From 1ba5bf993c6a3142e18e68ea6452b347f9cb5635 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 5 Jul 2016 10:18:08 +0200 Subject: xfrm: fix crash in XFRM_MSG_GETSA netlink handler If we hit any of the error conditions inside xfrm_dump_sa(), then xfrm_state_walk_init() never gets called. However, we still call xfrm_state_walk_done() from xfrm_dump_sa_done(), which will crash because the state walk was never initialized properly. We can fix this by setting cb->args[0] only after we've processed the first element and checking this before calling xfrm_state_walk_done(). Fixes: d3623099d3 ("ipsec: add support of limited SA dump") Cc: Nicolas Dichtel Cc: Steffen Klassert Signed-off-by: Vegard Nossum Acked-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d516845..4fb04ce 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -896,7 +896,8 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) struct sock *sk = cb->skb->sk; struct net *net = sock_net(sk); - xfrm_state_walk_done(walk, net); + if (cb->args[0]) + xfrm_state_walk_done(walk, net); return 0; } @@ -921,8 +922,6 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) u8 proto = 0; int err; - cb->args[0] = 1; - err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy); if (err < 0) @@ -939,6 +938,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) proto = nla_get_u8(attrs[XFRMA_PROTO]); xfrm_state_walk_init(walk, proto, filter); + cb->args[0] = 1; } (void) xfrm_state_walk(net, walk, dump_one_state, &info); -- cgit v1.1 From b46211d6dcfb81a8af66b8684a42d629183670d4 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 24 Jun 2016 03:59:33 +0200 Subject: ARM: OMAP3: hwmod data: Add sysc information for DSI Add missing sysconfig/sysstatus information to OMAP3 hwmod. The information has been checked against OMAP34xx and OMAP36xx TRM. Without this change DSI block is not reset during boot, which is required for working Nokia N950 display. Signed-off-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index d72ee61..1cc4a6f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -722,8 +722,20 @@ static struct omap_hwmod omap3xxx_dss_dispc_hwmod = { * display serial interface controller */ +static struct omap_hwmod_class_sysconfig omap3xxx_dsi_sysc = { + .rev_offs = 0x0000, + .sysc_offs = 0x0010, + .syss_offs = 0x0014, + .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY | + SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE | + SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), + .sysc_fields = &omap_hwmod_sysc_type1, +}; + static struct omap_hwmod_class omap3xxx_dsi_hwmod_class = { .name = "dsi", + .sysc = &omap3xxx_dsi_sysc, }; static struct omap_hwmod_irq_info omap3xxx_dsi1_irqs[] = { -- cgit v1.1 From 60a5b875047e827d0f5a1a3166df714be2ef13df Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 12 Jul 2016 12:50:31 -0500 Subject: ARM: OMAP4+: hwmod: Add hwmod flag for HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET Nearly all modules on OMAP4 and newer platforms have clkctrl offsets that are non-zero except for the RTC on am335x. Because we rely on a clkctrl_offset of zero to indicate no clkctrl_offset being present, lets add a HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET flag to use to indicate hwmods that have a valid clkctrl_offset of 0. Signed-off-by: Dave Gerlach Acked-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 4041bad..7890401 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -443,8 +443,12 @@ struct omap_hwmod_omap2_prcm { * HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT: Some IP blocks don't have a PRCM * module-level context loss register associated with them; this * flag bit should be set in those cases + * HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET: Some IP blocks have a valid CLKCTRL + * offset of zero; this flag bit should be set in those cases to + * distinguish from hwmods that have no clkctrl offset. */ #define HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT (1 << 0) +#define HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET (1 << 1) /** * struct omap_hwmod_omap4_prcm - OMAP4-specific PRCM data -- cgit v1.1 From ddb0d99d53fea77c568d17265b8bca6e64f2975b Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 12 Jul 2016 12:50:32 -0500 Subject: ARM: OMAP2+: AM33XX: Add HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET flag to rtc hwmod The RTC hwmod on AM335x family of SoCs is unique in that the clkctrl_offs in the PRCM is 0. We rely on a clkctrl_offs of zero as indicating no clkctrl is present so we must flag this hwmod with HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET to indicate the 0 clkctrl_offs is in fact valid in this case. Signed-off-by: Dave Gerlach Acked-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c index 55c5878..e2d84aa 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c @@ -29,6 +29,7 @@ #define CLKCTRL(oh, clkctrl) ((oh).prcm.omap4.clkctrl_offs = (clkctrl)) #define RSTCTRL(oh, rstctrl) ((oh).prcm.omap4.rstctrl_offs = (rstctrl)) #define RSTST(oh, rstst) ((oh).prcm.omap4.rstst_offs = (rstst)) +#define PRCM_FLAGS(oh, flag) ((oh).prcm.omap4.flags = (flag)) /* * 'l3' class @@ -1296,6 +1297,7 @@ static void omap_hwmod_am33xx_clkctrl(void) CLKCTRL(am33xx_i2c1_hwmod, AM33XX_CM_WKUP_I2C0_CLKCTRL_OFFSET); CLKCTRL(am33xx_wd_timer1_hwmod, AM33XX_CM_WKUP_WDT1_CLKCTRL_OFFSET); CLKCTRL(am33xx_rtc_hwmod, AM33XX_CM_RTC_RTC_CLKCTRL_OFFSET); + PRCM_FLAGS(am33xx_rtc_hwmod, HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET); CLKCTRL(am33xx_mmc2_hwmod, AM33XX_CM_PER_MMC2_CLKCTRL_OFFSET); CLKCTRL(am33xx_gpmc_hwmod, AM33XX_CM_PER_GPMC_CLKCTRL_OFFSET); CLKCTRL(am33xx_l4_ls_hwmod, AM33XX_CM_PER_L4LS_CLKCTRL_OFFSET); -- cgit v1.1 From 428929c79cc3de7f9fefd2184da6ed625676ad9e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 12 Jul 2016 12:50:33 -0500 Subject: ARM: OMAP4+: Have _omap4_wait_target_* check for valid clkctrl_offs Previously the low-level CM call internal to the _omap4_wait_target_ready/disable calls was responsible for checking for a valid clkctrl_offs. Now we must also consider the value of the prcm.omap4.flags because if HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET is set in the flags then clkctrl_offs of 0 is valid. Let's add this check into the _omap4_wait_target_ready/disable where we have access to both the clkctrl_offs and the flags values and simply return 0 without calling the low level CM call at all, which would have returned 0 anyway if the clktrl_offs was zero. Signed-off-by: Dave Gerlach Acked-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 5b70938..1052b29 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1053,6 +1053,10 @@ static int _omap4_wait_target_disable(struct omap_hwmod *oh) if (oh->flags & HWMOD_NO_IDLEST) return 0; + if (!oh->prcm.omap4.clkctrl_offs && + !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET)) + return 0; + return omap_cm_wait_module_idle(oh->clkdm->prcm_partition, oh->clkdm->cm_inst, oh->prcm.omap4.clkctrl_offs, 0); @@ -2971,6 +2975,10 @@ static int _omap4_wait_target_ready(struct omap_hwmod *oh) if (!_find_mpu_rt_port(oh)) return 0; + if (!oh->prcm.omap4.clkctrl_offs && + !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET)) + return 0; + /* XXX check module SIDLEMODE, hardreset status */ return omap_cm_wait_module_ready(oh->clkdm->prcm_partition, -- cgit v1.1 From e4e53287701b2b8087fa107b4a965fa49505615f Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 12 Jul 2016 12:50:34 -0500 Subject: ARM: OMAP4+: CM: Remove redundant checks for clkctrl_offs of zero Now that we have moved the check for valid clkctrl_offs to the caller of am33xx_cm_wait_module_ready/idle and omap4_cminst_wait_module_ready/idle let's remove the now redundant check for clkctrl_offs from these functions. Signed-off-by: Dave Gerlach Acked-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/cm33xx.c | 6 ------ arch/arm/mach-omap2/cminst44xx.c | 6 ------ 2 files changed, 12 deletions(-) diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c index c073fb5..6f2d0ae 100644 --- a/arch/arm/mach-omap2/cm33xx.c +++ b/arch/arm/mach-omap2/cm33xx.c @@ -220,9 +220,6 @@ static int am33xx_cm_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout(_is_module_ready(inst, clkctrl_offs), MAX_MODULE_READY_TIME, i); @@ -246,9 +243,6 @@ static int am33xx_cm_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout((_clkctrl_idlest(inst, clkctrl_offs) == CLKCTRL_IDLEST_DISABLED), MAX_MODULE_READY_TIME, i); diff --git a/arch/arm/mach-omap2/cminst44xx.c b/arch/arm/mach-omap2/cminst44xx.c index 2c0e07e..2ab27ad 100644 --- a/arch/arm/mach-omap2/cminst44xx.c +++ b/arch/arm/mach-omap2/cminst44xx.c @@ -278,9 +278,6 @@ static int omap4_cminst_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout(_is_module_ready(part, inst, clkctrl_offs), MAX_MODULE_READY_TIME, i); @@ -304,9 +301,6 @@ static int omap4_cminst_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout((_clkctrl_idlest(part, inst, clkctrl_offs) == CLKCTRL_IDLEST_DISABLED), MAX_MODULE_DISABLE_TIME, i); -- cgit v1.1 From 73efc3245fd3edb3632d82a3a9c5d5d975a02efc Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 27 Jul 2016 08:03:18 +0200 Subject: xfrm: get rid of incorrect WARN AFAICT this message is just printed whenever input validation fails. This is a normal failure and we shouldn't be dumping the stack over it. Looks like it was originally a printk that was maybe incorrectly upgraded to a WARN: commit 62db5cfd70b1ef53aa21f144a806fe3b78c84fab Author: stephen hemminger Date: Wed May 12 06:37:06 2010 +0000 xfrm: add severity to printk Cc: Stephen Hemminger Cc: Steffen Klassert Signed-off-by: Vegard Nossum Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4fb04ce..1a4f142 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2117,7 +2117,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, err = verify_newpolicy_info(&ua->policy); if (err) - goto bad_policy; + goto free_state; /* build an XP */ xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); @@ -2149,8 +2149,6 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; -bad_policy: - WARN(1, "BAD policy passed\n"); free_state: kfree(x); nomem: -- cgit v1.1 From 7677c7560c3e80fde08a7e710d378dedabf950c3 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 27 Jul 2016 08:44:15 +0200 Subject: xfrm: get rid of another incorrect WARN During fuzzing I regularly run into this WARN(). According to Herbert Xu, this "certainly shouldn't be a WARN, it probably shouldn't print anything either". Cc: Stephen Hemminger Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Vegard Nossum Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1a4f142..cb65d91 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2051,9 +2051,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (up->hard) { xfrm_policy_delete(xp, p->dir); xfrm_audit_policy_delete(xp, 1, true); - } else { - // reset the timers here? - WARN(1, "Don't know what to do with soft policy expire\n"); } km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); -- cgit v1.1 From 6678716751af24af09163aef54bbabb60c12e18b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:53:53 +0000 Subject: spi: qup: Remove spi_master_put in spi_qup_remove() The call to spi_master_put() in spi_qup_remove() is redundant since the master is registered using devm_spi_register_master() and no reference hold by using spi_master_get() in spi_qup_remove(). This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- drivers/spi/spi-qup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index c338ef1..7f15556 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1030,7 +1030,6 @@ static int spi_qup_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); - spi_master_put(master); return 0; } -- cgit v1.1 From c2b08cede727387a5e19b40fa8e1a1e3a53e8527 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:56:50 +0000 Subject: spi: mediatek: remove spi_master_put in mtk_spi_remove() The call to spi_master_put() in mtk_spi_remove() is redundant since the master is registered using devm_spi_register_master() and no reference hold by using spi_master_get() in mtk_spi_remove(). This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 0be89e0..899d7a8 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -685,7 +685,6 @@ static int mtk_spi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); mtk_spi_reset(mdata); - spi_master_put(master); return 0; } -- cgit v1.1 From 2932c287108e2987454449b8214eae6db066da85 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:57:55 +0000 Subject: spi: img-spfi: Remove spi_master_put in img_spfi_remove() The call to spi_master_put() in img_spfi_remove() is redundant since the master is registered using devm_spi_register_master() and no reference hold by using spi_master_get() in img_spfi_remove(). This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- drivers/spi/spi-img-spfi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 823cbc9..7a37090 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -720,8 +720,6 @@ static int img_spfi_remove(struct platform_device *pdev) clk_disable_unprepare(spfi->sys_clk); } - spi_master_put(master); - return 0; } -- cgit v1.1 From 6916fb3b10b3cbe3b1f9f5b680675f53e4e299eb Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Fri, 29 Jul 2016 09:57:32 +0200 Subject: xfrm: Ignore socket policies when rebuilding hash tables Whenever thresholds are changed the hash tables are rebuilt. This is done by enumerating all policies and hashing and inserting them into the right table according to the thresholds and direction. Because socket policies are also contained in net->xfrm.policy_all but no hash tables are defined for their direction (dir + XFRM_POLICY_MAX) this causes a NULL or invalid pointer dereference after returning from policy_hash_bysel() if the rebuild is done while any socket policies are installed. Since the rebuild after changing thresholds is scheduled this crash could even occur if the userland sets thresholds seemingly before installing any socket policies. Fixes: 53c2e285f970 ("xfrm: Do not hash socket policies") Signed-off-by: Tobias Brunner Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b5e665b..45f9cf9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -626,6 +626,10 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { + /* skip socket policies */ + continue; + } newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, -- cgit v1.1 From b2c7f5d9c939a37c1ce7f86a642de70e3033ee9e Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Fri, 29 Jul 2016 23:42:12 +0200 Subject: regmap: cache: Fix num_reg_defaults computation from reg_defaults_raw In 3245d460 (regmap: cache: Fall back to register by register read for cache defaults) non-readable registers are skipped when initializing reg_defaults, but are still included in num_reg_defaults. So there can be uninitialized entries at the end of reg_defaults, which can cause problems when the register cache initializes from the full array. Fixed it by excluding non-readable registers from the count as well. Signed-off-by: Maarten ter Huurne Signed-off-by: Mark Brown --- drivers/base/regmap/regcache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index df7ff729..4e58256 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -38,10 +38,11 @@ static int regcache_hw_init(struct regmap *map) /* calculate the size of reg_defaults */ for (count = 0, i = 0; i < map->num_reg_defaults_raw; i++) - if (!regmap_volatile(map, i * map->reg_stride)) + if (regmap_readable(map, i * map->reg_stride) && + !regmap_volatile(map, i * map->reg_stride)) count++; - /* all registers are volatile, so just bypass */ + /* all registers are unreadable or volatile, so just bypass */ if (!count) { map->cache_bypass = true; return 0; -- cgit v1.1 From 9b622e2bbcf049c82e2550d35fb54ac205965f50 Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Thu, 28 Jul 2016 10:28:25 +0200 Subject: raid10: increment write counter after bio is split md pending write counter must be incremented after bio is split, otherwise it gets decremented too many times in end bio callback and becomes negative. Signed-off-by: Tomasz Majchrzak Reviewed-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ed29fc8..1a632a8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) return; } - md_write_start(mddev, bio); - do { /* -- cgit v1.1 From ff00d3b4e5e4395c825e8ec628f25932d812f31a Mon Sep 17 00:00:00 2001 From: ZhengYuan Liu Date: Thu, 28 Jul 2016 14:22:14 +0800 Subject: raid5: fix incorrectly counter of conf->empty_inactive_list_nr The counter conf->empty_inactive_list_nr is only used for determine if the raid5 is congested which is deal with in function raid5_congested(). It was increased in get_free_stripe() when conf->inactive_list got to be empty and decreased in release_inactive_stripe_list() when splice temp_inactive_list to conf->inactive_list. However, this may have a problem when raid5_get_active_stripe or stripe_add_to_batch_list was called, because these two functions may call list_del_init(&sh->lru) to delete sh from "conf->inactive_list + hash" which may cause "conf->inactive_list + hash" to be empty when atomic_inc_not_zero(&sh->count) got false. So a check should be done at these two point and increase empty_inactive_list_nr accordingly. Otherwise the counter may get to be negative number which would influence async readahead from VFS. Signed-off-by: ZhengYuan Liu Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d189e89..e379b89 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, { struct stripe_head *sh; int hash = stripe_hash_locks_hash(sector); + int inc_empty_inactive_list_flag; pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); @@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&sh->lru) && !test_bit(STRIPE_EXPANDING, &sh->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&sh->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (sh->group) { sh->group->stripes_cnt--; sh->group = NULL; @@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh sector_t head_sector, tmp_sec; int hash; int dd_idx; + int inc_empty_inactive_list_flag; /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ tmp_sec = sh->sector; @@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&head->lru) && !test_bit(STRIPE_EXPANDING, &head->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&head->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (head->group) { head->group->stripes_cnt--; head->group = NULL; -- cgit v1.1 From 1bc8da4e143c0fd8807e061a66d91d5972601ab1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 4 Aug 2016 17:22:16 +0200 Subject: regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-rbtree.c | 38 ++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index aa56af8..b11af3f 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -404,6 +404,7 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, unsigned int new_base_reg, new_top_reg; unsigned int min, max; unsigned int max_dist; + unsigned int dist, best_dist = UINT_MAX; max_dist = map->reg_stride * sizeof(*rbnode_tmp) / map->cache_word_size; @@ -423,24 +424,41 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, &base_reg, &top_reg); if (base_reg <= max && top_reg >= min) { - new_base_reg = min(reg, base_reg); - new_top_reg = max(reg, top_reg); - } else { - if (max < base_reg) - node = node->rb_left; + if (reg < base_reg) + dist = base_reg - reg; + else if (reg > top_reg) + dist = reg - top_reg; else - node = node->rb_right; - - continue; + dist = 0; + if (dist < best_dist) { + rbnode = rbnode_tmp; + best_dist = dist; + new_base_reg = min(reg, base_reg); + new_top_reg = max(reg, top_reg); + } } - ret = regcache_rbtree_insert_to_block(map, rbnode_tmp, + /* + * Keep looking, we want to choose the closest block, + * otherwise we might end up creating overlapping + * blocks, which breaks the rbtree. + */ + if (reg < base_reg) + node = node->rb_left; + else if (reg > top_reg) + node = node->rb_right; + else + break; + } + + if (rbnode) { + ret = regcache_rbtree_insert_to_block(map, rbnode, new_base_reg, new_top_reg, reg, value); if (ret) return ret; - rbtree_ctx->cached_rbnode = rbnode_tmp; + rbtree_ctx->cached_rbnode = rbnode; return 0; } -- cgit v1.1 From c3ccf357c3d75bd2924e049b6a991f7c0c111068 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 5 Aug 2016 10:17:52 +0200 Subject: spi: sh-msiof: Avoid invalid clock generator parameters The conversion from a look-up table to a calculation for clock generator parameters forgot to take into account that BRDV x 1/1 is valid only if BRPS is x 1/1 or x 1/2, leading to undefined behavior (e.g. arbitrary clock rates). This limitation is documented for the MSIOF module in all supported SH/R-Mobile and R-Car Gen2/Gen3 ARM SoCs. Tested on r8a7791/koelsch and r8a7795/salvator-x. Fixes: 65d5665bb260b034 ("spi: sh-msiof: Update calculation of frequency dividing") Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 0f83ad1..1de3a77 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -262,6 +262,9 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p, for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) { brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div); + /* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */ + if (sh_msiof_spi_div_table[k].div == 1 && brps > 2) + continue; if (brps <= 32) /* max of brdv is 32 */ break; } -- cgit v1.1 From 290284776bb281759b11faa287b8abccaf74bfcb Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 11 Jul 2016 14:50:07 -0700 Subject: regulator: qcom_smd: Fix voltage ranges for pm8x41 The voltage ranges listed here are wrong. The correct ranges can be seen in the "native" spmi regulator driver qcom_spmi-regulator.c at pldo_ranges[], ftsmps_ranges[] and boost_ranges[] for the pldo, ftsmps, and boost type regulators. Port these ranges over to the RPM SMD regulator driver so that we list the appropriate set of supported voltages on pldos. Doing this allows us to specify a voltage like 3075000 for l24, whereas before that wasn't a supported voltage. Fixes: da65e367b67e ("regulator: Regulator driver for the Qualcomm RPM") Signed-off-by: Stephen Boyd Reviewed-by: Andy Gross Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 5022fa8..47cd28e 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -221,29 +221,30 @@ static const struct regulator_desc pm8x41_hfsmps = { static const struct regulator_desc pm8841_ftsmps = { .linear_ranges = (struct regulator_linear_range[]) { REGULATOR_LINEAR_RANGE(350000, 0, 184, 5000), - REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000), + REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000), }, .n_linear_ranges = 2, - .n_voltages = 340, + .n_voltages = 262, .ops = &rpm_smps_ldo_ops, }; static const struct regulator_desc pm8941_boost = { .linear_ranges = (struct regulator_linear_range[]) { - REGULATOR_LINEAR_RANGE(4000000, 0, 15, 100000), + REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000), }, .n_linear_ranges = 1, - .n_voltages = 16, + .n_voltages = 31, .ops = &rpm_smps_ldo_ops, }; static const struct regulator_desc pm8941_pldo = { .linear_ranges = (struct regulator_linear_range[]) { - REGULATOR_LINEAR_RANGE( 750000, 0, 30, 25000), - REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000), + REGULATOR_LINEAR_RANGE( 750000, 0, 63, 12500), + REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000), + REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000), }, - .n_linear_ranges = 2, - .n_voltages = 100, + .n_linear_ranges = 3, + .n_voltages = 164, .ops = &rpm_smps_ldo_ops, }; -- cgit v1.1 From c488f0071eacd10a290df4fb34bbdeb4eb8e7888 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 11 Jul 2016 14:50:08 -0700 Subject: regulator: qcom_smd: Fix voltage ranges for pma8084 ftsmps and pldo The voltage ranges listed here are wrong. The pma8084 pldo supports three different overlapping voltage ranges with differing step sizes and the pma8084 ftsmps supports two. These ranges can be seen in the "native" spmi regulator driver (qcom_spmi-regulator.c) at pldo_ranges[] and ftsmps_ranges[] respectively. Port these ranges over to the RPM SMD regulator driver so that we list the appropriate set of supported voltages on these types of regulators. Fixes: ee01d0c91ef1 ("regulator: qcom-smd: Add support for PMA8084") Signed-off-by: Stephen Boyd Reviewed-by: Andy Gross Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 47cd28e..8ed46a9 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -178,20 +178,21 @@ static const struct regulator_desc pma8084_hfsmps = { static const struct regulator_desc pma8084_ftsmps = { .linear_ranges = (struct regulator_linear_range[]) { REGULATOR_LINEAR_RANGE(350000, 0, 184, 5000), - REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000), + REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000), }, .n_linear_ranges = 2, - .n_voltages = 340, + .n_voltages = 262, .ops = &rpm_smps_ldo_ops, }; static const struct regulator_desc pma8084_pldo = { .linear_ranges = (struct regulator_linear_range[]) { - REGULATOR_LINEAR_RANGE(750000, 0, 30, 25000), - REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000), + REGULATOR_LINEAR_RANGE( 750000, 0, 63, 12500), + REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000), + REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000), }, - .n_linear_ranges = 2, - .n_voltages = 100, + .n_linear_ranges = 3, + .n_voltages = 164, .ops = &rpm_smps_ldo_ops, }; -- cgit v1.1 From d9dd26b20cff88b45d861ec786d86b1c9bd2ee60 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 30 Jul 2016 10:05:31 -0700 Subject: MD: hold mddev lock to change bitmap location Changing the location changes a lot of things. Holding the lock to avoid race. This makes the .quiesce called with mddev lock hold too. Acked-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/bitmap.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 6fff794..13041ee 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page) static ssize_t location_store(struct mddev *mddev, const char *buf, size_t len) { + int rv; + rv = mddev_lock(mddev); + if (rv) + return rv; if (mddev->pers) { - if (!mddev->pers->quiesce) - return -EBUSY; - if (mddev->recovery || mddev->sync_thread) - return -EBUSY; + if (!mddev->pers->quiesce) { + rv = -EBUSY; + goto out; + } + if (mddev->recovery || mddev->sync_thread) { + rv = -EBUSY; + goto out; + } } if (mddev->bitmap || mddev->bitmap_info.file || mddev->bitmap_info.offset) { /* bitmap already configured. Only option is to clear it */ - if (strncmp(buf, "none", 4) != 0) - return -EBUSY; + if (strncmp(buf, "none", 4) != 0) { + rv = -EBUSY; + goto out; + } if (mddev->pers) { mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); @@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* nothing to be done */; else if (strncmp(buf, "file:", 5) == 0) { /* Not supported yet */ - return -EINVAL; + rv = -EINVAL; + goto out; } else { - int rv; if (buf[0] == '+') rv = kstrtoll(buf+1, 10, &offset); else rv = kstrtoll(buf, 10, &offset); if (rv) - return rv; - if (offset == 0) - return -EINVAL; + goto out; + if (offset == 0) { + rv = -EINVAL; + goto out; + } if (mddev->bitmap_info.external == 0 && mddev->major_version == 0 && - offset != mddev->bitmap_info.default_offset) - return -EINVAL; + offset != mddev->bitmap_info.default_offset) { + rv = -EINVAL; + goto out; + } mddev->bitmap_info.offset = offset; if (mddev->pers) { struct bitmap *bitmap; @@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) mddev->pers->quiesce(mddev, 0); if (rv) { bitmap_destroy(mddev); - return rv; + goto out; } } } @@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len) set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); } + rv = 0; +out: + mddev_unlock(mddev); + if (rv) + return rv; return len; } -- cgit v1.1 From 11367799f3d12a5074c4a3c0fa4ea8da2a21a2a4 Mon Sep 17 00:00:00 2001 From: Alexey Obitotskiy Date: Wed, 3 Aug 2016 10:02:56 +0200 Subject: md: Prevent IO hold during accessing to faulty raid5 array After array enters in faulty state (e.g. number of failed drives becomes more then accepted for raid5 level) it sets error flags (one of this flags is MD_CHANGE_PENDING). For internal metadata arrays MD_CHANGE_PENDING cleared into md_update_sb, but not for external metadata arrays. MD_CHANGE_PENDING flag set prevents to finish all new or non-finished IOs to array and hold them in pending state. In some cases this can leads to deadlock situation. For example, we have faulty array (2 of 4 drives failed) and udev handle array state changes and blkid started (or other userspace application that used array to read/write) but unable to finish reads due to IO hold. At the same time we unable to get exclusive access to array (to stop array in our case) because another external application still use this array. Fix makes possible to return IO with errors immediately. So external application can finish working with array and give exclusive access to other applications to perform required management actions with array. Signed-off-by: Alexey Obitotskiy Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e379b89..4f8f524 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4640,7 +4640,9 @@ finish: } if (!bio_list_empty(&s.return_bi)) { - if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) { + if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) && + (s.failed <= conf->max_degraded || + conf->mddev->external == 0)) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->return_bi, &s.return_bi); spin_unlock_irq(&conf->device_lock); -- cgit v1.1 From 4d0bd46a4d55383f7b925e6cf7865a77e0f0e020 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Aug 2016 08:45:33 +0200 Subject: Revert "wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel" This reverts commit 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724. Ben Hutchings pointed out that the commit isn't safe since it assumes that the structure used by the driver is iw_point, when in fact there's no way to know about that. Fortunately, the only driver in the tree that ever runs this code path is the wilc1000 staging driver, so it doesn't really matter. Clearly I should have investigated this better before applying, sorry. Reported-by: Ben Hutchings Cc: stable@vger.kernel.org [though I guess it doesn't matter much] Fixes: 3d5fdff46c4b ("wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel") Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index dbb2738e..6250b1c 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -958,29 +958,8 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) { -#ifdef CONFIG_COMPAT - if (info->flags & IW_REQUEST_FLAG_COMPAT) { - int ret = 0; - struct iwreq iwr_lcl; - struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; - - memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); - iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); - iwr_lcl.u.data.length = iwp_compat->length; - iwr_lcl.u.data.flags = iwp_compat->flags; - - ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); - - iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); - iwp_compat->length = iwr_lcl.u.data.length; - iwp_compat->flags = iwr_lcl.u.data.flags; - - return ret; - } else -#endif - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); - } + if (dev->netdev_ops->ndo_do_ioctl) + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); return -EOPNOTSUPP; } -- cgit v1.1 From 626d2f07de89bf6be3d7301524d0ab3375b81b9c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 4 Aug 2016 19:59:41 +0900 Subject: dmaengine: usb-dmac: check CHCR.DE bit in usb_dmac_isr_channel() The USB-DMAC's interruption happens even if the CHCR.DE is not set to 1 because CHCR.NULLE is set to 1. So, this driver should call usb_dmac_isr_transfer_end() if the DE bit is set to 1 only. Otherwise, the desc is possible to be NULL in the usb_dmac_isr_transfer_end(). Fixes: 0c1c8ff32fa2 ("dmaengine: usb-dmac: Add Renesas USB DMA Controller (USB-DMAC) driver) Cc: # v4.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 749f1bd..06ecdc3 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -600,27 +600,30 @@ static irqreturn_t usb_dmac_isr_channel(int irq, void *dev) { struct usb_dmac_chan *chan = dev; irqreturn_t ret = IRQ_NONE; - u32 mask = USB_DMACHCR_TE; - u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP; + u32 mask = 0; u32 chcr; + bool xfer_end = false; spin_lock(&chan->vc.lock); chcr = usb_dmac_chan_read(chan, USB_DMACHCR); - if (chcr & check_bits) - mask |= USB_DMACHCR_DE | check_bits; + if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) { + mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP; + if (chcr & USB_DMACHCR_DE) + xfer_end = true; + ret |= IRQ_HANDLED; + } if (chcr & USB_DMACHCR_NULL) { /* An interruption of TE will happen after we set FTE */ mask |= USB_DMACHCR_NULL; chcr |= USB_DMACHCR_FTE; ret |= IRQ_HANDLED; } - usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask); + if (mask) + usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask); - if (chcr & check_bits) { + if (xfer_end) usb_dmac_isr_transfer_end(chan); - ret |= IRQ_HANDLED; - } spin_unlock(&chan->vc.lock); -- cgit v1.1 From 23540d6e2f3193b946c4de43e3f9654fa6d23fe7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 24 Jul 2016 14:10:58 +0200 Subject: memory: omap-gpmc: allow probe of child nodes to fail A recent commit (inadvertently?) changed how failed probe of a gpmc child node was handled. Instead of proceeding with setting up any other children as before, a single error now aborts the whole process. This change broke networking on some Overo boards due to probe failing for an unrelated nand node. This second issue should obviously be fixed, but let's restore the old behaviour of allowing child-node probe to fail to avoid further similar breakage on other systems. Fixes: d2d00862dfbb ("memory: omap-gpmc: Support general purpose input for WAITPINs") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Johan Hovold Signed-off-by: Roger Quadros --- drivers/memory/omap-gpmc.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 869c83f..f00f3e7 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -2185,7 +2185,7 @@ static int gpmc_probe_dt(struct platform_device *pdev) return 0; } -static int gpmc_probe_dt_children(struct platform_device *pdev) +static void gpmc_probe_dt_children(struct platform_device *pdev) { int ret; struct device_node *child; @@ -2200,11 +2200,11 @@ static int gpmc_probe_dt_children(struct platform_device *pdev) else ret = gpmc_probe_generic_child(pdev, child); - if (ret) - return ret; + if (ret) { + dev_err(&pdev->dev, "failed to probe DT child '%s': %d\n", + child->name, ret); + } } - - return 0; } #else static int gpmc_probe_dt(struct platform_device *pdev) @@ -2212,9 +2212,8 @@ static int gpmc_probe_dt(struct platform_device *pdev) return 0; } -static int gpmc_probe_dt_children(struct platform_device *pdev) +static void gpmc_probe_dt_children(struct platform_device *pdev) { - return 0; } #endif /* CONFIG_OF */ @@ -2369,16 +2368,10 @@ static int gpmc_probe(struct platform_device *pdev) goto setup_irq_failed; } - rc = gpmc_probe_dt_children(pdev); - if (rc < 0) { - dev_err(gpmc->dev, "failed to probe DT children\n"); - goto dt_children_failed; - } + gpmc_probe_dt_children(pdev); return 0; -dt_children_failed: - gpmc_free_irq(gpmc); setup_irq_failed: gpmc_gpio_exit(gpmc); gpio_init_failed: -- cgit v1.1 From d9fd3c918114cfd3995947339549c7341181efb0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Jul 2016 14:58:39 +0100 Subject: ARM: dts: armada-388-clearfog: number LAN ports properly Currently, the ports as seen from the rear number as: eth0 sfp lan5 lan4 lan3 lan2 lan1 lan6 which is illogical - this came about because the rev 2.0 boards have the LEDs on the front for the DSA switch (lan5-1) reversed. Rev 2.1 boards fixed the LED issue, and the Clearfog case numbers the lan ports increasing from left to right. Maintaining this illogical numbering causes confusion, with reports that "my link isn't coming up" and "my connection negotiates 10base-Half" both of which are due to people thinking that the port next to the SFP is lan1. Fix this by renumbering the ports to match people's expectations. [gregory.clement@free-electrons.com: added the Fixes and stable tags] Fixes: 4c945e8556ec ("ARM: dts: Add SolidRun Armada 388 Clearfog A1 DT file") Cc: Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-388-clearfog.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts b/arch/arm/boot/dts/armada-388-clearfog.dts index 2e0556a..d3e6bd8 100644 --- a/arch/arm/boot/dts/armada-388-clearfog.dts +++ b/arch/arm/boot/dts/armada-388-clearfog.dts @@ -390,12 +390,12 @@ port@0 { reg = <0>; - label = "lan1"; + label = "lan5"; }; port@1 { reg = <1>; - label = "lan2"; + label = "lan4"; }; port@2 { @@ -405,12 +405,12 @@ port@3 { reg = <3>; - label = "lan4"; + label = "lan2"; }; port@4 { reg = <4>; - label = "lan5"; + label = "lan1"; }; port@5 { -- cgit v1.1 From 0956254a2d5b9e2141385514553aeef694dfe3b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 8 Aug 2016 15:08:49 +0200 Subject: ovl: don't copy up opaqueness When a copy up of a directory occurs which has the opaque xattr set, the xattr remains in the upper directory. The immediate behavior with overlayfs is that the upper directory is not treated as opaque, however after a remount the opaque flag is used and upper directory is treated as opaque. This causes files created in the lower layer to be hidden when using multiple lower directories. Fix by not copying up the opaque flag. To reproduce: ----8<---------8<---------8<---------8<---------8<---------8<---- mkdir -p l/d/s u v w mnt mount -t overlay overlay -olowerdir=l,upperdir=u,workdir=w mnt rm -rf mnt/d/ mkdir -p mnt/d/n umount mnt mount -t overlay overlay -olowerdir=u:l,upperdir=v,workdir=w mnt touch mnt/d/foo umount mnt mount -t overlay overlay -olowerdir=u:l,upperdir=v,workdir=w mnt ls mnt/d ----8<---------8<---------8<---------8<---------8<---------8<---- output should be: "foo n" Reported-by: Derek McGowan Link: https://bugzilla.kernel.org/show_bug.cgi?id=151291 Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/copy_up.c | 2 ++ fs/overlayfs/inode.c | 2 +- fs/overlayfs/overlayfs.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 54e5d66..43fdc27 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { + if (ovl_is_private_xattr(name)) + continue; retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1b885c1..024352f 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -191,7 +191,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) return err; } -static bool ovl_is_private_xattr(const char *name) +bool ovl_is_private_xattr(const char *name) { #define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "." return strncmp(name, OVL_XATTR_PRE_NAME, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e4f5c95..34839bd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -193,6 +193,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); +bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode); struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); -- cgit v1.1 From 557e37c05f28bad113d65d584699e8d8f29f70a3 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 1 Jul 2016 17:41:59 +0800 Subject: bus: vexpress-config: add missing of_node_put after calling of_parse_phandle of_node_put needs to be called when the device node which is got from of_parse_phandle has finished using. Cc: Lorenzo Pieralisi Acked-by: Liviu Dudau Signed-off-by: Peter Chen Signed-off-by: Sudeep Holla --- drivers/bus/vexpress-config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c index c3cb76b..9efdf1d 100644 --- a/drivers/bus/vexpress-config.c +++ b/drivers/bus/vexpress-config.c @@ -178,6 +178,7 @@ static int vexpress_config_populate(struct device_node *node) parent = class_find_device(vexpress_config_class, NULL, bridge, vexpress_config_node_match); + of_node_put(bridge); if (WARN_ON(!parent)) return -ENODEV; -- cgit v1.1 From b079bd555f6060cd6b435a1eb58ec3b8d255ebd0 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 4 Jul 2016 14:55:57 +0800 Subject: firmware: arm_scpi: add missing of_node_put after calling of_parse_phandle of_node_put needs to be called when the device node which is got from of_parse_phandle has finished using it. Besides, of_address_to_resource always returns -EINVAL for error, delete the assignment for ret. Signed-off-by: Peter Chen Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index 4388937..ce2bc2a 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -709,9 +709,10 @@ static int scpi_probe(struct platform_device *pdev) struct mbox_client *cl = &pchan->cl; struct device_node *shmem = of_parse_phandle(np, "shmem", idx); - if (of_address_to_resource(shmem, 0, &res)) { + ret = of_address_to_resource(shmem, 0, &res); + of_node_put(shmem); + if (ret) { dev_err(dev, "failed to get SCPI payload mem resource\n"); - ret = -EINVAL; goto err; } -- cgit v1.1 From b9700be51bf00bbe9e1dabcb4838d6aa420192d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Tue, 5 Jul 2016 06:04:08 +0200 Subject: ARM: dts: imx6sx-sabreauto: Fix misspelled property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 99fc5ba0bfb6 ("ARM: dts: imx6sx: add i.mx6sx sabreauto board support") it should've been enable-sdio-wakeup (not -wakup). But that is now considered a legacy name for wakeup-source, so directly use the new name instead, as done in commit 26cefdd15db1 for the other occurrence. Fixes: 26cefdd15db1 ("ARM: dts: imx: replace legacy wakeup property with 'wakeup-source'") Signed-off-by: Andreas Färber Acked-by: Sudeep Holla Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx-sabreauto.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index 96ea936..240a286 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -64,7 +64,7 @@ cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>; no-1-8-v; keep-power-in-suspend; - enable-sdio-wakup; + wakeup-source; status = "okay"; }; -- cgit v1.1 From 7e196aa1a011da35a04cb1f161e186563aa8d8db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:18:34 +0000 Subject: clk: sunxi: pll2: Fix return value check in sun4i_pll2_setup() In case of error, the functions clk_register_composite() and clk_register_divider() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-a10-pll2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c index 0ee1f36..d8eab90 100644 --- a/drivers/clk/sunxi/clk-a10-pll2.c +++ b/drivers/clk/sunxi/clk-a10-pll2.c @@ -73,7 +73,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, SUN4I_PLL2_PRE_DIV_WIDTH, CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO, &sun4i_a10_pll2_lock); - if (!prediv_clk) { + if (IS_ERR(prediv_clk)) { pr_err("Couldn't register the prediv clock\n"); goto err_free_array; } @@ -106,7 +106,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, &mult->hw, &clk_multiplier_ops, &gate->hw, &clk_gate_ops, CLK_SET_RATE_PARENT); - if (!base_clk) { + if (IS_ERR(base_clk)) { pr_err("Couldn't register the base multiplier clock\n"); goto err_free_multiplier; } -- cgit v1.1 From fbe359f12ce40e3126bea959b135656e4a305897 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:21:47 +0000 Subject: clk: sunxi: Fix return value check in sun8i_a23_mbus_setup() In case of error, the function of_io_request_and_map() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-sun8i-mbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c index 411d303..b200ebf 100644 --- a/drivers/clk/sunxi/clk-sun8i-mbus.c +++ b/drivers/clk/sunxi/clk-sun8i-mbus.c @@ -48,7 +48,7 @@ static void __init sun8i_a23_mbus_setup(struct device_node *node) return; reg = of_io_request_and_map(node, 0, of_node_full_name(node)); - if (!reg) { + if (IS_ERR(reg)) { pr_err("Could not get registers for sun8i-mbus-clk\n"); goto err_free_parents; } -- cgit v1.1 From 156ad0d7eae4825a0d94a4bf68571e97302f2501 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:23 +0800 Subject: clk: sunxi-ng: Fix inverted test condition in ccu_helper_wait_for_lock The condition passed to read*_poll_timeout() is the break condition, i.e. wait for this condition to happen and return success. The original code assumed the opposite, resulting in a warning when the PLL clock rate was changed but never lost it's lock as far as the readout indicated. This was verified by checking the read out register value. Fixes: 1d80c14248d6 ("clk: sunxi-ng: Add common infrastructure") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index fc17b52..51d4bac 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) return; WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg, - !(reg & lock), 100, 70000)); + reg & lock, 100, 70000)); } int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, -- cgit v1.1 From 06421a7821f9040467e0db2702309b72aa2b7af4 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:24 +0800 Subject: clk: sunxi-ng: nk: Make ccu_nk_find_best static make C=2 reports: CHECK drivers/clk/sunxi-ng/ccu_nk.c drivers/clk/sunxi-ng/ccu_nk.c:17:6: warning: symbol 'ccu_nk_find_best' was not declared. Should it be static? ccu_nk_find_best is only used within ccu_nk.c. So make it static to get rid of this warning. Fixes: adbfb0056e03 ("clk: sunxi-ng: Add N-K-factor clock support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_nk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c index 4470ffc..d6fafb3 100644 --- a/drivers/clk/sunxi-ng/ccu_nk.c +++ b/drivers/clk/sunxi-ng/ccu_nk.c @@ -14,9 +14,9 @@ #include "ccu_gate.h" #include "ccu_nk.h" -void ccu_nk_find_best(unsigned long parent, unsigned long rate, - unsigned int max_n, unsigned int max_k, - unsigned int *n, unsigned int *k) +static void ccu_nk_find_best(unsigned long parent, unsigned long rate, + unsigned int max_n, unsigned int max_k, + unsigned int *n, unsigned int *k) { unsigned long best_rate = 0; unsigned int best_k = 0, best_n = 0; -- cgit v1.1 From 764f21665a12e99f03124b4c8de722cdbff92213 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Aug 2016 17:45:33 +0100 Subject: spi: Drop io_mutex in error paths A couple of error paths were missing drops of io_mutex. Reported-by: Julia Lawall Signed-off-by: Mark Brown --- drivers/spi/spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 51ad42f..57179d5 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1159,6 +1159,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread) if (ret < 0) { dev_err(&master->dev, "Failed to power device: %d\n", ret); + mutex_unlock(&master->io_mutex); return; } } @@ -1174,6 +1175,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread) if (master->auto_runtime_pm) pm_runtime_put(master->dev.parent); + mutex_unlock(&master->io_mutex); return; } } -- cgit v1.1 From 06f4e94898918bcad00cdd4d349313a439d6911e Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Tue, 9 Aug 2016 11:25:01 +0800 Subject: cpuset: make sure new tasks conform to the current config of the cpuset A new task inherits cpus_allowed and mems_allowed masks from its parent, but if someone changes cpuset's config by writing to cpuset.cpus/cpuset.mems before this new task is inserted into the cgroup's task list, the new task won't be updated accordingly. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cpuset.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c7fd277..c27e533 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) mutex_unlock(&cpuset_mutex); } +/* + * Make sure the new task conform to the current state of its parent, + * which could have been changed by cpuset just after it inherits the + * state from the parent and before it sits on the cgroup's task list. + */ +void cpuset_fork(struct task_struct *task) +{ + if (task_css_is_root(task, cpuset_cgrp_id)) + return; + + set_cpus_allowed_ptr(task, ¤t->cpus_allowed); + task->mems_allowed = current->mems_allowed; +} + struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, @@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, + .fork = cpuset_fork, .legacy_cftypes = files, .early_init = true, }; -- cgit v1.1 From e45a8a9e60ff1dd5ad118c794337a1101b46ab0d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Aug 2016 18:27:08 +0200 Subject: xfrm: constify xfrm_replay structures The xfrm_replay structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_replay.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6..d2fdd6d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -187,7 +187,7 @@ struct xfrm_state { struct xfrm_replay_state_esn *preplay_esn; /* The functions for replay detection. */ - struct xfrm_replay *repl; + const struct xfrm_replay *repl; /* internal flag that only holds state for delayed aevent at the * moment diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 4fd725a..cdc2e2e 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } -static struct xfrm_replay xfrm_replay_legacy = { +static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, .recheck = xfrm_replay_check, @@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = { .overflow = xfrm_replay_overflow, }; -static struct xfrm_replay xfrm_replay_bmp = { +static const struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, .recheck = xfrm_replay_check_bmp, @@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = { .overflow = xfrm_replay_overflow_bmp, }; -static struct xfrm_replay xfrm_replay_esn = { +static const struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, -- cgit v1.1 From ae3fb6d32140e5c5b491892105ca89066171d217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:04 +0200 Subject: xfrm: state: use hlist_for_each_entry_rcu helper This is required once we allow lockless access of bydst/bysrc hash tables. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c..904ab4d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -76,18 +76,18 @@ static void xfrm_hash_transfer(struct hlist_head *list, h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family, nhashmask); - hlist_add_head(&x->bydst, ndsttable+h); + hlist_add_head_rcu(&x->bydst, ndsttable + h); h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family, nhashmask); - hlist_add_head(&x->bysrc, nsrctable+h); + hlist_add_head_rcu(&x->bysrc, nsrctable + h); if (x->id.spi) { h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family, nhashmask); - hlist_add_head(&x->byspi, nspitable+h); + hlist_add_head_rcu(&x->byspi, nspitable + h); } } } @@ -520,10 +520,10 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del(&x->bydst); - hlist_del(&x->bysrc); + hlist_del_rcu(&x->bydst); + hlist_del_rcu(&x->bysrc); if (x->id.spi) - hlist_del(&x->byspi); + hlist_del_rcu(&x->byspi); net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); @@ -659,7 +659,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -683,7 +683,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -781,7 +781,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -797,7 +797,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -852,12 +852,12 @@ found: if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, encap_family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); @@ -945,16 +945,16 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); @@ -1063,9 +1063,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); net->xfrm.state_num++; @@ -1581,7 +1581,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (x->id.spi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; -- cgit v1.1 From 02efdff7e209859c2755ebe93b3bd0e3d40123ab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:05 +0200 Subject: xfrm: state: use atomic_inc_not_zero to increment refcount Once xfrm_state_lookup_byaddr no longer acquires the state lock another cpu might be freeing the state entry at the same time. To detect this we use atomic_inc_not_zero, we then signal -EAGAIN to caller in case our result was stale. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 904ab4d..84c1db6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -37,6 +37,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +{ + return atomic_inc_not_zero(&x->refcnt); +} + static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -668,7 +673,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -692,7 +698,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -871,10 +878,14 @@ found: } } out: - if (x) - xfrm_state_hold(x); - else + if (x) { + if (!xfrm_state_hold_rcu(x)) { + *err = -EAGAIN; + x = NULL; + } + } else { *err = acquire_in_progress ? -EAGAIN : error; + } spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); -- cgit v1.1 From df7274eb70b7c8488170ebe8757dd94647a8e1e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:06 +0200 Subject: xfrm: state: delay freeing until rcu grace period has elapsed The hash table backend memory and the state structs are free'd via kfree/vfree. Once we only rely on rcu during lookups we have to make sure no other cpu is currently accessing this before doing the free. Free operations already happen from worker so we can use synchronize_rcu to wait until concurrent readers are done. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 84c1db6..8e37387 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -146,6 +146,9 @@ static void xfrm_hash_resize(struct work_struct *work) spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); + + synchronize_rcu(); + xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); @@ -369,6 +372,8 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); + synchronize_rcu(); + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); } -- cgit v1.1 From b65e3d7be06fd8ff5236439254f338fe1a8d4bbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:07 +0200 Subject: xfrm: state: add sequence count to detect hash resizes Once xfrm_state_find is lockless we have to cope with a concurrent resize opertion. We use a sequence counter to block in case a resize is in progress and to detect if we might have missed a state that got moved to a new hash table. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e37387..ac4037c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -36,6 +36,7 @@ */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { @@ -127,6 +128,7 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); + write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -143,6 +145,7 @@ static void xfrm_hash_resize(struct work_struct *work) net->xfrm.state_byspi = nspi; net->xfrm.state_hmask = nhashmask; + write_seqcount_end(&xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); @@ -787,10 +790,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + unsigned int sequence; struct km_event c; to_put = NULL; + sequence = read_seqcount_begin(&xfrm_state_hash_generation); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { @@ -894,6 +900,15 @@ out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); + + if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + *err = -EAGAIN; + if (x) { + xfrm_state_put(x); + x = NULL; + } + } + return x; } -- cgit v1.1 From c8406998b80183ef87895ab1de4dbed8bb2d53a0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:08 +0200 Subject: xfrm: state: use rcu_deref and assign_pointer helpers Before xfrm_state_find() can use rcu_read_lock instead of xfrm_state_lock we need to switch users of the hash table to assign/obtain the pointers with the appropriate rcu helpers. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ac4037c..53e7867 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -28,6 +28,9 @@ #include "xfrm_hash.h" +#define xfrm_state_deref_prot(table, net) \ + rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -131,18 +134,17 @@ static void xfrm_hash_resize(struct work_struct *work) write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, - nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); - odst = net->xfrm.state_bydst; - osrc = net->xfrm.state_bysrc; - ospi = net->xfrm.state_byspi; + osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); + ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); ohashmask = net->xfrm.state_hmask; - net->xfrm.state_bydst = ndst; - net->xfrm.state_bysrc = nsrc; - net->xfrm.state_byspi = nspi; + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; write_seqcount_end(&xfrm_state_hash_generation); -- cgit v1.1 From d737a5805581c6f99dad4caa9fdf80965d617d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:09 +0200 Subject: xfrm: state: don't use lock anymore unless acquire operation is needed push the lock down, after earlier patches we can rely on rcu to make sure state struct won't go away. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 6 +++--- net/xfrm/xfrm_state.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 24cd394..1ab51d1 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,9 +38,9 @@ struct netns_xfrm { * mode. Also, it can be used by ah/esp icmp error handler to find * offending SA. */ - struct hlist_head *state_bydst; - struct hlist_head *state_bysrc; - struct hlist_head *state_byspi; + struct hlist_head __rcu *state_bydst; + struct hlist_head __rcu *state_bysrc; + struct hlist_head __rcu *state_byspi; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 53e7867..1a15b65 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -799,7 +799,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, sequence = read_seqcount_begin(&xfrm_state_hash_generation); - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && @@ -870,6 +870,7 @@ found: } if (km_query(x, tmpl, pol) == 0) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); @@ -883,6 +884,7 @@ found: tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -899,7 +901,7 @@ out: } else { *err = acquire_in_progress ? -EAGAIN : error; } - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); -- cgit v1.1 From 162f2db394a0efb270db3d93475ce466794a8eef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 15 Jul 2016 10:12:49 +0200 Subject: ARM: dts: exynos: Properly select eMMC HighSpeed mode on Odroid XU Exynos5410 supports eMMC version 4.41 so HS200 is the top mode which should be configured. This is reflected in usage of "samsung,exynos5250-dw-mshc" compatible. However Odroid XU DTS contained also property "mmc-hs400-1_8v" which is parsed by MMC core therefore resulting in mixed configuration. MMC core set HS400 but dwmmc_exynos driver did not configure the data strobe for HS400 DDR mode. Removal of HS400 properties fixes semi-random mmc errors during boot: mmc_host mmc0: Bus speed (slot 0) = 400000000Hz (slot req 200000000Hz, actual 200000000HZ div = 1) mmc0: mmc_select_hs400 failed, error -84 mmc0: error -84 whilst initialising MMC card Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar --- arch/arm/boot/dts/exynos5410-odroidxu.dts | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts index d949931..f6d1352 100644 --- a/arch/arm/boot/dts/exynos5410-odroidxu.dts +++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts @@ -447,14 +447,11 @@ samsung,dw-mshc-ciu-div = <3>; samsung,dw-mshc-sdr-timing = <0 4>; samsung,dw-mshc-ddr-timing = <0 2>; - samsung,dw-mshc-hs400-timing = <0 2>; - samsung,read-strobe-delay = <90>; pinctrl-names = "default"; pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus1 &sd0_bus4 &sd0_bus8 &sd0_cd>; bus-width = <8>; cap-mmc-highspeed; mmc-hs200-1_8v; - mmc-hs400-1_8v; vmmc-supply = <&ldo20_reg>; vqmmc-supply = <&ldo11_reg>; }; -- cgit v1.1 From ed0ab110235c659fdb3f73d27907b1b45b89cf30 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 14 Jul 2016 10:42:35 +0800 Subject: clk: sunxi-ng: Fix inverted test condition in ccu_helper_wait_for_lock The condition passed to read*_poll_timeout() is the break condition, i.e. wait for this condition to happen and return success. The original code assumed the opposite, resulting in a warning when the PLL clock rate was changed but never lost it's lock as far as the readout indicated. This was verified by checking the read out register value. Fixes: 1d80c14248d6 ("clk: sunxi-ng: Add common infrastructure") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/sunxi-ng/ccu_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index fc17b52..51d4bac 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) return; WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg, - !(reg & lock), 100, 70000)); + reg & lock, 100, 70000)); } int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, -- cgit v1.1 From 1625f4529957738be7d87cf157e107b8fb9d23b9 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 10 Aug 2016 13:54:57 +0300 Subject: net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key Running LTP 'icmp-uni-basic.sh -6 -p ipcomp -m tunnel' test over openvswitch + veth can trigger kernel panic: BUG: unable to handle kernel NULL pointer dereference at 00000000000000e0 IP: [] xfrm_input+0x82/0x750 ... [] xfrm6_rcv_spi+0x1e/0x20 [] xfrm6_tunnel_rcv+0x42/0x50 [xfrm6_tunnel] [] tunnel6_rcv+0x3e/0x8c [tunnel6] [] ip6_input_finish+0xd5/0x430 [] ip6_input+0x33/0x90 [] ip6_rcv_finish+0xa5/0xb0 ... It seems that tunnel.ip6 can have garbage values and also dereferenced without a proper check, only tunnel.ip4 is being verified. Fix it by adding one more if block for AF_INET6 and initialize tunnel.ip6 with NULL inside xfrm6_rcv_spi() (which is similar to xfrm4_rcv_spi()). Fixes: 049f8e2 ("xfrm: Override skb->mark with tunnel->parm.i_key in xfrm_input") Signed-off-by: Alexey Kodanev Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_input.c | 1 + net/xfrm/xfrm_input.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0eaab1f..00a2d40 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -23,6 +23,7 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 1c4ad47..6e3f025 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -207,15 +207,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ - if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) { - switch (family) { - case AF_INET: + switch (family) { + case AF_INET: + if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); - break; - case AF_INET6: + break; + case AF_INET6: + if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); - break; - } + break; } /* Allocate new secpath or COW existing one. */ -- cgit v1.1 From 9b4b3f6a062b22550e62523efe5213776cdd426b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:26:01 -0700 Subject: ahci: disable correct irq for dummy ports irq already contains the interrupt number for the port, don't add the port index to it. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Fixes: d684a90d38e2 ("ahci: per-port msix support") Cc: stable@vger.kernel.org v4.5+ --- drivers/ata/libahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 7461a58..dcf2c72 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2524,7 +2524,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, /* Do not receive interrupts sent by dummy ports */ if (!pp) { - disable_irq(irq + i); + disable_irq(irq); continue; } -- cgit v1.1 From a3f457d9636b3f5ae4fc6502cb0c95f60f5e342b Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Tue, 9 Aug 2016 11:02:33 -0700 Subject: clk: rockchip: fix rk3399 aclk_vio gate bit Fix incorrect rk3399 aclk_vio gating bit, it should be 0, not 10. Fixes: 115510053e5e ("clk: rockchip: add clock controller for the RK3399") Signed-off-by: Chris Zhong Reviewed-by: Xing Zheng Reviewed-by: Guenter Roeck Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index c109d80..314eab6 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1071,7 +1071,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { /* vio */ COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, - RK3399_CLKGATE_CON(11), 10, GFLAGS), + RK3399_CLKGATE_CON(11), 0, GFLAGS), COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", 0, RK3399_CLKSEL_CON(43), 0, 5, DFLAGS, RK3399_CLKGATE_CON(11), 1, GFLAGS), -- cgit v1.1 From e0cb1b84163720ec67ff0e54397fd3f57ad4a4dd Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 10 Aug 2016 09:29:43 +0200 Subject: clk: renesas: r8a7795: Fix SD clocks According to the datasheet, SDn clocks are from the SDSRC clock. And the SDSRC has a 1/2 divider. So, we should have ".sdsrc" as an internal core clock. Otherwise, since the sdhi driver will calculate clock for a sd card using the wrong parent clock rate, and then performance will be not good. Fixes: 90c073e53909da85 ("clk: shmobile: r8a7795: Add SD divider support") Signed-off-by: Yoshihiro Shimoda Acked-by: Dirk Behme Tested-by: Wolfram Sang Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/renesas/r8a7795-cpg-mssr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c index d359c92..e38bf60 100644 --- a/drivers/clk/renesas/r8a7795-cpg-mssr.c +++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c @@ -69,6 +69,7 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = { DEF_FIXED(".s1", CLK_S1, CLK_PLL1_DIV2, 3, 1), DEF_FIXED(".s2", CLK_S2, CLK_PLL1_DIV2, 4, 1), DEF_FIXED(".s3", CLK_S3, CLK_PLL1_DIV2, 6, 1), + DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1_DIV2, 2, 1), /* Core Clock Outputs */ DEF_FIXED("ztr", R8A7795_CLK_ZTR, CLK_PLL1_DIV2, 6, 1), @@ -87,10 +88,10 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = { DEF_FIXED("s3d2", R8A7795_CLK_S3D2, CLK_S3, 2, 1), DEF_FIXED("s3d4", R8A7795_CLK_S3D4, CLK_S3, 4, 1), - DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_PLL1_DIV2, 0x0074), - DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_PLL1_DIV2, 0x0078), - DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_PLL1_DIV2, 0x0268), - DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_PLL1_DIV2, 0x026c), + DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_SDSRC, 0x0074), + DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_SDSRC, 0x0078), + DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_SDSRC, 0x0268), + DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_SDSRC, 0x026c), DEF_FIXED("cl", R8A7795_CLK_CL, CLK_PLL1_DIV2, 48, 1), DEF_FIXED("cp", R8A7795_CLK_CP, CLK_EXTAL, 2, 1), -- cgit v1.1 From a5eefc1df641f3c99fe54b309e7b79c18cec4a1e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:52 +0200 Subject: xfrm: policy: use rcu versions for iteration and list add/del This is required once we allow lockless readers. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b5e665b..93b8ff7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -426,14 +426,14 @@ redo: h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask, dbits, sbits); if (!entry0) { - hlist_del(&pol->bydst); - hlist_add_head(&pol->bydst, ndsttable+h); + hlist_del_rcu(&pol->bydst); + hlist_add_head_rcu(&pol->bydst, ndsttable + h); h0 = h; } else { if (h != h0) continue; - hlist_del(&pol->bydst); - hlist_add_behind(&pol->bydst, entry0); + hlist_del_rcu(&pol->bydst); + hlist_add_behind_rcu(&pol->bydst, entry0); } entry0 = &pol->bydst; } @@ -1106,7 +1106,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, read_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1122,7 +1122,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { if ((pol->priority >= priority) && ret) break; @@ -1271,7 +1271,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { - hlist_del(&pol->bydst); + hlist_del_rcu(&pol->bydst); hlist_del(&pol->byidx); } -- cgit v1.1 From e1e551bc56302b80ff930c966f9985095fb1b70d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:53 +0200 Subject: xfrm: policy: prepare policy_bydst hash for rcu lookups Since commit 56f047305dd4b6b617 ("xfrm: add rcu grace period in xfrm_policy_destroy()") xfrm policy objects are already free'd via rcu. In order to make more places lockless (i.e. use rcu_read_lock instead of grabbing read-side of policy rwlock) we only need to: - use rcu_assign_pointer to store address of new hash table backend memory - add rcu barrier so that freeing of old memory is delayed (expansion and free happens from system workqueue, so synchronize_rcu is fine) - use rcu_dereference to fetch current address of the hash table. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 93b8ff7..4a8d90a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -385,9 +385,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits); - return (hash == hmask + 1 ? - &net->xfrm.policy_inexact[dir] : - net->xfrm.policy_bydst[dir].table + hash); + if (hash == hmask + 1) + return &net->xfrm.policy_inexact[dir]; + + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static struct hlist_head *policy_hash_direct(struct net *net, @@ -403,7 +405,8 @@ static struct hlist_head *policy_hash_direct(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); - return net->xfrm.policy_bydst[dir].table + hash; + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static void xfrm_dst_hash_transfer(struct net *net, @@ -468,8 +471,8 @@ static void xfrm_bydst_resize(struct net *net, int dir) unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); + struct hlist_head *odst; int i; if (!ndst) @@ -477,14 +480,19 @@ static void xfrm_bydst_resize(struct net *net, int dir) write_lock_bh(&net->xfrm.xfrm_policy_lock); + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); + for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); - net->xfrm.policy_bydst[dir].table = ndst; + rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&net->xfrm.xfrm_policy_lock); + synchronize_rcu(); + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -- cgit v1.1 From 30846090a746edfdb230deadd638cfa96f7b8c91 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:54 +0200 Subject: xfrm: policy: add sequence count to sync with hash resize Once xfrm_policy_lookup_bytype doesn't grab xfrm_policy_lock anymore its possible for a hash resize to occur in parallel. Use sequence counter to block lookup in case a resize is in progress and to also re-lookup in case hash table was altered in the mean time (might cause use to not find the best-match). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4a8d90a..576d903 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; +static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); @@ -479,6 +480,10 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; write_lock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_begin(&xfrm_policy_hash_generation); + + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -489,6 +494,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; + write_seqcount_end(&xfrm_policy_hash_generation); write_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1104,7 +1110,8 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; struct hlist_head *chain; - u32 priority = ~0U; + unsigned int sequence; + u32 priority; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); @@ -1112,7 +1119,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, return NULL; read_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, daddr, saddr, family, dir); + retry: + do { + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + + priority = ~0U; ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -1148,6 +1161,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + goto retry; + xfrm_pol_hold(ret); fail: read_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -3090,6 +3106,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); + seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); } -- cgit v1.1 From e37cc8ade5afaf082f804c6d18eb23377146bec4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:55 +0200 Subject: xfrm: policy: use atomic_inc_not_zero in rcu section If we don't hold the policy lock anymore the refcnt might already be 0, i.e. policy struct is about to be free'd. Switch to atomic_inc_not_zero to avoid this. On removal policies are already unlinked from the tables (lists) before the last _put occurs so we are not supposed to find the same 'dead' entry on the next loop, so its safe to just repeat the lookup. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 576d903..09f2e2b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -60,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); +static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) +{ + return atomic_inc_not_zero(&policy->refcnt); +} + static inline bool __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { @@ -1164,7 +1169,8 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) goto retry; - xfrm_pol_hold(ret); + if (ret && !xfrm_pol_hold_rcu(ret)) + goto retry; fail: read_unlock_bh(&net->xfrm.xfrm_policy_lock); -- cgit v1.1 From a7c44247f704e385c77579d65c6ee6d002832529 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:56 +0200 Subject: xfrm: policy: make xfrm_policy_lookup_bytype lockless side effect: no longer disables BH (should be fine). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1ab51d1..3ab828a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -11,7 +11,7 @@ struct ctl_table_header; struct xfrm_policy_hash { - struct hlist_head *table; + struct hlist_head __rcu *table; unsigned int hmask; u8 dbits4; u8 sbits4; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 09f2e2b..9302647 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1123,7 +1123,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_lock(); retry: do { sequence = read_seqcount_begin(&xfrm_policy_hash_generation); @@ -1172,7 +1172,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; fail: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return ret; } -- cgit v1.1 From ae33786f73a7ce5b15ce29e8f342e43606385cef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:57 +0200 Subject: xfrm: policy: only use rcu in xfrm_sk_policy_lookup Don't acquire the readlock anymore and rely on rcu alone. In case writer on other CPU changed policy at the wrong moment (after we obtained sk policy pointer but before we could obtain the reference) just repeat the lookup. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9302647..3d27b9a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1249,10 +1249,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; - struct net *net = sock_net(sk); rcu_read_lock(); - read_lock_bh(&net->xfrm.xfrm_policy_lock); + again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, @@ -1267,8 +1266,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, policy_to_flow_dir(dir)); - if (!err) - xfrm_pol_hold(pol); + if (!err && !xfrm_pol_hold_rcu(pol)) + goto again; else if (err == -ESRCH) pol = NULL; else @@ -1277,7 +1276,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, pol = NULL; } out: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); rcu_read_unlock(); return pol; } -- cgit v1.1 From d5b8f86dc7200d16e48bb3a6aaac29c0cdf621c9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:58 +0200 Subject: xfrm: policy: don't acquire policy lock in xfrm_spd_getinfo It doesn't seem that important. We now get inconsistent view of the counters, but those are stale anyway right after we drop the lock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3d27b9a..35b85a9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -560,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -569,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); -- cgit v1.1 From 9d0380df6217e8dd014118fa1c99dda9974f3613 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:59 +0200 Subject: xfrm: policy: convert policy_lock to spinlock After earlier patches conversions all spots acquire the writer lock and we can now convert this to a normal spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 68 ++++++++++++++++++++++++------------------------ 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 3ab828a..177ed44 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -73,7 +73,7 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - rwlock_t xfrm_policy_lock; + spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; /* flow cache part */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 35b85a9..dd01fd2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -484,7 +484,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) if (!ndst) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, @@ -500,7 +500,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) net->xfrm.policy_bydst[dir].hmask = nhashmask; write_seqcount_end(&xfrm_policy_hash_generation); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -519,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -527,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -617,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { @@ -659,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) hlist_add_head(&policy->bydst, chain); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } @@ -770,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -781,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -817,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -837,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -850,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -859,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -878,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -889,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -898,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -956,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) @@ -972,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -991,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -1006,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1026,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -1054,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -1073,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) if (list_empty(&walk->walk.all)) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -1321,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1342,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { @@ -1360,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ xfrm_sk_policy_unlink(old_pol, dir); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1390,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -3074,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; @@ -3206,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && @@ -3230,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * xfrm_pol_hold(ret); - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } -- cgit v1.1 From 20c389e656a89e2302017bf3f499cb5a31a2a7ba Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Tue, 2 Aug 2016 15:19:58 +0800 Subject: clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399 Dues to incorrect diagram, we need to fix incorrect bits for (c/g)pll_aclk_emmc_src: cpll_aclk_emmc_src --> G6[13] gpll_aclk_emmc_src --> G6[12] Fixes: 115510053e5e ("clk: rockchip: add clock controller for the RK3399") Signed-off-by: Xing Zheng Reviewed-by: Shawn Lin Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 314eab6..01fa60e 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -923,9 +923,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { RK3399_CLKGATE_CON(6), 14, GFLAGS), GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED, - RK3399_CLKGATE_CON(6), 12, GFLAGS), - GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(6), 13, GFLAGS), + GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, + RK3399_CLKGATE_CON(6), 12, GFLAGS), COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS), GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED, -- cgit v1.1 From b33ecca87df99fa6fff8a1d455de96f436934dcf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Aug 2016 21:55:39 +0200 Subject: phy-sun4i-usb: Add support for peripheral-only mode Use the new of_usb_get_dr_mode_by_phy() function to get the dr_mode from the musb controller node instead of assuming that having an id_det gpio means otg mode, and not having one means host mode. Implement peripheral-only mode by adding a sun4i_usb_phy0_get_id_det helper which looks at the dr_mode, always registering our extcon and always monitoring vbus. If dr_mode is not specified in the dts, do not register phy0 as we then do not know how to treat it. This is actually a good thing as this means we will not be registering phy0 on devices where the otg controller is not enabled in the devicetree. Signed-off-by: Hans de Goede Acked-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-sun4i-usb.c | 68 ++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index 0a45bc6..8c7eb33 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #define REG_ISCR 0x00 @@ -110,6 +111,7 @@ struct sun4i_usb_phy_cfg { struct sun4i_usb_phy_data { void __iomem *base; const struct sun4i_usb_phy_cfg *cfg; + enum usb_dr_mode dr_mode; struct mutex mutex; struct sun4i_usb_phy { struct phy *phy; @@ -120,6 +122,7 @@ struct sun4i_usb_phy_data { bool regulator_on; int index; } phys[MAX_PHYS]; + int first_phy; /* phy0 / otg related variables */ struct extcon_dev *extcon; bool phy0_init; @@ -285,16 +288,10 @@ static int sun4i_usb_phy_init(struct phy *_phy) sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_DPDM_PULLUP_EN); sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_ID_PULLUP_EN); - if (data->id_det_gpio) { - /* OTG mode, force ISCR and cable state updates */ - data->id_det = -1; - data->vbus_det = -1; - queue_delayed_work(system_wq, &data->detect, 0); - } else { - /* Host only mode */ - sun4i_usb_phy0_set_id_detect(_phy, 0); - sun4i_usb_phy0_set_vbus_detect(_phy, 1); - } + /* Force ISCR and cable state updates */ + data->id_det = -1; + data->vbus_det = -1; + queue_delayed_work(system_wq, &data->detect, 0); } return 0; @@ -319,6 +316,19 @@ static int sun4i_usb_phy_exit(struct phy *_phy) return 0; } +static int sun4i_usb_phy0_get_id_det(struct sun4i_usb_phy_data *data) +{ + switch (data->dr_mode) { + case USB_DR_MODE_OTG: + return gpiod_get_value_cansleep(data->id_det_gpio); + case USB_DR_MODE_HOST: + return 0; + case USB_DR_MODE_PERIPHERAL: + default: + return 1; + } +} + static int sun4i_usb_phy0_get_vbus_det(struct sun4i_usb_phy_data *data) { if (data->vbus_det_gpio) @@ -432,7 +442,10 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) struct phy *phy0 = data->phys[0].phy; int id_det, vbus_det, id_notify = 0, vbus_notify = 0; - id_det = gpiod_get_value_cansleep(data->id_det_gpio); + if (phy0 == NULL) + return; + + id_det = sun4i_usb_phy0_get_id_det(data); vbus_det = sun4i_usb_phy0_get_vbus_det(data); mutex_lock(&phy0->mutex); @@ -448,7 +461,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) * without vbus detection report vbus low for long enough for * the musb-ip to end the current device session. */ - if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) { + if (data->dr_mode == USB_DR_MODE_OTG && + !sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) { sun4i_usb_phy0_set_vbus_detect(phy0, 0); msleep(200); sun4i_usb_phy0_set_vbus_detect(phy0, 1); @@ -474,7 +488,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) * without vbus detection report vbus low for long enough to * the musb-ip to end the current host session. */ - if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) { + if (data->dr_mode == USB_DR_MODE_OTG && + !sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) { mutex_lock(&phy0->mutex); sun4i_usb_phy0_set_vbus_detect(phy0, 0); msleep(1000); @@ -519,7 +534,8 @@ static struct phy *sun4i_usb_phy_xlate(struct device *dev, { struct sun4i_usb_phy_data *data = dev_get_drvdata(dev); - if (args->args[0] >= data->cfg->num_phys) + if (args->args[0] < data->first_phy || + args->args[0] >= data->cfg->num_phys) return ERR_PTR(-ENODEV); return data->phys[args->args[0]].phy; @@ -593,13 +609,17 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - /* vbus_det without id_det makes no sense, and is not supported */ - if (sun4i_usb_phy0_have_vbus_det(data) && !data->id_det_gpio) { - dev_err(dev, "usb0_id_det missing or invalid\n"); - return -ENODEV; - } - - if (data->id_det_gpio) { + data->dr_mode = of_usb_get_dr_mode_by_phy(np, 0); + switch (data->dr_mode) { + case USB_DR_MODE_OTG: + /* otg without id_det makes no sense, and is not supported */ + if (!data->id_det_gpio) { + dev_err(dev, "usb0_id_det missing or invalid\n"); + return -ENODEV; + } + /* fall through */ + case USB_DR_MODE_HOST: + case USB_DR_MODE_PERIPHERAL: data->extcon = devm_extcon_dev_allocate(dev, sun4i_usb_phy0_cable); if (IS_ERR(data->extcon)) @@ -610,9 +630,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) dev_err(dev, "failed to register extcon: %d\n", ret); return ret; } + break; + default: + dev_info(dev, "dr_mode unknown, not registering usb phy0\n"); + data->first_phy = 1; } - for (i = 0; i < data->cfg->num_phys; i++) { + for (i = data->first_phy; i < data->cfg->num_phys; i++) { struct sun4i_usb_phy *phy = data->phys + i; char name[16]; -- cgit v1.1 From 1766e7b3763a0707c2fda9689a7866dceed07b7a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 10 Aug 2016 18:49:27 +0530 Subject: mfd: da8xx-cfgchip: New header file for CFGCHIP registers Create a new header file for TI DA8XX SoC CFGCHIPx registers. This will be used by a number of planned drivers including a new USB PHY driver and common clock framework drivers. The same defines *will* be removed from the platform_data header, once all the users start using the new syscon device header. This also fixes the following compiler error caused due to a dependent patch not merged. drivers/phy/phy-da8xx-usb.c:19:37: fatal error: linux/mfd/da8xx-cfgchip.h: No such file or directory #include Signed-off-by: David Lechner Acked-by: Lee Jones Reported-by: Arnd Bergmann Signed-off-by: Kishon Vijay Abraham I --- include/linux/mfd/da8xx-cfgchip.h | 153 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 include/linux/mfd/da8xx-cfgchip.h diff --git a/include/linux/mfd/da8xx-cfgchip.h b/include/linux/mfd/da8xx-cfgchip.h new file mode 100644 index 0000000..304985e --- /dev/null +++ b/include/linux/mfd/da8xx-cfgchip.h @@ -0,0 +1,153 @@ +/* + * TI DaVinci DA8xx CHIPCFGx registers for syscon consumers. + * + * Copyright (C) 2016 David Lechner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_DA8XX_CFGCHIP_H +#define __LINUX_MFD_DA8XX_CFGCHIP_H + +#include + +/* register offset (32-bit registers) */ +#define CFGCHIP(n) ((n) * 4) + +/* CFGCHIP0 (PLL0/EDMA3_0) register bits */ +#define CFGCHIP0_PLL_MASTER_LOCK BIT(4) +#define CFGCHIP0_EDMA30TC1DBS(n) ((n) << 2) +#define CFGCHIP0_EDMA30TC1DBS_MASK CFGCHIP0_EDMA30TC1DBS(0x3) +#define CFGCHIP0_EDMA30TC1DBS_16 CFGCHIP0_EDMA30TC1DBS(0x0) +#define CFGCHIP0_EDMA30TC1DBS_32 CFGCHIP0_EDMA30TC1DBS(0x1) +#define CFGCHIP0_EDMA30TC1DBS_64 CFGCHIP0_EDMA30TC1DBS(0x2) +#define CFGCHIP0_EDMA30TC0DBS(n) ((n) << 0) +#define CFGCHIP0_EDMA30TC0DBS_MASK CFGCHIP0_EDMA30TC0DBS(0x3) +#define CFGCHIP0_EDMA30TC0DBS_16 CFGCHIP0_EDMA30TC0DBS(0x0) +#define CFGCHIP0_EDMA30TC0DBS_32 CFGCHIP0_EDMA30TC0DBS(0x1) +#define CFGCHIP0_EDMA30TC0DBS_64 CFGCHIP0_EDMA30TC0DBS(0x2) + +/* CFGCHIP1 (eCAP/HPI/EDMA3_1/eHRPWM TBCLK/McASP0 AMUTEIN) register bits */ +#define CFGCHIP1_CAP2SRC(n) ((n) << 27) +#define CFGCHIP1_CAP2SRC_MASK CFGCHIP1_CAP2SRC(0x1f) +#define CFGCHIP1_CAP2SRC_ECAP_PIN CFGCHIP1_CAP2SRC(0x0) +#define CFGCHIP1_CAP2SRC_MCASP0_TX CFGCHIP1_CAP2SRC(0x1) +#define CFGCHIP1_CAP2SRC_MCASP0_RX CFGCHIP1_CAP2SRC(0x2) +#define CFGCHIP1_CAP2SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP2SRC(0x7) +#define CFGCHIP1_CAP2SRC_EMAC_C0_RX CFGCHIP1_CAP2SRC(0x8) +#define CFGCHIP1_CAP2SRC_EMAC_C0_TX CFGCHIP1_CAP2SRC(0x9) +#define CFGCHIP1_CAP2SRC_EMAC_C0_MISC CFGCHIP1_CAP2SRC(0xa) +#define CFGCHIP1_CAP2SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP2SRC(0xb) +#define CFGCHIP1_CAP2SRC_EMAC_C1_RX CFGCHIP1_CAP2SRC(0xc) +#define CFGCHIP1_CAP2SRC_EMAC_C1_TX CFGCHIP1_CAP2SRC(0xd) +#define CFGCHIP1_CAP2SRC_EMAC_C1_MISC CFGCHIP1_CAP2SRC(0xe) +#define CFGCHIP1_CAP2SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP2SRC(0xf) +#define CFGCHIP1_CAP2SRC_EMAC_C2_RX CFGCHIP1_CAP2SRC(0x10) +#define CFGCHIP1_CAP2SRC_EMAC_C2_TX CFGCHIP1_CAP2SRC(0x11) +#define CFGCHIP1_CAP2SRC_EMAC_C2_MISC CFGCHIP1_CAP2SRC(0x12) +#define CFGCHIP1_CAP1SRC(n) ((n) << 22) +#define CFGCHIP1_CAP1SRC_MASK CFGCHIP1_CAP1SRC(0x1f) +#define CFGCHIP1_CAP1SRC_ECAP_PIN CFGCHIP1_CAP1SRC(0x0) +#define CFGCHIP1_CAP1SRC_MCASP0_TX CFGCHIP1_CAP1SRC(0x1) +#define CFGCHIP1_CAP1SRC_MCASP0_RX CFGCHIP1_CAP1SRC(0x2) +#define CFGCHIP1_CAP1SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP1SRC(0x7) +#define CFGCHIP1_CAP1SRC_EMAC_C0_RX CFGCHIP1_CAP1SRC(0x8) +#define CFGCHIP1_CAP1SRC_EMAC_C0_TX CFGCHIP1_CAP1SRC(0x9) +#define CFGCHIP1_CAP1SRC_EMAC_C0_MISC CFGCHIP1_CAP1SRC(0xa) +#define CFGCHIP1_CAP1SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP1SRC(0xb) +#define CFGCHIP1_CAP1SRC_EMAC_C1_RX CFGCHIP1_CAP1SRC(0xc) +#define CFGCHIP1_CAP1SRC_EMAC_C1_TX CFGCHIP1_CAP1SRC(0xd) +#define CFGCHIP1_CAP1SRC_EMAC_C1_MISC CFGCHIP1_CAP1SRC(0xe) +#define CFGCHIP1_CAP1SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP1SRC(0xf) +#define CFGCHIP1_CAP1SRC_EMAC_C2_RX CFGCHIP1_CAP1SRC(0x10) +#define CFGCHIP1_CAP1SRC_EMAC_C2_TX CFGCHIP1_CAP1SRC(0x11) +#define CFGCHIP1_CAP1SRC_EMAC_C2_MISC CFGCHIP1_CAP1SRC(0x12) +#define CFGCHIP1_CAP0SRC(n) ((n) << 17) +#define CFGCHIP1_CAP0SRC_MASK CFGCHIP1_CAP0SRC(0x1f) +#define CFGCHIP1_CAP0SRC_ECAP_PIN CFGCHIP1_CAP0SRC(0x0) +#define CFGCHIP1_CAP0SRC_MCASP0_TX CFGCHIP1_CAP0SRC(0x1) +#define CFGCHIP1_CAP0SRC_MCASP0_RX CFGCHIP1_CAP0SRC(0x2) +#define CFGCHIP1_CAP0SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP0SRC(0x7) +#define CFGCHIP1_CAP0SRC_EMAC_C0_RX CFGCHIP1_CAP0SRC(0x8) +#define CFGCHIP1_CAP0SRC_EMAC_C0_TX CFGCHIP1_CAP0SRC(0x9) +#define CFGCHIP1_CAP0SRC_EMAC_C0_MISC CFGCHIP1_CAP0SRC(0xa) +#define CFGCHIP1_CAP0SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP0SRC(0xb) +#define CFGCHIP1_CAP0SRC_EMAC_C1_RX CFGCHIP1_CAP0SRC(0xc) +#define CFGCHIP1_CAP0SRC_EMAC_C1_TX CFGCHIP1_CAP0SRC(0xd) +#define CFGCHIP1_CAP0SRC_EMAC_C1_MISC CFGCHIP1_CAP0SRC(0xe) +#define CFGCHIP1_CAP0SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP0SRC(0xf) +#define CFGCHIP1_CAP0SRC_EMAC_C2_RX CFGCHIP1_CAP0SRC(0x10) +#define CFGCHIP1_CAP0SRC_EMAC_C2_TX CFGCHIP1_CAP0SRC(0x11) +#define CFGCHIP1_CAP0SRC_EMAC_C2_MISC CFGCHIP1_CAP0SRC(0x12) +#define CFGCHIP1_HPIBYTEAD BIT(16) +#define CFGCHIP1_HPIENA BIT(15) +#define CFGCHIP0_EDMA31TC0DBS(n) ((n) << 13) +#define CFGCHIP0_EDMA31TC0DBS_MASK CFGCHIP0_EDMA31TC0DBS(0x3) +#define CFGCHIP0_EDMA31TC0DBS_16 CFGCHIP0_EDMA31TC0DBS(0x0) +#define CFGCHIP0_EDMA31TC0DBS_32 CFGCHIP0_EDMA31TC0DBS(0x1) +#define CFGCHIP0_EDMA31TC0DBS_64 CFGCHIP0_EDMA31TC0DBS(0x2) +#define CFGCHIP1_TBCLKSYNC BIT(12) +#define CFGCHIP1_AMUTESEL0(n) ((n) << 0) +#define CFGCHIP1_AMUTESEL0_MASK CFGCHIP1_AMUTESEL0(0xf) +#define CFGCHIP1_AMUTESEL0_LOW CFGCHIP1_AMUTESEL0(0x0) +#define CFGCHIP1_AMUTESEL0_BANK_0 CFGCHIP1_AMUTESEL0(0x1) +#define CFGCHIP1_AMUTESEL0_BANK_1 CFGCHIP1_AMUTESEL0(0x2) +#define CFGCHIP1_AMUTESEL0_BANK_2 CFGCHIP1_AMUTESEL0(0x3) +#define CFGCHIP1_AMUTESEL0_BANK_3 CFGCHIP1_AMUTESEL0(0x4) +#define CFGCHIP1_AMUTESEL0_BANK_4 CFGCHIP1_AMUTESEL0(0x5) +#define CFGCHIP1_AMUTESEL0_BANK_5 CFGCHIP1_AMUTESEL0(0x6) +#define CFGCHIP1_AMUTESEL0_BANK_6 CFGCHIP1_AMUTESEL0(0x7) +#define CFGCHIP1_AMUTESEL0_BANK_7 CFGCHIP1_AMUTESEL0(0x8) + +/* CFGCHIP2 (USB PHY) register bits */ +#define CFGCHIP2_PHYCLKGD BIT(17) +#define CFGCHIP2_VBUSSENSE BIT(16) +#define CFGCHIP2_RESET BIT(15) +#define CFGCHIP2_OTGMODE(n) ((n) << 13) +#define CFGCHIP2_OTGMODE_MASK CFGCHIP2_OTGMODE(0x3) +#define CFGCHIP2_OTGMODE_NO_OVERRIDE CFGCHIP2_OTGMODE(0x0) +#define CFGCHIP2_OTGMODE_FORCE_HOST CFGCHIP2_OTGMODE(0x1) +#define CFGCHIP2_OTGMODE_FORCE_DEVICE CFGCHIP2_OTGMODE(0x2) +#define CFGCHIP2_OTGMODE_FORCE_HOST_VBUS_LOW CFGCHIP2_OTGMODE(0x3) +#define CFGCHIP2_USB1PHYCLKMUX BIT(12) +#define CFGCHIP2_USB2PHYCLKMUX BIT(11) +#define CFGCHIP2_PHYPWRDN BIT(10) +#define CFGCHIP2_OTGPWRDN BIT(9) +#define CFGCHIP2_DATPOL BIT(8) +#define CFGCHIP2_USB1SUSPENDM BIT(7) +#define CFGCHIP2_PHY_PLLON BIT(6) +#define CFGCHIP2_SESENDEN BIT(5) +#define CFGCHIP2_VBDTCTEN BIT(4) +#define CFGCHIP2_REFFREQ(n) ((n) << 0) +#define CFGCHIP2_REFFREQ_MASK CFGCHIP2_REFFREQ(0xf) +#define CFGCHIP2_REFFREQ_12MHZ CFGCHIP2_REFFREQ(0x1) +#define CFGCHIP2_REFFREQ_24MHZ CFGCHIP2_REFFREQ(0x2) +#define CFGCHIP2_REFFREQ_48MHZ CFGCHIP2_REFFREQ(0x3) +#define CFGCHIP2_REFFREQ_19_2MHZ CFGCHIP2_REFFREQ(0x4) +#define CFGCHIP2_REFFREQ_38_4MHZ CFGCHIP2_REFFREQ(0x5) +#define CFGCHIP2_REFFREQ_13MHZ CFGCHIP2_REFFREQ(0x6) +#define CFGCHIP2_REFFREQ_26MHZ CFGCHIP2_REFFREQ(0x7) +#define CFGCHIP2_REFFREQ_20MHZ CFGCHIP2_REFFREQ(0x8) +#define CFGCHIP2_REFFREQ_40MHZ CFGCHIP2_REFFREQ(0x9) + +/* CFGCHIP3 (EMAC/uPP/PLL1/ASYNC3/PRU/DIV4.5/EMIFA) register bits */ +#define CFGCHIP3_RMII_SEL BIT(8) +#define CFGCHIP3_UPP_TX_CLKSRC BIT(6) +#define CFGCHIP3_PLL1_MASTER_LOCK BIT(5) +#define CFGCHIP3_ASYNC3_CLKSRC BIT(4) +#define CFGCHIP3_PRUEVTSEL BIT(3) +#define CFGCHIP3_DIV45PENA BIT(2) +#define CFGCHIP3_EMA_CLKSRC BIT(1) + +/* CFGCHIP4 (McASP0 AMUNTEIN) register bits */ +#define CFGCHIP4_AMUTECLR0 BIT(0) + +#endif /* __LINUX_MFD_DA8XX_CFGCHIP_H */ -- cgit v1.1 From bf8ca651e1f8f054b39b6b3b95d6f515c3c857d5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 10 Aug 2016 18:04:44 +0800 Subject: phy: brcm-sata: Return proper error if brcm_sata_phy_init fails Return proper error instead of 0 if brcm_sata_phy_init fails. Signed-off-by: Axel Lin Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-brcm-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c index 18d6626..8ffc44a 100644 --- a/drivers/phy/phy-brcm-sata.c +++ b/drivers/phy/phy-brcm-sata.c @@ -367,7 +367,7 @@ static int brcm_sata_phy_init(struct phy *phy) rc = -ENODEV; }; - return 0; + return rc; } static const struct phy_ops phy_ops = { -- cgit v1.1 From 017300da3a4547d85e52c2484fc0bd759e1bbcdb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 6 Jul 2016 08:00:06 +0200 Subject: phy: sun9i-usb: fix error handling This is likely that checking 'phy->hsic_clk' instead of 'phy->clk' is expected here. Signed-off-by: Christophe JAILLET Acked-by: Chen-Yu Tsai --- drivers/phy/phy-sun9i-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-sun9i-usb.c b/drivers/phy/phy-sun9i-usb.c index ac4f31a..28fce4b 100644 --- a/drivers/phy/phy-sun9i-usb.c +++ b/drivers/phy/phy-sun9i-usb.c @@ -141,9 +141,9 @@ static int sun9i_usb_phy_probe(struct platform_device *pdev) } phy->hsic_clk = devm_clk_get(dev, "hsic_12M"); - if (IS_ERR(phy->clk)) { + if (IS_ERR(phy->hsic_clk)) { dev_err(dev, "failed to get hsic_12M clock\n"); - return PTR_ERR(phy->clk); + return PTR_ERR(phy->hsic_clk); } phy->reset = devm_reset_control_get(dev, "hsic"); -- cgit v1.1 From 4608d96fb491125657fd8183a35921e4d4e27bc8 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Tue, 2 Aug 2016 15:19:57 +0800 Subject: clk: rockchip: fix incorrect GATE bits for {c, g}pll_aclk_perihp_src on rk3399 Sorry to refer incorrect clock diagram, we double check it that the bits configuration of the Xpll_aclk_perihp_src need to be fixed: bit 1 - shows aclk_perihp_cpll_src_en bit 0 - shows aclk_perihp_gpll_src_en Through the testing that plug/unplug the USB ethernet cable on the RK3399 kevin board. 1. the hclk_host0 and hclk_host1 are endpoint clocks: cpll --> G5[1] --> aclk_perihp_cpll_src --\ |--> hclk_host0 | --> ... ---> | gpll --> G5[0] --> aclk_perihp_gpll_src --/ |--> hclk_host1 2. there is no clock below the cpll_aclk_perihp_src, and the hclk_hostX are below the gpll_aclk_perihp_src: pll_cpll 1 1 800000000 0 0 cpll 7 19 800000000 0 0 cpll_aclk_perihp_src 0 0 800000000 0 0 ... pll_gpll 1 1 594000000 0 0 gpll 10 10 594000000 0 0 gpll_aclk_perihp_src 2 2 594000000 0 0 hclk_perihp 5 5 74250000 0 0 hclk_host1_arb 2 2 74250000 0 0 hclk_host1 2 2 74250000 0 0 hclk_host0_arb 2 2 74250000 0 0 hclk_host0 2 2 74250000 0 0 3. by default, G5[0] and G5[1] are enabled: localhost ~ # mem r 0xff760314 0x000003e0 4. close the G5[1] (aclk_perihp_cpll_src), and plug/unplug USB ethernet cable, the DUT still works well: localhost ~ # mem w 0xff760314 0xffff03e2 localhost ~ # mem r 0xff760314 0x000003e2 plug/unplug, the work statue is ok 5. close the G5[0] (aclk_perihp_gpll_src), , and plug/unplug USB ethernet cable, the DUT will be crashed: localhost ~ # mem w 0xff760314 0xffff03e1 localhost ~ # mem r 0xff760314 0x000003e1 plug/unplug, the DUT is crashed Summary: bit 1 - shows aclk_perihp_cpll_src_en bit 0 - shows aclk_perihp_gpll_src_en Fixes: 3bd14ae9da91 ("clk: rockchip: fix incorrect parent for rk3399's {c,g}pll_aclk_perihp_src") Signed-off-by: Xing Zheng [here the clock-documentation in the manual was actually stating the wrong bits and thus only Xing's testing above revealed the issue] Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 01fa60e..ec5b2fd 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -833,9 +833,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { /* perihp */ GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, - RK3399_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 1, GFLAGS), + GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, + RK3399_CLKGATE_CON(5), 0, GFLAGS), COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, RK3399_CLKGATE_CON(5), 2, GFLAGS), -- cgit v1.1 From 1c8d477a77e2d1d3504419e7f2e02e6422becf9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Aug 2016 12:47:49 -0400 Subject: pNFS/flexfiles: Fix layoutstat periodic reporting Putting the periodicity timer in the mirror instances is causing non-scalable reporting behaviour and missed reporting intervals. When you recall layouts and/or implement client side mirroring, it leads to consecutive reports with only a few ms between RPC calls. Signed-off-by: Trond Myklebust Fixes: d0379a5d066a9 ("pNFS/flexfiles: Support server-supplied...") --- fs/nfs/flexfilelayout/flexfilelayout.c | 8 ++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index e6206ea..ee1c94c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) if (ffl) { INIT_LIST_HEAD(&ffl->error_list); INIT_LIST_HEAD(&ffl->mirrors); + ffl->last_report_time = ktime_get(); return &ffl->generic_hdr; } else return NULL; @@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, { static const ktime_t notime = {0}; s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; + struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); if (ktime_equal(mirror->start_time, notime)) mirror->start_time = now; - if (ktime_equal(mirror->last_report_time, notime)) - mirror->last_report_time = now; if (mirror->report_interval != 0) report_interval = (s64)mirror->report_interval * 1000LL; else if (layoutstats_timer != 0) report_interval = (s64)layoutstats_timer * 1000LL; - if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >= report_interval) { - mirror->last_report_time = now; + ffl->last_report_time = now; return true; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 1bcdb15..3ee0c9f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; - ktime_t last_report_time; u32 report_interval; }; @@ -101,6 +100,7 @@ struct nfs4_flexfile_layout { struct pnfs_ds_commit_info commit_info; struct list_head mirrors; struct list_head error_list; /* nfs4_ff_layout_ds_err */ + ktime_t last_report_time; /* Layoutstat report times */ }; static inline struct nfs4_flexfile_layout * -- cgit v1.1 From 5a5a1d614287a647b36dff3f40c2b0ceabbc83ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 12 Aug 2016 01:05:08 +0300 Subject: USB: serial: mos7720: fix non-atomic allocation in write path There is an allocation with GFP_KERNEL flag in mos7720_write(), while it may be called from interrupt context. Follow-up for commit 191252837626 ("USB: kobil_sct: fix non-atomic allocation in write path") Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7720.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 5608af4..de9992b 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1252,7 +1252,7 @@ static int mos7720_write(struct tty_struct *tty, struct usb_serial_port *port, if (urb->transfer_buffer == NULL) { urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!urb->transfer_buffer) goto exit; } -- cgit v1.1 From 3b7c7e52efda0d4640060de747768360ba70a7c0 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 12 Aug 2016 01:05:09 +0300 Subject: USB: serial: mos7840: fix non-atomic allocation in write path There is an allocation with GFP_KERNEL flag in mos7840_write(), while it may be called from interrupt context. Follow-up for commit 191252837626 ("USB: kobil_sct: fix non-atomic allocation in write path") Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7840.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index ed378fb..57426d7 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1340,8 +1340,8 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port, } if (urb->transfer_buffer == NULL) { - urb->transfer_buffer = - kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_KERNEL); + urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, + GFP_ATOMIC); if (!urb->transfer_buffer) goto exit; } -- cgit v1.1 From 7ef9153d9af5fe7ce32dcc0f558bfcfc3d2b3016 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 15 Aug 2016 15:17:41 +0200 Subject: misc: delete bh1780 driver The Rohm BH1780 ambient light sensor has a new driver with extended functionality (proper runtime PM) in the appropriate framework IIO, it can be found at: drivers/iio/light/bh1780.c The MISC driver symbol CONFIG_SENSORS_BH1780 does not appear in any defconfigs, so it should safe to delete. Cc: Hemanth V Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 10 -- drivers/misc/Makefile | 1 - drivers/misc/bh1780gli.c | 259 ----------------------------------------------- 3 files changed, 270 deletions(-) delete mode 100644 drivers/misc/bh1780gli.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index a216b46..d002528 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -345,16 +345,6 @@ config SENSORS_TSL2550 This driver can also be built as a module. If so, the module will be called tsl2550. -config SENSORS_BH1780 - tristate "ROHM BH1780GLI ambient light sensor" - depends on I2C && SYSFS - help - If you say yes here you get support for the ROHM BH1780GLI - ambient light sensor. - - This driver can also be built as a module. If so, the module - will be called bh1780gli. - config SENSORS_BH1770 tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" depends on I2C diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7410c6d..fb32516 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_TIFM_CORE) += tifm_core.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o obj-$(CONFIG_PHANTOM) += phantom.o obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o -obj-$(CONFIG_SENSORS_BH1780) += bh1780gli.o obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o obj-$(CONFIG_SGI_IOC4) += ioc4.o diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c deleted file mode 100644 index 7f90ce5..0000000 --- a/drivers/misc/bh1780gli.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * bh1780gli.c - * ROHM Ambient Light Sensor Driver - * - * Copyright (C) 2010 Texas Instruments - * Author: Hemanth V - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ -#include -#include -#include -#include -#include -#include -#include - -#define BH1780_REG_CONTROL 0x80 -#define BH1780_REG_PARTID 0x8A -#define BH1780_REG_MANFID 0x8B -#define BH1780_REG_DLOW 0x8C -#define BH1780_REG_DHIGH 0x8D - -#define BH1780_REVMASK (0xf) -#define BH1780_POWMASK (0x3) -#define BH1780_POFF (0x0) -#define BH1780_PON (0x3) - -/* power on settling time in ms */ -#define BH1780_PON_DELAY 2 - -struct bh1780_data { - struct i2c_client *client; - int power_state; - /* lock for sysfs operations */ - struct mutex lock; -}; - -static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg) -{ - int ret = i2c_smbus_write_byte_data(ddata->client, reg, val); - if (ret < 0) - dev_err(&ddata->client->dev, - "i2c_smbus_write_byte_data failed error %d Register (%s)\n", - ret, msg); - return ret; -} - -static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg) -{ - int ret = i2c_smbus_read_byte_data(ddata->client, reg); - if (ret < 0) - dev_err(&ddata->client->dev, - "i2c_smbus_read_byte_data failed error %d Register (%s)\n", - ret, msg); - return ret; -} - -static ssize_t bh1780_show_lux(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - int lsb, msb; - - lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW"); - if (lsb < 0) - return lsb; - - msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH"); - if (msb < 0) - return msb; - - return sprintf(buf, "%d\n", (msb << 8) | lsb); -} - -static ssize_t bh1780_show_power_state(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - int state; - - state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); - if (state < 0) - return state; - - return sprintf(buf, "%d\n", state & BH1780_POWMASK); -} - -static ssize_t bh1780_store_power_state(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - unsigned long val; - int error; - - error = kstrtoul(buf, 0, &val); - if (error) - return error; - - if (val < BH1780_POFF || val > BH1780_PON) - return -EINVAL; - - mutex_lock(&ddata->lock); - - error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL"); - if (error < 0) { - mutex_unlock(&ddata->lock); - return error; - } - - msleep(BH1780_PON_DELAY); - ddata->power_state = val; - mutex_unlock(&ddata->lock); - - return count; -} - -static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL); - -static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, - bh1780_show_power_state, bh1780_store_power_state); - -static struct attribute *bh1780_attributes[] = { - &dev_attr_power_state.attr, - &dev_attr_lux.attr, - NULL -}; - -static const struct attribute_group bh1780_attr_group = { - .attrs = bh1780_attributes, -}; - -static int bh1780_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - int ret; - struct bh1780_data *ddata; - struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); - - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) - return -EIO; - - ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data), - GFP_KERNEL); - if (ddata == NULL) - return -ENOMEM; - - ddata->client = client; - i2c_set_clientdata(client, ddata); - - ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID"); - if (ret < 0) - return ret; - - dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n", - (ret & BH1780_REVMASK)); - - mutex_init(&ddata->lock); - - return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group); -} - -static int bh1780_remove(struct i2c_client *client) -{ - sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group); - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int bh1780_suspend(struct device *dev) -{ - struct bh1780_data *ddata; - int state, ret; - struct i2c_client *client = to_i2c_client(dev); - - ddata = i2c_get_clientdata(client); - state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); - if (state < 0) - return state; - - ddata->power_state = state & BH1780_POWMASK; - - ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF, - "CONTROL"); - - if (ret < 0) - return ret; - - return 0; -} - -static int bh1780_resume(struct device *dev) -{ - struct bh1780_data *ddata; - int state, ret; - struct i2c_client *client = to_i2c_client(dev); - - ddata = i2c_get_clientdata(client); - state = ddata->power_state; - ret = bh1780_write(ddata, BH1780_REG_CONTROL, state, - "CONTROL"); - - if (ret < 0) - return ret; - - return 0; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume); - -static const struct i2c_device_id bh1780_id[] = { - { "bh1780", 0 }, - { }, -}; - -MODULE_DEVICE_TABLE(i2c, bh1780_id); - -#ifdef CONFIG_OF -static const struct of_device_id of_bh1780_match[] = { - { .compatible = "rohm,bh1780gli", }, - {}, -}; - -MODULE_DEVICE_TABLE(of, of_bh1780_match); -#endif - -static struct i2c_driver bh1780_driver = { - .probe = bh1780_probe, - .remove = bh1780_remove, - .id_table = bh1780_id, - .driver = { - .name = "bh1780", - .pm = &bh1780_pm, - .of_match_table = of_match_ptr(of_bh1780_match), - }, -}; - -module_i2c_driver(bh1780_driver); - -MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Hemanth V "); -- cgit v1.1 From f5a49057c71433e35a4712ab8d8f00641b3e1ec0 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 9 Aug 2016 16:24:43 +0800 Subject: ARM: imx6: add missing BM_CLPCR_BYP_MMDC_CH0_LPM_HS setting for imx6ul There is a missing BM_CLPCR_BYP_MMDC_CH0_LPM_HS setting for imx6ul, without it, the "standby" mode can't work well, the system can't be resumed. With this commit, the "standby" mode works well. Signed-off-by: Peter Chen Cc: Anson Huang Cc: Fixes: ee4a5f838c84 ("ARM: imx: add suspend/resume support for i.mx6ul") Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 58924b3..67bab74 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -295,7 +295,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode) val &= ~BM_CLPCR_SBYOS; if (cpu_is_imx6sl()) val |= BM_CLPCR_BYPASS_PMIC_READY; - if (cpu_is_imx6sl() || cpu_is_imx6sx()) + if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul()) val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS; else val |= BM_CLPCR_BYP_MMDC_CH1_LPM_HS; -- cgit v1.1 From bb9947c3a14e781eb0f137728e7e55ec8d848991 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 14 Jul 2016 11:06:11 +0000 Subject: iio: pressure: bmp280: fix wrong pointer passed to PTR_ERR() PTR_ERR should access the value just tested by IS_ERR, otherwise the wrong error code will be returned. Signed-off-by: Wei Yongjun Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 6943688..94e27b2 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -970,7 +970,7 @@ int bmp280_common_probe(struct device *dev, data->vdda = devm_regulator_get(dev, "vdda"); if (IS_ERR(data->vdda)) { dev_err(dev, "failed to get VDDA regulator\n"); - ret = PTR_ERR(data->vddd); + ret = PTR_ERR(data->vdda); goto out_disable_vddd; } ret = regulator_enable(data->vdda); -- cgit v1.1 From 776b645315d372e06e46167194605d308aa1a790 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Thu, 21 Jul 2016 11:23:13 -0400 Subject: staging: iio: ad5933: Return correct value for AD5933_OUT_RANGE. The 'break' statement after calling ad5933_cmd only breaks out of the 'for' loop, which then unconditionally sets the return value to -EINVAL. Move the initialisation of 'ret' so we return the correct value. Signed-off-by: Phil Turnbull Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 170ac98..24c348d 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -419,6 +419,7 @@ static ssize_t ad5933_store(struct device *dev, mutex_lock(&indio_dev->mlock); switch ((u32)this_attr->address) { case AD5933_OUT_RANGE: + ret = -EINVAL; for (i = 0; i < 4; i++) if (val == st->range_avail[i]) { st->ctrl_hb &= ~AD5933_CTRL_RANGE(0x3); @@ -426,7 +427,6 @@ static ssize_t ad5933_store(struct device *dev, ret = ad5933_cmd(st, 0); break; } - ret = -EINVAL; break; case AD5933_IN_PGA_GAIN: if (sysfs_streq(buf, "1")) { -- cgit v1.1 From b2f0c09664b72b2f8c581383a9337ac3092e42c8 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 11 Jul 2016 13:50:01 +0200 Subject: iio: sw-trigger: Fix config group initialization Use the IS_ENABLED() helper macro to ensure that the configfs group is initialized either when configfs is built-in or when configfs is built as a module. Otherwise software trigger creation will result in undefined behaviour when configfs is built as a mdoule since the configfs group for the trigger is not properly initialized. Fixes: b662f809d410 ("iio: core: Introduce IIO software triggers") Signed-off-by: Lars-Peter Clausen Acked-by: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/sw_trigger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 5198f8e..c97eab6 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -62,7 +62,7 @@ void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, struct config_item_type *type) { -#ifdef CONFIG_CONFIGFS_FS +#if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&t->group, name, type); #endif } -- cgit v1.1 From 7d3cc21dab5313a02f2f3ca8164529b828a030d1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 11 Jul 2016 13:54:17 +0200 Subject: iio: ad799x: Fix buffered capture for ad7991/ad7995/ad7999 The data buffer for captured mode for the ad799x driver is allocated in the update_scan_mode() callback. This callback is not set in the iio_info struct for the ad7791/ad7995/ad7999, which means that the data buffer is not allocated when a captured transfer is started. As a result the driver crashes when the first sample is received. To fix this properly set the update_scan_mode() callback. Fixes: d8dca33027c1 ("staging:iio:ad799x: Preallocate sample buffer") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad799x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index b616376..9704090 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -527,6 +527,7 @@ static struct attribute_group ad799x_event_attrs_group = { static const struct iio_info ad7991_info = { .read_raw = &ad799x_read_raw, .driver_module = THIS_MODULE, + .update_scan_mode = ad799x_update_scan_mode, }; static const struct iio_info ad7993_4_7_8_noirq_info = { -- cgit v1.1 From b234f683dde97aebb7009c6aecab651d32ad70b4 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:25:07 -0700 Subject: iio: accel: bma220_spi: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Reviewed-By: Tiberiu Breana Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma220_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c index 1098d10..5099f29 100644 --- a/drivers/iio/accel/bma220_spi.c +++ b/drivers/iio/accel/bma220_spi.c @@ -253,7 +253,7 @@ static int bma220_probe(struct spi_device *spi) if (ret < 0) return ret; - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, bma220_trigger_handler, NULL); if (ret < 0) { dev_err(&spi->dev, "iio triggered buffer setup failed\n"); -- cgit v1.1 From 3c68858df7c2f0c4c343bb4702733fe827491f9e Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:26:13 -0700 Subject: iio: humidity: am2315: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Reviewed-By: Tiberiu Breana Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/am2315.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c index 3e200f6..ff96b6d 100644 --- a/drivers/iio/humidity/am2315.c +++ b/drivers/iio/humidity/am2315.c @@ -244,7 +244,7 @@ static int am2315_probe(struct i2c_client *client, indio_dev->channels = am2315_channels; indio_dev->num_channels = ARRAY_SIZE(am2315_channels); - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, am2315_trigger_handler, NULL); if (ret < 0) { dev_err(&client->dev, "iio triggered buffer setup failed\n"); -- cgit v1.1 From f8adf645db03345af2d9a8b6095b02327ea50885 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:26:56 -0700 Subject: iio: proximity: as3935: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/as3935.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 2e3a70e..5656deb 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -397,7 +397,7 @@ static int as3935_probe(struct spi_device *spi) return ret; } - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, &as3935_trigger_handler, NULL); if (ret) { -- cgit v1.1 From 45e98152850c36560484f3fa3bb857a4bfe1a419 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 19 Jul 2016 12:25:00 -0400 Subject: iio: stx104: Unregister IIO device on remove callback The devm_iio_device_register function should not be used if custom operations must be performed in the remove callback. This patch replaces the dem_iio_device_register call with a iio_device_register call and respective iio_device_unregister call in the remove callback. Fixes: 765550e4d98d ("iio: stx104: Add GPIO support for the Apex Embedded Systems STX104") Signed-off-by: William Breathitt Gray Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/stx104.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/iio/dac/stx104.c b/drivers/iio/dac/stx104.c index 792a971..bebbd00 100644 --- a/drivers/iio/dac/stx104.c +++ b/drivers/iio/dac/stx104.c @@ -65,6 +65,16 @@ struct stx104_gpio { unsigned int out_state; }; +/** + * struct stx104_dev - STX104 device private data structure + * @indio_dev: IIO device + * @chip: instance of the gpio_chip + */ +struct stx104_dev { + struct iio_dev *indio_dev; + struct gpio_chip *chip; +}; + static int stx104_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) { @@ -107,6 +117,7 @@ static const struct iio_chan_spec stx104_channels[STX104_NUM_CHAN] = { static int stx104_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { + /* GPIO 0-3 are input only, while the rest are output only */ if (offset < 4) return 1; @@ -169,6 +180,7 @@ static int stx104_probe(struct device *dev, unsigned int id) struct iio_dev *indio_dev; struct stx104_iio *priv; struct stx104_gpio *stx104gpio; + struct stx104_dev *stx104dev; int err; indio_dev = devm_iio_device_alloc(dev, sizeof(*priv)); @@ -179,6 +191,10 @@ static int stx104_probe(struct device *dev, unsigned int id) if (!stx104gpio) return -ENOMEM; + stx104dev = devm_kzalloc(dev, sizeof(*stx104dev), GFP_KERNEL); + if (!stx104dev) + return -ENOMEM; + if (!devm_request_region(dev, base[id], STX104_EXTENT, dev_name(dev))) { dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n", @@ -199,12 +215,6 @@ static int stx104_probe(struct device *dev, unsigned int id) outw(0, base[id] + 4); outw(0, base[id] + 6); - err = devm_iio_device_register(dev, indio_dev); - if (err) { - dev_err(dev, "IIO device registering failed (%d)\n", err); - return err; - } - stx104gpio->chip.label = dev_name(dev); stx104gpio->chip.parent = dev; stx104gpio->chip.owner = THIS_MODULE; @@ -220,7 +230,9 @@ static int stx104_probe(struct device *dev, unsigned int id) spin_lock_init(&stx104gpio->lock); - dev_set_drvdata(dev, stx104gpio); + stx104dev->indio_dev = indio_dev; + stx104dev->chip = &stx104gpio->chip; + dev_set_drvdata(dev, stx104dev); err = gpiochip_add_data(&stx104gpio->chip, stx104gpio); if (err) { @@ -228,14 +240,22 @@ static int stx104_probe(struct device *dev, unsigned int id) return err; } + err = iio_device_register(indio_dev); + if (err) { + dev_err(dev, "IIO device registering failed (%d)\n", err); + gpiochip_remove(&stx104gpio->chip); + return err; + } + return 0; } static int stx104_remove(struct device *dev, unsigned int id) { - struct stx104_gpio *const stx104gpio = dev_get_drvdata(dev); + struct stx104_dev *const stx104dev = dev_get_drvdata(dev); - gpiochip_remove(&stx104gpio->chip); + iio_device_unregister(stx104dev->indio_dev); + gpiochip_remove(stx104dev->chip); return 0; } -- cgit v1.1 From 193e2d4fd91c5e5d563395f9577621dac4f4df31 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 18 Jul 2016 17:56:43 -0700 Subject: iio: add Kconfig selects needed for triggered buffer compiles Select IIO_BUFFER and IIO_TRIGGERED_BUFFER to compile. Remove IIO_TRIGGER if present since IIO_BUFFER selects it. Signed-off-by: Alison Schofield Cc: Daniel Baluta Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 5 ++++- drivers/iio/humidity/Kconfig | 2 ++ drivers/iio/light/Kconfig | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 89d7820..78f148e 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -20,6 +20,8 @@ config BMA180 config BMA220 tristate "Bosch BMA220 3-Axis Accelerometer Driver" depends on SPI + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to add support for the Bosch BMA220 triaxial acceleration sensor. @@ -234,7 +236,8 @@ config STK8312 config STK8BA50 tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver" depends on I2C - depends on IIO_TRIGGER + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to get support for the Sensortek STK8BA50 3-axis accelerometer. diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 738a86d..d041243 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -6,6 +6,8 @@ menu "Humidity sensors" config AM2315 tristate "Aosong AM2315 relative humidity and temperature sensor" depends on I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help If you say yes here you get support for the Aosong AM2315 relative humidity and ambient temperature sensor. diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7c566f5..12ceb11b 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -238,6 +238,8 @@ config MAX44000 tristate "MAX44000 Ambient and Infrared Proximity Sensor" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for Maxim Integrated's MAX44000 ambient and infrared proximity sensor device. -- cgit v1.1 From 31f453eac56bdc41f434126bc2d5933b9fb720ec Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Jul 2016 22:32:58 +0200 Subject: iio: pressure: bmp280: fix runtime suspend/resume crash In commit 3d838118c6aa ("iio: pressure: bmp280: add power management") For some reason the code in the runtime suspend/resume hooks got wrong (I suspect in the ambition to cut down boilerplate) and it seems it was tested without CONFIG_PM and crashes like so for me: Unable to handle kernel NULL pointer dereference at virtual address 0000000c pgd = c0204000 [0000000c] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 1 PID: 89 Comm: kworker/1:2 Not tainted 4.7.0-03348-g90dc3680458a-dirty #99 Hardware name: Generic DT based system Workqueue: pm pm_runtime_work task: df3c6300 ti: dec8a000 task.ti: dec8a000 PC is at regulator_disable+0x0/0x6c LR is at bmp280_runtime_suspend+0x3c/0xa4 Dereferencing the BMP280 state container properly fixes the problem, sorry for screwing up. Fixes: 3d838118c6aa ("iio: pressure: bmp280: add power management") Signed-off-by: Linus Walleij Tested-by: Jarkko Nikula Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 94e27b2..e5a533c 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -1079,7 +1079,8 @@ EXPORT_SYMBOL(bmp280_common_remove); #ifdef CONFIG_PM static int bmp280_runtime_suspend(struct device *dev) { - struct bmp280_data *data = dev_get_drvdata(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct bmp280_data *data = iio_priv(indio_dev); int ret; ret = regulator_disable(data->vdda); @@ -1090,7 +1091,8 @@ static int bmp280_runtime_suspend(struct device *dev) static int bmp280_runtime_resume(struct device *dev) { - struct bmp280_data *data = dev_get_drvdata(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct bmp280_data *data = iio_priv(indio_dev); int ret; ret = regulator_enable(data->vddd); -- cgit v1.1 From 7b142d8fd0bd4c9bf06ccb72ac4daedb503f0124 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 16 Jun 2016 00:45:33 +0200 Subject: android: binder: fix dangling pointer comparison If /dev/binder is opened and the opener process then e.g. calls execve, proc->vma_vm_mm will still point to the location of the now-freed mm_struct. If the process then calls ioctl(binder_fd, ...), the dangling proc->vma_vm_mm pointer will be compared to current->mm. Let the binder take a reference to the mm_struct to avoid this. v2: use the right refcounter Fixes: a906d6931f3c ("android: binder: Sanity check at binder ioctl") Signed-off-by: Jann Horn Reviewed-by: Chen Feng Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 16288e7..09fdb42 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2962,6 +2962,7 @@ static int binder_open(struct inode *nodp, struct file *filp) return -ENOMEM; get_task_struct(current); proc->tsk = current; + atomic_inc(¤t->mm->mm_count); proc->vma_vm_mm = current->mm; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); @@ -3167,6 +3168,7 @@ static void binder_deferred_release(struct binder_proc *proc) vfree(proc->buffer); } + mmdrop(proc->vma_vm_mm); put_task_struct(proc->tsk); binder_debug(BINDER_DEBUG_OPEN_CLOSE, -- cgit v1.1 From 0d9dcf852334b796bacc7020364afba3122db81e Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 8 Aug 2016 11:14:36 -0700 Subject: iio: humidity: hdc100x: fix sensor data reads of temp and humidity Replace the i2c_smbus_read_byte commmands used to retrieve the sensor data with an i2c_master_recv command. The smbus read byte method fails because the device does not expect a stop condition after sending the first byte. When we issue the second read, we are getting the first byte again. Net effect is that of the 14 bits used for the measurement, the 8 most significant bits are correct, the lower 6 are not. None of the smbus read protocols follow the pattern this device requires (S Addr Rd [A] Data [A] Data NA P), hence the switch to an i2c receive transaction. Applicable from original introduction of this driver, but will require backporting due to churn in the code. Signed-off-by: Alison Schofield Cc: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index a03832a..e0c9c70 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -142,7 +142,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, struct i2c_client *client = data->client; int delay = data->adc_int_us[chan->address]; int ret; - int val; + __be16 val; /* start measurement */ ret = i2c_smbus_write_byte(client, chan->address); @@ -154,26 +154,13 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, /* wait for integration time to pass */ usleep_range(delay, delay + 1000); - /* - * i2c_smbus_read_word_data cannot() be used here due to the command - * value not being understood and causes NAKs preventing any reading - * from being accessed. - */ - ret = i2c_smbus_read_byte(client); + /* read measurement */ + ret = i2c_master_recv(data->client, (char *)&val, sizeof(val)); if (ret < 0) { - dev_err(&client->dev, "cannot read high byte measurement"); + dev_err(&client->dev, "cannot read sensor data\n"); return ret; } - val = ret << 8; - - ret = i2c_smbus_read_byte(client); - if (ret < 0) { - dev_err(&client->dev, "cannot read low byte measurement"); - return ret; - } - val |= ret; - - return val; + return be16_to_cpu(val); } static int hdc100x_get_heater_status(struct hdc100x_data *data) @@ -272,8 +259,8 @@ static int hdc100x_probe(struct i2c_client *client, struct iio_dev *indio_dev; struct hdc100x_data *data; - if (!i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BYTE)) + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C)) return -EOPNOTSUPP; indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); -- cgit v1.1 From ddbc719f99cf9aed6918cef98cb3475fd4fc4fa6 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 26 Jul 2016 20:17:43 -0700 Subject: tools: iio: iio_generic_buffer: initialize channel array pointer Uninitialized channel pointer causes segmentation fault when we call free(channel) during cleanup() with no channels initialized. This happens when you exit early for usage errors. Initialize the pointer to NULL when it is declared. Signed-off-by: Alison Schofield Cc: Daniel Baluta Tested-by: Gregor Boirie Signed-off-by: Jonathan Cameron --- tools/iio/iio_generic_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 0e8a1f7..ae68bf0 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -348,7 +348,7 @@ int main(int argc, char **argv) int notrigger = 0; char *dummy; - struct iio_channel_info *channels; + struct iio_channel_info *channels = NULL; register_cleanup(); -- cgit v1.1 From 42647f947210cb9fd8a7737c0fd2a60002a81188 Mon Sep 17 00:00:00 2001 From: Teresa Remmet Date: Mon, 15 Aug 2016 09:10:39 -0700 Subject: ARM: dts: am335x: Update elm phandle binding The check for the "elm_id" binding had been removed. This causes nand boot to fail on boards still using the old binding. Update the bindings on those boards. Signed-off-by: Teresa Remmet Acked-by: Brian Norris Acked-by: Roger Quadros Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-baltos.dtsi | 2 +- arch/arm/boot/dts/am335x-igep0033.dtsi | 2 +- arch/arm/boot/dts/am335x-phycore-som.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi index c8609d8..b689172 100644 --- a/arch/arm/boot/dts/am335x-baltos.dtsi +++ b/arch/arm/boot/dts/am335x-baltos.dtsi @@ -226,7 +226,7 @@ #address-cells = <1>; #size-cells = <1>; - elm_id = <&elm>; + ti,elm-id = <&elm>; }; }; diff --git a/arch/arm/boot/dts/am335x-igep0033.dtsi b/arch/arm/boot/dts/am335x-igep0033.dtsi index df63484..e7d9ca1 100644 --- a/arch/arm/boot/dts/am335x-igep0033.dtsi +++ b/arch/arm/boot/dts/am335x-igep0033.dtsi @@ -161,7 +161,7 @@ #address-cells = <1>; #size-cells = <1>; - elm_id = <&elm>; + ti,elm-id = <&elm>; /* MTD partition table */ partition@0 { diff --git a/arch/arm/boot/dts/am335x-phycore-som.dtsi b/arch/arm/boot/dts/am335x-phycore-som.dtsi index 86f7731..1263c9d 100644 --- a/arch/arm/boot/dts/am335x-phycore-som.dtsi +++ b/arch/arm/boot/dts/am335x-phycore-som.dtsi @@ -197,7 +197,7 @@ gpmc,wr-access-ns = <30>; gpmc,wr-data-mux-bus-ns = <0>; - elm_id = <&elm>; + ti,elm-id = <&elm>; #address-cells = <1>; #size-cells = <1>; -- cgit v1.1 From 5e0568dfbfb8c13cdb69c9fd06d600593ad4b430 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 15 Aug 2016 09:10:45 -0700 Subject: ARM: dts: overo: fix gpmc nand cs0 range The gpmc ranges property for NAND at CS0 has been broken since it was first added. This currently prevents the nand gpmc child node from being probed: omap-gpmc 6e000000.gpmc: /ocp/gpmc@6e000000/nand@0,0 has malformed 'reg' property and consequently the NAND device from being registered. Fixes: 98ce6007efb4 ("ARM: dts: overo: Support PoP NAND") Cc: stable # 4.3 Signed-off-by: Johan Hovold Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-overo-base.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi index de256fa..b15c435 100644 --- a/arch/arm/boot/dts/omap3-overo-base.dtsi +++ b/arch/arm/boot/dts/omap3-overo-base.dtsi @@ -223,7 +223,7 @@ }; &gpmc { - ranges = <0 0 0x00000000 0x20000000>; + ranges = <0 0 0x30000000 0x1000000>; /* CS0 */ nand@0,0 { compatible = "ti,omap2-nand"; -- cgit v1.1 From 153b58ea932b2d0642fa5cd41c93bb0555f3f09b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 15 Aug 2016 09:10:49 -0700 Subject: ARM: dts: overo: fix gpmc nand on boards with ethernet The gpmc ranges property for NAND at CS0 was being overridden by later includes that defined gpmc ethernet nodes, effectively breaking NAND on these systems: omap-gpmc 6e000000.gpmc: /ocp/gpmc@6e000000/nand@0,0 has malformed 'reg' property Instead of redefining the NAND range in every such dtsi, define all currently used ranges in omap3-overo-base.dtsi. Fixes: 98ce6007efb4 ("ARM: dts: overo: Support PoP NAND") Cc: stable # 4.3 Signed-off-by: Johan Hovold Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-overo-base.dtsi | 4 +++- arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi | 2 -- arch/arm/boot/dts/omap3-overo-tobi-common.dtsi | 2 -- arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi | 3 --- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi index b15c435..3e946ca 100644 --- a/arch/arm/boot/dts/omap3-overo-base.dtsi +++ b/arch/arm/boot/dts/omap3-overo-base.dtsi @@ -223,7 +223,9 @@ }; &gpmc { - ranges = <0 0 0x30000000 0x1000000>; /* CS0 */ + ranges = <0 0 0x30000000 0x1000000>, /* CS0 */ + <4 0 0x2b000000 0x1000000>, /* CS4 */ + <5 0 0x2c000000 0x1000000>; /* CS5 */ nand@0,0 { compatible = "ti,omap2-nand"; diff --git a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi index 7df2792..4f4c6ef 100644 --- a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi @@ -55,8 +55,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */ - ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; diff --git a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi index 9e24b6a..1b304e2 100644 --- a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi @@ -27,8 +27,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */ - ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi index 334109e..82e98ee 100644 --- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi @@ -15,9 +15,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <4 0 0x2b000000 0x1000000>, /* CS4 */ - <5 0 0x2c000000 0x1000000>; /* CS5 */ - smsc1: ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; -- cgit v1.1 From a8771a6a64226c24f4baf30b8d13a2116795487f Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 13 Aug 2016 10:13:04 -0500 Subject: ARM: dts: logicpd-torpedo-som: Provide NAND ready pin This was applied to a variety of omap3 boards, so it should probably be applied here. I did not test NAND performance, but I tested this with UBI to confirm read/write didn't break. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 5e9a13c..1c2c746 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -46,6 +46,7 @@ linux,mtd-name = "micron,mt29f4g16abbda3w"; nand-bus-width = <16>; ti,nand-ecc-opt = "bch8"; + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <44>; -- cgit v1.1 From 4875b8fcf68d8133713dd5c5df5bc79431be8be7 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 13 Aug 2016 10:21:00 -0500 Subject: ARM: dts: logicpd-somlv: Fix NAND device nodes This fix was applied to a bunch of omap3 devices including LogicPD Torpedo, but this got missed since it was new around the same times the patches were applied. This makes the GPMC parameters match the Torpedo since they have the same processor PoP memory. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 365f39ff..0ff1c2d 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -35,10 +35,15 @@ ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */ nand@0,0 { - linux,mtd-name = "micron,mt29f4g16abbda3w"; + compatible = "ti,omap2-nand"; reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ + interrupt-parent = <&gpmc>; + interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ + <1 IRQ_TYPE_NONE>; /* termcount */ + linux,mtd-name = "micron,mt29f4g16abbda3w"; nand-bus-width = <16>; ti,nand-ecc-opt = "bch8"; + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <44>; @@ -54,10 +59,6 @@ gpmc,wr-access-ns = <40>; gpmc,wr-data-mux-bus-ns = <0>; gpmc,device-width = <2>; - - gpmc,page-burst-access-ns = <5>; - gpmc,cycle2cycle-delay-ns = <50>; - #address-cells = <1>; #size-cells = <1>; -- cgit v1.1 From c2ab447454d498e709d9011c0f2d2945ee321f9b Mon Sep 17 00:00:00 2001 From: Anders Darander Date: Mon, 8 Aug 2016 14:42:16 +0200 Subject: iio: adc: at91: unbreak channel adc channel 3 The driver always assumes that an input device has been created when reading channel 3. This causes a kernel panic when dereferencing st->ts_input. The change was introduced in commit 84882b060301 ("iio: adc: at91_adc: Add support for touchscreens without TSMR"). Earlier versions only entered that part of the if-else statement if only the following flags are set: AT91_ADC_IER_XRDY | AT91_ADC_IER_YRDY | AT91_ADC_IER_PRDY Signed-off-by: Anders Darander Acked-by: Alexandre Belloni Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91_adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 52430ba..0438c68 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -381,8 +381,8 @@ static irqreturn_t at91_adc_rl_interrupt(int irq, void *private) st->ts_bufferedmeasure = false; input_report_key(st->ts_input, BTN_TOUCH, 0); input_sync(st->ts_input); - } else if (status & AT91_ADC_EOC(3)) { - /* Conversion finished */ + } else if (status & AT91_ADC_EOC(3) && st->ts_input) { + /* Conversion finished and we've a touchscreen */ if (st->ts_bufferedmeasure) { /* * Last measurement is always discarded, since it can -- cgit v1.1 From 99f1c013194e64d4b67d5d318148303b0e1585e1 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Thu, 14 Jul 2016 23:40:21 -0400 Subject: staging/lustre/llite: Close atomic_open race with several openers Right now, if it's an open of a negative dentry, a race is possible with several openers who all try to instantiate/rehash the same dentry and would hit a BUG_ON in d_add. But in fact if we got a negative dentry in atomic_open, that means we just revalidated it so no point in talking to MDS at all, just return ENOENT and make the race go away completely. Signed-off-by: Oleg Drokin Cc: stable # 4.7+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/llite/namei.c | 43 ++++++++++++++++------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index 3664bfd..2c4dc69 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -388,6 +388,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, struct inode *inode = NULL; __u64 bits = 0; int rc = 0; + struct dentry *alias; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return @@ -412,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, */ } - /* Only hash *de if it is unhashed (new dentry). - * Atoimc_open may passing hashed dentries for open. - */ - if (d_unhashed(*de)) { - struct dentry *alias; - - alias = ll_splice_alias(inode, *de); - if (IS_ERR(alias)) { - rc = PTR_ERR(alias); - goto out; - } - *de = alias; - } else if (!it_disposition(it, DISP_LOOKUP_NEG) && - !it_disposition(it, DISP_OPEN_CREATE)) { - /* With DISP_OPEN_CREATE dentry will be - * instantiated in ll_create_it. - */ - LASSERT(!d_inode(*de)); - d_instantiate(*de, inode); + alias = ll_splice_alias(inode, *de); + if (IS_ERR(alias)) { + rc = PTR_ERR(alias); + goto out; } + *de = alias; if (!it_disposition(it, DISP_LOOKUP_NEG)) { /* we have lookup look - unhide dentry */ @@ -587,6 +574,24 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, *opened); + /* Only negative dentries enter here */ + LASSERT(!d_inode(dentry)); + + if (!d_in_lookup(dentry)) { + /* A valid negative dentry that just passed revalidation, + * there's little point to try and open it server-side, + * even though there's a minuscle chance it might succeed. + * Either way it's a valid race to just return -ENOENT here. + */ + if (!(open_flags & O_CREAT)) + return -ENOENT; + + /* Otherwise we just unhash it to be rehashed afresh via + * lookup if necessary + */ + d_drop(dentry); + } + it = kzalloc(sizeof(*it), GFP_NOFS); if (!it) return -ENOMEM; -- cgit v1.1 From a87eeb900dbb9f8202f96604d56e47e67c936b9d Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 12 Aug 2016 17:20:07 -0500 Subject: scsi: fix upper bounds check of sense key in scsi_sense_key_string() Commit 655ee63cf371 ("scsi constants: command, sense key + additional sense string") added a "Completed" sense string with key 0xF to snstext[], but failed to updated the upper bounds check of the sense key in scsi_sense_key_string(). Fixes: 655ee63cf371 ("[SCSI] scsi constants: command, sense key + additional sense strings") Cc: # v3.12+ Signed-off-by: Tyrel Datwyler Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/constants.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c index 83458f7..6dc96c8 100644 --- a/drivers/scsi/constants.c +++ b/drivers/scsi/constants.c @@ -361,8 +361,9 @@ static const char * const snstext[] = { /* Get sense key string or NULL if not available */ const char * -scsi_sense_key_string(unsigned char key) { - if (key <= 0xE) +scsi_sense_key_string(unsigned char key) +{ + if (key < ARRAY_SIZE(snstext)) return snstext[key]; return NULL; } -- cgit v1.1 From c6b269ba51be70fd11852bcad2e163c734e8e92a Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 13 Aug 2016 00:56:03 +0200 Subject: scsi: blacklist all RDAC devices for BLIST_NO_ULD_ATTACH "Universal Xport" LUN is used for in-band storage array management. Cc: Sean Stewart Cc: Christophe Varoqui Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: device-mapper development Signed-off-by: Xose Vazquez Perez Acked-by: Sean Stewart Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index eaccd65..2464569 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -246,6 +246,10 @@ static struct { {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"STK", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ -- cgit v1.1 From 83cf8df2d4fa48a80b384fea4b09b12180a2442e Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 16 Aug 2016 08:27:53 +0200 Subject: drivers/iio/light/Kconfig: SENSORS_BH1780 cleanup Commit 7ef9153d9af5 ("misc: delete bh1780 driver") has removed the Kconfig option SENSORS_BH1780. Remove the last reference on this option. Signed-off-by: Valentin Rothberg Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7c566f5..69904d5 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -76,7 +76,6 @@ config BH1750 config BH1780 tristate "ROHM BH1780 ambient light sensor" depends on I2C - depends on !SENSORS_BH1780 help Say Y here to build support for the ROHM BH1780GLI ambient light sensor. -- cgit v1.1 From d3e2773c4ede5c62d2a92dae20e3a09b1ca55b6e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 15 Aug 2016 10:36:22 -0700 Subject: builddeb: Skip gcc-plugins when not configured When attempting to build a Debian kernel package, the "scripts/gcc-plugins" directory does not exist in the output tree unless CONFIG_GCC_PLUGINS=y. To avoid errors when not defined, this wraps the failing "find" in a config test. Reported-by: Frank Paulsen Tested-by: Christian Kujau Signed-off-by: Kees Cook Signed-off-by: Michal Marek --- scripts/package/builddeb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index e1c09e2..8ea9fd2 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -332,7 +332,9 @@ if grep -q '^CONFIG_STACK_VALIDATION=y' $KCONFIG_CONFIG ; then (cd $objtree; find tools/objtool -type f -executable) >> "$objtree/debian/hdrobjfiles" fi (cd $objtree; find arch/$SRCARCH/include Module.symvers include scripts -type f) >> "$objtree/debian/hdrobjfiles" -(cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles" +if grep -q '^CONFIG_GCC_PLUGINS=y' $KCONFIG_CONFIG ; then + (cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles" +fi destdir=$kernel_headers_dir/usr/src/linux-headers-$version mkdir -p "$destdir" (cd $srctree; tar -c -f - -T -) < "$objtree/debian/hdrsrcfiles" | (cd $destdir; tar -xf -) -- cgit v1.1 From 39bbee4e549fbc358b2ef9137c4bf459abd164fb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2016 09:24:39 +0100 Subject: nvme-rdma: initialize ret to zero to avoid returning garbage ret is not initialized so it contains garbage. Ensure garbage is not returned by initializing rc to 0. Signed-off-by: Colin Ian King Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8d2875b..9c69393 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1319,7 +1319,7 @@ out_destroy_queue_ib: static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret; + int ret = 0; /* Own the controller deletion */ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) -- cgit v1.1 From 3256aaef5e9a851f6be47656868020726e102187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Tue, 16 Aug 2016 15:11:25 +0200 Subject: nvmet-rdma: Fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid dereferencing the queue pointer in nvmet_rdma_release_queue_work() after it has been freed by nvmet_rdma_free_queue(). Fixes: d8f7750a08968b10 ("nvmet-rdma: Correctly handle RDMA device hot removal") Signed-off-by: Vincent Stehlé Cc: Sagi Grimberg Cc: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b4d6485..5de8d0a 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -978,10 +978,11 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) container_of(w, struct nvmet_rdma_queue, release_work); struct rdma_cm_id *cm_id = queue->cm_id; struct nvmet_rdma_device *dev = queue->dev; + enum nvmet_rdma_queue_state state = queue->state; nvmet_rdma_free_queue(queue); - if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) + if (state != NVMET_RDMA_IN_DEVICE_REMOVAL) rdma_destroy_id(cm_id); kref_put(&dev->ref, nvmet_rdma_free_dev); -- cgit v1.1 From a956beda19a6b39fbc19d0aaf21947acdc18cf74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2016 10:26:47 -0400 Subject: NFS: Allow the mount option retrans=0 We should allow retrans=0 as just meaning that every timeout is a major timeout, and that there is no increment in the timeout value. For instance, this means that we would allow TCP users to specify a flat timeout value of 60s, by specifying "timeo=600,retrans=0" in their mount option string. Siged-off-by: Trond Myklebust --- fs/nfs/client.c | 10 +++++----- fs/nfs/internal.h | 5 ++++- fs/nfs/super.c | 19 +++++++++++++++++-- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 003ebce..1e10678 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready); * Initialise the timeout values for a connection */ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, - unsigned int timeo, unsigned int retrans) + int timeo, int retrans) { to->to_initval = timeo * HZ / 10; to->to_retries = retrans; @@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: - if (to->to_retries == 0) + if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; - if (to->to_initval == 0) + if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; @@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: - if (to->to_retries == 0) + if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; - if (!to->to_initval) + if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_UDP_TIMEOUT) to->to_initval = NFS_MAX_UDP_TIMEOUT; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7ce5e02..74935a1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -58,6 +58,9 @@ struct nfs_clone_mount { */ #define NFS_UNSPEC_PORT (-1) +#define NFS_UNSPEC_RETRANS (UINT_MAX) +#define NFS_UNSPEC_TIMEO (UINT_MAX) + /* * Maximum number of pages that readdir can use for creating * a vmapped array of pages. @@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); -void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); +void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); struct nfs_server *nfs_alloc_server(void); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 18d446e..d396013 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data = kzalloc(sizeof(*data), GFP_KERNEL); if (data) { + data->timeo = NFS_UNSPEC_TIMEO; + data->retrans = NFS_UNSPEC_RETRANS; data->acregmin = NFS_DEF_ACREGMIN; data->acregmax = NFS_DEF_ACREGMAX; data->acdirmin = NFS_DEF_ACDIRMIN; @@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) return rc; } +static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, + unsigned long l_bound, unsigned long u_bound) +{ + int ret; + + ret = nfs_get_option_ul(args, option); + if (ret != 0) + return ret; + if (*option < l_bound || *option > u_bound) + return -ERANGE; + return 0; +} + /* * Error-check and convert a string of mount options from user space into * a data structure. The whole mount string is processed; bad options are @@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw, mnt->bsize = option; break; case Opt_timeo: - if (nfs_get_option_ul(args, &option) || option == 0) + if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) goto out_invalid_value; mnt->timeo = option; break; case Opt_retrans: - if (nfs_get_option_ul(args, &option) || option == 0) + if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) goto out_invalid_value; mnt->retrans = option; break; -- cgit v1.1 From 15d03055cf39fe61714aeda8d0a722b3137531ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2016 11:08:22 -0400 Subject: pNFS/flexfiles: Set reasonable default retrans values for the data channel Prior to this patch, the retrans value was set at 5, meaning that we could see a maximum retransmission timeout value of more than 6 minutes. That's a tad high for NFSv3 where the protocol does allow the server to drop requests at any time. Since this is a data channel, let's just set retrans to 0, and the default timeout to 60s. The user can continue to adjust these defaults using the dataserver_retrans and dataserver_timeo module parameters. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 0aa36be..970efba 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -17,8 +17,8 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; -static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_retrans; void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { -- cgit v1.1 From b347af816ad2086c1dacf9f74973b82f83e877be Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 11 Aug 2016 17:14:45 -0700 Subject: md: do not count journal as spare in GET_ARRAY_INFO GET_ARRAY_INFO counts journal as spare (spare_disks), which is not accurate. This patch fixes this. Reported-by: Yi Zhang Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2c3ab6f..d750b52 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5851,6 +5851,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg) working++; if (test_bit(In_sync, &rdev->flags)) insync++; + else if (test_bit(Journal, &rdev->flags)) + /* TODO: add journal count to md_u.h */ + ; else spare++; } -- cgit v1.1 From 4e486cba285ff06a1f28f0fc2991dde1482d1dcf Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Tue, 2 Aug 2016 16:45:37 +0100 Subject: bus: arm-ccn: Fix PMU handling of MN The "Miscellaneous Node" fell through cracks of node initialisation, as its ID is shared with HN-I. This patch treats MN as a special case (which it is), adding separate validation check for it and pre-defining the node ID in relevant events descriptions. That way one can simply run: # perf stat -a -e ccn/mn_ecbarrier/ Additionally, direction in the MN pseudo-events XP watchpoint definitions is corrected to be "TX" (1) as they are defined from the crosspoint point of view (thus barriers are transmitted from XP to MN). Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 97a9185..a11b9bb 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -187,6 +187,7 @@ struct arm_ccn { struct arm_ccn_component *xp; struct arm_ccn_dt dt; + int mn_id; }; static DEFINE_MUTEX(arm_ccn_mutex); @@ -328,6 +329,7 @@ struct arm_ccn_pmu_event { static ssize_t arm_ccn_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); struct arm_ccn_pmu_event *event = container_of(attr, struct arm_ccn_pmu_event, attr); ssize_t res; @@ -354,6 +356,9 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, res += snprintf(buf + res, PAGE_SIZE - res, ",cmp_l=?,cmp_h=?,mask=?"); break; + case CCN_TYPE_MN: + res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); + break; default: res += snprintf(buf + res, PAGE_SIZE - res, ",node=?"); break; @@ -383,9 +388,9 @@ static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj, } static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = { - CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE), - CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE), - CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE), CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY), CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY), CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY), @@ -759,6 +764,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) /* Validate node/xp vs topology */ switch (type) { + case CCN_TYPE_MN: + if (node_xp != ccn->mn_id) { + dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + return -EINVAL; + } + break; case CCN_TYPE_XP: if (node_xp >= ccn->num_xps) { dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); @@ -1361,6 +1372,8 @@ static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region, switch (type) { case CCN_TYPE_MN: + ccn->mn_id = id; + return 0; case CCN_TYPE_DT: return 0; case CCN_TYPE_XP: -- cgit v1.1 From b7c1beb278e8e3dc664ed3df3fc786db126120a9 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Fri, 5 Aug 2016 15:07:10 +0100 Subject: bus: arm-ccn: Do not attempt to configure XPs for cycle counter Fuzzing the CCN perf driver revealed a small but definitely dangerous mistake in the event setup code. When a cycle counter is requested, the driver should not reconfigure the events bus at all, otherwise it will corrupt (in most but the simplest cases) its configuration and may end up accessing XP array out of its bounds and corrupting control registers. Reported-by: Mark Rutland Reviewed-by: Mark Rutland Tested-by: Mark Rutland Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index a11b9bb..9bbb0ab 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -897,6 +897,10 @@ static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable) struct arm_ccn_component *xp; u32 val, dt_cfg; + /* Nothing to do for cycle counter */ + if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) + return; + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)]; else -- cgit v1.1 From b928466b2169e061822daad48ecf55b005445547 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Wed, 10 Aug 2016 17:06:26 +0100 Subject: bus: arm-ccn: Fix XP watchpoint settings bitmask The code setting XP watchpoint comparator and mask registers should, in order to be fully compliant with specification, zero one or more most significant bits of each field. In both L cases it means zeroing bit 63. The bitmask doing this was wrong, though, zeroing bit 60 instead. Fortunately, due to a lucky coincidence, this turned out to be fairly innocent with the existing hardware. Fixed now. Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 9bbb0ab..647a27b 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1003,7 +1003,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event) /* Comparison values */ writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp)); - writel((cmp_l >> 32) & 0xefffffff, + writel((cmp_l >> 32) & 0x7fffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4); writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp)); writel((cmp_h >> 32) & 0x0fffffff, @@ -1011,7 +1011,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event) /* Mask */ writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp)); - writel((mask_l >> 32) & 0xefffffff, + writel((mask_l >> 32) & 0x7fffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4); writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp)); writel((mask_h >> 32) & 0x0fffffff, -- cgit v1.1 From 90d11e267a32a25d2cb69127174a96b9e518395e Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 11 Aug 2016 11:56:28 +0100 Subject: bus: arm-ccn: Correct required arguments for XP PMU events XP can provide events from two sources: watchpoints, observing traffic on device ports and PMU looking at internal buses. Unfortunately the sysfs definition of the PMU events was requiring port number (instead of bus number) and direction (the buses are unidirectional), as these fields were shared with the watchpoint event. Although it does not introduce a major problem (port can be used as bus alias and direction is simply ignored for XP PMU events), it's better to fix it now, before external tools start depending on this behaviour. Signed-off-by: Pawel Moll --- Documentation/arm/CCN.txt | 16 ++++++++++------ drivers/bus/arm-ccn.c | 13 ++++++++++--- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Documentation/arm/CCN.txt b/Documentation/arm/CCN.txt index ffca443..15cdb7b 100644 --- a/Documentation/arm/CCN.txt +++ b/Documentation/arm/CCN.txt @@ -18,13 +18,17 @@ and config2 fields of the perf_event_attr structure. The "events" directory provides configuration templates for all documented events, that can be used with perf tool. For example "xp_valid_flit" is an equivalent of "type=0x8,event=0x4". Other parameters must be -explicitly specified. For events originating from device, "node" -defines its index. All crosspoint events require "xp" (index), -"port" (device port number) and "vc" (virtual channel ID) and -"dir" (direction). Watchpoints (special "event" value 0xfe) also -require comparator values ("cmp_l" and "cmp_h") and "mask", being -index of the comparator mask. +explicitly specified. +For events originating from device, "node" defines its index. + +Crosspoint PMU events require "xp" (index), "bus" (bus number) +and "vc" (virtual channel ID). + +Crosspoint watchpoint-based events (special "event" value 0xfe) +require "xp" and "vc" as as above plus "port" (device port index), +"dir" (transmit/receive direction), comparator values ("cmp_l" +and "cmp_h") and "mask", being index of the comparator mask. Masks are defined separately from the event description (due to limited number of the config values) in the "cmp_mask" directory, with first 8 configurable by user and additional diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 647a27b..ddb65c1 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -213,6 +213,7 @@ static int arm_ccn_node_to_xp_port(int node) #define CCN_CONFIG_TYPE(_config) (((_config) >> 8) & 0xff) #define CCN_CONFIG_EVENT(_config) (((_config) >> 16) & 0xff) #define CCN_CONFIG_PORT(_config) (((_config) >> 24) & 0x3) +#define CCN_CONFIG_BUS(_config) (((_config) >> 24) & 0x3) #define CCN_CONFIG_VC(_config) (((_config) >> 26) & 0x7) #define CCN_CONFIG_DIR(_config) (((_config) >> 29) & 0x1) #define CCN_CONFIG_MASK(_config) (((_config) >> 30) & 0xf) @@ -242,6 +243,7 @@ static CCN_FORMAT_ATTR(xp, "config:0-7"); static CCN_FORMAT_ATTR(type, "config:8-15"); static CCN_FORMAT_ATTR(event, "config:16-23"); static CCN_FORMAT_ATTR(port, "config:24-25"); +static CCN_FORMAT_ATTR(bus, "config:24-25"); static CCN_FORMAT_ATTR(vc, "config:26-28"); static CCN_FORMAT_ATTR(dir, "config:29-29"); static CCN_FORMAT_ATTR(mask, "config:30-33"); @@ -254,6 +256,7 @@ static struct attribute *arm_ccn_pmu_format_attrs[] = { &arm_ccn_pmu_format_attr_type.attr.attr, &arm_ccn_pmu_format_attr_event.attr.attr, &arm_ccn_pmu_format_attr_port.attr.attr, + &arm_ccn_pmu_format_attr_bus.attr.attr, &arm_ccn_pmu_format_attr_vc.attr.attr, &arm_ccn_pmu_format_attr_dir.attr.attr, &arm_ccn_pmu_format_attr_mask.attr.attr, @@ -351,10 +354,14 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, break; case CCN_TYPE_XP: res += snprintf(buf + res, PAGE_SIZE - res, - ",xp=?,port=?,vc=?,dir=?"); + ",xp=?,vc=?"); if (event->event == CCN_EVENT_WATCHPOINT) res += snprintf(buf + res, PAGE_SIZE - res, - ",cmp_l=?,cmp_h=?,mask=?"); + ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?"); + else + res += snprintf(buf + res, PAGE_SIZE - res, + ",bus=?"); + break; case CCN_TYPE_MN: res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); @@ -1029,7 +1036,7 @@ static void arm_ccn_pmu_xp_event_config(struct perf_event *event) hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base); id = (CCN_CONFIG_VC(event->attr.config) << 4) | - (CCN_CONFIG_PORT(event->attr.config) << 3) | + (CCN_CONFIG_BUS(event->attr.config) << 3) | (CCN_CONFIG_EVENT(event->attr.config) << 0); val = readl(source->base + CCN_XP_PMU_EVENT_SEL); -- cgit v1.1 From 3249bce459ff0bb7c1621b00a8e2d6afe24c53bb Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 11 Aug 2016 12:00:36 +0100 Subject: bus: arm-ccn: Add missing event attribute exclusions for host/guest CCN PMUs have no knowledge into VM-related origins of the memory traffic, therefore can't handle requests for host-only or guest-only events. Added appropriate exclusions (they should have been there from the beginning). This required changing the error code returned, as the userspace tool only re-negotiates the options (exclude_guest is true by default) only for EINVAL. Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index ddb65c1..02f81e3 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -745,9 +745,10 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) if (has_branch_stack(event) || event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_hv || - event->attr.exclude_idle) { + event->attr.exclude_idle || event->attr.exclude_host || + event->attr.exclude_guest) { dev_warn(ccn->dev, "Can't exclude execution levels!\n"); - return -EOPNOTSUPP; + return -EINVAL; } if (event->cpu < 0) { -- cgit v1.1 From cea8aa3a93d1734816e1e3f7b118e1dddf3f4aaa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Aug 2016 14:07:46 +0200 Subject: regulator: Change Krzysztof Kozlowski's email to kernel.org Change my email address to kernel.org instead of Samsung one for the purpose of any future contact. The copyrights remain untouched and are attributed to Samsung. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- drivers/regulator/max14577-regulator.c | 4 ++-- drivers/regulator/max77693-regulator.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c index b2daa66..c9ff261 100644 --- a/drivers/regulator/max14577-regulator.c +++ b/drivers/regulator/max14577-regulator.c @@ -2,7 +2,7 @@ * max14577.c - Regulator driver for the Maxim 14577/77836 * * Copyright (C) 2013,2014 Samsung Electronics - * Krzysztof Kozlowski + * Krzysztof Kozlowski * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -331,7 +331,7 @@ static void __exit max14577_regulator_exit(void) } module_exit(max14577_regulator_exit); -MODULE_AUTHOR("Krzysztof Kozlowski "); +MODULE_AUTHOR("Krzysztof Kozlowski "); MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:max14577-regulator"); diff --git a/drivers/regulator/max77693-regulator.c b/drivers/regulator/max77693-regulator.c index de730fd..cfbb951 100644 --- a/drivers/regulator/max77693-regulator.c +++ b/drivers/regulator/max77693-regulator.c @@ -3,7 +3,7 @@ * * Copyright (C) 2013-2015 Samsung Electronics * Jonghwa Lee - * Krzysztof Kozlowski + * Krzysztof Kozlowski * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -314,5 +314,5 @@ module_exit(max77693_pmic_cleanup); MODULE_DESCRIPTION("MAXIM 77693/77843 regulator driver"); MODULE_AUTHOR("Jonghwa Lee "); -MODULE_AUTHOR("Krzysztof Kozlowski "); +MODULE_AUTHOR("Krzysztof Kozlowski "); MODULE_LICENSE("GPL"); -- cgit v1.1 From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 10 Aug 2016 15:43:06 -0400 Subject: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork cgroup_threadgroup_rwsem is acquired in read mode during process exit and fork. It is also grabbed in write mode during __cgroups_proc_write(). I've recently run into a scenario with lots of memory pressure and OOM and I am beginning to see systemd __switch_to+0x1f8/0x350 __schedule+0x30c/0x990 schedule+0x48/0xc0 percpu_down_write+0x114/0x170 __cgroup_procs_write.isra.12+0xb8/0x3c0 cgroup_file_write+0x74/0x1a0 kernfs_fop_write+0x188/0x200 __vfs_write+0x6c/0xe0 vfs_write+0xc0/0x230 SyS_write+0x6c/0x110 system_call+0x38/0xb4 This thread is waiting on the reader of cgroup_threadgroup_rwsem to exit. The reader itself is under memory pressure and has gone into reclaim after fork. There are times the reader also ends up waiting on oom_lock as well. __switch_to+0x1f8/0x350 __schedule+0x30c/0x990 schedule+0x48/0xc0 jbd2_log_wait_commit+0xd4/0x180 ext4_evict_inode+0x88/0x5c0 evict+0xf8/0x2a0 dispose_list+0x50/0x80 prune_icache_sb+0x6c/0x90 super_cache_scan+0x190/0x210 shrink_slab.part.15+0x22c/0x4c0 shrink_zone+0x288/0x3c0 do_try_to_free_pages+0x1dc/0x590 try_to_free_pages+0xdc/0x260 __alloc_pages_nodemask+0x72c/0xc90 alloc_pages_current+0xb4/0x1a0 page_table_alloc+0xc0/0x170 __pte_alloc+0x58/0x1f0 copy_page_range+0x4ec/0x950 copy_process.isra.5+0x15a0/0x1870 _do_fork+0xa8/0x4b0 ppc_clone+0x8/0xc In the meanwhile, all processes exiting/forking are blocked almost stalling the system. This patch moves the threadgroup_change_begin from before cgroup_fork() to just before cgroup_canfork(). There is no nee to worry about threadgroup changes till the task is actually added to the threadgroup. This avoids having to call reclaim with cgroup_threadgroup_rwsem held. tj: Subject and description edits. Signed-off-by: Balbir Singh Acked-by: Zefan Li Cc: Oleg Nesterov Cc: Andrew Morton Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Tejun Heo --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 52e725d4..aaf7823 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1404,7 +1404,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; - threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); @@ -1556,6 +1555,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed @@ -1656,6 +1656,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, bad_fork_cancel_cgroup: cgroup_cancel_fork(p); bad_fork_free_pid: + threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_thread: @@ -1688,7 +1689,6 @@ bad_fork_cleanup_policy: mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: #endif - threadgroup_change_end(current); delayacct_tsk_free(p); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); -- cgit v1.1 From cfee6b5874654fe4e7388bc3112d3afb76cd8b9e Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 16 Aug 2016 10:05:15 +0800 Subject: ARM: imx6ul: populates platform device at .init_machine At imx6ul_init_machine, it calls imx6ul_pm_init which needs to find platform device for ocram, but the default populate platform device is at of_platform_default_populate_init, which is located at arch_initcall_sync, and called later than arch_initcall (.init_machine is located at that). So below warning message will be showed during boots up: imx6q_suspend_init: failed to find ocram device! imx6_pm_common_init: No DDR LPM support with suspend -19! Due to lack of ocram device, the suspend mode "mem" which needs ocram to store suspend routine code is invalid. This commit populates platform device before imx6ul_pm_init like other imx6 platforms do, and the suspend mode "mem" can work successfully. Signed-off-by: Peter Chen Fixes: 850bea2335e4 ("arm: Remove unnecessary of_platform_populate with default match table") Cc: Anson Huang Cc: Fabio Estevam Cc: Kefeng Wang Cc: Rob Herring Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6ul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c index 5d9bfab..6bb7d9c 100644 --- a/arch/arm/mach-imx/mach-imx6ul.c +++ b/arch/arm/mach-imx/mach-imx6ul.c @@ -64,6 +64,7 @@ static void __init imx6ul_init_machine(void) if (parent == NULL) pr_warn("failed to initialize soc device\n"); + of_platform_default_populate(NULL, NULL, parent); imx6ul_enet_init(); imx_anatop_init(); imx6ul_pm_init(); -- cgit v1.1 From 326dce0734b63c3b82b6a88e5645eab8b54c6692 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 16 Aug 2016 18:50:42 +0200 Subject: MAINTAINERS: Switch to kernel.org account for Krzysztof Kozlowski Change my email address to kernel.org account instead of Samsung one. Add Bartlomiej Zolnierkiewicz as a co-maintainer of Maxim and Samsung PMIC drivers. These are used on many of our boards along with Exynos SoCs and Samsung R&D Institute Poland can still take care of them. Signed-off-by: Krzysztof Kozlowski Cc: Kukjin Kim Cc: linux-samsung-soc@vger.kernel.org Cc: Bartlomiej Zolnierkiewicz Acked-by: Sylwester Nawrocki Acked-by: Kukjin Kim --- .mailmap | 1 + MAINTAINERS | 15 +++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.mailmap b/.mailmap index 2a91c14..b18912c 100644 --- a/.mailmap +++ b/.mailmap @@ -88,6 +88,7 @@ Kay Sievers Kenneth W Chen Konstantin Khlebnikov Koushik +Krzysztof Kozlowski Krzysztof Kozlowski Kuninori Morimoto Leonid I Ananiev diff --git a/MAINTAINERS b/MAINTAINERS index 20bb1d0..c9cd8d3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1613,7 +1613,7 @@ N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained @@ -7448,7 +7448,8 @@ F: Documentation/devicetree/bindings/sound/max9860.txt F: sound/soc/codecs/max9860.* MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-pm@vger.kernel.org S: Supported F: drivers/power/max14577_charger.c @@ -7464,7 +7465,8 @@ F: include/dt-bindings/*/*max77802.h MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org S: Supported F: drivers/*/max14577*.c @@ -9230,7 +9232,7 @@ F: drivers/pinctrl/sh-pfc/ PIN CONTROLLER - SAMSUNG M: Tomasz Figa -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) @@ -10163,7 +10165,7 @@ S: Maintained F: drivers/platform/x86/samsung-laptop.c SAMSUNG AUDIO (ASoC) DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sangbeom Kim M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) @@ -10178,7 +10180,8 @@ F: drivers/video/fbdev/s3c-fb.c SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS M: Sangbeom Kim -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski +M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Supported -- cgit v1.1 From 207efcd2b55e0460dfee35663fbb3d05efad990a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 12 Aug 2016 13:42:40 +0800 Subject: md: remove obsolete ret in md_start_sync The ret is not needed anymore since we have already move resync_start into md_do_sync in commit 41a9a0d. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d750b52..19d8e23 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8278,16 +8278,13 @@ no_add: static void md_start_sync(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - int ret = 0; mddev->sync_thread = md_register_thread(md_do_sync, mddev, "resync"); if (!mddev->sync_thread) { - if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) - printk(KERN_ERR "%s: could not start resync" - " thread...\n", - mdname(mddev)); + printk(KERN_ERR "%s: could not start resync thread...\n", + mdname(mddev)); /* leave the spares where they are, it shouldn't hurt */ clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); -- cgit v1.1 From c622ca543bff8e73efacf4dafa0cc9851ecea511 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Tue, 16 Aug 2016 14:26:08 +0200 Subject: md: don't print the same repeated messages about delayed sync operation This fixes a long-standing bug that caused a flood of messages like: "md: delaying data-check of md1 until md2 has finished (they share one or more physical units)" It can be reproduced like this: 1. Create at least 3 raid1 arrays on a pair of disks, each on different partitions. 2. Request a sync operation like 'check' or 'repair' on 2 arrays by writing to their md/sync_action attribute files. One operation should start and one should be delayed and a message like the above will be printed. 3. Issue a write to the third array. Each write will cause 2 copies of the message to be printed. This happens when wake_up(&resync_wait) is called, usually by md_check_recovery(). Then the delayed sync thread again prints the message and is put to sleep. This patch adds a check in md_do_sync() to prevent printing this message more than once for the same pair of devices. Reported-by: Sven Koehler Link: https://bugzilla.kernel.org/show_bug.cgi?id=151801 Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- drivers/md/md.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 19d8e23..cc25cbc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7865,6 +7865,7 @@ void md_do_sync(struct md_thread *thread) */ do { + int mddev2_minor = -1; mddev->curr_resync = 2; try_again: @@ -7894,10 +7895,14 @@ void md_do_sync(struct md_thread *thread) prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && mddev2->curr_resync >= mddev->curr_resync) { - printk(KERN_INFO "md: delaying %s of %s" - " until %s has finished (they" - " share one or more physical units)\n", - desc, mdname(mddev), mdname(mddev2)); + if (mddev2_minor != mddev2->md_minor) { + mddev2_minor = mddev2->md_minor; + printk(KERN_INFO "md: delaying %s of %s" + " until %s has finished (they" + " share one or more physical units)\n", + desc, mdname(mddev), + mdname(mddev2)); + } mddev_put(mddev2); if (signal_pending(current)) flush_signals(current); -- cgit v1.1 From b825b44c4ef4dabfdaf4e82db2263d377ac45d67 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:25 -0700 Subject: nvmet-rdma: +1 to *queue_size from hsqsize/hrqsize The host will be sending sqsize 0-based hsqsize value, the target need to be adjusted as well. Signed-off-by: Jay Freyensee Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5de8d0a..1cbe6e0 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1004,10 +1004,10 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn, queue->host_qid = le16_to_cpu(req->qid); /* - * req->hsqsize corresponds to our recv queue size + * req->hsqsize corresponds to our recv queue size plus 1 * req->hrqsize corresponds to our send queue size */ - queue->recv_queue_size = le16_to_cpu(req->hsqsize); + queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; queue->send_queue_size = le16_to_cpu(req->hrqsize); if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) -- cgit v1.1 From f994d9dc28bc27353acde2caaf718222d92a3e24 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:26 -0700 Subject: fabrics: define admin sqsize min default, per spec Upon admin queue connect(), the rdma qp was being set based on NVMF_AQ_DEPTH. However, the fabrics layer was using the sqsize field value set for I/O queues for the admin queue, which threw the nvme layer and rdma layer off-whack: root@fedora23-fabrics-host1 nvmf]# dmesg [ 3507.798642] nvme_fabrics: nvmf_connect_admin_queue():admin sqsize being sent is: 128 [ 3507.798858] nvme nvme0: creating 16 I/O queues. [ 3507.896407] nvme nvme0: new ctrl: NQN "nullside-nqn", addr 192.168.1.3:4420 Thus, to have a different admin queue value, we use NVMF_AQ_DEPTH for connect() and RDMA private data as the minimum depth specified in the NVMe-over-Fabrics 1.0 spec (and in that RDMA private data we treat hrqsize as 1's-based value, per current understanding of the fabrics spec). Reported-by: Daniel Verkamp Signed-off-by: Jay Freyensee Reviewed-by: Daniel Verkamp Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 9 ++++++++- drivers/nvme/host/rdma.c | 13 +++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index dc99676..020302c 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -363,7 +363,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) cmd.connect.opcode = nvme_fabrics_command; cmd.connect.fctype = nvme_fabrics_type_connect; cmd.connect.qid = 0; - cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); + + /* + * fabrics spec sets a minimum of depth 32 for admin queue, + * so set the queue with this depth always until + * justification otherwise. + */ + cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + /* * Set keep-alive timeout in seconds granularity (ms * 1000) * and add a grace period for controller kato enforcement diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9c69393..d44809e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1278,8 +1278,17 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue)); - priv.hrqsize = cpu_to_le16(queue->queue_size); - priv.hsqsize = cpu_to_le16(queue->queue_size); + /* + * set the admin queue depth to the minimum size + * specified by the Fabrics standard. + */ + if (priv.qid == 0) { + priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); + priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + } else { + priv.hrqsize = cpu_to_le16(queue->queue_size); + priv.hsqsize = cpu_to_le16(queue->queue_size); + } ret = rdma_connect(queue->cm_id, ¶m); if (ret) { -- cgit v1.1 From c5af8654c422cfdd8480be3a244748e18cace6c5 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:27 -0700 Subject: nvme-rdma: fix sqsize/hsqsize per spec Per NVMe-over-Fabrics 1.0 spec, sqsize is represented as a 0-based value. Also per spec, the RDMA binding values shall be set to sqsize, which makes hsqsize 0-based values. Thus, the sqsize during NVMf connect() is now: [root@fedora23-fabrics-host1 for-48]# dmesg [ 318.720645] nvme_fabrics: nvmf_connect_admin_queue(): sqsize for admin queue: 31 [ 318.720884] nvme nvme0: creating 16 I/O queues. [ 318.810114] nvme_fabrics: nvmf_connect_io_queue(): sqsize for i/o queue: 127 Finally, current interpretation implies hrqsize is 1's based so set it appropriately. Reported-by: Daniel Verkamp Signed-off-by: Jay Freyensee Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d44809e..c133256 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -645,7 +645,8 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize); + ret = nvme_rdma_init_queue(ctrl, i, + ctrl->ctrl.opts->queue_size); if (ret) { dev_info(ctrl->ctrl.device, "failed to initialize i/o queue: %d\n", ret); @@ -1286,8 +1287,13 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); } else { + /* + * current interpretation of the fabrics spec + * is at minimum you make hrqsize sqsize+1, or a + * 1's based representation of sqsize. + */ priv.hrqsize = cpu_to_le16(queue->queue_size); - priv.hsqsize = cpu_to_le16(queue->queue_size); + priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); } ret = rdma_connect(queue->cm_id, ¶m); @@ -1825,7 +1831,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -1923,7 +1929,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, spin_lock_init(&ctrl->lock); ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ - ctrl->ctrl.sqsize = opts->queue_size; + ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; -- cgit v1.1 From eadb7cf44105ae8250f0d638dc880c3ed511c4e2 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:28 -0700 Subject: nvme-loop: set sqsize to 0-based value, per spec Signed-off-by: Jay Freyensee Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7affd40..395e60d 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -556,7 +556,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -620,7 +620,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = -ENOMEM; - ctrl->ctrl.sqsize = opts->queue_size; + ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues), -- cgit v1.1 From 2e365a703ea60de617287354ae63bf677e406feb Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 18 Aug 2016 16:25:55 +0900 Subject: MAINTAINERS: add myself as Samsung SPI maintainer Create a new entry for the Samsung SPI driver supported by the drivers/spi/spi-s3c* and remove it from its original place under "ARM/SAMSUNG EXYNOS ARM ARCHITECTURES". The original maintainership inherited from the Samsung Exynos ARM Architecture is kept as it was (i.e. Kukjin and Krzysztof), I will help and co-maintain the driver. Signed-off-by: Andi Shyti Acked-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- MAINTAINERS | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 20bb1d0..5051c2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1633,7 +1633,6 @@ F: drivers/*/*s3c64xx* F: drivers/*/*s5pv210* F: drivers/memory/samsung/* F: drivers/soc/samsung/* -F: drivers/spi/spi-s3c* F: Documentation/arm/Samsung/ F: Documentation/devicetree/bindings/arm/samsung/ F: Documentation/devicetree/bindings/sram/samsung-sram.txt @@ -10237,6 +10236,17 @@ S: Supported L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) F: drivers/clk/samsung/ +SAMSUNG SPI DRIVERS +M: Kukjin Kim +M: Krzysztof Kozlowski +M: Andi Shyti +L: linux-spi@vger.kernel.org +L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/spi/spi-samsung.txt +F: drivers/spi/spi-s3c* +F: include/linux/platform_data/spi-s3c64xx.h + SAMSUNG SXGBE DRIVERS M: Byungho An M: Girish K S -- cgit v1.1 From 815806e39bf6f7e7b34875d4a9609dbe76661782 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Thu, 18 Aug 2016 17:01:55 +0800 Subject: regmap: drop cache if the bus transfer error regmap_write ->_regmap_raw_write -->regcache_write first and than use map->bus->write to wirte i2c or spi But if the i2c or spi transfer failed, But the cache is updated, So if I use regmap_read will get the cache data which is not the real register value. Signed-off-by: Elaine Zhang Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 51fa7d6..25d26bb 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1474,6 +1474,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, ret = map->bus->write(map->bus_context, buf, len); kfree(buf); + } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) { + regcache_drop_region(map, reg, reg + 1); } trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); -- cgit v1.1 From c81396f3da22aa8f1e8fbf7943616a0839c4d63d Mon Sep 17 00:00:00 2001 From: Cathy Luo Date: Thu, 21 Jul 2016 16:30:24 +0530 Subject: mwifiex: fix large amsdu packets causing firmware hang Sometimes host prepares and downloads a large amsdu packet to firmware which leads to a memory corruption in firmware. The reason is __dev_alloc_skb() may allocate larger buffer than required size. This patch solves the problem by checking "adapter->tx_buf_size" instead of relying on skb_tailroom(). Signed-off-by: Cathy Luo Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n_aggr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c index dc49c3d..c47d636 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c @@ -205,7 +205,8 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, do { /* Check if AMSDU can accommodate this MSDU */ - if (skb_tailroom(skb_aggr) < (skb_src->len + LLC_SNAP_LEN)) + if ((skb_aggr->len + skb_src->len + LLC_SNAP_LEN) > + adapter->tx_buf_size) break; skb_src = skb_dequeue(&pra_list->skb_head); -- cgit v1.1 From 51c70261b2575962cb9406cd92246b1cee6a3c71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Aug 2016 17:21:37 +0200 Subject: Revert "android: binder: fix dangling pointer comparison" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7b142d8fd0bd4c9bf06ccb72ac4daedb503f0124. It doesn't seem to be correct, no one seems to have tested it, and the email address of the submitter now bounces :( So revert it. Cc: Jann Horn Cc: Chen Feng Cc: stable Cc: Arve Hjønnevåg Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 09fdb42..16288e7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2962,7 +2962,6 @@ static int binder_open(struct inode *nodp, struct file *filp) return -ENOMEM; get_task_struct(current); proc->tsk = current; - atomic_inc(¤t->mm->mm_count); proc->vma_vm_mm = current->mm; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); @@ -3168,7 +3167,6 @@ static void binder_deferred_release(struct binder_proc *proc) vfree(proc->buffer); } - mmdrop(proc->vma_vm_mm); put_task_struct(proc->tsk); binder_debug(BINDER_DEBUG_OPEN_CLOSE, -- cgit v1.1 From 87a713c8ffca33d8e497a8b6c02034332bd80394 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:13 +0200 Subject: 8250/fintek: rename IRQ_MODE macro A bugfix for the fintek driver required defining some macros, but one of them clashes with a system header on ARM: drivers/tty/serial/8250/8250_fintek.c:34:0: error: "IRQ_MODE" redefined [-Werror] #define IRQ_MODE 0x70 In file included from /git/arm-soc/arch/arm/include/asm/ptrace.h:13:0, from /git/arm-soc/arch/arm/include/asm/irqflags.h:6, from /git/arm-soc/include/linux/irqflags.h:15, from /git/arm-soc/arch/arm/include/asm/bitops.h:27, from /git/arm-soc/include/linux/bitops.h:36, from /git/arm-soc/include/linux/kernel.h:10, from /git/arm-soc/include/linux/list.h:8, from /git/arm-soc/include/linux/module.h:9, from /git/arm-soc/drivers/tty/serial/8250/8250_fintek.c:11: arch/arm/include/uapi/asm/ptrace.h:55:0: note: this is the location of the previous definition This renames the newly introduced 'IRQ_MODE' macro to FINTEK_IRQ_MODE. Signed-off-by: Arnd Bergmann Fixes: 4da22f1418cb ("serial: 8250_fintek: fix the mismatched IRQ mode") Link: https://patchwork.kernel.org/patch/9200119/ Acked-by: Ji-Ze Hong (Peter Hong) Acked-by: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 737b4b3..0facc78 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -31,7 +31,7 @@ #define IO_ADDR2 0x60 #define LDN 0x7 -#define IRQ_MODE 0x70 +#define FINTEK_IRQ_MODE 0x70 #define IRQ_SHARE BIT(4) #define IRQ_MODE_MASK (BIT(6) | BIT(5)) #define IRQ_LEVEL_LOW 0 @@ -195,7 +195,7 @@ static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode) outb(LDN, pdata->base_port + ADDR_PORT); outb(pdata->index, pdata->base_port + DATA_PORT); - outb(IRQ_MODE, pdata->base_port + ADDR_PORT); + outb(FINTEK_IRQ_MODE, pdata->base_port + ADDR_PORT); tmp = inb(pdata->base_port + DATA_PORT); tmp &= ~IRQ_MODE_MASK; -- cgit v1.1 From c4e94174983a86c935be1537a73e496b778b0287 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 16 Aug 2016 19:19:11 +0800 Subject: usb: chipidea: udc: don't touch DP when controller is in host mode When the controller is configured to be dual role and it's in host mode, if bind udc and gadgt driver, those gadget operations will do gadget disconnect and finally pull down DP line, which will break host function. Cc: # 4.1+ Signed-off-by: Li Jun Signed-off-by: Peter Chen --- drivers/usb/chipidea/udc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 065f5d9..dfec5a1 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1596,8 +1596,11 @@ static int ci_udc_pullup(struct usb_gadget *_gadget, int is_on) { struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget); - /* Data+ pullup controlled by OTG state machine in OTG fsm mode */ - if (ci_otg_is_fsm_mode(ci)) + /* + * Data+ pullup controlled by OTG state machine in OTG fsm mode; + * and don't touch Data+ in host mode for dual role config. + */ + if (ci_otg_is_fsm_mode(ci) || ci->role == CI_ROLE_HOST) return 0; pm_runtime_get_sync(&ci->gadget.dev); -- cgit v1.1 From c1a23f6d64552b4480208aa584ec7e9c13d6d9c3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Aug 2016 11:46:16 +0200 Subject: scsi: sas: provide stub implementation for scsi_is_sas_rphy Provide a stub implementation for scsi_is_sas_rphy for kernel configurations which do not have CONFIG_SCSI_SAS_ATTRS defined. Reported-by: kbuild test robot Suggested-by: James Bottomley Reviewed-by: James E.J. Bottomley Signed-off-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_sas.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 13c0b2b..31ae074 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -15,8 +15,14 @@ static inline int is_sas_attached(struct scsi_device *sdev) { return 0; } + +static inline int scsi_is_sas_rphy(const struct device *sdev) +{ + return 0; +} #else extern int is_sas_attached(struct scsi_device *sdev); +extern int scsi_is_sas_rphy(const struct device *); #endif static inline int sas_protocol_ata(enum sas_protocol proto) @@ -202,7 +208,6 @@ extern int sas_rphy_add(struct sas_rphy *); extern void sas_rphy_remove(struct sas_rphy *); extern void sas_rphy_delete(struct sas_rphy *); extern void sas_rphy_unlink(struct sas_rphy *); -extern int scsi_is_sas_rphy(const struct device *); struct sas_port *sas_port_alloc(struct device *, int); struct sas_port *sas_port_alloc_num(struct device *); -- cgit v1.1 From 835831c57e9b0cccc24e96a812542875471d75b5 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Aug 2016 11:46:17 +0200 Subject: scsi: ses: use scsi_is_sas_rphy instead of is_sas_attached Use scsi_is_sas_rphy() instead of is_sas_attached() to decide whether we should obtain the SAS address from a scsi device or not. This will prevent us from tripping on the BUG_ON() in sas_sdev_to_rdev() if the rphy isn't attached to the SAS transport class, like it is with hpsa's logical devices. Fixes: 3f8d6f2a0 ('ses: fix discovery of SATA devices in SAS enclosures') Signed-off-by: Johannes Thumshirn Reviewed-by: James E.J. Bottomley Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 0e8601a..8c9a35c 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -587,7 +587,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0); - if (is_sas_attached(sdev)) + if (scsi_is_sas_rphy(&sdev->sdev_gendev)) efd.addr = sas_get_address(sdev); if (efd.addr) { -- cgit v1.1 From a0f81dbeef13aa0aeaa8b955b38735dbf09de392 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Aug 2016 11:46:18 +0200 Subject: scsi: sas: remove is_sas_attached() As there are no more users of is_sas_attached() left, remove it. Signed-off-by: Johannes Thumshirn Reviewed-by: James E.J. Bottomley Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_sas.c | 16 ---------------- include/scsi/scsi_transport_sas.h | 6 ------ 2 files changed, 22 deletions(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 3f0ff07..60b651b 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -341,22 +341,6 @@ static int do_sas_phy_delete(struct device *dev, void *data) } /** - * is_sas_attached - check if device is SAS attached - * @sdev: scsi device to check - * - * returns true if the device is SAS attached - */ -int is_sas_attached(struct scsi_device *sdev) -{ - struct Scsi_Host *shost = sdev->host; - - return shost->transportt->host_attrs.ac.class == - &sas_host_class.class; -} -EXPORT_SYMBOL(is_sas_attached); - - -/** * sas_remove_children - tear down a devices SAS data structures * @dev: device belonging to the sas object * diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 31ae074..73d8709 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -11,17 +11,11 @@ struct sas_rphy; struct request; #if !IS_ENABLED(CONFIG_SCSI_SAS_ATTRS) -static inline int is_sas_attached(struct scsi_device *sdev) -{ - return 0; -} - static inline int scsi_is_sas_rphy(const struct device *sdev) { return 0; } #else -extern int is_sas_attached(struct scsi_device *sdev); extern int scsi_is_sas_rphy(const struct device *); #endif -- cgit v1.1 From 76507fdfc9b629209ae20cd469da2f6d093a507c Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 7 Aug 2016 21:01:48 +0200 Subject: dmaengine: pxa_dma: fix hotchain corner case In the case where a descriptor is chained on a running channel, and as explained in the comment in the code 10 lines above, the success of the chaining is ensured either if : - the DMA is still running - or if the chained transfer is completed Unfortunately the transfer completness test was done on the descriptor to which the transfer was chained, and not the transfer being chained at the end, ie. hot-chained. This corner case is extremely hard to trigger, as usually the DMA chain is still running, and the first case takes care of returning success of the hot-chaining. It was seen by hot-chaining several "small transfers" to a running "big transfer", not in a real-life usecase but by testing the robustness of the driver. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/pxa_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index dc7850a..2093e52 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -638,7 +638,7 @@ static bool pxad_try_hotchain(struct virt_dma_chan *vc, vd_last_issued = list_entry(vc->desc_issued.prev, struct virt_dma_desc, node); pxad_desc_chain(vd_last_issued, vd); - if (is_chan_running(chan) || is_desc_completed(vd_last_issued)) + if (is_chan_running(chan) || is_desc_completed(vd)) return true; } -- cgit v1.1 From 7a665d2f60b457c0d77b3e4f01e21c55ffc57069 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Tue, 28 Jun 2016 11:20:23 -0700 Subject: nvme-fabrics: change NQN UUID to big-endian format NVM Express 1.2.1 section 7.9, NVMe Qualified Names, specifies that the UUID format of NQN uses a UUID based on RFC 4122. RFC 4122 specifies that the UUID is encoded in big-endian byte order. Switch the NVMe over Fabrics host ID field from little-endian UUID to big-endian UUID to match the specification. Signed-off-by: Daniel Verkamp Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 10 +++++----- drivers/nvme/host/fabrics.h | 2 +- include/linux/nvme.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 020302c..be0b106 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -56,7 +56,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) kref_init(&host->ref); memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); - uuid_le_gen(&host->id); + uuid_be_gen(&host->id); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -73,9 +73,9 @@ static struct nvmf_host *nvmf_host_default(void) return NULL; kref_init(&host->ref); - uuid_le_gen(&host->id); + uuid_be_gen(&host->id); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUl", &host->id); + "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); @@ -382,7 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le)); + memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); data->cntlid = cpu_to_le16(0xffff); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); @@ -441,7 +441,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le)); + memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); data->cntlid = cpu_to_le16(ctrl->cntlid); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 89df52c..46e460a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -34,7 +34,7 @@ struct nvmf_host { struct kref ref; struct list_head list; char nqn[NVMF_NQN_SIZE]; - uuid_le id; + uuid_be id; }; /** diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d8b37ba..7676557 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -794,7 +794,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - uuid_le hostid; + uuid_be hostid; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; -- cgit v1.1 From 4c51e7db87b753ce16c35ed0111eb273fe1a5135 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 18 Aug 2016 16:52:08 +0300 Subject: ath9k: fix misleading indent Fixes smatch warning: ath9k_vif_iter_set_beacon() warn if statement not indented Signed-off-by: Bob Copeland Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a394622..eb00724 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -919,7 +919,7 @@ static void ath9k_vif_iter_set_beacon(struct ath9k_vif_iter_data *iter_data, } else { if (iter_data->primary_beacon_vif->type != NL80211_IFTYPE_AP && vif->type == NL80211_IFTYPE_AP) - iter_data->primary_beacon_vif = vif; + iter_data->primary_beacon_vif = vif; } iter_data->beacons = true; -- cgit v1.1 From a5d268277ad566cbcf53a2e6f05fd6db81a61c41 Mon Sep 17 00:00:00 2001 From: Eduardo Abinader Date: Thu, 18 Aug 2016 16:52:09 +0300 Subject: ath9k: consider return code on just to comply with current ath9k_hw_nvram_read to return value, hence behaving reacting accordingly. Signed-off-by: Eduardo Abinader Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 5bd2cba..08607d7 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3252,7 +3252,8 @@ static int ar9300_eeprom_restore_flash(struct ath_hw *ah, u8 *mptr, int i; for (i = 0; i < mdata_size / 2; i++, data++) - ath9k_hw_nvram_read(ah, i, data); + if (!ath9k_hw_nvram_read(ah, i, data)) + return -EIO; return 0; } @@ -3282,7 +3283,8 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah, if (ath9k_hw_use_flash(ah)) { u8 txrx; - ar9300_eeprom_restore_flash(ah, mptr, mdata_size); + if (ar9300_eeprom_restore_flash(ah, mptr, mdata_size)) + return -EIO; /* check if eeprom contains valid data */ eep = (struct ar9300_eeprom *) mptr; -- cgit v1.1 From 0163b03199006a6ba0d2c991c311e8ac93fcb208 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:10 +0300 Subject: wil6210: align to latest auto generated wmi.h Align to latest version of the auto generated wmi file describing the interface with FW. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/wmi.h | 640 +++++++++++++++++++++++++++++---- 1 file changed, 561 insertions(+), 79 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h index 685fe0d..349510c 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.h +++ b/drivers/net/wireless/ath/wil6210/wmi.h @@ -120,6 +120,8 @@ enum wmi_command_id { WMI_BF_SM_MGMT_CMDID = 0x838, WMI_BF_RXSS_MGMT_CMDID = 0x839, WMI_BF_TRIG_CMDID = 0x83A, + WMI_LINK_MAINTAIN_CFG_WRITE_CMDID = 0x842, + WMI_LINK_MAINTAIN_CFG_READ_CMDID = 0x843, WMI_SET_SECTORS_CMDID = 0x849, WMI_MAINTAIN_PAUSE_CMDID = 0x850, WMI_MAINTAIN_RESUME_CMDID = 0x851, @@ -134,10 +136,15 @@ enum wmi_command_id { WMI_BF_CTRL_CMDID = 0x862, WMI_NOTIFY_REQ_CMDID = 0x863, WMI_GET_STATUS_CMDID = 0x864, + WMI_GET_RF_STATUS_CMDID = 0x866, + WMI_GET_BASEBAND_TYPE_CMDID = 0x867, WMI_UNIT_TEST_CMDID = 0x900, WMI_HICCUP_CMDID = 0x901, WMI_FLASH_READ_CMDID = 0x902, WMI_FLASH_WRITE_CMDID = 0x903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_CMDID = 0x904, + WMI_TRAFFIC_RESUME_CMDID = 0x905, /* P2P */ WMI_P2P_CFG_CMDID = 0x910, WMI_PORT_ALLOCATE_CMDID = 0x911, @@ -150,6 +157,19 @@ enum wmi_command_id { WMI_PCP_START_CMDID = 0x918, WMI_PCP_STOP_CMDID = 0x919, WMI_GET_PCP_FACTOR_CMDID = 0x91B, + /* Power Save Configuration Commands */ + WMI_PS_DEV_PROFILE_CFG_CMDID = 0x91C, + /* Not supported yet */ + WMI_PS_DEV_CFG_CMDID = 0x91D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_CMDID = 0x91E, + /* Per MAC Power Save Configuration commands + * Not supported yet + */ + WMI_PS_MID_CFG_CMDID = 0x91F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_CMDID = 0x920, + WMI_RS_CFG_CMDID = 0x921, WMI_SET_MAC_ADDRESS_CMDID = 0xF003, WMI_ABORT_SCAN_CMDID = 0xF007, WMI_SET_PROMISCUOUS_MODE_CMDID = 0xF041, @@ -291,9 +311,8 @@ enum wmi_scan_type { /* WMI_START_SCAN_CMDID */ struct wmi_start_scan_cmd { u8 direct_scan_mac_addr[WMI_MAC_LEN]; - /* DMG Beacon frame is transmitted during active scanning */ + /* run scan with discovery beacon. Relevant for ACTIVE scan only. */ u8 discovery_mode; - /* reserved */ u8 reserved; /* Max duration in the home channel(ms) */ __le32 dwell_time; @@ -453,6 +472,12 @@ struct wmi_port_delete_cmd { u8 reserved[3]; } __packed; +/* WMI_TRAFFIC_DEFERRAL_CMDID */ +struct wmi_traffic_deferral_cmd { + /* Bit vector: bit[0] - wake on Unicast, bit[1] - wake on Broadcast */ + u8 wakeup_trigger; +} __packed; + /* WMI_P2P_CFG_CMDID */ enum wmi_discovery_mode { WMI_DISCOVERY_MODE_NON_OFFLOAD = 0x00, @@ -822,81 +847,99 @@ struct wmi_pmc_cmd { * List of Events (target to host) */ enum wmi_event_id { - WMI_READY_EVENTID = 0x1001, - WMI_CONNECT_EVENTID = 0x1002, - WMI_DISCONNECT_EVENTID = 0x1003, - WMI_SCAN_COMPLETE_EVENTID = 0x100A, - WMI_REPORT_STATISTICS_EVENTID = 0x100B, - WMI_RD_MEM_RSP_EVENTID = 0x1800, - WMI_FW_READY_EVENTID = 0x1801, - WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, - WMI_ECHO_RSP_EVENTID = 0x1803, - WMI_FS_TUNE_DONE_EVENTID = 0x180A, - WMI_CORR_MEASURE_EVENTID = 0x180B, - WMI_READ_RSSI_EVENTID = 0x180C, - WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, - WMI_DC_CALIB_DONE_EVENTID = 0x180F, - WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, - WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, - WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, - WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, - WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, - WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, - WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, - WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, - WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, - WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, - WMI_VRING_CFG_DONE_EVENTID = 0x1821, - WMI_BA_STATUS_EVENTID = 0x1823, - WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, - WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, - WMI_DELBA_EVENTID = 0x1826, - WMI_GET_SSID_EVENTID = 0x1828, - WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, - WMI_SW_TX_COMPLETE_EVENTID = 0x182B, - WMI_READ_MAC_RXQ_EVENTID = 0x1830, - WMI_READ_MAC_TXQ_EVENTID = 0x1831, - WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, - WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, - WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, - WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, - WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, - WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, - WMI_RS_MGMT_DONE_EVENTID = 0x1852, - WMI_RF_MGMT_STATUS_EVENTID = 0x1853, - WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, - WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, - WMI_RX_MGMT_PACKET_EVENTID = 0x1840, - WMI_TX_MGMT_PACKET_EVENTID = 0x1841, - WMI_OTP_READ_RESULT_EVENTID = 0x1856, - WMI_LED_CFG_DONE_EVENTID = 0x1858, + WMI_READY_EVENTID = 0x1001, + WMI_CONNECT_EVENTID = 0x1002, + WMI_DISCONNECT_EVENTID = 0x1003, + WMI_SCAN_COMPLETE_EVENTID = 0x100A, + WMI_REPORT_STATISTICS_EVENTID = 0x100B, + WMI_RD_MEM_RSP_EVENTID = 0x1800, + WMI_FW_READY_EVENTID = 0x1801, + WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, + WMI_ECHO_RSP_EVENTID = 0x1803, + WMI_FS_TUNE_DONE_EVENTID = 0x180A, + WMI_CORR_MEASURE_EVENTID = 0x180B, + WMI_READ_RSSI_EVENTID = 0x180C, + WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, + WMI_DC_CALIB_DONE_EVENTID = 0x180F, + WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, + WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, + WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, + WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, + WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, + WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, + WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, + WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, + WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, + WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, + WMI_VRING_CFG_DONE_EVENTID = 0x1821, + WMI_BA_STATUS_EVENTID = 0x1823, + WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, + WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, + WMI_DELBA_EVENTID = 0x1826, + WMI_GET_SSID_EVENTID = 0x1828, + WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, + WMI_SW_TX_COMPLETE_EVENTID = 0x182B, + WMI_READ_MAC_RXQ_EVENTID = 0x1830, + WMI_READ_MAC_TXQ_EVENTID = 0x1831, + WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, + WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, + WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, + WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, + WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, + WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, + WMI_RS_MGMT_DONE_EVENTID = 0x1852, + WMI_RF_MGMT_STATUS_EVENTID = 0x1853, + WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, + WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, + WMI_RX_MGMT_PACKET_EVENTID = 0x1840, + WMI_TX_MGMT_PACKET_EVENTID = 0x1841, + WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID = 0x1842, + WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENTID = 0x1843, + WMI_OTP_READ_RESULT_EVENTID = 0x1856, + WMI_LED_CFG_DONE_EVENTID = 0x1858, /* Performance monitoring events */ - WMI_DATA_PORT_OPEN_EVENTID = 0x1860, - WMI_WBE_LINK_DOWN_EVENTID = 0x1861, - WMI_BF_CTRL_DONE_EVENTID = 0x1862, - WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, - WMI_GET_STATUS_DONE_EVENTID = 0x1864, - WMI_VRING_EN_EVENTID = 0x1865, - WMI_UNIT_TEST_EVENTID = 0x1900, - WMI_FLASH_READ_DONE_EVENTID = 0x1902, - WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + WMI_DATA_PORT_OPEN_EVENTID = 0x1860, + WMI_WBE_LINK_DOWN_EVENTID = 0x1861, + WMI_BF_CTRL_DONE_EVENTID = 0x1862, + WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, + WMI_GET_STATUS_DONE_EVENTID = 0x1864, + WMI_VRING_EN_EVENTID = 0x1865, + WMI_GET_RF_STATUS_EVENTID = 0x1866, + WMI_GET_BASEBAND_TYPE_EVENTID = 0x1867, + WMI_UNIT_TEST_EVENTID = 0x1900, + WMI_FLASH_READ_DONE_EVENTID = 0x1902, + WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_EVENTID = 0x1904, + WMI_TRAFFIC_RESUME_EVENTID = 0x1905, /* P2P */ - WMI_P2P_CFG_DONE_EVENTID = 0x1910, - WMI_PORT_ALLOCATED_EVENTID = 0x1911, - WMI_PORT_DELETED_EVENTID = 0x1912, - WMI_LISTEN_STARTED_EVENTID = 0x1914, - WMI_SEARCH_STARTED_EVENTID = 0x1915, - WMI_DISCOVERY_STARTED_EVENTID = 0x1916, - WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, - WMI_PCP_STARTED_EVENTID = 0x1918, - WMI_PCP_STOPPED_EVENTID = 0x1919, - WMI_PCP_FACTOR_EVENTID = 0x191A, - WMI_SET_CHANNEL_EVENTID = 0x9000, - WMI_ASSOC_REQ_EVENTID = 0x9001, - WMI_EAPOL_RX_EVENTID = 0x9002, - WMI_MAC_ADDR_RESP_EVENTID = 0x9003, - WMI_FW_VER_EVENTID = 0x9004, - WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, + WMI_P2P_CFG_DONE_EVENTID = 0x1910, + WMI_PORT_ALLOCATED_EVENTID = 0x1911, + WMI_PORT_DELETED_EVENTID = 0x1912, + WMI_LISTEN_STARTED_EVENTID = 0x1914, + WMI_SEARCH_STARTED_EVENTID = 0x1915, + WMI_DISCOVERY_STARTED_EVENTID = 0x1916, + WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, + WMI_PCP_STARTED_EVENTID = 0x1918, + WMI_PCP_STOPPED_EVENTID = 0x1919, + WMI_PCP_FACTOR_EVENTID = 0x191A, + /* Power Save Configuration Events */ + WMI_PS_DEV_PROFILE_CFG_EVENTID = 0x191C, + /* Not supported yet */ + WMI_PS_DEV_CFG_EVENTID = 0x191D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_EVENTID = 0x191E, + /* Not supported yet */ + WMI_PS_MID_CFG_EVENTID = 0x191F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_EVENTID = 0x1920, + WMI_RS_CFG_DONE_EVENTID = 0x1921, + WMI_SET_CHANNEL_EVENTID = 0x9000, + WMI_ASSOC_REQ_EVENTID = 0x9001, + WMI_EAPOL_RX_EVENTID = 0x9002, + WMI_MAC_ADDR_RESP_EVENTID = 0x9003, + WMI_FW_VER_EVENTID = 0x9004, + WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, }; /* Events data structures */ @@ -943,10 +986,78 @@ struct wmi_get_status_done_event { /* WMI_FW_VER_EVENTID */ struct wmi_fw_ver_event { - u8 major; - u8 minor; - __le16 subminor; - __le16 build; + /* FW image version */ + __le32 fw_major; + __le32 fw_minor; + __le32 fw_subminor; + __le32 fw_build; + /* FW image build time stamp */ + __le32 hour; + __le32 minute; + __le32 second; + __le32 day; + __le32 month; + __le32 year; + /* Boot Loader image version */ + __le32 bl_major; + __le32 bl_minor; + __le32 bl_subminor; + __le32 bl_build; +} __packed; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_type { + RF_UNKNOWN = 0x00, + RF_MARLON = 0x01, + RF_SPARROW = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum board_file_rf_type { + BF_RF_MARLON = 0x00, + BF_RF_SPARROW = 0x01, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_status { + RF_OK = 0x00, + RF_NO_COMM = 0x01, + RF_WRONG_BOARD_FILE = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +struct wmi_get_rf_status_event { + /* enum rf_type */ + __le32 rf_type; + /* attached RFs bit vector */ + __le32 attached_rf_vector; + /* enabled RFs bit vector */ + __le32 enabled_rf_vector; + /* enum rf_status, refers to enabled RFs */ + u8 rf_status[32]; + /* enum board file RF type */ + __le32 board_file_rf_type; + /* board file platform type */ + __le32 board_file_platform_type; + /* board file version */ + __le32 board_file_version; + __le32 reserved[2]; +} __packed; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +enum baseband_type { + BASEBAND_UNKNOWN = 0x00, + BASEBAND_SPARROW_M_A0 = 0x03, + BASEBAND_SPARROW_M_A1 = 0x04, + BASEBAND_SPARROW_M_B0 = 0x05, + BASEBAND_SPARROW_M_C0 = 0x06, + BASEBAND_SPARROW_M_D0 = 0x07, +}; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +struct wmi_get_baseband_type_event { + /* enum baseband_type */ + __le32 baseband_type; } __packed; /* WMI_MAC_ADDR_RESP_EVENTID */ @@ -1410,4 +1521,375 @@ struct wmi_led_cfg_done_event { __le32 status; } __packed; +#define WMI_NUM_MCS (13) + +/* Rate search parameters configuration per connection */ +struct wmi_rs_cfg { + /* The maximal allowed PER for each MCS + * MCS will be considered as failed if PER during RS is higher + */ + u8 per_threshold[WMI_NUM_MCS]; + /* Number of MPDUs for each MCS + * this is the minimal statistic required to make an educated + * decision + */ + u8 min_frame_cnt[WMI_NUM_MCS]; + /* stop threshold [0-100] */ + u8 stop_th; + /* MCS1 stop threshold [0-100] */ + u8 mcs1_fail_th; + u8 max_back_failure_th; + /* Debug feature for disabling internal RS trigger (which is + * currently triggered by BF Done) + */ + u8 dbg_disable_internal_trigger; + __le32 back_failure_mask; + __le32 mcs_en_vec; +} __packed; + +/* WMI_RS_CFG_CMDID */ +struct wmi_rs_cfg_cmd { + /* connection id */ + u8 cid; + /* enable or disable rate search */ + u8 rs_enable; + /* rate search configuration */ + struct wmi_rs_cfg rs_cfg; +} __packed; + +/* WMI_RS_CFG_DONE_EVENTID */ +struct wmi_rs_cfg_done_event { + u8 cid; + /* enum wmi_fw_status */ + u8 status; + u8 reserved[2]; +} __packed; + +/* broadcast connection ID */ +#define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST (0xFFFFFFFF) + +/* Types wmi_link_maintain_cfg presets for WMI_LINK_MAINTAIN_CFG_WRITE_CMD */ +enum wmi_link_maintain_cfg_type { + /* AP/PCP default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_AP = 0x00, + /* AP/PCP default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_AP = 0x01, + /* STA default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_STA = 0x02, + /* STA default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_STA = 0x03, + /* custom configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM = 0x04, + /* number of defined configuration types */ + WMI_LINK_MAINTAIN_CFG_TYPES_NUM = 0x05, +}; + +/* Response status codes for WMI_LINK_MAINTAIN_CFG_WRITE/READ commands */ +enum wmi_link_maintain_cfg_response_status { + /* WMI_LINK_MAINTAIN_CFG_WRITE/READ command successfully accomplished + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_OK = 0x00, + /* ERROR due to bad argument in WMI_LINK_MAINTAIN_CFG_WRITE/READ + * command request + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_BAD_ARGUMENT = 0x01, +}; + +/* Link Loss and Keep Alive configuration */ +struct wmi_link_maintain_cfg { + /* link_loss_enable_detectors_vec */ + __le32 link_loss_enable_detectors_vec; + /* detectors check period usec */ + __le32 check_link_loss_period_usec; + /* max allowed tx ageing */ + __le32 tx_ageing_threshold_usec; + /* keep alive period for high SNR */ + __le32 keep_alive_period_usec_high_snr; + /* keep alive period for low SNR */ + __le32 keep_alive_period_usec_low_snr; + /* lower snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_low_db; + /* upper snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_high_db; + /* num of successive bad bcons causing link-loss */ + __le32 bad_beacons_num_threshold; + /* SNR limit for bad_beacons_detector */ + __le32 bad_beacons_snr_threshold_db; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_CMDID */ +struct wmi_link_maintain_cfg_write_cmd { + /* enum wmi_link_maintain_cfg_type_e - type of requested default + * configuration to be applied + */ + __le32 cfg_type; + /* requested connection ID or WMI_LINK_MAINTAIN_CFG_CID_BROADCAST */ + __le32 cid; + /* custom configuration settings to be applied (relevant only if + * cfg_type==WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM) + */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_READ_CMDID */ +struct wmi_link_maintain_cfg_read_cmd { + /* connection ID which configuration settings are requested */ + __le32 cid; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID */ +struct wmi_link_maintain_cfg_write_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - write status */ + __le32 status; +} __packed; + +/* \WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENT */ +struct wmi_link_maintain_cfg_read_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - read status */ + __le32 status; + /* Retrieved configuration settings */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +enum wmi_traffic_deferral_status { + WMI_TRAFFIC_DEFERRAL_APPROVED = 0x0, + WMI_TRAFFIC_DEFERRAL_REJECTED = 0x1, +}; + +/* WMI_TRAFFIC_DEFERRAL_EVENTID */ +struct wmi_traffic_deferral_event { + /* enum wmi_traffic_deferral_status_e */ + u8 status; +} __packed; + +enum wmi_traffic_resume_status { + WMI_TRAFFIC_RESUME_SUCCESS = 0x0, + WMI_TRAFFIC_RESUME_FAILED = 0x1, +}; + +/* WMI_TRAFFIC_RESUME_EVENTID */ +struct wmi_traffic_resume_event { + /* enum wmi_traffic_resume_status_e */ + u8 status; +} __packed; + +/* Power Save command completion status codes */ +enum wmi_ps_cfg_cmd_status { + WMI_PS_CFG_CMD_STATUS_SUCCESS = 0x00, + WMI_PS_CFG_CMD_STATUS_BAD_PARAM = 0x01, + /* other error */ + WMI_PS_CFG_CMD_STATUS_ERROR = 0x02, +}; + +/* Device Power Save Profiles */ +enum wmi_ps_profile_type { + WMI_PS_PROFILE_TYPE_DEFAULT = 0x00, + WMI_PS_PROFILE_TYPE_PS_DISABLED = 0x01, + WMI_PS_PROFILE_TYPE_MAX_PS = 0x02, + WMI_PS_PROFILE_TYPE_LOW_LATENCY_PS = 0x03, +}; + +/* WMI_PS_DEV_PROFILE_CFG_CMDID + * + * Power save profile to be used by the device + * + * Returned event: + * - WMI_PS_DEV_PROFILE_CFG_EVENTID + */ +struct wmi_ps_dev_profile_cfg_cmd { + /* wmi_ps_profile_type_e */ + u8 ps_profile; + u8 reserved[3]; +} __packed; + +/* WMI_PS_DEV_PROFILE_CFG_EVENTID */ +struct wmi_ps_dev_profile_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +enum wmi_ps_level { + WMI_PS_LEVEL_DEEP_SLEEP = 0x00, + WMI_PS_LEVEL_SHALLOW_SLEEP = 0x01, + /* awake = all PS mechanisms are disabled */ + WMI_PS_LEVEL_AWAKE = 0x02, +}; + +enum wmi_ps_deep_sleep_clk_level { + /* 33k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC = 0x00, + /* 10k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_OSC = 0x01, + /* @RTC Low latency */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC_LT = 0x02, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_XTAL = 0x03, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_SYSCLK = 0x04, + /* Not Applicable */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_N_A = 0xFF, +}; + +/* Response by the FW to a D3 entry request */ +enum wmi_ps_d3_resp_policy { + WMI_PS_D3_RESP_POLICY_DEFAULT = 0x00, + /* debug -D3 req is always denied */ + WMI_PS_D3_RESP_POLICY_DENIED = 0x01, + /* debug -D3 req is always approved */ + WMI_PS_D3_RESP_POLICY_APPROVED = 0x02, +}; + +/* Device common power save configurations */ +struct wmi_ps_dev_cfg { + /* lowest level of PS allowed while unassociated, enum wmi_ps_level_e + */ + u8 ps_unassoc_min_level; + /* lowest deep sleep clock level while nonassoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_unassoc_deep_sleep_min_level; + /* lowest level of PS allowed while associated, enum wmi_ps_level_e */ + u8 ps_assoc_min_level; + /* lowest deep sleep clock level while assoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_assoc_deep_sleep_min_level; + /* enum wmi_ps_deep_sleep_clk_level_e */ + u8 ps_assoc_low_latency_ds_min_level; + /* enum wmi_ps_d3_resp_policy_e */ + u8 ps_D3_response_policy; + /* BOOL */ + u8 ps_D3_pm_pme_enabled; + /* BOOL */ + u8 ps_halp_enable; + u8 ps_deep_sleep_enter_thresh_msec; + /* BOOL */ + u8 ps_voltage_scaling_en; +} __packed; + +/* WMI_PS_DEV_CFG_CMDID + * + * Configure common Power Save parameters of the device and all MIDs. + * + * Returned event: + * - WMI_PS_DEV_CFG_EVENTID + */ +struct wmi_ps_dev_cfg_cmd { + /* Device Power Save configuration to be applied */ + struct wmi_ps_dev_cfg ps_dev_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* WMI_PS_DEV_CFG_EVENTID */ +struct wmi_ps_dev_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_DEV_CFG_READ_CMDID + * + * request to retrieve device Power Save configuration + * (WMI_PS_DEV_CFG_CMD params) + * + * Returned event: + * - WMI_PS_DEV_CFG_READ_EVENTID + */ +struct wmi_ps_dev_cfg_read_cmd { + __le32 reserved; +} __packed; + +/* WMI_PS_DEV_CFG_READ_EVENTID */ +struct wmi_ps_dev_cfg_read_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; + /* Retrieved device Power Save configuration (WMI_PS_DEV_CFG_CMD + * params) + */ + struct wmi_ps_dev_cfg dev_ps_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* Per Mac Power Save configurations */ +struct wmi_ps_mid_cfg { + /* Low power RX in BTI is enabled, BOOL */ + u8 beacon_lprx_enable; + /* Sync to sector ID enabled, BOOL */ + u8 beacon_sync_to_sectorId_enable; + /* Low power RX in DTI is enabled, BOOL */ + u8 frame_exchange_lprx_enable; + /* Sleep Cycle while in scheduled PS, 1-31 */ + u8 scheduled_sleep_cycle_pow2; + /* Stay Awake for k BIs every (sleep_cycle - k) BIs, 1-31 */ + u8 scheduled_num_of_awake_bis; + u8 am_to_traffic_load_thresh_mbp; + u8 traffic_to_am_load_thresh_mbps; + u8 traffic_to_am_num_of_no_traffic_bis; + /* BOOL */ + u8 continuous_traffic_psm; + __le16 no_traffic_to_min_usec; + __le16 no_traffic_to_max_usec; + __le16 snoozing_sleep_interval_milisec; + u8 max_no_data_awake_events; + /* Trigger WEB after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_web; + /* Trigger BF after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_bf; + /* Trigger SOB after k successful beacons */ + u8 num_of_successful_beacons_rx_to_trigger_sob; +} __packed; + +/* WMI_PS_MID_CFG_CMDID + * + * Configure Power Save parameters of a specific MID. + * These parameters are relevant for the specific BSS this MID belongs to. + * + * Returned event: + * - WMI_PS_MID_CFG_EVENTID + */ +struct wmi_ps_mid_cfg_cmd { + /* MAC ID */ + u8 mid; + /* mid PS configuration to be applied */ + struct wmi_ps_mid_cfg ps_mid_cfg; +} __packed; + +/* WMI_PS_MID_CFG_EVENTID */ +struct wmi_ps_mid_cfg_event { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_MID_CFG_READ_CMDID + * + * request to retrieve Power Save configuration of mid + * (WMI_PS_MID_CFG_CMD params) + * + * Returned event: + * - WMI_PS_MID_CFG_READ_EVENTID + */ +struct wmi_ps_mid_cfg_read_cmd { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; +} __packed; + +/* WMI_PS_MID_CFG_READ_EVENTID */ +struct wmi_ps_mid_cfg_read_event { + /* MAC ID */ + u8 mid; + /* Retrieved MID Power Save configuration(WMI_PS_MID_CFG_CMD params) */ + struct wmi_ps_mid_cfg mid_ps_cfg; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + #endif /* __WILOCITY_WMI_H__ */ -- cgit v1.1 From f1b7764f8626b5ee7a42b6648427e71111c5cbb5 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:12 +0300 Subject: wil6210: fix HALP handling in case of HALP vote time-out In case HALP vote times out, we need to mask the HALP IRQ, as done in case the interrupt is received, as this interrupt should be set until completion of the low latency operation. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/interrupt.c | 11 +++++++++-- drivers/net/wireless/ath/wil6210/main.c | 7 +++++-- drivers/net/wireless/ath/wil6210/wil6210.h | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 011e741..f10c47d 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2012-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -101,7 +101,7 @@ static void wil6210_mask_irq_misc(struct wil6210_priv *wil, bool mask_halp) mask_halp ? WIL6210_IRQ_DISABLE : WIL6210_IRQ_DISABLE_NO_HALP); } -static void wil6210_mask_halp(struct wil6210_priv *wil) +void wil6210_mask_halp(struct wil6210_priv *wil) { wil_dbg_irq(wil, "%s()\n", __func__); @@ -503,6 +503,13 @@ static int wil6210_debug_irq_mask(struct wil6210_priv *wil, u32 pseudo_cause) offsetof(struct RGF_ICR, ICR)); u32 imv_misc = wil_r(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, IMV)); + + /* HALP interrupt can be unmasked when misc interrupts are + * masked + */ + if (icr_misc & BIT_DMA_EP_MISC_ICR_HALP) + return 0; + wil_err(wil, "IRQ when it should be masked: pseudo 0x%08x\n" "Rx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" "Tx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 4bc92e5..4240e81 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1124,13 +1124,16 @@ void wil_halp_vote(struct wil6210_priv *wil) if (++wil->halp.ref_cnt == 1) { wil6210_set_halp(wil); rc = wait_for_completion_timeout(&wil->halp.comp, to_jiffies); - if (!rc) + if (!rc) { wil_err(wil, "%s: HALP vote timed out\n", __func__); - else + /* Mask HALP as done in case the interrupt is raised */ + wil6210_mask_halp(wil); + } else { wil_dbg_misc(wil, "%s: HALP vote completed after %d ms\n", __func__, jiffies_to_msecs(to_jiffies - rc)); + } } wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index ecab4af..9742446 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -828,6 +828,7 @@ void wil_unmask_irq(struct wil6210_priv *wil); void wil_configure_interrupt_moderation(struct wil6210_priv *wil); void wil_disable_irq(struct wil6210_priv *wil); void wil_enable_irq(struct wil6210_priv *wil); +void wil6210_mask_halp(struct wil6210_priv *wil); /* P2P */ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request); -- cgit v1.1 From 74b6ac586d9cb7f45c894841e4204b3648ae865c Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:12 +0300 Subject: wil6210: support rx key setting for all TIDs According to the spec the PN should be calculated per TID. In the current implementation, the PN and key_set were set only for TID 0, therefore only traffic for TID 0 was supported. In order to support all TIDs, the key_set and PN should be set for all the TIDs. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 116 +++++++++++++++++++--------- 1 file changed, 81 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index f0e1175..110098e 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -760,14 +760,11 @@ static enum wmi_key_usage wil_detect_key_usage(struct wil6210_priv *wil, return rc; } -static struct wil_tid_crypto_rx_single * -wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, - enum wmi_key_usage key_usage, const u8 *mac_addr) +static struct wil_sta_info * +wil_find_sta_by_key_usage(struct wil6210_priv *wil, + enum wmi_key_usage key_usage, const u8 *mac_addr) { int cid = -EINVAL; - int tid = 0; - struct wil_sta_info *s; - struct wil_tid_crypto_rx *c; if (key_usage == WMI_KEY_USE_TX_GROUP) return NULL; /* not needed */ @@ -778,18 +775,72 @@ wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, else if (key_usage == WMI_KEY_USE_RX_GROUP) cid = wil_find_cid_by_idx(wil, 0); if (cid < 0) { - wil_err(wil, "No CID for %pM %s[%d]\n", mac_addr, - key_usage_str[key_usage], key_index); + wil_err(wil, "No CID for %pM %s\n", mac_addr, + key_usage_str[key_usage]); return ERR_PTR(cid); } - s = &wil->sta[cid]; - if (key_usage == WMI_KEY_USE_PAIRWISE) - c = &s->tid_crypto_rx[tid]; - else - c = &s->group_crypto_rx; + return &wil->sta[cid]; +} + +static void wil_set_crypto_rx(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs, + struct key_params *params) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; - return &c->key_id[key_index]; + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, + IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + break; + default: + break; + } +} + +static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; + + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + cc->key_set = false; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + cc->key_set = false; + break; + default: + break; + } } static int wil_cfg80211_add_key(struct wiphy *wiphy, @@ -801,24 +852,26 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, int rc; struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); + + if (!params) { + wil_err(wil, "NULL params\n"); + return -EINVAL; + } wil_dbg_misc(wil, "%s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); - if (IS_ERR(cc)) { + if (IS_ERR(cs)) { wil_err(wil, "Not connected, %s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); return -EINVAL; } - if (cc) - cc->key_set = false; + wil_del_rx_key(key_index, key_usage, cs); if (params->seq && params->seq_len != IEEE80211_GCMP_PN_LEN) { wil_err(wil, @@ -831,13 +884,8 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, rc = wmi_add_cipher_key(wil, key_index, mac_addr, params->key_len, params->key, key_usage); - if ((rc == 0) && cc) { - if (params->seq) - memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); - else - memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); - cc->key_set = true; - } + if (!rc) + wil_set_crypto_rx(key_index, key_usage, cs, params); return rc; } @@ -849,20 +897,18 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy, { struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); wil_dbg_misc(wil, "%s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (IS_ERR(cc)) + if (IS_ERR(cs)) wil_info(wil, "Not connected, %s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (!IS_ERR_OR_NULL(cc)) - cc->key_set = false; + if (!IS_ERR_OR_NULL(cs)) + wil_del_rx_key(key_index, key_usage, cs); return wmi_del_cipher_key(wil, key_index, mac_addr, key_usage); } -- cgit v1.1 From ef86f249fa4980fc78fe1546e45d8cab6be424b6 Mon Sep 17 00:00:00 2001 From: Lior David Date: Thu, 18 Aug 2016 16:52:13 +0300 Subject: wil6210: change HALP logging category to IRQ Change the logging category of HALP functions from MISC to IRQ, since the HALP mechanism is closely related to interrupts. Both HALP and IRQ create a heavy load of logging messages when enabled, so their logging is typically disabled during normal debug scenarios. Having them in the same logging category will make it easier to disable logging for both in one go. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/interrupt.c | 4 ++-- drivers/net/wireless/ath/wil6210/main.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index f10c47d..64046e0 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -599,7 +599,7 @@ void wil6210_clear_irq(struct wil6210_priv *wil) void wil6210_set_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICS), BIT_DMA_EP_MISC_ICR_HALP); @@ -607,7 +607,7 @@ void wil6210_set_halp(struct wil6210_priv *wil) void wil6210_clear_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICR), BIT_DMA_EP_MISC_ICR_HALP); diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 4240e81..1205d76 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1118,8 +1118,8 @@ void wil_halp_vote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (++wil->halp.ref_cnt == 1) { wil6210_set_halp(wil); @@ -1129,15 +1129,15 @@ void wil_halp_vote(struct wil6210_priv *wil) /* Mask HALP as done in case the interrupt is raised */ wil6210_mask_halp(wil); } else { - wil_dbg_misc(wil, - "%s: HALP vote completed after %d ms\n", - __func__, - jiffies_to_msecs(to_jiffies - rc)); + wil_dbg_irq(wil, + "%s: HALP vote completed after %d ms\n", + __func__, + jiffies_to_msecs(to_jiffies - rc)); } } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } @@ -1148,16 +1148,16 @@ void wil_halp_unvote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (--wil->halp.ref_cnt == 0) { wil6210_clear_halp(wil); - wil_dbg_misc(wil, "%s: HALP unvote\n", __func__); + wil_dbg_irq(wil, "%s: HALP unvote\n", __func__); } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } -- cgit v1.1 From d35c2b6f8ffa75d430fd0fbbc5062f738c44f6e4 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:14 +0300 Subject: wil6210: fix stop p2p device handling fix stop p2p device handling to identify between search and listen and update the upper layers with the appropriate notification. The stop of p2p radio operations also needs to be performed in __wil_down. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 25 ++++++---------- drivers/net/wireless/ath/wil6210/main.c | 2 +- drivers/net/wireless/ath/wil6210/p2p.c | 46 +++++++++++++++++++++++++++++ drivers/net/wireless/ath/wil6210/pcie_bus.c | 5 +++- drivers/net/wireless/ath/wil6210/wil6210.h | 1 + 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 110098e..310a385 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1409,23 +1409,16 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); - u8 started; + struct wil_p2p_info *p2p = &wil->p2p; + + if (!p2p->p2p_dev_started) + return; wil_dbg_misc(wil, "%s: entered\n", __func__); mutex_lock(&wil->mutex); - started = wil_p2p_stop_discovery(wil); - if (started && wil->scan_request) { - struct cfg80211_scan_info info = { - .aborted = true, - }; - - cfg80211_scan_done(wil->scan_request, &info); - wil->scan_request = NULL; - wil->radio_wdev = wil->wdev; - } + wil_p2p_stop_radio_operations(wil); + p2p->p2p_dev_started = 0; mutex_unlock(&wil->mutex); - - wil->p2p.p2p_dev_started = 0; } static struct cfg80211_ops wil_cfg80211_ops = { @@ -1544,11 +1537,11 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil) mutex_lock(&wil->p2p_wdev_mutex); p2p_wdev = wil->p2p_wdev; + wil->p2p_wdev = NULL; + wil->radio_wdev = wil_to_wdev(wil); + mutex_unlock(&wil->p2p_wdev_mutex); if (p2p_wdev) { - wil->p2p_wdev = NULL; - wil->radio_wdev = wil_to_wdev(wil); cfg80211_unregister_wdev(p2p_wdev); kfree(p2p_wdev); } - mutex_unlock(&wil->p2p_wdev_mutex); } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 1205d76..bbc54ee 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1050,7 +1050,7 @@ int __wil_down(struct wil6210_priv *wil) } wil_enable_irq(wil); - (void)wil_p2p_stop_discovery(wil); + wil_p2p_stop_radio_operations(wil); if (wil->scan_request) { struct cfg80211_scan_info info = { diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c index e0f8aa0..4087785 100644 --- a/drivers/net/wireless/ath/wil6210/p2p.c +++ b/drivers/net/wireless/ath/wil6210/p2p.c @@ -263,3 +263,49 @@ void wil_p2p_search_expired(struct work_struct *work) mutex_unlock(&wil->p2p_wdev_mutex); } } + +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil) +{ + struct wil_p2p_info *p2p = &wil->p2p; + struct cfg80211_scan_info info = { + .aborted = true, + }; + + lockdep_assert_held(&wil->mutex); + + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->radio_wdev != wil->p2p_wdev) + goto out; + + if (!p2p->discovery_started) { + /* Regular scan on the p2p device */ + if (wil->scan_request && + wil->scan_request->wdev == wil->p2p_wdev) { + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } + goto out; + } + + /* Search or listen on p2p device */ + mutex_unlock(&wil->p2p_wdev_mutex); + wil_p2p_stop_discovery(wil); + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->scan_request) { + /* search */ + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } else { + /* listen */ + cfg80211_remain_on_channel_expired(wil->radio_wdev, + p2p->cookie, + &p2p->listen_chan, + GFP_KERNEL); + } + +out: + wil->radio_wdev = wil->wdev; + mutex_unlock(&wil->p2p_wdev_mutex); +} diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 7b5c422..5b7a9d2 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -20,6 +20,7 @@ #include #include #include "wil6210.h" +#include static bool use_msi = true; module_param(use_msi, bool, S_IRUGO); @@ -293,6 +294,9 @@ static void wil_pcie_remove(struct pci_dev *pdev) #endif /* CONFIG_PM */ wil6210_debugfs_remove(wil); + rtnl_lock(); + wil_p2p_wdev_free(wil); + rtnl_unlock(); wil_if_remove(wil); wil_if_pcie_disable(wil); pci_iounmap(pdev, csr); @@ -300,7 +304,6 @@ static void wil_pcie_remove(struct pci_dev *pdev) pci_disable_device(pdev); if (wil->platform_ops.uninit) wil->platform_ops.uninit(wil->platform_handle); - wil_p2p_wdev_free(wil); wil_if_free(wil); } diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 9742446..6087691 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -841,6 +841,7 @@ u8 wil_p2p_stop_discovery(struct wil6210_priv *wil); int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie); void wil_p2p_listen_expired(struct work_struct *work); void wil_p2p_search_expired(struct work_struct *work); +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil); /* WMI for P2P */ int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi); -- cgit v1.1 From b0c0e688e523eba14abf21ac246b7dd88f5574fa Mon Sep 17 00:00:00 2001 From: Lazar Alexei Date: Thu, 18 Aug 2016 16:52:14 +0300 Subject: wil6210: Fix driver down flow Stations disconnection is executed as part of wil_reset so no need to do it in wil_down. Removal of the disconnect operation will also preserve the lock of wil->mutex during the whole reset flow and prevent handling of connect event while resetting. Set wil_status_resetting in earlier stage in the flow to prevent double resetting call in case communication with FW fails while bringing the interface down. Signed-off-by: Lazar Alexei Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/main.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index bbc54ee..dd0ee7f 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -1035,10 +1035,10 @@ int wil_up(struct wil6210_priv *wil) int __wil_down(struct wil6210_priv *wil) { - int rc; - WARN_ON(!mutex_is_locked(&wil->mutex)); + set_bit(wil_status_resetting, wil->status); + if (wil->platform_ops.bus_request) wil->platform_ops.bus_request(wil->platform_handle, 0); @@ -1064,18 +1064,6 @@ int __wil_down(struct wil6210_priv *wil) wil->scan_request = NULL; } - if (test_bit(wil_status_fwconnected, wil->status) || - test_bit(wil_status_fwconnecting, wil->status)) { - - mutex_unlock(&wil->mutex); - rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0, - WMI_DISCONNECT_EVENTID, NULL, 0, - WIL6210_DISCONNECT_TO_MS); - mutex_lock(&wil->mutex); - if (rc) - wil_err(wil, "timeout waiting for disconnect\n"); - } - wil_reset(wil, false); return 0; -- cgit v1.1 From dc90506f145875b9d88160802cc5fe06a7c79dda Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Thu, 18 Aug 2016 16:52:15 +0300 Subject: wil6210: prevent usage of incorrect TX hwtail txdata->enabled is used in order to determine if the TX vring is valid. As the data transmit is handled in a different context, in case txdata->enabled is set before vring->hwtail is updated, an old or corrupted vring->hwtail can be used. Protect setting of txdata->enabled and vring->hwtail to prevent a case where TX vring start handling TX packets before setting vring->hwtail. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/txrx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index f2f6a40..4c38520 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -873,9 +873,12 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size, rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); + if (txdata->dot1x_open && (agg_wsize >= 0)) wil_addba_tx_request(wil, id, agg_wsize); @@ -950,9 +953,11 @@ int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size) rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); return 0; out_free: -- cgit v1.1 From 2690c4c0e83b1aa5aa8b8b258ae422b067720224 Mon Sep 17 00:00:00 2001 From: Lior David Date: Thu, 18 Aug 2016 16:52:16 +0300 Subject: wil6210: fix wiphy registration sequence Currently wiphy structure is initialized and registered in wil_if_alloc, before some information is available such as MAC address and capabilities. As a result there is a small chance user space will get incorrect information from calls such as NL80211_CMD_GET_WIPHY. Fix this by seperating the registration and moving it to wil_if_add which is executed later, after all relevant information is known. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 7 ------- drivers/net/wireless/ath/wil6210/main.c | 3 +++ drivers/net/wireless/ath/wil6210/netdev.c | 32 +++++++++++++++++++++-------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 310a385..ffacc76 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1503,14 +1503,8 @@ struct wireless_dev *wil_cfg80211_init(struct device *dev) set_wiphy_dev(wdev->wiphy, dev); wil_wiphy_init(wdev->wiphy); - rc = wiphy_register(wdev->wiphy); - if (rc < 0) - goto out_failed_reg; - return wdev; -out_failed_reg: - wiphy_free(wdev->wiphy); out: kfree(wdev); @@ -1526,7 +1520,6 @@ void wil_wdev_free(struct wil6210_priv *wil) if (!wdev) return; - wiphy_unregister(wdev->wiphy); wiphy_free(wdev->wiphy); kfree(wdev); } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index dd0ee7f..d0b180c 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -232,6 +232,9 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid, struct net_device *ndev = wil_to_ndev(wil); struct wireless_dev *wdev = wil->wdev; + if (unlikely(!ndev)) + return; + might_sleep(); wil_info(wil, "%s(bssid=%pM, reason=%d, ev%s)\n", __func__, bssid, reason_code, from_event ? "+" : "-"); diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 09840975..4bc9bb0a 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -179,13 +179,6 @@ void *wil_if_alloc(struct device *dev) SET_NETDEV_DEV(ndev, wiphy_dev(wdev->wiphy)); wdev->netdev = ndev; - netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, - WIL6210_NAPI_BUDGET); - netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, - WIL6210_NAPI_BUDGET); - - netif_tx_stop_all_queues(ndev); - return wil; out_priv: @@ -216,25 +209,46 @@ void wil_if_free(struct wil6210_priv *wil) int wil_if_add(struct wil6210_priv *wil) { + struct wireless_dev *wdev = wil_to_wdev(wil); + struct wiphy *wiphy = wdev->wiphy; struct net_device *ndev = wil_to_ndev(wil); int rc; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "entered"); + + rc = wiphy_register(wiphy); + if (rc < 0) { + wil_err(wil, "failed to register wiphy, err %d\n", rc); + return rc; + } + + netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, + WIL6210_NAPI_BUDGET); + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + WIL6210_NAPI_BUDGET); + + netif_tx_stop_all_queues(ndev); rc = register_netdev(ndev); if (rc < 0) { dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc); - return rc; + goto out_wiphy; } return 0; + +out_wiphy: + wiphy_unregister(wdev->wiphy); + return rc; } void wil_if_remove(struct wil6210_priv *wil) { struct net_device *ndev = wil_to_ndev(wil); + struct wireless_dev *wdev = wil_to_wdev(wil); wil_dbg_misc(wil, "%s()\n", __func__); unregister_netdev(ndev); + wiphy_unregister(wdev->wiphy); } -- cgit v1.1 From 06777c4ec78a43977b63f1d5045def057227c2c5 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 7 Aug 2016 21:01:49 +0200 Subject: dmaengine: pxa_dma: fix debug message In a very tight timeframe, the debug message in the transfer completion handler can be misleading, as the completion test report can change just after the message, and the code flow cannot be deduced from the debug message. This is just a cleanup to make debugging easier. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/pxa_dma.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 2093e52..3f56f9c 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -671,6 +671,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) struct virt_dma_desc *vd, *tmp; unsigned int dcsr; unsigned long flags; + bool vd_completed; dma_cookie_t last_started = 0; BUG_ON(!chan); @@ -681,15 +682,17 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) spin_lock_irqsave(&chan->vc.lock, flags); list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + vd_completed = is_desc_completed(vd); dev_dbg(&chan->vc.chan.dev->device, - "%s(): checking txd %p[%x]: completed=%d\n", - __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + "%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n", + __func__, vd, vd->tx.cookie, vd_completed, + dcsr); last_started = vd->tx.cookie; if (to_pxad_sw_desc(vd)->cyclic) { vchan_cyclic_callback(vd); break; } - if (is_desc_completed(vd)) { + if (vd_completed) { list_del(&vd->node); vchan_cookie_complete(vd); } else { -- cgit v1.1 From 98096d8a787f05b1afe3869aa01e84981915c81d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Aug 2016 11:16:35 -0700 Subject: nvme-fabrics: get a reference when reusing a nvme_host structure Without this we'll get a use after free after connecting two controller using the same hostnqn and then disconnecting one of them. Signed-off-by: Christoph Hellwig Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index be0b106..4eff491 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -47,8 +47,10 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) mutex_lock(&nvmf_hosts_mutex); host = __nvmf_host_find(hostnqn); - if (host) + if (host) { + kref_get(&host->ref); goto out_unlock; + } host = kmalloc(sizeof(*host), GFP_KERNEL); if (!host) -- cgit v1.1 From aa71987472a974f4f6dc4be377720564079ef42e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Aug 2016 11:16:36 -0700 Subject: nvme: fabrics drivers don't need the nvme-pci driver So select the NVME_CORE symbol instead of depending on BLK_DEV_NVME. Signed-off-by: Christoph Hellwig Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg --- drivers/nvme/host/Kconfig | 2 +- drivers/nvme/target/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index db39d53..0c644f7 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -31,7 +31,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND - depends on BLK_DEV_NVME + select NVME_CORE select NVME_FABRICS select SG_POOL help diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index a5c31cb..3a5b9d0 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -15,8 +15,8 @@ config NVME_TARGET config NVME_TARGET_LOOP tristate "NVMe loopback device support" - depends on BLK_DEV_NVME depends on NVME_TARGET + select NVME_CORE select NVME_FABRICS select SG_POOL help -- cgit v1.1 From 9a0fe86745b8e95f7ea39933a956f5771332c430 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Aug 2016 15:33:12 -0400 Subject: pNFS: Handle NFS4ERR_OLD_STATEID correctly in LAYOUTSTAT calls We normally want to update the stateid and then retry, Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 34 +++++++++++++++++++++++++++++----- fs/nfs/pnfs.c | 1 - 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6f47527..64b43b4 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -318,10 +318,22 @@ static void nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) { struct nfs42_layoutstat_data *data = calldata; - struct nfs_server *server = NFS_SERVER(data->args.inode); + struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo; + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + rpc_exit(task, 0); + return; + } + nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid); + spin_unlock(&inode->i_lock); nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, &data->res.seq_res, task); + } static void @@ -341,11 +353,11 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: spin_lock(&inode->i_lock); lo = NFS_I(inode)->layout; - if (lo && nfs4_stateid_match(&data->args.stateid, + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match(&data->args.stateid, &lo->plh_stateid)) { LIST_HEAD(head); @@ -359,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) } else spin_unlock(&inode->i_lock); break; + case -NFS4ERR_OLD_STATEID: + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&data->args.stateid, + &lo->plh_stateid)) { + /* Do we need to delay before resending? */ + if (!nfs4_stateid_is_newer(&lo->plh_stateid, + &data->args.stateid)) + rpc_delay(task, HZ); + rpc_restart_call_prepare(task); + } + spin_unlock(&inode->i_lock); + break; case -ENOTSUPP: case -EOPNOTSUPP: NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; - default: - break; } dprintk("%s server returns %d\n", __func__, task->tk_status); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 70806ca..bf98f1b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2510,7 +2510,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) data->args.fh = NFS_FH(inode); data->args.inode = inode; - nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); status = ld->prepare_layoutstats(&data->args); if (status) goto out_free; -- cgit v1.1 From d5fb46e0e3b7e49ee83ba92efc3ab4e1a545ecc1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:10 -0700 Subject: drm/vc4: Use drm_free_large() on handles to match its allocation. If you managed to exceed the limit to switch to vmalloc, we'd use the wrong free. Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Cc: stable@vger.kernel.org --- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 6155e8a..62df61f 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -572,7 +572,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, spin_unlock(&file_priv->table_lock); fail: - kfree(handles); + drm_free_large(handles); return 0; } -- cgit v1.1 From ece7267dccf0e9e08cb6e8dc6b7ad2be9c4eb444 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2016 11:32:44 -0700 Subject: drm/vc4: Use drm_malloc_ab to fix large rendering jobs. If you exceeded the size that kmalloc would return, you'd get a dmesg warning and a return from the job submit. We can handle much allocations with vmalloc, and drm_malloc_ab makes that decision. Fixes failure in piglit's scissor-many. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_gem.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 62df61f..bfd1b52 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -534,8 +534,8 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), - GFP_KERNEL); + exec->bo = drm_calloc_large(exec->bo_count, + sizeof(struct drm_gem_cma_object *)); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; @@ -608,7 +608,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = kmalloc(temp_size, GFP_KERNEL); + temp = drm_malloc_ab(temp_size, 1); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -675,7 +675,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_validate_shader_recs(dev, exec); fail: - kfree(temp); + drm_free_large(temp); return ret; } @@ -688,7 +688,7 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) if (exec->bo) { for (i = 0; i < exec->bo_count; i++) drm_gem_object_unreference_unlocked(&exec->bo[i]->base); - kfree(exec->bo); + drm_free_large(exec->bo); } while (!list_empty(&exec->unref_list)) { -- cgit v1.1 From 163195fc12cae0c8b5c0d74d3ba8d2c5f81773bc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:12 -0700 Subject: drm/vc4: Fix handling of a pm_runtime_get_sync() success case. If the device was already up, a 1 is returned instead of 0. We were erroring out, leading the 3D driver to sometimes fail at screen initialization (generally with ENOENT returned to it). Signed-off-by: Eric Anholt Fixes: af713795c59f ("drm/vc4: Add a getparam ioctl for getting the V3D identity regs.") --- drivers/gpu/drm/vc4/vc4_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8b42d31..9ecef93 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -57,21 +57,21 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, switch (args->param) { case DRM_VC4_PARAM_V3D_IDENT0: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT0); pm_runtime_put(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT1: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT1); pm_runtime_put(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT2: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT2); pm_runtime_put(&vc4->v3d->pdev->dev); -- cgit v1.1 From def96527707e1978a0c88e75d13b082f51460d5c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:13 -0700 Subject: drm/vc4: Free hang state before destroying BO cache. The BO cache will complain if BOs are still allocated when we try to destroy it (since freeing those BOs would try to hit the cache). You could hit this if you were to unload the module after a GPU hang. Signed-off-by: Eric Anholt Fixes: 214613656b51 ("drm/vc4: Add an interface for capturing the GPU state after a hang.") --- drivers/gpu/drm/vc4/vc4_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index bfd1b52..fba2c83 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -942,8 +942,8 @@ vc4_gem_destroy(struct drm_device *dev) vc4->overflow_mem = NULL; } - vc4_bo_cache_destroy(dev); - if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); + + vc4_bo_cache_destroy(dev); } -- cgit v1.1 From 9326e6f25574bbb8bd48206d245654780e3fd665 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:14 -0700 Subject: drm/vc4: Fix overflow mem unreferencing when the binner runs dry. Overflow memory handling is tricky: While it's still referenced by the BPO registers, we want to keep it from being freed. When we are putting a new set of overflow memory in the registers, we need to assign the old one to the last rendering job using it. We were looking at "what's currently running in the binner", but since the bin/render submission split, we may end up with the binner completing and having no new job while the renderer is still processing. So, if we don't find a bin job at all, look at the highest-seqno (last) render job to attach our overflow to. Signed-off-by: Eric Anholt Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs") Cc: stable@vger.kernel.org --- drivers/gpu/drm/vc4/vc4_drv.h | 9 +++++++++ drivers/gpu/drm/vc4/vc4_irq.c | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 489e3de..428e249 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -321,6 +321,15 @@ vc4_first_render_job(struct vc4_dev *vc4) struct vc4_exec_info, head); } +static inline struct vc4_exec_info * +vc4_last_render_job(struct vc4_dev *vc4) +{ + if (list_empty(&vc4->render_job_list)) + return NULL; + return list_last_entry(&vc4->render_job_list, + struct vc4_exec_info, head); +} + /** * struct vc4_texture_sample_info - saves the offsets into the UBO for texture * setup parameters. diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index b0104a34..094bc6a 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -83,8 +83,10 @@ vc4_overflow_mem_work(struct work_struct *work) spin_lock_irqsave(&vc4->job_lock, irqflags); current_exec = vc4_first_bin_job(vc4); + if (!current_exec) + current_exec = vc4_last_render_job(vc4); if (current_exec) { - vc4->overflow_mem->seqno = vc4->finished_seqno + 1; + vc4->overflow_mem->seqno = current_exec->seqno; list_add_tail(&vc4->overflow_mem->unref_head, ¤t_exec->unref_list); vc4->overflow_mem = NULL; -- cgit v1.1 From 552416c146fadc67cd9b53ef7adf88d3381c43a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:15 -0700 Subject: drm/vc4: Fix oops when userspace hands in a bad BO. We'd end up NULL pointer dereferencing because we didn't take the error path out in the parent. Fixes igt vc4_lookup_fail test. Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Cc: stable@vger.kernel.org --- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index fba2c83..b262c5c 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -573,7 +573,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, fail: drm_free_large(handles); - return 0; + return ret; } static int -- cgit v1.1 From 6695593e4a7659db49ac6eca98c164f7b5589f72 Mon Sep 17 00:00:00 2001 From: Aleksandr Makarov Date: Sat, 20 Aug 2016 13:29:41 +0300 Subject: USB: serial: option: add WeTelecom WM-D200 Add support for WeTelecom WM-D200. T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22de ProdID=6801 Rev=00.00 S: Manufacturer=WeTelecom Incorporated S: Product=WeTelecom Mobile Products C: #Ifs= 4 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Signed-off-by: Aleksandr Makarov Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bc47258..bb6a711 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -525,6 +525,10 @@ static void option_instat_callback(struct urb *urb); #define VIATELECOM_VENDOR_ID 0x15eb #define VIATELECOM_PRODUCT_CDS7 0x0001 +/* WeTelecom products */ +#define WETELECOM_VENDOR_ID 0x22de +#define WETELECOM_PRODUCT_WMD200 0x6801 + struct option_blacklist_info { /* bitmask of interface numbers blacklisted for send_setup */ const unsigned long sendsetup; @@ -1991,6 +1995,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v1.1 From 522caebb2c3684f4a1d154526fb5e33f1381e92a Mon Sep 17 00:00:00 2001 From: Giorgio Dal Molin Date: Tue, 16 Aug 2016 20:43:37 +0200 Subject: iio:ti-ads1015: fix a wrong pointer definition. The call to i2c_get_clientdata(client) returns a struct iio_dev*, not the needed struct ads1015_data*. We need here an intermediate step as in the function: void ads1015_get_channels_config(struct i2c_client *client). Signed-off-by: Giorgio Dal Molin Fixes: ecc24e72f437 ("iio: adc: Add TI ADS1015 ADC driver support") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads1015.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 1ef39877..066abaf 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -489,7 +489,8 @@ static struct iio_info ads1115_info = { #ifdef CONFIG_OF static int ads1015_get_channels_config_of(struct i2c_client *client) { - struct ads1015_data *data = i2c_get_clientdata(client); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct ads1015_data *data = iio_priv(indio_dev); struct device_node *node; if (!client->dev.of_node || -- cgit v1.1 From 80e162ee9b31d77d851b10f8c5299132be1e120f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 29 Jun 2016 20:27:44 +0100 Subject: staging: comedi: daqboard2000: bug fix board type matching code `daqboard2000_find_boardinfo()` is supposed to check if the DaqBoard/2000 series model is supported, based on the PCI subvendor and subdevice ID. The current code is wrong as it is comparing the PCI device's subdevice ID to an expected, fixed value for the subvendor ID. It should be comparing the PCI device's subvendor ID to this fixed value. Correct it. Fixes: 7e8401b23e7f ("staging: comedi: daqboard2000: add back subsystem_device check") Signed-off-by: Ian Abbott Cc: # 3.7+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/daqboard2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c index 65daef0..0f4eb95 100644 --- a/drivers/staging/comedi/drivers/daqboard2000.c +++ b/drivers/staging/comedi/drivers/daqboard2000.c @@ -634,7 +634,7 @@ static const void *daqboard2000_find_boardinfo(struct comedi_device *dev, const struct daq200_boardtype *board; int i; - if (pcidev->subsystem_device != PCI_VENDOR_ID_IOTECH) + if (pcidev->subsystem_vendor != PCI_VENDOR_ID_IOTECH) return NULL; for (i = 0; i < ARRAY_SIZE(boardtypes); i++) { -- cgit v1.1 From 403fe7f34e3327ddac2e06a15e76a293d613381e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 30 Jun 2016 19:58:32 +0100 Subject: staging: comedi: comedi_test: fix timer race conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up") fixed a lock-up in the timer routine `waveform_ai_timer()` (which was called `waveform_ai_interrupt()` at the time) caused by commit 240512474424 ("staging: comedi: comedi_test: use comedi_handle_events()"). However, it introduced a race condition that can result in the timer routine misbehaving, such as accessing freed memory or dereferencing a NULL pointer. 73e0... changed the timer routine to do nothing unless a `WAVEFORM_AI_RUNNING` flag was set, and changed `waveform_ai_cancel()` to clear the flag and replace a call to `del_timer_sync()` with a call to `del_timer()`. `waveform_ai_cancel()` may be called from the timer routine itself (via `comedi_handle_events()`), or from `do_cancel()`. (`do_cancel()` is called as a result of a file operation (usually a `COMEDI_CANCEL` ioctl command, or a release), or during device removal.) When called from `do_cancel()`, the call to `waveform_ai_cancel()` is followed by a call to `do_become_nonbusy()`, which frees up stuff for the current asynchronous command under the assumption that it is now safe to do so. The race condition occurs when the timer routine `waveform_ai_timer()` checks the `WAVEFORM_AI_RUNNING` flag just before it is cleared by `waveform_ai_cancel()`, and is still running during the call to `do_become_nonbusy()`. In particular, it can lead to a NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] waveform_ai_timer+0x17d/0x290 [comedi_test] That corresponds to this line in `waveform_ai_timer()`: unsigned int chanspec = cmd->chanlist[async->cur_chan]; but `do_become_nonbusy()` frees `cmd->chanlist` and sets it to `NULL`. Fix the race by calling `del_timer_sync()` instead of `del_timer()` in `waveform_ai_cancel()` when not in an interrupt context. The only time `waveform_ai_cancel()` is called in an interrupt context is when it is called from the timer routine itself, via `comedi_handle_events()`. There is no longer any need for the `WAVEFORM_AI_RUNNING` flag, so get rid of it. The bug was copied from the AI subdevice to the AO when support for commands on the AO subdevice was added by commit 0cf55bbef2f9 ("staging: comedi: comedi_test: implement commands on AO subdevice"). That involves the timer routine `waveform_ao_timer()`, the comedi "cancel" routine `waveform_ao_cancel()`, and the flag `WAVEFORM_AO_RUNNING`. Fix it in the same way as for the AI subdevice. Fixes: 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up") Fixes: 0cf55bbef2f9 ("staging: comedi: comedi_test: implement commands on AO subdevice") Reported-by: Éric Piel Signed-off-by: Ian Abbott Cc: # 4.4+ Cc: Éric Piel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/comedi_test.c | 46 ++++++++-------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c index 4ab1866..ec5b9a2 100644 --- a/drivers/staging/comedi/drivers/comedi_test.c +++ b/drivers/staging/comedi/drivers/comedi_test.c @@ -56,11 +56,6 @@ #define N_CHANS 8 -enum waveform_state_bits { - WAVEFORM_AI_RUNNING, - WAVEFORM_AO_RUNNING -}; - /* Data unique to this driver */ struct waveform_private { struct timer_list ai_timer; /* timer for AI commands */ @@ -68,7 +63,6 @@ struct waveform_private { unsigned int wf_amplitude; /* waveform amplitude in microvolts */ unsigned int wf_period; /* waveform period in microseconds */ unsigned int wf_current; /* current time in waveform period */ - unsigned long state_bits; unsigned int ai_scan_period; /* AI scan period in usec */ unsigned int ai_convert_period; /* AI conversion period in usec */ struct timer_list ao_timer; /* timer for AO commands */ @@ -191,10 +185,6 @@ static void waveform_ai_timer(unsigned long arg) unsigned int nsamples; unsigned int time_increment; - /* check command is still active */ - if (!test_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits)) - return; - now = ktime_to_us(ktime_get()); nsamples = comedi_nsamples_left(s, UINT_MAX); @@ -386,11 +376,6 @@ static int waveform_ai_cmd(struct comedi_device *dev, */ devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; - - /* mark command as active */ - smp_mb__before_atomic(); - set_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); add_timer(&devpriv->ai_timer); return 0; } @@ -400,11 +385,12 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; - /* mark command as no longer active */ - clear_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); - /* cannot call del_timer_sync() as may be called from timer routine */ - del_timer(&devpriv->ai_timer); + if (in_softirq()) { + /* Assume we were called from the timer routine itself. */ + del_timer(&devpriv->ai_timer); + } else { + del_timer_sync(&devpriv->ai_timer); + } return 0; } @@ -436,10 +422,6 @@ static void waveform_ao_timer(unsigned long arg) u64 scans_since; unsigned int scans_avail = 0; - /* check command is still active */ - if (!test_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits)) - return; - /* determine number of scan periods since last time */ now = ktime_to_us(ktime_get()); scans_since = now - devpriv->ao_last_scan_time; @@ -518,11 +500,6 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); - - /* mark command as active */ - smp_mb__before_atomic(); - set_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); add_timer(&devpriv->ao_timer); return 1; @@ -608,11 +585,12 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; - /* mark command as no longer active */ - clear_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); - /* cannot call del_timer_sync() as may be called from timer routine */ - del_timer(&devpriv->ao_timer); + if (in_softirq()) { + /* Assume we were called from the timer routine itself. */ + del_timer(&devpriv->ao_timer); + } else { + del_timer_sync(&devpriv->ao_timer); + } return 0; } -- cgit v1.1 From 5ca05345c56cb979e1a25ab6146437002f95cac8 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 20 Jul 2016 17:07:34 +0100 Subject: staging: comedi: ni_mio_common: fix wrong insn_write handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For counter subdevices, the `s->insn_write` handler is being set to the wrong function, `ni_tio_insn_read()`. It should be `ni_tio_insn_write()`. Signed-off-by: Ian Abbott Reported-by: Éric Piel Fixes: 10f74377eec3 ("staging: comedi: ni_tio: make ni_tio_winsn() a proper comedi (*insn_write)" Cc: # 3.17+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 8dabb19..3cf3c05 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -5480,7 +5480,7 @@ static int ni_E_init(struct comedi_device *dev, s->maxdata = (devpriv->is_m_series) ? 0xffffffff : 0x00ffffff; s->insn_read = ni_tio_insn_read; - s->insn_write = ni_tio_insn_read; + s->insn_write = ni_tio_insn_write; s->insn_config = ni_tio_insn_config; #ifdef PCIDMA if (dev->irq && devpriv->mite) { -- cgit v1.1 From c71f20ee76342376e3c4c67cdbe7421d8c4e886e Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 29 Jul 2016 09:43:56 -0400 Subject: staging: comedi: adv_pci1760: Do not return EINVAL for CMDF_ROUND_DOWN. The CMDF_ROUND_DOWN case falls through and so always returns -EINVAL. Fixes: 14b93bb6bbf0 ("staging: comedi: adv_pci_dio: separate out PCI-1760 support") Signed-off-by: Phil Turnbull Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adv_pci1760.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/drivers/adv_pci1760.c b/drivers/staging/comedi/drivers/adv_pci1760.c index d7dd1e5..9f525ff 100644 --- a/drivers/staging/comedi/drivers/adv_pci1760.c +++ b/drivers/staging/comedi/drivers/adv_pci1760.c @@ -196,6 +196,7 @@ static int pci1760_pwm_ns_to_div(unsigned int flags, unsigned int ns) break; case CMDF_ROUND_DOWN: divisor = ns / PCI1760_PWM_TIMEBASE; + break; default: return -EINVAL; } -- cgit v1.1 From 5ac5c3bcf57419d0aa3f53b12b8c07599a13fdcc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Jun 2016 14:46:21 +0300 Subject: staging: comedi: dt2811: fix a precedence bug Bitwise | has higher precedence than ?: so we need to add some parenthesis for this to work as intended. Fixes: 7c9574090d30 ('staging: comedi: dt2811: simplify A/D reference configuration') Signed-off-by: Dan Carpenter Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt2811.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/dt2811.c b/drivers/staging/comedi/drivers/dt2811.c index 904f6377..8bbd938 100644 --- a/drivers/staging/comedi/drivers/dt2811.c +++ b/drivers/staging/comedi/drivers/dt2811.c @@ -588,8 +588,8 @@ static int dt2811_attach(struct comedi_device *dev, struct comedi_devconfig *it) s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | - (it->options[2] == 1) ? SDF_DIFF : - (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND; + ((it->options[2] == 1) ? SDF_DIFF : + (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND); s->n_chan = (it->options[2] == 1) ? 8 : 16; s->maxdata = 0x0fff; s->range_table = board->is_pgh ? &dt2811_pgh_ai_ranges -- cgit v1.1 From f0f4b0cc3a8cffd983f5940d46cd0227f3f5710a Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 19 Jul 2016 12:17:39 +0100 Subject: staging: comedi: ni_mio_common: fix AO inttrig backwards compatibility Commit ebb657babfa9 ("staging: comedi: ni_mio_common: clarify the cmd->start_arg validation and use") introduced a backwards compatibility issue in the use of asynchronous commands on the AO subdevice when `start_src` is `TRIG_EXT`. Valid values for `start_src` are `TRIG_INT` (for internal, software trigger), and `TRIG_EXT` (for external trigger). When set to `TRIG_EXT`. In both cases, the driver relies on an internal, software trigger to set things up (allowing the user application to write sufficient samples to the data buffer before the trigger), so it acts as a software "pre-trigger" in the `TRIG_EXT` case. The software trigger is handled by `ni_ao_inttrig()`. Prior to the above change, when `start_src` was `TRIG_INT`, `start_arg` was required to be 0, and `ni_ao_inttrig()` checked that the software trigger number was also 0. After the above change, when `start_src` was `TRIG_INT`, any value was allowed for `start_arg`, and `ni_ao_inttrig()` checked that the software trigger number matched this `start_arg` value. The backwards compatibility issue is that the internal trigger number now has to match `start_arg` when `start_src` is `TRIG_EXT` when it previously had to be 0. Fix the backwards compatibility issue in `ni_ao_inttrig()` by always allowing software trigger number 0 when `start_src` is something other than `TRIG_INT`. Thanks to Spencer Olson for reporting the issue. Signed-off-by: Ian Abbott Reported-by: Spencer Olson Fixes: ebb657babfa9 ("staging: comedi: ni_mio_common: clarify the cmd->start_arg validation and use") Cc: stable Reviewed-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 3cf3c05..0f97d7b 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -2772,7 +2772,15 @@ static int ni_ao_inttrig(struct comedi_device *dev, int i; static const int timeout = 1000; - if (trig_num != cmd->start_arg) + /* + * Require trig_num == cmd->start_arg when cmd->start_src == TRIG_INT. + * For backwards compatibility, also allow trig_num == 0 when + * cmd->start_src != TRIG_INT (i.e. when cmd->start_src == TRIG_EXT); + * in that case, the internal trigger is being used as a pre-trigger + * before the external trigger. + */ + if (!(trig_num == cmd->start_arg || + (trig_num == 0 && cmd->start_src != TRIG_INT))) return -EINVAL; /* -- cgit v1.1 From fcf68f3c0bb2a541aa47a2a38b8939edf84fd529 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 8 Aug 2016 17:19:38 -0700 Subject: iio: fix sched WARNING "do not call blocking ops when !TASK_RUNNING" When using CONFIG_DEBUG_ATOMIC_SLEEP, the scheduler nicely points out that we're calling sleeping primitives within the wait_event loop, which means we might clobber the task state: [ 10.831289] do not call blocking ops when !TASK_RUNNING; state=1 set at [] [ 10.845531] ------------[ cut here ]------------ [ 10.850161] WARNING: at kernel/sched/core.c:7630 ... [ 12.164333] ---[ end trace 45409966a9a76438 ]--- [ 12.168942] Call trace: [ 12.171391] [] __might_sleep+0x64/0x90 [ 12.176699] [] mutex_lock_nested+0x50/0x3fc [ 12.182440] [] iio_kfifo_buf_data_available+0x28/0x4c [ 12.189043] [] iio_buffer_ready+0x60/0xe0 [ 12.194608] [] iio_buffer_read_first_n_outer+0x108/0x1a8 [ 12.201474] [] __vfs_read+0x58/0x114 [ 12.206606] [] vfs_read+0x94/0x118 [ 12.211564] [] SyS_read+0x64/0xb4 [ 12.216436] [] el0_svc_naked+0x24/0x28 To avoid this, we should (a la https://lwn.net/Articles/628628/) use the wait_woken() function, which avoids the nested sleeping while still handling races between waiting / wake-events. Signed-off-by: Brian Norris Reviewed-by: Lars-Peter Clausen Cc: # 3.19+ for introduction of wake_woken Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 90462fc..49bf9c5 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -107,6 +107,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, { struct iio_dev *indio_dev = filp->private_data; struct iio_buffer *rb = indio_dev->buffer; + DEFINE_WAIT_FUNC(wait, woken_wake_function); size_t datum_size; size_t to_wait; int ret; @@ -131,19 +132,29 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, else to_wait = min_t(size_t, n / datum_size, rb->watermark); + add_wait_queue(&rb->pollq, &wait); do { - ret = wait_event_interruptible(rb->pollq, - iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)); - if (ret) - return ret; + if (!indio_dev->info) { + ret = -ENODEV; + break; + } - if (!indio_dev->info) - return -ENODEV; + if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + wait_woken(&wait, TASK_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + continue; + } ret = rb->access->read_first_n(rb, n, buf); if (ret == 0 && (filp->f_flags & O_NONBLOCK)) ret = -EAGAIN; } while (ret == 0); + remove_wait_queue(&rb->pollq, &wait); return ret; } -- cgit v1.1 From ca64d4bc80a88845f7e1e266dbff798f928bcc06 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 25 Jul 2016 23:06:56 +0100 Subject: iio: chemical: atlas-ph-sensor: fix typo in val assignment Fix an incorrect assignment due to a typo on a variable name. The variable val2 should be assigned 100000 and not val. Signed-off-by: Colin Ian King Reviewed-By: Matt Ranostay Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/atlas-ph-sensor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index ae038a5..407f141 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -434,7 +434,7 @@ static int atlas_read_raw(struct iio_dev *indio_dev, break; case IIO_ELECTRICALCONDUCTIVITY: *val = 1; /* 0.00001 */ - *val = 100000; + *val2 = 100000; break; case IIO_CONCENTRATION: *val = 0; /* 0.000000001 */ -- cgit v1.1 From d9a8594011080def9202f2c258f755647fe66683 Mon Sep 17 00:00:00 2001 From: Aditya Shankar Date: Tue, 2 Aug 2016 11:49:00 +0200 Subject: MAINTAINERS: Update maintainer entry for wilc1000 Take the maintenance of the Atmel WIFI staging driver wilc1000. Former maintainers are no more with Atmel. Reported-by: Loic Lefort Signed-off-by: Aditya Shankar Signed-off-by: Ganesh Krishna Acked-by: Luis de Bethencourt Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a306795..09c7066 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11217,12 +11217,8 @@ S: Odd Fixes F: drivers/staging/vt665?/ STAGING - WILC1000 WIFI DRIVER -M: Johnny Kim -M: Austin Shin -M: Chris Park -M: Tony Cho -M: Glen Lee -M: Leo Kim +M: Aditya Shankar +M: Ganesh Krishna L: linux-wireless@vger.kernel.org S: Supported F: drivers/staging/wilc1000/ -- cgit v1.1 From e9d766b965a15816cf9ec353d86c11f27b783d4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:38 +0200 Subject: MAINTAINERS: Add file patterns for ion device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Sumit Semwal Cc: devel@driverdev.osuosl.org Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 09c7066..e614392 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -798,6 +798,7 @@ M: Laura Abbott M: Sumit Semwal L: devel@driverdev.osuosl.org S: Supported +F: Documentation/devicetree/bindings/staging/ion/ F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h F: drivers/staging/android/uapi/ion_test.h -- cgit v1.1 From 23535c1322e42e71f32bfbeae9970f4dba31e3bd Mon Sep 17 00:00:00 2001 From: Binoy Jayan Date: Thu, 21 Jul 2016 13:26:56 +0530 Subject: staging: wilc1000: txq_event: Fix coding error Fix incorrect usage of completion interface by replacing 'wait_for_completion' with 'complete'. This error was introduced accidentally while replacing semaphores with mutexes. Reported-by: Jiri Slaby Signed-off-by: Binoy Jayan Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 3a66255..3221511 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -648,7 +648,7 @@ void wilc1000_wlan_deinit(struct net_device *dev) mutex_unlock(&wl->hif_cs); } if (&wl->txq_event) - wait_for_completion(&wl->txq_event); + complete(&wl->txq_event); wlan_deinitialize_threads(dev); deinit_irq(dev); -- cgit v1.1 From 23436825e671cdd55c45d151ddc66fd3c47d10e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 16 Jul 2016 13:07:55 +0300 Subject: staging: wilc1000: NULL dereference on error We can't pass NULL pointers to destroy_workqueue(). Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/host_interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c index 0b1760c..78f524f 100644 --- a/drivers/staging/wilc1000/host_interface.c +++ b/drivers/staging/wilc1000/host_interface.c @@ -3363,7 +3363,7 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler) if (!hif_workqueue) { netdev_err(vif->ndev, "Failed to create workqueue\n"); result = -ENOMEM; - goto _fail_mq_; + goto _fail_; } setup_timer(&periodic_rssi, GetPeriodicRSSI, @@ -3391,7 +3391,6 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler) clients_count++; -_fail_mq_: destroy_workqueue(hif_workqueue); _fail_: return result; -- cgit v1.1 From 6c08fda0306916135291103f23cc17248c422c49 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Aug 2016 17:09:52 +0100 Subject: staging: wilc1000: correctly check if associatedsta has not been found The current check for associatedsta being set to -1 to indicate it has not been found is not working because associatedsta is initialized to zero and will never be -1. Fix this by initializing it to ~0 and checking for ~0 instead. Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/wilc_wfi_cfgoperations.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c index 9092600..2c2e8ac 100644 --- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c +++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c @@ -1191,7 +1191,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev, struct wilc_priv *priv; struct wilc_vif *vif; u32 i = 0; - u32 associatedsta = 0; + u32 associatedsta = ~0; u32 inactive_time = 0; priv = wiphy_priv(wiphy); vif = netdev_priv(dev); @@ -1204,7 +1204,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev, } } - if (associatedsta == -1) { + if (associatedsta == ~0) { netdev_err(dev, "sta required is not associated\n"); return -ENOENT; } -- cgit v1.1 From c0678b2d6648ab65b68703044709e367799ba9f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Aug 2016 15:52:23 -0700 Subject: include/linux: fix excess fence.h kernel-doc notation Fix excess fields in kernel-doc notation in after some struct fields were removed. Fixes these kernel-doc warnings: ..//include/linux/fence.h:85: warning: Excess struct/union/enum/typedef member 'child_list' description in 'fence' ..//include/linux/fence.h:85: warning: Excess struct/union/enum/typedef member 'active_list' description in 'fence' Fixes: 0431b9065f28 ("staging/android: bring struct sync_pt back") Cc: Daniel Vetter Cc: Sumit Semwal Cc: Luis de Bethencourt Signed-off-by: Randy Dunlap Reviewed-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- include/linux/fence.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/fence.h b/include/linux/fence.h index 8cc719a6..2ac6fa5 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -49,8 +49,6 @@ struct fence_cb; * @timestamp: Timestamp when the fence was signaled. * @status: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. - * @child_list: list of children fences - * @active_list: list of active fences * * the flags member must be manipulated and read using the appropriate * atomic ops (bit_*), so taking the spinlock will not be needed most -- cgit v1.1 From 90c43ec6997a892448f1f86180a515f59cafd8a3 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 17 Aug 2016 17:43:00 +0530 Subject: iio: adc: ti_am335x_adc: Protect FIFO1 from concurrent access It is possible that two or more ADC channels can be simultaneously requested for raw samples, in which case there can be race in access to FIFO data resulting in loss of samples. If am335x_tsc_se_set_once() is called again from tiadc_read_raw(), when ADC is still acquired to sample one of the channels, the second process might be put into uninterruptible sleep state. Fix these issues, by protecting FIFO access and channel configurations with a mutex. Since tiadc_read_raw() might take anywhere between few microseconds to few milliseconds to finish execution (depending on averaging and delay values supplied via DT), its better to use mutex instead of spinlock. Fixes: 7ca6740cd1cd4 ("mfd: input: iio: ti_amm335x: Rework TSC/ADC synchronization") Signed-off-by: Vignesh R Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 8a36875..bed9977 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -32,6 +32,7 @@ struct tiadc_device { struct ti_tscadc_dev *mfd_tscadc; + struct mutex fifo1_lock; /* to protect fifo access */ int channels; u8 channel_line[8]; u8 channel_step[8]; @@ -359,6 +360,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct tiadc_device *adc_dev = iio_priv(indio_dev); + int ret = IIO_VAL_INT; int i, map_val; unsigned int fifo1count, read, stepid; bool found = false; @@ -372,6 +374,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, if (!step_en) return -EINVAL; + mutex_lock(&adc_dev->fifo1_lock); fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); while (fifo1count--) tiadc_readl(adc_dev, REG_FIFO1); @@ -388,7 +391,8 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, if (time_after(jiffies, timeout)) { am335x_tsc_se_adc_done(adc_dev->mfd_tscadc); - return -EAGAIN; + ret = -EAGAIN; + goto err_unlock; } } map_val = adc_dev->channel_step[chan->scan_index]; @@ -414,8 +418,11 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, am335x_tsc_se_adc_done(adc_dev->mfd_tscadc); if (found == false) - return -EBUSY; - return IIO_VAL_INT; + ret = -EBUSY; + +err_unlock: + mutex_unlock(&adc_dev->fifo1_lock); + return ret; } static const struct iio_info tiadc_info = { @@ -483,6 +490,7 @@ static int tiadc_probe(struct platform_device *pdev) tiadc_step_config(indio_dev); tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD); + mutex_init(&adc_dev->fifo1_lock); err = tiadc_channel_init(indio_dev, adc_dev->channels); if (err < 0) -- cgit v1.1 From 7175cce1c3f1d8c8840d2004f78f96a3904249b5 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 17 Aug 2016 17:43:01 +0530 Subject: iio: adc: ti_am335x_adc: Increase timeout value waiting for ADC sample Now that open delay and sample delay for each channel is configurable via DT, the default IDLE_TIMEOUT value is not enough as this is calculated based on hardcoded macros. This results in driver returning EBUSY sometimes. Fix this by increasing the timeout value based on maximum value possible to open delay and sample delays for each channel. Fixes: 5dc11e810676e ("iio: adc: ti_am335x_adc: make sample delay, open delay, averaging DT parameters") Signed-off-by: Vignesh R Acked-by: Lee Jones Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 2 +- include/linux/mfd/ti_am335x_tscadc.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index bed9977..c3cfacc 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -381,7 +381,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en); - timeout = jiffies + usecs_to_jiffies + timeout = jiffies + msecs_to_jiffies (IDLE_TIMEOUT * adc_dev->channels); /* Wait for Fifo threshold interrupt */ while (1) { diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 2567a87..7f55b8b 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -138,16 +138,16 @@ /* * time in us for processing a single channel, calculated as follows: * - * num cycles = open delay + (sample delay + conv time) * averaging + * max num cycles = open delay + (sample delay + conv time) * averaging * - * num cycles: 152 + (1 + 13) * 16 = 376 + * max num cycles: 262143 + (255 + 13) * 16 = 266431 * * clock frequency: 26MHz / 8 = 3.25MHz * clock period: 1 / 3.25MHz = 308ns * - * processing time: 376 * 308ns = 116us + * max processing time: 266431 * 308ns = 83ms(approx) */ -#define IDLE_TIMEOUT 116 /* microsec */ +#define IDLE_TIMEOUT 83 /* milliseconds */ #define TSCADC_CELLS 2 -- cgit v1.1 From 7ac61a062f3147dc23e3f12b9dfe7c4dd35f9cb8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Aug 2016 15:33:28 +0200 Subject: iio: accel: kxsd9: Fix raw read return Any readings from the raw interface of the KXSD9 driver will return an empty string, because it does not return IIO_VAL_INT but rather some random value from the accelerometer to the caller. Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxsd9.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 3a9f106..da5fb67 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -160,6 +160,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, if (ret < 0) goto error_ret; *val = ret; + ret = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); -- cgit v1.1 From 11d7a0bb95eaaba1741bb24a7c3c169c82f09c7b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 14 Aug 2016 19:52:56 -0700 Subject: xfrm: Only add l3mdev oif to dst lookups Subash reported that commit 42a7b32b73d6 ("xfrm: Add oif to dst lookups") broke a wifi use case that uses fib rules and xfrms. The intent of 42a7b32b73d6 was driven by VRFs with IPsec. As a compromise relax the use of oif in xfrm lookups to L3 master devices only (ie., oif is either an L3 master device or is enslaved to a master device). Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups") Reported-by: Subash Abhinov Kasiviswanathan Signed-off-by: David Ahern Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7b0edb3..e07ed8b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -29,7 +29,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; - fl4->flowi4_oif = oif; + fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); if (saddr) fl4->saddr = saddr->a4; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c074771..dd84ecd 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -36,7 +36,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, int err; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = oif; + fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) -- cgit v1.1 From 93e11eb1b745ee8dbd4f98f24b542d496c8f1e03 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Aug 2016 13:44:56 +0000 Subject: dmaengine: fsl_raid: add missing of_node_put() in fsl_re_probe() When terminating for_each_compatible_node() iteration with break or return, of_node_put() should be used to prevent stale device node references from being left behind. Found by Coccinelle. Signed-off-by: Wei Yongjun Signed-off-by: Vinod Koul --- drivers/dma/fsl_raid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c index aad167e..de2a2a2 100644 --- a/drivers/dma/fsl_raid.c +++ b/drivers/dma/fsl_raid.c @@ -836,6 +836,7 @@ static int fsl_re_probe(struct platform_device *ofdev) rc = of_property_read_u32(np, "reg", &off); if (rc) { dev_err(dev, "Reg property not found in JQ node\n"); + of_node_put(np); return -ENODEV; } /* Find out the Job Rings present under each JQ */ -- cgit v1.1 From 6a8b0c6b18f62a277ffb2139d0c0253fe35d7feb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Aug 2016 03:17:09 +0000 Subject: dmaengine: at_xdmac: fix to pass correct device identity to free_irq() free_irq() expects the same device identity that was passed to corresponding request_irq(), otherwise the IRQ is not freed. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Wei Yongjun Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index e434ffe..832cbd6 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2067,7 +2067,7 @@ err_dma_unregister: err_clk_disable: clk_disable_unprepare(atxdmac->clk); err_free_irq: - free_irq(atxdmac->irq, atxdmac->dma.dev); + free_irq(atxdmac->irq, atxdmac); return ret; } @@ -2081,7 +2081,7 @@ static int at_xdmac_remove(struct platform_device *pdev) dma_async_device_unregister(&atxdmac->dma); clk_disable_unprepare(atxdmac->clk); - free_irq(atxdmac->irq, atxdmac->dma.dev); + free_irq(atxdmac->irq, atxdmac); for (i = 0; i < atxdmac->dma.chancnt; i++) { struct at_xdmac_chan *atchan = &atxdmac->chan[i]; -- cgit v1.1 From 32e80820de5d7eb778632af8f235727a32d3aeb2 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Tue, 16 Aug 2016 10:44:18 +0200 Subject: dmaengine: img-mdc: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later at line 850: mdma->soc = match->data; For fixing this problem, we use of_device_get_match_data(), this will simplify the code a little by using a standard function for getting the match data. This was reported by coverity (CID 1324134) Signed-off-by: LABBE Corentin Signed-off-by: Vinod Koul --- drivers/dma/img-mdc-dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c index a4c53be..624f1e1 100644 --- a/drivers/dma/img-mdc-dma.c +++ b/drivers/dma/img-mdc-dma.c @@ -861,7 +861,6 @@ static int mdc_dma_probe(struct platform_device *pdev) { struct mdc_dma *mdma; struct resource *res; - const struct of_device_id *match; unsigned int i; u32 val; int ret; @@ -871,8 +870,7 @@ static int mdc_dma_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, mdma); - match = of_match_device(mdc_dma_of_match, &pdev->dev); - mdma->soc = match->data; + mdma->soc = of_device_get_match_data(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); mdma->regs = devm_ioremap_resource(&pdev->dev, res); -- cgit v1.1 From f6c274e11e3b31a5f95c23962b1ba593bd6a4759 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 28 Jul 2016 10:16:12 +0300 Subject: usb: dwc3: pci: runtime_resume child device During runtime_resume of dwc3-pci.c, we need to runtime suspend our child device (which is dwc3 proper) otherwise nothing will happen. Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 2eb84d6..0a32430 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -243,6 +243,13 @@ static int dwc3_pci_runtime_suspend(struct device *dev) return -EBUSY; } +static int dwc3_pci_runtime_resume(struct device *dev) +{ + struct platform_device *dwc3 = dev_get_drvdata(dev); + + return pm_runtime_get(&dwc3->dev); +} + static int dwc3_pci_pm_dummy(struct device *dev) { /* @@ -259,7 +266,7 @@ static int dwc3_pci_pm_dummy(struct device *dev) static struct dev_pm_ops dwc3_pci_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy) - SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_pm_dummy, + SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_runtime_resume, NULL) }; -- cgit v1.1 From b74c2d875baaa07186f617c5617f4c5e3a8a41ad Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 28 Jul 2016 13:07:07 +0300 Subject: usb: dwc3: core: allow device to runtime_suspend several times After going through runtime_suspend/runtime_resume cycle once we would be left with an unbalanced pm_runtime_get() call. Fix that by making sure that we try to suspend right after resuming so things are balanced and device can runtime_suspend again. Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9466431..35d0924 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1192,6 +1192,7 @@ static int dwc3_runtime_resume(struct device *dev) } pm_runtime_mark_last_busy(dev); + pm_runtime_put(dev); return 0; } -- cgit v1.1 From 83f8da562f8b5275fa1095b45762996971f7c607 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 10 Aug 2016 08:53:34 -0500 Subject: usb: dwc2: Add reset control to dwc2 Allow for platforms that have a reset controller driver in place to bring the USB IP out of reset. Signed-off-by: Dinh Nguyen Acked-by: John Youn Tested-by: Stefan Wahren Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.h | 1 + drivers/usb/dwc2/platform.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 9fae029..d645512 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -868,6 +868,7 @@ struct dwc2_hsotg { void *priv; int irq; struct clk *clk; + struct reset_control *reset; unsigned int queuing_high_bandwidth:1; unsigned int srp_success:1; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index fc6f525..530959a 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -337,6 +338,24 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg) { int i, ret; + hsotg->reset = devm_reset_control_get_optional(hsotg->dev, "dwc2"); + if (IS_ERR(hsotg->reset)) { + ret = PTR_ERR(hsotg->reset); + switch (ret) { + case -ENOENT: + case -ENOTSUPP: + hsotg->reset = NULL; + break; + default: + dev_err(hsotg->dev, "error getting reset control %d\n", + ret); + return ret; + } + } + + if (hsotg->reset) + reset_control_deassert(hsotg->reset); + /* Set default UTMI width */ hsotg->phyif = GUSBCFG_PHYIF16; @@ -434,6 +453,9 @@ static int dwc2_driver_remove(struct platform_device *dev) if (hsotg->ll_hw_enabled) dwc2_lowlevel_hw_disable(hsotg); + if (hsotg->reset) + reset_control_assert(hsotg->reset); + return 0; } -- cgit v1.1 From 3295235fd70ed6d594aadee8c892a14f6a4b2d2e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Aug 2016 01:28:24 +0000 Subject: usb: renesas_usbhs: gadget: fix return value check in usbhs_mod_gadget_probe() In case of error, the function usb_get_phy() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: b5a2875605ca ("usb: renesas_usbhs: Allow an OTG PHY driver to provide VBUS") Cc: # v4.3+ Acked-by: Yoshihiro Shimoda Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 92bc83b..c4c6474 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -1076,7 +1076,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", - gpriv->transceiver ? "" : "no "); + !IS_ERR(gpriv->transceiver) ? "" : "no "); /* * CAUTION -- cgit v1.1 From 70237dc8efd092b93b40dc2eba812d66a5d65cb1 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 11 Aug 2016 15:51:45 +0800 Subject: usb: gadget: function: f_eem: socket buffer may be NULL In eth_start_xmit, the socket buffer may be NULL. So, add NULL pointer check at .wrap API. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_eem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c index d58bfc3..8741fd7 100644 --- a/drivers/usb/gadget/function/f_eem.c +++ b/drivers/usb/gadget/function/f_eem.c @@ -341,11 +341,15 @@ static struct sk_buff *eem_wrap(struct gether *port, struct sk_buff *skb) { struct sk_buff *skb2 = NULL; struct usb_ep *in = port->in_ep; - int padlen = 0; + int headroom, tailroom, padlen = 0; u16 len = skb->len; - int headroom = skb_headroom(skb); - int tailroom = skb_tailroom(skb); + if (!skb) + return NULL; + + len = skb->len; + headroom = skb_headroom(skb); + tailroom = skb_tailroom(skb); /* When (len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) is 0, * stick two bytes of zero-length EEM packet on the end. -- cgit v1.1 From 80d1642d7640ef00d8823f17a529785331aceb96 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 11 Aug 2016 15:51:46 +0800 Subject: usb: gadget: function: f_rndis: socket buffer may be NULL In eth_start_xmit, the socket buffer may be NULL. So, add NULL pointer check at .wrap API. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_rndis.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index c800582..16562e4 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -374,6 +374,9 @@ static struct sk_buff *rndis_add_header(struct gether *port, { struct sk_buff *skb2; + if (!skb) + return NULL; + skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type)); rndis_add_hdr(skb2); -- cgit v1.1 From f4693b08cc901912a87369c46537b94ed4084ea0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2016 14:15:47 +0300 Subject: usb: gadget: fsl_qe_udc: signedness bug in qe_get_frame() We can't assign -EINVAL to a u16. Fixes: 3948f0e0c999 ('usb: add Freescale QE/CPM USB peripheral controller driver') Acked-by: Peter Chen Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/fsl_qe_udc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c index cf8819a..8bb011e 100644 --- a/drivers/usb/gadget/udc/fsl_qe_udc.c +++ b/drivers/usb/gadget/udc/fsl_qe_udc.c @@ -1878,11 +1878,8 @@ static int qe_get_frame(struct usb_gadget *gadget) tmp = in_be16(&udc->usb_param->frame_n); if (tmp & 0x8000) - tmp = tmp & 0x07ff; - else - tmp = -EINVAL; - - return (int)tmp; + return tmp & 0x07ff; + return -EINVAL; } static int fsl_qe_start(struct usb_gadget *gadget, -- cgit v1.1 From d6011f6fc21b4d4ab1586f01c4f62becaa0a28d7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 16 Aug 2016 10:22:38 +0100 Subject: usb: dwc3: gadget: don't rely on jiffies while holding spinlock __dwc3_gadget_wakeup() is called while holding a spinlock, then depends on jiffies in order to timeout while polling the USB core for a link state update. In the case the wakeup failed, the timeout will never happen and will also cause the cpu to stall until rcu_preempt kicks in. This switches to a "decrement variable and wait" timeout scheme. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1f5597e..122e64d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1433,7 +1433,7 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g) static int __dwc3_gadget_wakeup(struct dwc3 *dwc) { - unsigned long timeout; + int retries; int ret; u32 reg; @@ -1484,9 +1484,9 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc) } /* poll until Link State changes to ON */ - timeout = jiffies + msecs_to_jiffies(100); + retries = 20000; - while (!time_after(jiffies, timeout)) { + while (retries--) { reg = dwc3_readl(dwc->regs, DWC3_DSTS); /* in HS, means ON */ -- cgit v1.1 From 511a36d2f357724312bb3776d2f6eed3890928b2 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 30 Jun 2016 17:10:23 +0800 Subject: usb: gadget: Add the gserial port checking in gs_start_tx() When usb gadget is set gadget serial function, it will be crash in below situation. It will clean the 'port->port_usb' pointer in gserial_disconnect() function when usb link is inactive, but it will release lock for disabling the endpoints in this function. Druing the lock release period, it maybe complete one request to issue gs_write_complete()--->gs_start_tx() function, but the 'port->port_usb' pointer had been set NULL, thus it will be crash in gs_start_tx() function. This patch adds the 'port->port_usb' pointer checking in gs_start_tx() function to avoid this situation. Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_serial.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 6ded634..e0cd1e4 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -375,10 +375,15 @@ __acquires(&port->port_lock) */ { struct list_head *pool = &port->write_pool; - struct usb_ep *in = port->port_usb->in; + struct usb_ep *in; int status = 0; bool do_tty_wake = false; + if (!port->port_usb) + return status; + + in = port->port_usb->in; + while (!port->write_busy && !list_empty(pool)) { struct usb_request *req; int len; -- cgit v1.1 From a32ac2912f97d7ea9b67eb67bb4aa30b9156a88e Mon Sep 17 00:00:00 2001 From: James Hartley Date: Fri, 19 Aug 2016 12:03:23 +0100 Subject: pinctrl: pistachio: fix mfio pll_lock pinmux A previous patch attempted to fix the pinmuxes for mfio 84 - 89, but it omitted a change to pistachio_pin_group pistachio_groups, which results in incorrect pll_lock signals being routed. Apply the correct mux settings throughout the driver. fixes: cefc03e5995e ("pinctrl: Add Pistachio SoC pin control driver") fixes: e9adb336d0bf ("pinctrl: pistachio: fix mfio84-89 function description and pinmux.") Cc: # 4.4.x- Signed-off-by: James Hartley Reviewed-by: Sifan Naeem Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-pistachio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 7bad200..55375b1 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -809,17 +809,17 @@ static const struct pistachio_pin_group pistachio_groups[] = { PADS_FUNCTION_SELECT2, 12, 0x3), MFIO_MUX_PIN_GROUP(83, MIPS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG, PADS_FUNCTION_SELECT2, 14, 0x3), - MFIO_MUX_PIN_GROUP(84, SYS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG, + MFIO_MUX_PIN_GROUP(84, AUDIO_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG, PADS_FUNCTION_SELECT2, 16, 0x3), - MFIO_MUX_PIN_GROUP(85, WIFI_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG, + MFIO_MUX_PIN_GROUP(85, RPU_V_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG, PADS_FUNCTION_SELECT2, 18, 0x3), - MFIO_MUX_PIN_GROUP(86, BT_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG, + MFIO_MUX_PIN_GROUP(86, RPU_L_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG, PADS_FUNCTION_SELECT2, 20, 0x3), - MFIO_MUX_PIN_GROUP(87, RPU_V_PLL_LOCK, DREQ2, SOCIF_DEBUG, + MFIO_MUX_PIN_GROUP(87, SYS_PLL_LOCK, DREQ2, SOCIF_DEBUG, PADS_FUNCTION_SELECT2, 22, 0x3), - MFIO_MUX_PIN_GROUP(88, RPU_L_PLL_LOCK, DREQ3, SOCIF_DEBUG, + MFIO_MUX_PIN_GROUP(88, WIFI_PLL_LOCK, DREQ3, SOCIF_DEBUG, PADS_FUNCTION_SELECT2, 24, 0x3), - MFIO_MUX_PIN_GROUP(89, AUDIO_PLL_LOCK, DREQ4, DREQ5, + MFIO_MUX_PIN_GROUP(89, BT_PLL_LOCK, DREQ4, DREQ5, PADS_FUNCTION_SELECT2, 26, 0x3), PIN_GROUP(TCK, "tck"), PIN_GROUP(TRSTN, "trstn"), -- cgit v1.1 From bcb48cca23ec9852739e4a464307fa29515bbe48 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Aug 2016 14:42:52 +0300 Subject: pinctrl: cherryview: Do not mask all interrupts in probe The Cherryview GPIO controller has 8 or 16 wires connected to the I/O-APIC which can be used directly by the platform/BIOS or drivers. One such wire is used as SCI (System Control Interrupt) which ACPI depends on to be able to trigger GPEs (General Purpose Events). The pinctrl driver itself uses another IRQ resource which is wire OR of all the 8 (or 16) wires and follows what BIOS has programmed to the IntSel register of each pin. Currently the driver masks all interrupts at probe time and this prevents these direct interrupts from working as expected. The reason for this is that some early stage prototypes had some pins misconfigured causing lots of spurious interrupts. We fix this by leaving the interrupt mask untouched. This allows SCI and other direct interrupts work properly. What comes to the possible spurious interrupts we switch the default handler to be handle_bad_irq() instead of handle_simple_irq() (which was not correct anyway). Reported-by: Yu C Chen Reported-by: Anisse Astier Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 5749a4ee..0fe8fad 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1539,12 +1539,11 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) offset += range->npins; } - /* Mask and clear all interrupts */ - chv_writel(0, pctrl->regs + CHV_INTMASK); + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); goto fail; -- cgit v1.1 From 05860bed491b114a9f2d7a4f6e09fb02c0b69056 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 19 Aug 2016 13:37:38 +0300 Subject: ath9k: fix client mode beacon configuration For pure station mode, iter_data.primary_beacon_vif was used and passed to ath_beacon_config, but not set to the station vif. This was causing the following warning: [ 100.310919] ------------[ cut here ]------------ [ 100.315683] WARNING: CPU: 0 PID: 7 at compat-wireless-2016-06-20/drivers/net/wireless/ath/ath9k/beacon.c:642 ath9k_calculate_summary_state+0x250/0x60c [ath9k]() [ 100.402028] CPU: 0 PID: 7 Comm: kworker/u2:1 Tainted: G W 4.4.15 #5 [ 100.409676] Workqueue: phy0 ieee80211_ibss_leave [mac80211] [ 100.415351] Stack : 8736e98c 870b4b20 87a25b54 800a6800 8782a080 80400d63 8039b96c 00000007 [ 100.415351] 803c5edc 87875914 80400000 800a47cc 87a25b54 800a6800 803a0fd8 80400000 [ 100.415351] 00000003 87875914 80400000 80094ae0 87a25b54 8787594c 00000000 801ef308 [ 100.415351] 803ffe70 801ef300 87193d58 87b3a400 87b3ad00 70687930 00000000 00000000 [ 100.415351] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 100.415351] ... [ 100.451703] Call Trace: [ 100.454235] [<800a6800>] vprintk_default+0x24/0x30 [ 100.459110] [<800a47cc>] printk+0x2c/0x38 [ 100.463190] [<800a6800>] vprintk_default+0x24/0x30 [ 100.468072] [<80094ae0>] print_worker_info+0x148/0x174 [ 100.473378] [<801ef308>] serial8250_console_putchar+0x0/0x44 [ 100.479122] [<801ef300>] wait_for_xmitr+0xc4/0xcc [ 100.484014] [<87193d58>] ieee80211_ibss_leave+0xb90/0x1900 [mac80211] [ 100.490590] [<80081604>] warn_slowpath_common+0xa0/0xd0 [ 100.495922] [<801a359c>] dump_stack+0x14/0x28 [ 100.500350] [<80071a00>] show_stack+0x50/0x84 [ 100.504784] [<80081604>] warn_slowpath_common+0xa0/0xd0 [ 100.510106] [<87024c60>] ath9k_calculate_summary_state+0x250/0x60c [ath9k] [ 100.517105] [<800816b8>] warn_slowpath_null+0x18/0x24 [ 100.522256] [<87024c60>] ath9k_calculate_summary_state+0x250/0x60c [ath9k] [ 100.529273] [<87025418>] ath9k_set_txpower+0x148/0x498 [ath9k] [ 100.535302] [<871d2c64>] cleanup_module+0xa74/0xd4c [mac80211] [ 100.541237] [<801ef308>] serial8250_console_putchar+0x0/0x44 [ 100.547042] [<800a5d18>] wake_up_klogd+0x54/0x68 [ 100.551730] [<800a6650>] vprintk_emit+0x404/0x43c [ 100.556623] [<871b9db8>] ieee80211_sta_rx_notify+0x258/0x32c [mac80211] [ 100.563475] [<871ba6a4>] ieee80211_sta_rx_queued_mgmt+0x63c/0x734 [mac80211] [ 100.570693] [<871aa49c>] ieee80211_tx_prepare_skb+0x210/0x230 [mac80211] [ 100.577609] [<800af5d4>] mod_timer+0x15c/0x190 [ 100.582220] [<871ba8b8>] ieee80211_sta_work+0xfc/0xe1c [mac80211] [ 100.588539] [<871940b4>] ieee80211_ibss_leave+0xeec/0x1900 [mac80211] [ 100.595122] [<8009ec84>] dequeue_task_fair+0x44/0x130 [ 100.600281] [<80092a34>] process_one_work+0x1f8/0x334 [ 100.605454] [<80093830>] worker_thread+0x2b4/0x408 [ 100.610317] [<8009357c>] worker_thread+0x0/0x408 [ 100.615019] [<8009357c>] worker_thread+0x0/0x408 [ 100.619705] [<80097b68>] kthread+0xdc/0xe8 [ 100.623886] [<80097a8c>] kthread+0x0/0xe8 [ 100.627961] [<80060878>] ret_from_kernel_thread+0x14/0x1c [ 100.633448] [ 100.634956] ---[ end trace aafbe57e9ae6862f ]--- Fixes: cfda2d8e2314 ("ath9k: Fix beacon configuration for addition/removal of interfaces") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a394622..098745d 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1154,6 +1154,7 @@ void ath9k_calculate_summary_state(struct ath_softc *sc, bool changed = (iter_data.primary_sta != ctx->primary_sta); if (iter_data.primary_sta) { + iter_data.primary_beacon_vif = iter_data.primary_sta; iter_data.beacons = true; ath9k_set_assoc_state(sc, iter_data.primary_sta, changed); -- cgit v1.1 From 7711aaf08ad3fc4d0e937eec1de0a63620444ce7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 19 Aug 2016 13:37:46 +0300 Subject: ath9k: fix using sta->drv_priv before initializing it A station pointer can be passed to the driver on tx, before it has been marked as associated. Since ath9k_sta_state was initializing the entry too late, it resulted in some spurious crashes. Fixes: df3c6eb34da5 ("ath9k: Use sta_state() callback") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 098745d..6ca4337 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1564,13 +1564,13 @@ static int ath9k_sta_state(struct ieee80211_hw *hw, struct ath_common *common = ath9k_hw_common(sc->sc_ah); int ret = 0; - if (old_state == IEEE80211_STA_AUTH && - new_state == IEEE80211_STA_ASSOC) { + if (old_state == IEEE80211_STA_NOTEXIST && + new_state == IEEE80211_STA_NONE) { ret = ath9k_sta_add(hw, vif, sta); ath_dbg(common, CONFIG, "Add station: %pM\n", sta->addr); - } else if (old_state == IEEE80211_STA_ASSOC && - new_state == IEEE80211_STA_AUTH) { + } else if (old_state == IEEE80211_STA_NONE && + new_state == IEEE80211_STA_NOTEXIST) { ret = ath9k_sta_remove(hw, vif, sta); ath_dbg(common, CONFIG, "Remove station: %pM\n", sta->addr); -- cgit v1.1 From 62148f0930a8e9bd5c5614f8387222f0220d7d47 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:11:00 -0300 Subject: [media] cec: rename cec_devnode fhs_lock to just lock This lock will be used to protect more than just the fhs list. So rename it to just 'lock'. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 12 ++++++------ drivers/staging/media/cec/cec-api.c | 8 ++++---- drivers/staging/media/cec/cec-core.c | 6 +++--- include/media/cec.h | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index b2393bba..9dcb784 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -124,10 +124,10 @@ static void cec_queue_event(struct cec_adapter *adap, u64 ts = ktime_get_ns(); struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, ev, ts); - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -191,12 +191,12 @@ static void cec_queue_msg_monitor(struct cec_adapter *adap, u32 monitor_mode = valid_la ? CEC_MODE_MONITOR : CEC_MODE_MONITOR_ALL; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower >= monitor_mode) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -207,12 +207,12 @@ static void cec_queue_msg_followers(struct cec_adapter *adap, { struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower == CEC_MODE_FOLLOWER) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* Notify userspace of an adapter state change. */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 7be7615..4e2696a 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -508,14 +508,14 @@ static int cec_open(struct inode *inode, struct file *filp) filp->private_data = fh; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); /* Queue up initial state events */ ev_state.state_change.phys_addr = adap->phys_addr; ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask; cec_queue_event_fh(fh, &ev_state, 0); list_add(&fh->list, &devnode->fhs); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); return 0; } @@ -540,9 +540,9 @@ static int cec_release(struct inode *inode, struct file *filp) cec_monitor_all_cnt_dec(adap); mutex_unlock(&adap->lock); - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_del(&fh->list); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); /* Unhook pending transmits from this filehandle. */ mutex_lock(&adap->lock); diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 112a5fa..73792d0 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -117,7 +117,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Initialization */ INIT_LIST_HEAD(&devnode->fhs); - mutex_init(&devnode->fhs_lock); + mutex_init(&devnode->lock); /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); @@ -181,10 +181,10 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) if (!devnode->registered || devnode->unregistered) return; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; diff --git a/include/media/cec.h b/include/media/cec.h index dc7854b..fdb5d60 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -57,8 +57,8 @@ struct cec_devnode { int minor; bool registered; bool unregistered; - struct mutex fhs_lock; struct list_head fhs; + struct mutex lock; }; struct cec_adapter; -- cgit v1.1 From 2ab25d35a91098ef0f42d478cc37f6a5591a4ab0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:13:57 -0300 Subject: [media] cec: improve locking - The global lock was used in cec_get_device when it should have used the devnode lock. - cec_put_device also took the global lock, but since the release function takes that lock as well this could lead to a deadlock. Just don't take the lock here since there is no reason for it. - cec_devnode_register() should take the global lock when clearing the bit in the global bitmap. - In cec_devnode_unregister() place the devnode->(un)register tests and assignments under the devnode lock as well: this has to be in a critical block. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-core.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 73792d0..3b1e4d2 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -51,31 +51,29 @@ int cec_get_device(struct cec_devnode *devnode) { /* * Check if the cec device is available. This needs to be done with - * the cec_devnode_lock held to prevent an open/unregister race: + * the devnode->lock held to prevent an open/unregister race: * without the lock, the device could be unregistered and freed between * the devnode->registered check and get_device() calls, leading to * a crash. */ - mutex_lock(&cec_devnode_lock); + mutex_lock(&devnode->lock); /* * return ENXIO if the cec device has been removed * already or if it is not registered anymore. */ if (!devnode->registered) { - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return -ENXIO; } /* and increase the device refcount */ get_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return 0; } void cec_put_device(struct cec_devnode *devnode) { - mutex_lock(&cec_devnode_lock); put_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); } /* Called when the last user of the cec device exits. */ @@ -84,11 +82,10 @@ static void cec_devnode_release(struct device *cd) struct cec_devnode *devnode = to_cec_devnode(cd); mutex_lock(&cec_devnode_lock); - /* Mark device node number as free */ clear_bit(devnode->minor, cec_devnode_nums); - mutex_unlock(&cec_devnode_lock); + cec_delete_adapter(to_cec_adapter(devnode)); } @@ -160,7 +157,9 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, cdev_del: cdev_del(&devnode->cdev); clr_bit: + mutex_lock(&cec_devnode_lock); clear_bit(devnode->minor, cec_devnode_nums); + mutex_unlock(&cec_devnode_lock); return ret; } @@ -177,17 +176,21 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) { struct cec_fh *fh; + mutex_lock(&devnode->lock); + /* Check if devnode was never registered or already unregistered */ - if (!devnode->registered || devnode->unregistered) + if (!devnode->registered || devnode->unregistered) { + mutex_unlock(&devnode->lock); return; + } - mutex_lock(&devnode->lock); list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; + mutex_unlock(&devnode->lock); + device_del(&devnode->dev); cdev_del(&devnode->cdev); put_device(&devnode->dev); -- cgit v1.1 From 9ebf1945d757433a089ab3ee940673503e3e11ec Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 1 Aug 2016 07:29:34 -0300 Subject: [media] cec-funcs.h: fix typo: && should be & Fix typo where logical AND was used instead of bitwise AND. Reported-by: David Binderman Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 82c3d3b..9e054aa 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -227,7 +227,7 @@ static inline void cec_set_digital_service_id(__u8 *msg, if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) { *msg++ = (digital->channel.channel_number_fmt << 2) | (digital->channel.major >> 8); - *msg++ = digital->channel.major && 0xff; + *msg++ = digital->channel.major & 0xff; *msg++ = digital->channel.minor >> 8; *msg++ = digital->channel.minor & 0xff; *msg++ = 0; @@ -1277,7 +1277,7 @@ static inline void cec_msg_user_control_pressed(struct cec_msg *msg, msg->len += 4; msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) | (ui_cmd->channel_identifier.major >> 8); - msg->msg[4] = ui_cmd->channel_identifier.major && 0xff; + msg->msg[4] = ui_cmd->channel_identifier.major & 0xff; msg->msg[5] = ui_cmd->channel_identifier.minor >> 8; msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff; break; -- cgit v1.1 From 31dc8b7302f1e48952ec8e90cd49dca843146cd0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 08:01:38 -0300 Subject: [media] cec-funcs.h: add reply argument for Record On/Off A reply parameter is added to the cec_msg_record_on/off functions in cec-funcs.h. The standard mandates that Record Status shall be replied to Record On, and it may be replied to Record Off. Signed-off-by: Johan Fjeldtvedt Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 9e054aa..8af613e 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -162,10 +162,11 @@ static inline void cec_msg_standby(struct cec_msg *msg) /* One Touch Record Feature */ -static inline void cec_msg_record_off(struct cec_msg *msg) +static inline void cec_msg_record_off(struct cec_msg *msg, bool reply) { msg->len = 2; msg->msg[1] = CEC_MSG_RECORD_OFF; + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } struct cec_op_arib_data { @@ -323,6 +324,7 @@ static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg, } static inline void cec_msg_record_on(struct cec_msg *msg, + bool reply, const struct cec_op_record_src *rec_src) { switch (rec_src->type) { @@ -346,6 +348,7 @@ static inline void cec_msg_record_on(struct cec_msg *msg, rec_src->ext_phys_addr.phys_addr); break; } + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } static inline void cec_ops_record_on(const struct cec_msg *msg, -- cgit v1.1 From 277f963cea4ec87144c6713377322fe3bf172a5e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 06:00:53 -0300 Subject: [media] cec: improve dqevent documentation The documentation for the cec_event_state_change struct was incomplete. This patch documents what happens in the corner cases. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/cec/cec-ioc-dqevent.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index 7a6d6d0..2e1e7392 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -64,7 +64,8 @@ it is guaranteed that the state did change in between the two events. - ``phys_addr`` - - The current physical address. + - The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no + valid physical address is set. - .. row 2 @@ -72,7 +73,10 @@ it is guaranteed that the state did change in between the two events. - ``log_addr_mask`` - - The current set of claimed logical addresses. + - The current set of claimed logical addresses. This is 0 if no logical + addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``. + If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device + has the unregistered logical address. In that case all other bits are 0. -- cgit v1.1 From dcceb1eaf210096831b14471bc87678375b086ed Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 09:24:45 -0300 Subject: [media] cec: add CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK flag Currently if none of the requested logical addresses can be claimed, the framework will fall back to the Unregistered logical address. Add a flag to enable this explicitly. By default it will just go back to the unconfigured state. Usually Unregistered is not something you want since the functionality is very limited. Unless the application has support for this, it will fail to work correctly. So require that the application explicitly requests this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../media/uapi/cec/cec-ioc-adap-g-log-addrs.rst | 21 ++++++++++++++++++++- drivers/staging/media/cec/cec-adap.c | 4 ++++ drivers/staging/media/cec/cec-api.c | 2 +- include/linux/cec.h | 5 ++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst index 04ee900..201d483 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst @@ -144,7 +144,7 @@ logical address types are already defined will return with error ``EBUSY``. - ``flags`` - - Flags. No flags are defined yet, so set this to 0. + - Flags. See :ref:`cec-log-addrs-flags` for a list of available flags. - .. row 7 @@ -201,6 +201,25 @@ logical address types are already defined will return with error ``EBUSY``. give the CEC framework more information about the device type, even though the framework won't use it directly in the CEC message. +.. _cec-log-addrs-flags: + +.. flat-table:: Flags for struct cec_log_addrs + :header-rows: 0 + :stub-columns: 0 + :widths: 3 1 4 + + + - .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`: + + - ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK`` + + - 1 + + - By default if no logical address of the requested type can be claimed, then + it will go back to the unconfigured state. If this flag is set, then it will + fallback to the Unregistered logical address. Note that if the Unregistered + logical address was explicitly requested, then this flag has no effect. + .. _cec-versions: .. flat-table:: CEC Versions diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 9dcb784..2458a6c 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1047,6 +1047,10 @@ static int cec_config_thread_func(void *arg) dprintk(1, "could not claim LA %d\n", i); } + if (adap->log_addrs.log_addr_mask == 0 && + !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK)) + goto unconfigure; + configured: if (adap->log_addrs.log_addr_mask == 0) { /* Fall back to unregistered */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 4e2696a..6f58ee8 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -162,7 +162,7 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh, return -ENOTTY; if (copy_from_user(&log_addrs, parg, sizeof(log_addrs))) return -EFAULT; - log_addrs.flags = 0; + log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK; mutex_lock(&adap->lock); if (!adap->is_configuring && (!log_addrs.num_log_addrs || !adap->is_configured) && diff --git a/include/linux/cec.h b/include/linux/cec.h index b3e2289..851968e 100644 --- a/include/linux/cec.h +++ b/include/linux/cec.h @@ -364,7 +364,7 @@ struct cec_caps { * @num_log_addrs: how many logical addresses should be claimed. Set by the * caller. * @vendor_id: the vendor ID of the device. Set by the caller. - * @flags: set to 0. + * @flags: flags. * @osd_name: the OSD name of the device. Set by the caller. * @primary_device_type: the primary device type for each logical address. * Set by the caller. @@ -389,6 +389,9 @@ struct cec_log_addrs { __u8 features[CEC_MAX_LOG_ADDRS][12]; }; +/* Allow a fallback to unregistered */ +#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK (1 << 0) + /* Events */ /* Event that occurs when the adapter state changes */ -- cgit v1.1 From 0c1d61b0e4ed68d125b21fed375c38b6e3c2a658 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 08:27:09 -0300 Subject: [media] cec: set unclaimed addresses to CEC_LOG_ADDR_INVALID Up to 4 logical addresses can be claimed. Make sure that any unclaimed logical addresses are set to CEC_LOG_ADDR_INVALID as per the documentation. Take special care in the unregistered case: when falling back to unregistered num_log_addrs may be > 1, so mark those as invalid. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 2458a6c..6cc7d79 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1056,6 +1056,8 @@ configured: /* Fall back to unregistered */ las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED; las->log_addr_mask = 1 << las->log_addr[0]; + for (i = 1; i < las->num_log_addrs; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; } adap->is_configured = true; adap->is_configuring = false; @@ -1074,6 +1076,8 @@ configured: cec_report_features(adap, i); cec_report_phys_addr(adap, i); } + for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; mutex_lock(&adap->lock); adap->kthread_config = NULL; mutex_unlock(&adap->lock); -- cgit v1.1 From 260ff1144a9dd1afb85cf5da462672d68412cbc4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 19 Jul 2016 08:44:32 -0300 Subject: [media] cec: add item to TODO Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO index a10d4f8..1322469 100644 --- a/drivers/staging/media/cec/TODO +++ b/drivers/staging/media/cec/TODO @@ -12,6 +12,7 @@ Hopefully this will happen later in 2016. Other TODOs: +- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that? - Add a flag to inhibit passing CEC RC messages to the rc subsystem. Applications should be able to choose this when calling S_LOG_ADDRS. - If the reply field of cec_msg is set then when the reply arrives it -- cgit v1.1 From 3e92d8b238e48dfb539e8112bb2cc463e35e1b71 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 13:32:07 -0300 Subject: [media] cec: ignore messages when log_addr_mask == 0 Most CEC adapters will still receive broadcast messages, even if no logical addresses are claimed. But those messages should only be passed on for monitoring purposes, but not for processing by either kernel or userspace if userspace didn't call CEC_ADAP_S_LOG_ADDRS first. So if adap->log_addrs.log_addr_mask is 0, then just return before passing the received message on to the processing code. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 6cc7d79..e980ac9 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -851,6 +851,9 @@ void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg) if (!valid_la || msg->len <= 1) return; + if (adap->log_addrs.log_addr_mask == 0) + return; + /* * Process the message on the protocol level. If is_reply is true, * then cec_receive_notify() won't pass on the reply to the listener(s) -- cgit v1.1 From 73b14977549e4e1214413e7da2d0e97a9947bf8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 06:45:54 -0300 Subject: [media] mtk-vcodec: add HAS_DMA dependency This fixes this kbuild test robot error: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 329f4152911c276b074bec75a0443f88821afdb7 commit: c1023ba74fc77dc56dc317bd98f5060aab889ac1 [media] drivers/media/platform/Kconfig: fix VIDEO_MEDIATEK_VCODEC dependency config: m32r-allyesconfig (attached as .config) compiler: m32r-linux-gcc (GCC) 4.9.0 reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout c1023ba74fc77dc56dc317bd98f5060aab889ac1 # save the attached .config to linux build tree make.cross ARCH=m32r All errors (new ones prefixed by >>): drivers/media/v4l2-core/videobuf2-dma-contig.c: In function 'vb2_dc_get_userptr': >> >> drivers/media/v4l2-core/videobuf2-dma-contig.c:486:2: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration] unsigned long dma_align = dma_get_cache_alignment(); ^ cc1: some warnings being treated as errors This driver depends on HAS_DMA for dma_get_cache_alignment(). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index f25344b..552b635 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -169,7 +169,7 @@ config VIDEO_MEDIATEK_VPU config VIDEO_MEDIATEK_VCODEC tristate "Mediatek Video Codec driver" depends on MTK_IOMMU || COMPILE_TEST - depends on VIDEO_DEV && VIDEO_V4L2 + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV -- cgit v1.1 From 1e6e97541ab51b65019bd823506af81ebb3730fc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:44:27 -0300 Subject: [media] pulse8-cec: set correct Signal Free Time Don't hardcode the signal free time to 3 bit periods, instead use the value for the signal free time as passed in by the CEC framework. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/pulse8-cec/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 94f8590..28f853c 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -388,7 +388,7 @@ static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, int err; cmd[0] = MSGCODE_TRANSMIT_IDLETIME; - cmd[1] = 3; + cmd[1] = signal_free_time; err = pulse8_send_and_wait(pulse8, cmd, 2, MSGCODE_COMMAND_ACCEPTED, 1); cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY; -- cgit v1.1 From 31f58e31dc0e170e117a83584103921269b7581b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:46:06 -0300 Subject: [media] pulse8-cec: fix error handling Support more error codes and fix a bug where MSGCODE_TRANSMIT_FAILED_LINE was mapped to CEC_TX_STATUS_ARB_LOST, which is wrong. Thanks to Pulse-Eight for providing me with the information needed to handle this correctly (I hope). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/pulse8-cec/pulse8-cec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 28f853c..ed8bd95 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -114,14 +114,11 @@ static void pulse8_irq_work_handler(struct work_struct *work) cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); break; - case MSGCODE_TRANSMIT_FAILED_LINE: - cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST, - 1, 0, 0, 0); - break; case MSGCODE_TRANSMIT_FAILED_ACK: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK, 0, 1, 0, 0); break; + case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR, @@ -170,6 +167,9 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: schedule_work(&pulse8->work); break; + case MSGCODE_HIGH_ERROR: + case MSGCODE_LOW_ERROR: + case MSGCODE_RECEIVE_FAILED: case MSGCODE_TIMEOUT_ERROR: break; case MSGCODE_COMMAND_ACCEPTED: -- cgit v1.1 From 8ac6a1a53e9f195e8c4336a7edfba2e102fc14bb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 18 Aug 2016 04:13:42 -0300 Subject: [media] cec-edid: check for IEEE identifier The cec_get_edid_spa_location() function did not verify that the IEEE identifier in the Vendor Specific Data Block matched the HDMI-LLC identifier. This could result in the wrong VSDB block being returned. For example, for HDMI 2.0 EDIDs there is also a HDMI Forum VSDB. So check the IEEE identifier as well. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec-edid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c index 7001824..5719b99 100644 --- a/drivers/media/cec-edid.c +++ b/drivers/media/cec-edid.c @@ -70,7 +70,10 @@ static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size) u8 tag = edid[i] >> 5; u8 len = edid[i] & 0x1f; - if (tag == 3 && len >= 5 && i + len <= end) + if (tag == 3 && len >= 5 && i + len <= end && + edid[i + 1] == 0x03 && + edid[i + 2] == 0x0c && + edid[i + 3] == 0x00) return i + 4; i += len + 1; } while (i < end); -- cgit v1.1 From 4808f721627c2a23b5d749f9bbd20d4529ea2b8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 20 Aug 2016 07:54:38 -0300 Subject: [media] cec-funcs.h: add missing vendor-specific messages The cec-funcs.h header was missing support for these three vendor-specific messages: CEC_MSG_VENDOR_COMMAND CEC_MSG_VENDOR_COMMAND_WITH_ID CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN Add wrappers for these messages. I originally postponed adding these wrappers due to the fact that the argument is just a byte array which cec-ctl couldn't handle at the time, and then I just forgot to add them once the CEC framework was finalized. It wasn't until an attempt to transmit a vendor specific command was made that I realized that these wrappers were missing. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/cec-funcs.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 8af613e..138bbf7 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -1144,6 +1144,75 @@ static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg, msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0; } +static inline void cec_msg_vendor_command(struct cec_msg *msg, + __u8 size, const __u8 *vendor_cmd) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND; + memcpy(msg->msg + 2, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command(const struct cec_msg *msg, + __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *vendor_cmd = msg->msg + 2; +} + +static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg, + __u32 vendor_id, __u8 size, + const __u8 *vendor_cmd) +{ + if (size > 11) + size = 11; + msg->len = 5 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID; + msg->msg[2] = vendor_id >> 16; + msg->msg[3] = (vendor_id >> 8) & 0xff; + msg->msg[4] = vendor_id & 0xff; + memcpy(msg->msg + 5, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg, + __u32 *vendor_id, __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 5; + + if (*size > 11) + *size = 11; + *vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4]; + *vendor_cmd = msg->msg + 5; +} + +static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg, + __u8 size, + const __u8 *rc_code) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN; + memcpy(msg->msg + 2, rc_code, size); +} + +static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg, + __u8 *size, + const __u8 **rc_code) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *rc_code = msg->msg + 2; +} + static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg) { msg->len = 2; -- cgit v1.1 From 539d5c48a4aff5a4afcff117418618b49126c54c Mon Sep 17 00:00:00 2001 From: Hugo Grostabussiat Date: Tue, 16 Aug 2016 20:34:07 +0200 Subject: ARM: sun5i: Fix typo in trip point temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set cpu_alert0 temperature to 85°C instead of 850°C. Fixes: 32a5d2d170cc ("ARM: dts: sun5i: Add cpu thermal zones to dtsi") Signed-off-by: Hugo Grostabussiat Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun5i-a13.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index e012890..a17ba02 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -84,7 +84,7 @@ trips { cpu_alert0: cpu_alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; -- cgit v1.1 From 02ba38a5b6d6e0bc89c7b74651f1873055028a56 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2016 12:44:29 +0100 Subject: ARM: sa1100: fix 3.6864MHz clock pxa_timer wants to be able to call clk_enable() etc on this clock, but our clk_enable() implementation expects non-NULL enable/disable operations. Provide these dummy implementations. Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0204000 [00000000] *pgd=00000000 Internal error: Oops: 80000005 [#1] ARM Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.8.0-rc2+ #887 Hardware name: Intel-Assabet task: c0644590 task.stack: c0640000 PC is at 0x0 LR is at clk_enable+0x40/0x58 pc : [<00000000>] lr : [] psr: 600000d3 sp : c0641f60 ip : c0641f4c fp : c0641f74 r10: c1ffc7a0 r9 : 6901b118 r8 : 00000001 r7 : c0639a34 r6 : 0000001b r5 : a00000d3 r4 : c0645d70 r3 : c0645d78 r2 : 00000001 r1 : c0641ef0 r0 : c0645d70 Flags: nZCv IRQs off FIQs off Mode SVC_32 ISA ARM Segment none Control: c020717f Table: c020717f DAC: 00000053 Process swapper (pid: 0, stack limit = 0xc0640188) Stack: (0xc0641f60 to 0xc0642000) 1f60: 00384000 c08762e4 c0641f98 c0641f78 c063308c c021b144 00000000 00000000 1f80: 00000000 c0660b20 ffffffff c0641fa8 c0641f9c c06220ec c0633058 c0641fb8 1fa0: c0641fac c061f114 c06220dc c0641ff4 c0641fbc c061bb68 c061f0fc ffffffff 1fc0: ffffffff 00000000 c061b6cc c0639a34 c0660cd4 c0642038 c0639a30 c0645434 1fe0: c0204000 c06380f8 00000000 c0641ff8 c0208048 c061b954 00000000 00000000 Backtrace: [] (clk_enable) from [] (pxa_timer_nodt_init+0x40/0x120) r5:c08762e4 r4:00384000 [] (pxa_timer_nodt_init) from [] (sa1100_timer_init+0x1c/0x20) r6:ffffffff r5:c0660b20 r4:00000000 [] (sa1100_timer_init) from [] (time_init+0x24/0x2c) [] (time_init) from [] (start_kernel+0x220/0x42c) [] (start_kernel) from [] (0xc0208048) r10:c06380f8 r8:c0204000 r7:c0645434 r6:c0639a30 r5:c0642038 r4:c0660cd4 Code: bad PC value ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Attempted to kill the idle task! Fixes: ee3a4020f7c9 ("ARM: 8250/1: sa1100: provide OSTIMER0 clock for pxa_timer") Acked-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/clock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index cbf53bb..0bf32209 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -125,6 +125,8 @@ static unsigned long clk_36864_get_rate(struct clk *clk) } static struct clkops clk_36864_ops = { + .enable = clk_cpu_enable, + .disable = clk_cpu_disable, .get_rate = clk_36864_get_rate, }; -- cgit v1.1 From 198b51e8a6a31d3a6f8e9dd9cade3635d0291f26 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2016 12:47:54 +0100 Subject: ARM: sa1100: register clocks early Since we switched to use pxa_timer, we need to provide the OSTIMER0 clock. However, as the clock is initialised early, we need to provide the clock early as well, so that pxa_timer can find it. Adding the clock to the clkdev table at core_initcall() time is way too late. Move the initialisation earlier. Fixes: ee3a4020f7c9 ("ARM: 8250/1: sa1100: provide OSTIMER0 clock for pxa_timer") Acked-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/clock.c | 3 +-- arch/arm/mach-sa1100/generic.c | 1 + arch/arm/mach-sa1100/generic.h | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index 0bf32209..0db4689 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -142,9 +142,8 @@ static struct clk_lookup sa11xx_clkregs[] = { CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864), }; -static int __init sa11xx_clk_init(void) +int __init sa11xx_clk_init(void) { clkdev_add_table(sa11xx_clkregs, ARRAY_SIZE(sa11xx_clkregs)); return 0; } -core_initcall(sa11xx_clk_init); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 345e63f..2e2c35b 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -388,6 +388,7 @@ void __init sa1100_init_irq(void) sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start); sa1100_init_gpio(); + sa11xx_clk_init(); } /* diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 0d92e11..68199b603 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -44,3 +44,5 @@ int sa11x0_pm_init(void); #else static inline int sa11x0_pm_init(void) { return 0; } #endif + +int sa11xx_clk_init(void); -- cgit v1.1 From f271b779f415455e904f3f10067ab7f2fb8af497 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 18 Aug 2016 16:28:24 +0100 Subject: ARM: 8599/1: mm: pull asm/memory.h explicitly Commit d78114554939a (""ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL"") introduced a macro which lives under asm/memory.h. Unfortunately, for MMU-less systems (like R-class) it leads to build failure: arch/arm/mm/proc-v7.S: Assembler messages: arch/arm/mm/proc-v7.S:538: Error: unrecognised relocation suffix make[1]: *** [arch/arm/mm/proc-v7.o] Error 1 make: *** [arch/arm/mm] Error 2 since it is implicitly pulled via asm/pgtable.h for MMU capable systems only. To fix it include asm/memory.h explicitly. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index a7123b4..d00d52c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -16,6 +16,7 @@ #include #include #include +#include #include "proc-macros.S" -- cgit v1.1 From 1527eda3ab290f4ab519fe495c70ede5b0ad699f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Aug 2016 19:38:41 +0100 Subject: ARM: 8600/1: Enforce some NS-SVC initialisation Since the non-secure copies of banked registers lack architecturally defined reset values, there is no actual guarantee when entering in Hyp from secure-only firmware that the Non-Secure PL1 state will look the way that kernel entry (in particular the decompressor stub) expects. So far, we've been getting away with it thanks to implementation details of ARMv7 cores and/or bootloader behaviour, but for the sake of forwards compatibility let's try to ensure that we have a minimally sane state before dropping down into it. Cc: Russell King Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Robin Murphy Signed-off-by: Russell King --- arch/arm/kernel/hyp-stub.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 0b1e4a9..15d073a 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -142,6 +142,19 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE and r7, #0x1f @ Preserve HPMN mcr p15, 4, r7, c1, c1, 1 @ HDCR + @ Make sure NS-SVC is initialised appropriately + mrc p15, 0, r7, c1, c0, 0 @ SCTLR + orr r7, #(1 << 5) @ CP15 barriers enabled + bic r7, #(3 << 7) @ Clear SED/ITD for v8 (RES0 for v7) + bic r7, #(3 << 19) @ WXN and UWXN disabled + mcr p15, 0, r7, c1, c0, 0 @ SCTLR + + mrc p15, 0, r7, c0, c0, 0 @ MIDR + mcr p15, 4, r7, c0, c0, 0 @ VPIDR + + mrc p15, 0, r7, c0, c0, 5 @ MPIDR + mcr p15, 4, r7, c0, c0, 5 @ VMPIDR + #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 -- cgit v1.1 From 486095fae3a8a6b1ae07c51844699d9bd5cfbebc Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 23 Aug 2016 13:58:25 +0800 Subject: pinctrl: sunxi: fix uart1 CTS/RTS pins at PG on A23/A33 PG8, PG9 is said to be the CTS/RTS pins for UART1 according to the A23/33 datasheets. However, the function is wrongly named "uart2" in the pinctrl driver. This patch fixes this by modifying them to be named "uart1". Cc: stable@vger.kernel.org Signed-off-by: Icenowy Zheng Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c | 4 ++-- drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c index ce483b0..f9d661e5 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c @@ -485,12 +485,12 @@ static const struct sunxi_desc_pin sun8i_a23_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "uart2"), /* RTS */ + SUNXI_FUNCTION(0x2, "uart1"), /* RTS */ SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 8)), /* PG_EINT8 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "uart2"), /* CTS */ + SUNXI_FUNCTION(0x2, "uart1"), /* CTS */ SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 9)), /* PG_EINT9 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c index 3040abe..3131cac 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c @@ -407,12 +407,12 @@ static const struct sunxi_desc_pin sun8i_a33_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "uart2"), /* RTS */ + SUNXI_FUNCTION(0x2, "uart1"), /* RTS */ SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 8)), /* PG_EINT8 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "uart2"), /* CTS */ + SUNXI_FUNCTION(0x2, "uart1"), /* CTS */ SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 9)), /* PG_EINT9 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10), SUNXI_FUNCTION(0x0, "gpio_in"), -- cgit v1.1 From 6c73358c83ce870c0cf32413e5cadb3b9a39c606 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 22 Aug 2016 16:58:53 -0400 Subject: USB: fix typo in wMaxPacketSize validation The maximum value allowed for wMaxPacketSize of a high-speed interrupt endpoint is 1024 bytes, not 1023. Signed-off-by: Alan Stern Fixes: aed9d65ac327 ("USB: validate wMaxPacketValue entries in endpoint descriptors") CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 0511631..15ce4ab 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -187,7 +187,7 @@ static const unsigned short high_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_CONTROL] = 64, [USB_ENDPOINT_XFER_ISOC] = 1024, [USB_ENDPOINT_XFER_BULK] = 512, - [USB_ENDPOINT_XFER_INT] = 1023, + [USB_ENDPOINT_XFER_INT] = 1024, }; static const unsigned short super_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_CONTROL] = 512, -- cgit v1.1 From b88fa69eaa8649f11828158c7b65c4bcd886ebd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Aug 2016 11:19:33 -0400 Subject: pNFS: The client must not do I/O to the DS if it's lease has expired Ensure that the client conforms to the normative behaviour described in RFC5661 Section 12.7.2: "If a client believes its lease has expired, it MUST NOT send I/O to the storage device until it has validated its lease." So ensure that we wait for the lease to be validated before using the layout. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.20+ --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bf98f1b..6daf034 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1555,6 +1555,7 @@ pnfs_update_layout(struct inode *ino, } lookup_again: + nfs4_client_recover_expired_lease(clp); first = false; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); -- cgit v1.1 From 41963c10c47a35185e68cb9049f7a3493c94d2d7 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 22 Aug 2016 14:11:16 -0400 Subject: pnfs/blocklayout: update last_write_offset atomically with extents Block/SCSI layout write completion may add committable extents to the extent tree before updating the layout's last-written byte under the inode lock. If a sync happens before this value is updated, then prepare_layoutcommit may find and encode these extents which would produce a LAYOUTCOMMIT request whose encoded extents are larger than the request's loca_length. Fix this by using a last-written byte value that is updated atomically with the extent tree so that commitable extents always match. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/blocklayout/blocklayout.h | 3 ++- fs/nfs/blocklayout/extent_tree.c | 10 +++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index f55a4e7..2178476 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work) PAGE_SIZE - 1) & (loff_t)PAGE_MASK; ext_tree_mark_written(bl, start >> SECTOR_SHIFT, - (end - start) >> SECTOR_SHIFT); + (end - start) >> SECTOR_SHIFT, end); } pnfs_ld_write_done(hdr); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 18e6fd0..efc007f 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -141,6 +141,7 @@ struct pnfs_block_layout { struct rb_root bl_ext_ro; spinlock_t bl_ext_lock; /* Protects list manipulation */ bool bl_scsi_layout; + u64 bl_lwb; }; static inline struct pnfs_block_layout * @@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl, int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, sector_t end); int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, - sector_t len); + sector_t len, u64 lwb); bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, struct pnfs_block_extent *ret, bool rw); int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 992bcb1..c85fbfd 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, - sector_t len) + sector_t len, u64 lwb) { struct rb_root *root = &bl->bl_ext_rw; sector_t end = start + len; @@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, } } out: + if (bl->bl_lwb < lwb) + bl->bl_lwb = lwb; spin_unlock(&bl->bl_ext_lock); __ext_put_deviceids(&tmp); @@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) } static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, - size_t buffer_size, size_t *count) + size_t buffer_size, size_t *count, __u64 *lastbyte) { struct pnfs_block_extent *be; int ret = 0; @@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, p = encode_block_extent(be, p); be->be_tag = EXTENT_COMMITTING; } + *lastbyte = bl->bl_lwb - 1; + bl->bl_lwb = 0; spin_unlock(&bl->bl_ext_lock); return ret; @@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) arg->layoutupdate_pages = &arg->layoutupdate_page; retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); -- cgit v1.1 From 543852af8e5902aee8f7c72c89e1513663e0f696 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:04 +0800 Subject: iio: adc: rockchip_saradc: reset saradc controller before programming it SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Cc: Jonathan Cameron Cc: Heiko Stuebner Cc: Rob Herring Cc: linux-iio@vger.kernel.org Cc: linux-rockchip@lists.infradead.org Tested-by: Guenter Roeck Cc: Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/rockchip-saradc.txt | 7 +++++ drivers/iio/adc/Kconfig | 1 + drivers/iio/adc/rockchip_saradc.c | 30 ++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt index bf99e2f..205593f 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt @@ -16,6 +16,11 @@ Required properties: - vref-supply: The regulator supply ADC reference voltage. - #io-channel-cells: Should be 1, see ../iio-bindings.txt +Optional properties: +- resets: Must contain an entry for each entry in reset-names if need support + this option. See ../reset/reset.txt for details. +- reset-names: Must include the name "saradc-apb". + Example: saradc: saradc@2006c000 { compatible = "rockchip,saradc"; @@ -23,6 +28,8 @@ Example: interrupts = ; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; #io-channel-cells = <1>; vref-supply = <&vcc18>; }; diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 1de31bd..7675772 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -389,6 +389,7 @@ config QCOM_SPMI_VADC config ROCKCHIP_SARADC tristate "Rockchip SARADC driver" depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST) + depends on RESET_CONTROLLER help Say yes here to build support for the SARADC found in SoCs from Rockchip. diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index f9ad6c2..85d7012 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -53,6 +55,7 @@ struct rockchip_saradc { struct clk *clk; struct completion completion; struct regulator *vref; + struct reset_control *reset; const struct rockchip_saradc_data *data; u16 last_val; }; @@ -190,6 +193,16 @@ static const struct of_device_id rockchip_saradc_match[] = { }; MODULE_DEVICE_TABLE(of, rockchip_saradc_match); +/** + * Reset SARADC Controller. + */ +static void rockchip_saradc_reset_controller(struct reset_control *reset) +{ + reset_control_assert(reset); + usleep_range(10, 20); + reset_control_deassert(reset); +} + static int rockchip_saradc_probe(struct platform_device *pdev) { struct rockchip_saradc *info = NULL; @@ -218,6 +231,20 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->regs)) return PTR_ERR(info->regs); + /* + * The reset should be an optional property, as it should work + * with old devicetrees as well + */ + info->reset = devm_reset_control_get(&pdev->dev, "saradc-apb"); + if (IS_ERR(info->reset)) { + ret = PTR_ERR(info->reset); + if (ret != -ENOENT) + return ret; + + dev_dbg(&pdev->dev, "no reset control found\n"); + info->reset = NULL; + } + init_completion(&info->completion); irq = platform_get_irq(pdev, 0); @@ -252,6 +279,9 @@ static int rockchip_saradc_probe(struct platform_device *pdev) return PTR_ERR(info->vref); } + if (info->reset) + rockchip_saradc_reset_controller(info->reset); + /* * Use a default value for the converter clock. * This may become user-configurable in the future. -- cgit v1.1 From 78ec79bfd59e126e1cb394302bfa531a420b3ecd Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:06 +0800 Subject: arm64: dts: rockchip: add reset saradc node for rk3368 SoCs SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Acked-by: Heiko Stuebner Cc: Signed-off-by: Jonathan Cameron --- arch/arm64/boot/dts/rockchip/rk3368.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index d02a9003..4f44d11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -270,6 +270,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; -- cgit v1.1 From 3d4267a5a3a4b7619b80ad1839d8b3bedd8b7a8d Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:07 +0800 Subject: arm: dts: rockchip: add reset node for the exist saradc SoCs SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Acked-by: Heiko Stuebner Cc: Signed-off-by: Jonathan Cameron --- arch/arm/boot/dts/rk3066a.dtsi | 2 ++ arch/arm/boot/dts/rk3288.dtsi | 2 ++ arch/arm/boot/dts/rk3xxx.dtsi | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/rk3066a.dtsi b/arch/arm/boot/dts/rk3066a.dtsi index c0ba86c..0d0dae3 100644 --- a/arch/arm/boot/dts/rk3066a.dtsi +++ b/arch/arm/boot/dts/rk3066a.dtsi @@ -197,6 +197,8 @@ clock-names = "saradc", "apb_pclk"; interrupts = ; #io-channel-cells = <1>; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd33f01..91c4b3c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -279,6 +279,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 99bbcc2..e2cd683 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -399,6 +399,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; -- cgit v1.1 From 53e5f36fbd2453ad69a3369a1db62dc06c30a4aa Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 23 Aug 2016 15:32:51 -0400 Subject: USB: avoid left shift by -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UBSAN complains about a left shift by -1 in proc_do_submiturb(). This can occur when an URB is submitted for a bulk or control endpoint on a high-speed device, since the code doesn't bother to check the endpoint type; normally only interrupt or isochronous endpoints have a nonzero bInterval value. Aside from the fact that the operation is illegal, it shouldn't matter because the result isn't used. Still, in theory it could cause a hardware exception or other problem, so we should work around it. This patch avoids doing the left shift unless the shift amount is >= 0. The same piece of code has another problem. When checking the device speed (the exponential encoding for interrupt endpoints is used only by high-speed or faster devices), we need to look for speed >= USB_SPEED_SUPER as well as speed == USB_SPEED HIGH. The patch adds this check. Signed-off-by: Alan Stern Reported-by: Vittorio Zecca Tested-by: Vittorio Zecca Suggested-by: Bjørn Mork CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e6a6d67..09c8d9c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1709,11 +1709,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb as->urb->start_frame = uurb->start_frame; as->urb->number_of_packets = number_of_packets; as->urb->stream_id = stream_id; - if (uurb->type == USBDEVFS_URB_TYPE_ISO || - ps->dev->speed == USB_SPEED_HIGH) - as->urb->interval = 1 << min(15, ep->desc.bInterval - 1); - else - as->urb->interval = ep->desc.bInterval; + + if (ep->desc.bInterval) { + if (uurb->type == USBDEVFS_URB_TYPE_ISO || + ps->dev->speed == USB_SPEED_HIGH || + ps->dev->speed >= USB_SPEED_SUPER) + as->urb->interval = 1 << + min(15, ep->desc.bInterval - 1); + else + as->urb->interval = ep->desc.bInterval; + } + as->urb->context = as; as->urb->complete = async_completed; for (totlen = u = 0; u < number_of_packets; u++) { -- cgit v1.1 From 4141b36ab16d7a66b4cf712f2d21eba61c5927e5 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 24 Aug 2016 13:08:40 +0200 Subject: xfrm: Fix xfrm_policy_lock imbalance An earlier patch accidentally replaced a write_lock_bh with a spin_unlock_bh. Fix this by using spin_lock_bh instead. Fixes: 9d0380df6217 ("xfrm: policy: convert policy_lock to spinlock") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dd01fd2..f7ce626 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -979,7 +979,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) xfrm_policy_kill(pol); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } -- cgit v1.1 From 35db57bbc4b7ab810bba6e6d6954a0faf5a842cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 23 Aug 2016 16:00:12 +0200 Subject: xfrm: state: remove per-netns gc task After commit 5b8ef3415a21f173 ("xfrm: Remove ancient sleeping when the SA is in acquire state") gc does not need any per-netns data anymore. As far as gc is concerned all state structs are the same, so we can use a global work struct for it. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 -- net/xfrm/xfrm_state.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 177ed44..27bb963 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -44,8 +44,6 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; - struct hlist_head state_gc_list; - struct work_struct state_gc_work; struct list_head policy_all; struct hlist_head *policy_byidx; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1a15b65..ba8bf51 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -31,6 +31,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +static void xfrm_state_gc_task(struct work_struct *work); + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -41,6 +43,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); +static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +static HLIST_HEAD(xfrm_state_gc_list); + static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { return atomic_inc_not_zero(&x->refcnt); @@ -368,13 +373,12 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *work) { - struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&net->xfrm.state_gc_list, &gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); synchronize_rcu(); @@ -515,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - struct net *net = xs_net(x); - WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&net->xfrm.state_gc_work); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -2134,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; @@ -2153,7 +2153,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, IPSEC_PROTO_ANY, false); - flush_work(&net->xfrm.state_gc_work); + flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); -- cgit v1.1 From b70cd2de0ea85f5ab51a1d01893cba6415011b9d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 24 Aug 2016 14:11:30 +0300 Subject: spi: pxa2xx-pci: fix ACPI-based enumeration of SPI devices Slave devices are not enumerated by ACPI data because the ACPI handle for the core driver is NULL if it was enumerated by PCI. Propagate firmware node handle of the PCI device to the platform device. Suggested-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index f3df522..58d2d48 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -214,6 +214,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return PTR_ERR(ssp->clk); memset(&pi, 0, sizeof(pi)); + pi.fwnode = dev->dev.fwnode; pi.parent = &dev->dev; pi.name = "pxa2xx-spi"; pi.id = ssp->port_id; -- cgit v1.1 From 40d9c32525cba79130612650b1abc47c0c0f19a8 Mon Sep 17 00:00:00 2001 From: Aleksandr Makarov Date: Wed, 24 Aug 2016 13:06:22 +0300 Subject: USB: serial: option: add WeTelecom 0x6802 and 0x6803 products These product IDs are listed in Windows driver. 0x6803 corresponds to WeTelecom WM-D300. 0x6802 name is unknown. Signed-off-by: Aleksandr Makarov Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bb6a711..9894e34 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -528,6 +528,8 @@ static void option_instat_callback(struct urb *urb); /* WeTelecom products */ #define WETELECOM_VENDOR_ID 0x22de #define WETELECOM_PRODUCT_WMD200 0x6801 +#define WETELECOM_PRODUCT_6802 0x6802 +#define WETELECOM_PRODUCT_WMD300 0x6803 struct option_blacklist_info { /* bitmask of interface numbers blacklisted for send_setup */ @@ -1996,6 +1998,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v1.1 From f74bdd4cb5d0d4c3e89919e850e0bbb8789f32f9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 16 Aug 2016 21:49:45 +0200 Subject: hwrng: mxc-rnga - Fix Kconfig dependency We can directly depend on SOC_IMX31 since commit c9ee94965dce ("ARM: imx: deconstruct mxc_rnga initialization") Since that commit, CONFIG_HW_RANDOM_MXC_RNGA could not be switched on with unknown symbol ARCH_HAS_RNGA and mxc-rnga.o can't be generated with ARCH=arm make M=drivers/char/hw_random Previously, HW_RANDOM_MXC_RNGA required ARCH_HAS_RNGA which was based on IMX_HAVE_PLATFORM_MXC_RNGA && ARCH_MXC. IMX_HAVE_PLATFORM_MXC_RNGA was based on SOC_IMX31. Fixes: c9ee94965dce ("ARM: imx: deconstruct mxc_rnga initialization") Signed-off-by: Fabian Frederick Acked-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 56ad5a5..8c0770b 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -244,7 +244,7 @@ config HW_RANDOM_TX4939 config HW_RANDOM_MXC_RNGA tristate "Freescale i.MX RNGA Random Number Generator" - depends on ARCH_HAS_RNGA + depends on SOC_IMX31 default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number -- cgit v1.1 From 10bb087ce381c812cd81a65ffd5e6f83e6399291 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 18 Aug 2016 19:53:36 +0100 Subject: crypto: qat - fix aes-xts key sizes Increase value of supported key sizes for qat_aes_xts. aes-xts keys consists of keys of equal size concatenated. Fixes: def14bfaf30d ("crypto: qat - add support for ctr(aes) and xts(aes)") Cc: stable@vger.kernel.org Reported-by: Wenqian Yu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 769148d..20f35df 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -1260,8 +1260,8 @@ static struct crypto_alg qat_algs[] = { { .setkey = qat_alg_ablkcipher_xts_setkey, .decrypt = qat_alg_ablkcipher_decrypt, .encrypt = qat_alg_ablkcipher_encrypt, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, }, }, -- cgit v1.1 From 901d3d4fee83e9407d91e7178048e2fed6c91f6b Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 24 Aug 2016 15:34:40 +0800 Subject: crypto: vmx - fix null dereference in p8_aes_xts_crypt walk.iv is not assigned a value in blkcipher_walk_init. It makes iv uninitialized. It is possibly a null value(as shown below), which is then used by aes_p8_encrypt. This patch moves iv = walk.iv after blkcipher_walk_virt, in which walk.iv is set. [17856.268050] Unable to handle kernel paging request for data at address 0x00000000 [17856.268212] Faulting instruction address: 0xd000000002ff04bc 7:mon> t [link register ] d000000002ff47b8 p8_aes_xts_crypt+0x168/0x2a0 [vmx_crypto] (938) [c000000013b77960] d000000002ff4794 p8_aes_xts_crypt+0x144/0x2a0 [vmx_crypto] (unreliable) [c000000013b77a70] c000000000544d64 skcipher_decrypt_blkcipher+0x64/0x80 [c000000013b77ac0] d000000003c0175c crypt_convert+0x53c/0x620 [dm_crypt] [c000000013b77ba0] d000000003c043fc kcryptd_crypt+0x3cc/0x440 [dm_crypt] [c000000013b77c50] c0000000000f3070 process_one_work+0x1e0/0x590 [c000000013b77ce0] c0000000000f34c8 worker_thread+0xa8/0x660 [c000000013b77d80] c0000000000fc0b0 kthread+0x110/0x130 [c000000013b77e30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Li Zhong Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aes_xts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index cfb2541..24353ec3 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -129,8 +129,8 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); - iv = (u8 *)walk.iv; ret = blkcipher_walk_virt(desc, &walk); + iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); -- cgit v1.1 From 15dacf880e49ce3ecee05eb1a0c6b8e363dbacdc Mon Sep 17 00:00:00 2001 From: "mhiramat@kernel.org" Date: Mon, 15 Aug 2016 18:40:57 +0900 Subject: brcmfmac: Check rtnl_lock is locked when removing interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check rtnl_lock is locked in brcmf_p2p_ifp_removed() by passing rtnl_locked flag. Actually the caller brcmf_del_if() checks whether the rtnl_lock is locked, but doesn't pass it to brcmf_p2p_ifp_removed(). Without this fix, wpa_supplicant goes softlockup with rtnl_lock holding (this means all other process using netlink are locked up too) e.g. [ 4495.876627] INFO: task wpa_supplicant:7307 blocked for more than 10 seconds. [ 4495.876632] Tainted: G W 4.8.0-rc1+ #8 [ 4495.876635] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4495.876638] wpa_supplicant D ffff974c647b39a0 0 7307 1 0x00000000 [ 4495.876644] ffff974c647b39a0 0000000000000000 ffff974c00000000 ffff974c7dc59c58 [ 4495.876651] ffff974c6b7417c0 ffff974c645017c0 ffff974c647b4000 ffffffff86f16c08 [ 4495.876657] ffff974c645017c0 0000000000000246 00000000ffffffff ffff974c647b39b8 [ 4495.876664] Call Trace: [ 4495.876671] [] schedule+0x3c/0x90 [ 4495.876676] [] schedule_preempt_disabled+0x15/0x20 [ 4495.876682] [] mutex_lock_nested+0x176/0x3b0 [ 4495.876686] [] ? rtnl_lock+0x17/0x20 [ 4495.876690] [] rtnl_lock+0x17/0x20 [ 4495.876720] [] brcmf_p2p_ifp_removed+0x4d/0x70 [brcmfmac] [ 4495.876741] [] brcmf_remove_interface+0x196/0x1b0 [brcmfmac] [ 4495.876760] [] brcmf_p2p_del_vif+0x111/0x220 [brcmfmac] [ 4495.876777] [] brcmf_cfg80211_del_iface+0x21b/0x270 [brcmfmac] [ 4495.876820] [] nl80211_del_interface+0xfe/0x3a0 [cfg80211] [ 4495.876825] [] genl_family_rcv_msg+0x1b5/0x370 [ 4495.876832] [] ? trace_hardirqs_on+0xd/0x10 [ 4495.876836] [] genl_rcv_msg+0x7d/0xb0 [ 4495.876839] [] ? genl_family_rcv_msg+0x370/0x370 [ 4495.876846] [] netlink_rcv_skb+0x97/0xb0 [ 4495.876849] [] genl_rcv+0x28/0x40 [ 4495.876854] [] netlink_unicast+0x1d3/0x2f0 [ 4495.876860] [] ? netlink_unicast+0x14b/0x2f0 [ 4495.876866] [] netlink_sendmsg+0x2eb/0x3a0 [ 4495.876870] [] sock_sendmsg+0x38/0x50 [ 4495.876874] [] ___sys_sendmsg+0x27f/0x290 [ 4495.876882] [] ? mntput_no_expire+0x5/0x3f0 [ 4495.876888] [] ? mntput_no_expire+0x8e/0x3f0 [ 4495.876894] [] ? mntput_no_expire+0x5/0x3f0 [ 4495.876899] [] ? mntput+0x24/0x40 [ 4495.876904] [] ? __fput+0x190/0x200 [ 4495.876909] [] __sys_sendmsg+0x45/0x80 [ 4495.876914] [] SyS_sendmsg+0x12/0x20 [ 4495.876918] [] entry_SYSCALL_64_fastpath+0x23/0xc1 [ 4495.876924] [] ? trace_hardirqs_off_caller+0x1f/0xc0 Signed-off-by: Masami Hiramatsu Acked-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 8 +++++--- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 8d16f02..65e8c87 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -743,7 +743,7 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, * serious troublesome side effects. The p2p module will clean * up the ifp if needed. */ - brcmf_p2p_ifp_removed(ifp); + brcmf_p2p_ifp_removed(ifp, rtnl_locked); kfree(ifp); } } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 66f942f..de19c7c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2297,7 +2297,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) return err; } -void brcmf_p2p_ifp_removed(struct brcmf_if *ifp) +void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked) { struct brcmf_cfg80211_info *cfg; struct brcmf_cfg80211_vif *vif; @@ -2306,9 +2306,11 @@ void brcmf_p2p_ifp_removed(struct brcmf_if *ifp) vif = ifp->vif; cfg = wdev_to_cfg(&vif->wdev); cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif = NULL; - rtnl_lock(); + if (!rtnl_locked) + rtnl_lock(); cfg80211_unregister_wdev(&vif->wdev); - rtnl_unlock(); + if (!rtnl_locked) + rtnl_unlock(); brcmf_free_vif(vif); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index a3bd18c..8ce9447 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -155,7 +155,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name, int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev); int brcmf_p2p_ifchange(struct brcmf_cfg80211_info *cfg, enum brcmf_fil_p2p_if_types if_type); -void brcmf_p2p_ifp_removed(struct brcmf_if *ifp); +void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked); int brcmf_p2p_start_device(struct wiphy *wiphy, struct wireless_dev *wdev); void brcmf_p2p_stop_device(struct wiphy *wiphy, struct wireless_dev *wdev); int brcmf_p2p_scan_prep(struct wiphy *wiphy, -- cgit v1.1 From b64abcb7dae6060c67ab0e548da3ef923c49641d Mon Sep 17 00:00:00 2001 From: "mhiramat@kernel.org" Date: Mon, 15 Aug 2016 18:41:12 +0900 Subject: brcmfmac: Change vif_event_lock to spinlock Change vif_event_lock to spinlock from mutex, since this lock is used in wait_event_timeout() via vif_event_equals(). This caused a warning report as below. As far as I can see, this lock protects regions where updating structure members, not function calls. Also, since those regions are not called from interrupt handlers (of course, it was a mutex), spin_lock is used instead of spin_lock_irqsave. [ 186.678550] ------------[ cut here ]------------ [ 186.678556] WARNING: CPU: 2 PID: 7140 at /home/mhiramat/ksrc/linux/kernel/sched/core.c:7545 __might_sleep+0x7c/0x80 [ 186.678560] do not call blocking ops when !TASK_RUNNING; state=2 set at [] prepare_to_wait_event+0x60/0x100 [ 186.678560] Modules linked in: brcmfmac xt_CHECKSUM rfcomm ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_addrtype br_netfilter xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_raw ip6table_security ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_filter ip6_tables iptable_raw iptable_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_filter ip_tables x_tables bnep nls_iso8859_1 i2c_designware_platform i2c_designware_core snd_hda_codec_hdmi snd_hda_codec_realtek dcdbas snd_hda_codec_generic snd_hda_intel snd_hda_codec intel_rapl snd_hda_core x86_pkg_temp_thermal intel_powerclamp coretemp [ 186.678594] snd_pcm crct10dif_pclmul crc32_pclmul aesni_intel aes_x86_64 joydev glue_helper snd_hwdep lrw gf128mul uvcvideo ablk_helper snd_seq_midi cryptd snd_seq_midi_event snd_rawmidi videobuf2_vmalloc videobuf2_memops snd_seq input_leds videobuf2_v4l2 cfg80211 videobuf2_core snd_timer videodev serio_raw btusb snd_seq_device media btrtl rtsx_pci_ms snd mei_me memstick hid_multitouch mei soundcore brcmutil idma64 virt_dma intel_lpss_pci processor_thermal_device intel_soc_dts_iosf hci_uart btbcm btqca btintel bluetooth int3403_thermal dell_smo8800 intel_lpss_acpi intel_lpss int3402_thermal int340x_thermal_zone intel_hid mac_hid int3400_thermal shpchp sparse_keymap acpi_pad acpi_thermal_rel acpi_als kfifo_buf industrialio kvm_intel kvm irqbypass parport_pc ppdev lp parport autofs4 btrfs xor raid6_pq [ 186.678631] usbhid nouveau ttm i915 rtsx_pci_sdmmc mxm_wmi i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops psmouse drm ahci rtsx_pci nvme nvme_core libahci i2c_hid hid pinctrl_sunrisepoint video wmi pinctrl_intel fjes [last unloaded: brcmfmac] [ 186.678646] CPU: 2 PID: 7140 Comm: wpa_supplicant Not tainted 4.8.0-rc1+ #8 [ 186.678647] Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 01.02.00 04/07/2016 [ 186.678648] 0000000000000000 ffff9d8c64b5b900 ffffffff98442f23 ffff9d8c64b5b950 [ 186.678651] 0000000000000000 ffff9d8c64b5b940 ffffffff9808b22b 00001d790000000d [ 186.678653] ffffffff98c75e78 000000000000026c 0000000000000000 ffff9d8c2706d058 [ 186.678655] Call Trace: [ 186.678659] [] dump_stack+0x85/0xc2 [ 186.678666] [] __warn+0xcb/0xf0 [ 186.678668] [] warn_slowpath_fmt+0x4f/0x60 [ 186.678671] [] ? prepare_to_wait_event+0x60/0x100 [ 186.678672] [] ? prepare_to_wait_event+0x60/0x100 [ 186.678674] [] __might_sleep+0x7c/0x80 [ 186.678680] [] mutex_lock_nested+0x33/0x3b0 [ 186.678682] [] ? trace_hardirqs_on+0xd/0x10 [ 186.678689] [] brcmf_cfg80211_wait_vif_event+0xcd/0x130 [brcmfmac] [ 186.678691] [] ? wake_atomic_t_function+0x60/0x60 [ 186.678697] [] brcmf_p2p_del_vif+0xf9/0x220 [brcmfmac] [ 186.678702] [] brcmf_cfg80211_del_iface+0x21b/0x270 [brcmfmac] [ 186.678716] [] nl80211_del_interface+0xfe/0x3a0 [cfg80211] [ 186.678718] [] genl_family_rcv_msg+0x1b5/0x370 [ 186.678720] [] ? trace_hardirqs_on+0xd/0x10 [ 186.678721] [] genl_rcv_msg+0x7d/0xb0 [ 186.678722] [] ? genl_family_rcv_msg+0x370/0x370 [ 186.678724] [] netlink_rcv_skb+0x97/0xb0 [ 186.678726] [] genl_rcv+0x28/0x40 [ 186.678727] [] netlink_unicast+0x1d3/0x2f0 [ 186.678729] [] ? netlink_unicast+0x14b/0x2f0 [ 186.678731] [] netlink_sendmsg+0x2eb/0x3a0 [ 186.678733] [] sock_sendmsg+0x38/0x50 [ 186.678734] [] ___sys_sendmsg+0x27f/0x290 [ 186.678737] [] ? mntput_no_expire+0x5/0x3f0 [ 186.678739] [] ? mntput_no_expire+0x8e/0x3f0 [ 186.678741] [] ? mntput_no_expire+0x5/0x3f0 [ 186.678743] [] ? mntput+0x24/0x40 [ 186.678744] [] ? __fput+0x190/0x200 [ 186.678746] [] __sys_sendmsg+0x45/0x80 [ 186.678748] [] SyS_sendmsg+0x12/0x20 [ 186.678749] [] entry_SYSCALL_64_fastpath+0x23/0xc1 [ 186.678751] [] ? trace_hardirqs_off_caller+0x1f/0xc0 [ 186.678752] ---[ end trace e224d66c5d8408b5 ]--- Signed-off-by: Masami Hiramatsu Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 26 +++++++++++----------- .../broadcom/brcm80211/brcmfmac/cfg80211.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 2628d5e..5db56a7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5635,7 +5635,7 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp, ifevent->action, ifevent->flags, ifevent->ifidx, ifevent->bsscfgidx); - mutex_lock(&event->vif_event_lock); + spin_lock(&event->vif_event_lock); event->action = ifevent->action; vif = event->vif; @@ -5643,7 +5643,7 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp, case BRCMF_E_IF_ADD: /* waiting process may have timed out */ if (!cfg->vif_event.vif) { - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); return -EBADF; } @@ -5654,24 +5654,24 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp, ifp->ndev->ieee80211_ptr = &vif->wdev; SET_NETDEV_DEV(ifp->ndev, wiphy_dev(cfg->wiphy)); } - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); wake_up(&event->vif_wq); return 0; case BRCMF_E_IF_DEL: - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); /* event may not be upon user request */ if (brcmf_cfg80211_vif_event_armed(cfg)) wake_up(&event->vif_wq); return 0; case BRCMF_E_IF_CHANGE: - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); wake_up(&event->vif_wq); return 0; default: - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); break; } return -EINVAL; @@ -5792,7 +5792,7 @@ static void wl_deinit_priv(struct brcmf_cfg80211_info *cfg) static void init_vif_event(struct brcmf_cfg80211_vif_event *event) { init_waitqueue_head(&event->vif_wq); - mutex_init(&event->vif_event_lock); + spin_lock_init(&event->vif_event_lock); } static s32 brcmf_dongle_roam(struct brcmf_if *ifp) @@ -6691,9 +6691,9 @@ static inline bool vif_event_equals(struct brcmf_cfg80211_vif_event *event, { u8 evt_action; - mutex_lock(&event->vif_event_lock); + spin_lock(&event->vif_event_lock); evt_action = event->action; - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); return evt_action == action; } @@ -6702,10 +6702,10 @@ void brcmf_cfg80211_arm_vif_event(struct brcmf_cfg80211_info *cfg, { struct brcmf_cfg80211_vif_event *event = &cfg->vif_event; - mutex_lock(&event->vif_event_lock); + spin_lock(&event->vif_event_lock); event->vif = vif; event->action = 0; - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); } bool brcmf_cfg80211_vif_event_armed(struct brcmf_cfg80211_info *cfg) @@ -6713,9 +6713,9 @@ bool brcmf_cfg80211_vif_event_armed(struct brcmf_cfg80211_info *cfg) struct brcmf_cfg80211_vif_event *event = &cfg->vif_event; bool armed; - mutex_lock(&event->vif_event_lock); + spin_lock(&event->vif_event_lock); armed = event->vif != NULL; - mutex_unlock(&event->vif_event_lock); + spin_unlock(&event->vif_event_lock); return armed; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h index 7d77f86..8889832 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h @@ -227,7 +227,7 @@ struct escan_info { */ struct brcmf_cfg80211_vif_event { wait_queue_head_t vif_wq; - struct mutex vif_event_lock; + spinlock_t vif_event_lock; u8 action; struct brcmf_cfg80211_vif *vif; }; -- cgit v1.1 From 0d06108c65e572085b2d1f7c8273f417cad68734 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:31:13 -0300 Subject: [media] vcodec:mediatek:code refine for v4l2 Encoder driver This patch remove unused header and define from haeder files Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h | 1 - drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h index 94f0a42..3a8e695 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h @@ -23,7 +23,6 @@ #include #include -#include "mtk_vcodec_util.h" #define MTK_VCODEC_DRV_NAME "mtk_vcodec_drv" #define MTK_VCODEC_ENC_NAME "mtk-vcodec-enc" diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h index 33e890f..1213185 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h @@ -16,7 +16,6 @@ #define _MTK_VCODEC_INTR_H_ #define MTK_INST_IRQ_RECEIVED 0x1 -#define MTK_INST_WORK_THREAD_ABORT_DONE 0x2 struct mtk_vcodec_ctx; -- cgit v1.1 From ad34f5412d2a04a894b2cd2912538ae2e5d64e76 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:47:20 -0300 Subject: [media] vcodec:mediatek: Fix fops_vcodec_release flow for V4L2 Encoder This patch fix that mtk_vcodec_venc_release should be called after v4l2_m2m_ctx_release Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 7 ++++++- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3ed3f2d..3b0691f 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1288,5 +1288,10 @@ int mtk_venc_lock(struct mtk_vcodec_ctx *ctx) void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx) { - venc_if_deinit(ctx); + int ret = venc_if_deinit(ctx); + + if (ret) + mtk_v4l2_err("venc_if_deinit failed=%d", ret); + + ctx->state = MTK_STATE_FREE; } diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index c7806ec..5cd2151 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -218,11 +218,15 @@ static int fops_vcodec_release(struct file *file) mtk_v4l2_debug(1, "[%d] encoder", ctx->id); mutex_lock(&dev->dev_mutex); + /* + * Call v4l2_m2m_ctx_release to make sure the worker thread is not + * running after venc_if_deinit. + */ + v4l2_m2m_ctx_release(ctx->m2m_ctx); mtk_vcodec_enc_release(ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); - v4l2_m2m_ctx_release(ctx->m2m_ctx); list_del_init(&ctx->list); dev->num_instances--; -- cgit v1.1 From 91ae0e1ec6ec91cd297933886b424f9a4a8acbd4 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:08:03 -0300 Subject: [media] vcodec:mediatek: Fix visible_height larger than coded_height issue in s_fmt_out The original code add extra 32 line to visible_height. It is incorrect, 32 line should be add to coded_height. The purpose is that user space could calcuate real buffer size needed by using coded_width * coded_height. But this method will make v4l2-compliance test fail, since g_fmt != s_fmt(g_fmt) So remove extend visible_height or coded_height, user space should just use sizeimage to get real buffer size needed Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3b0691f..9b0187e 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -487,7 +487,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, struct mtk_q_data *q_data; int ret, i; struct mtk_video_fmt *fmt; - unsigned int pitch_w_div16; struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); @@ -530,15 +529,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, q_data->coded_width = f->fmt.pix_mp.width; q_data->coded_height = f->fmt.pix_mp.height; - pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16); - if (pitch_w_div16 % 8 != 0) { - /* Adjust returned width/height, so application could correctly - * allocate hw required memory - */ - q_data->visible_height += 32; - vidioc_try_fmt(f, q_data->fmt); - } - q_data->field = f->fmt.pix_mp.field; ctx->colorspace = f->fmt.pix_mp.colorspace; ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; -- cgit v1.1 From 16060f7ef660a11f282909b01fb6096e21cf5389 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:15:44 -0300 Subject: [media] vcodec:mediatek: Add timestamp and timecode copy for V4L2 Encoder This patch add copying timestamp and timecode from src buffer to dst buffer Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 23 ++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 9b0187e..0ca230e 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -868,7 +868,8 @@ static int mtk_venc_encode_header(void *priv) { struct mtk_vcodec_ctx *ctx = priv; int ret; - struct vb2_buffer *dst_buf; + struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; @@ -901,6 +902,15 @@ static int mtk_venc_encode_header(void *priv) mtk_v4l2_err("venc_if_encode failed=%d", ret); return -EINVAL; } + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (src_buf) { + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + } else { + mtk_v4l2_err("No timestamp for the header buffer."); + } ctx->state = MTK_STATE_HEADER; dst_buf->planes[0].bytesused = enc_result.bs_size; @@ -993,7 +1003,7 @@ static void mtk_venc_worker(struct work_struct *work) struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; int ret, i; - struct vb2_v4l2_buffer *vb2_v4l2; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; /* check dst_buf, dst_buf may be removed in device_run * to stored encdoe header so we need check dst_buf and @@ -1033,9 +1043,14 @@ static void mtk_venc_worker(struct work_struct *work) ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME, &frm_buf, &bs_buf, &enc_result); - vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf); + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + if (enc_result.is_key_frm) - vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; + dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; if (ret) { v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf), -- cgit v1.1 From 158d6071bc0aad6663109d2fe9249c3cf570d423 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:26:02 -0300 Subject: [media] vcodec:mediatek: change H264 profile default to profile high This patch change default H264 profile from V4L2_MPEG_VIDEO_H264_PROFILE_MAIN to V4L2_MPEG_VIDEO_H264_PROFILE_HIGH Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 0ca230e..2c5719a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1222,7 +1222,7 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx) 0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, - 0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN); + 0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL, V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0); -- cgit v1.1 From 2d683b6dad73b5636297ac4978f73f2c638a0b19 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:33:32 -0300 Subject: [media] vcodec:mediatek: Refine H264 encoder driver This patch : 1. remove field and function that unused anymore 2. add support V4L2_MPEG_VIDEO_H264_LEVEL_4_2 Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c index 9a60052..63d4be4 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c @@ -61,6 +61,8 @@ enum venc_h264_bs_mode { /* * struct venc_h264_vpu_config - Structure for h264 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -94,13 +96,13 @@ struct venc_h264_vpu_config { /* * struct venc_h264_vpu_buf - Structure for buffer information - * @align: buffer alignment (in bytes) + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_h264_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -108,6 +110,8 @@ struct venc_h264_vpu_buf { /* * struct venc_h264_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: h264 encoder configuration * @work_bufs: working buffer information in VPU side @@ -150,12 +154,6 @@ struct venc_h264_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr) { return readl(inst->hw_base + addr); @@ -214,6 +212,8 @@ static unsigned int h264_get_level(struct venc_h264_inst *inst, return 40; case V4L2_MPEG_VIDEO_H264_LEVEL_4_1: return 41; + case V4L2_MPEG_VIDEO_H264_LEVEL_4_2: + return 42; default: mtk_vcodec_debug(inst, "unsupported level %d", level); return 31; -- cgit v1.1 From 19d6837a52f1683cf448265952d559a44a7df924 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:37:19 -0300 Subject: [media] vcodec:mediatek: Refine VP8 encoder driver This patch remove field and function that unused anymore Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c index 60bbcd2..6d97584 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c @@ -56,6 +56,8 @@ enum venc_vp8_vpu_work_buf { /* * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -83,14 +85,14 @@ struct venc_vp8_vpu_config { }; /* - * struct venc_vp8_vpu_buf -Structure for buffer information - * @align: buffer alignment (in bytes) + * struct venc_vp8_vpu_buf - Structure for buffer information + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_vp8_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -98,6 +100,8 @@ struct venc_vp8_vpu_buf { /* * struct venc_vp8_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: vp8 encoder configuration * @work_bufs: working buffer information in VPU side @@ -138,12 +142,6 @@ struct venc_vp8_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr) { return readl(inst->hw_base + addr); -- cgit v1.1 From 8fba54aebbdf1f999738121922e74bf796ad60ee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 24 Aug 2016 18:17:04 +0200 Subject: fuse: direct-io: don't dirty ITER_BVEC pages When reading from a loop device backed by a fuse file it deadlocks on lock_page(). This is because the page is already locked by the read() operation done on the loop device. In this case we don't want to either lock the page or dirty it. So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors. Reported-by: Sheng Yang Fixes: aa4d86163e4e ("block: loop: switch to VFS ITER_BVEC") Signed-off-by: Miklos Szeredi Cc: # v4.1+ Reviewed-by: Sheng Yang Reviewed-by: Ashish Samant Tested-by: Sheng Yang Tested-by: Ashish Samant --- fs/fuse/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f394aff..3988b43 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -530,13 +530,13 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, req->out.args[0].size = count; } -static void fuse_release_user_pages(struct fuse_req *req, int write) +static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty) { unsigned i; for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; - if (write) + if (should_dirty) set_page_dirty_lock(page); put_page(page); } @@ -1320,6 +1320,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; + bool should_dirty = !write && iter_is_iovec(iter); int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; struct inode *inode = file->f_mapping->host; @@ -1363,7 +1364,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, nres = fuse_send_read(req, io, pos, nbytes, owner); if (!io->async) - fuse_release_user_pages(req, !write); + fuse_release_user_pages(req, should_dirty); if (req->out.h.error) { err = req->out.h.error; break; -- cgit v1.1 From 486b0f7bcd64be027535811ef44195bc1027fbd3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 19 Aug 2016 15:34:01 -0700 Subject: r5cache: set MD_JOURNAL_CLEAN correctly Currently, the code sets MD_JOURNAL_CLEAN when the array has MD_FEATURE_JOURNAL and the recovery_cp is MaxSector. The array will be MD_JOURNAL_CLEAN even if the journal device is missing. With this patch, the MD_JOURNAL_CLEAN is only set when the journal device presents. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/md.c | 5 +---- drivers/md/raid5.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index cc25cbc..4f6cf3b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->new_chunk_sectors = mddev->chunk_sectors; } - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) { + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); - if (mddev->recovery_cp == MaxSector) - set_bit(MD_JOURNAL_CLEAN, &mddev->flags); - } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for * spares (which don't need an event count) */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4f8f524..2119e09 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6840,11 +6840,14 @@ static int raid5_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { - printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", - mdname(mddev)); - mddev->ro = 1; - set_disk_ro(mddev->gendisk, 1); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + if (!journal_dev) { + pr_err("md/raid:%s: journal disk is missing, force array readonly\n", + mdname(mddev)); + mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); + } else if (mddev->recovery_cp == MaxSector) + set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } conf->min_offset_diff = min_offset_diff; -- cgit v1.1 From 0f6187dbe542d71ace8ba0908954b0f4f8a30a1e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 14:42:25 +0000 Subject: md-cluster: fix error return code in join() Fix to return error code -ENOMEM from the lockres_init() error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 41573f1..34a840d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes) goto err; } cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); - if (!cinfo->ack_lockres) + if (!cinfo->ack_lockres) { + ret = -ENOMEM; goto err; + } /* get sync CR lock on ACK. */ if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", @@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes) pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1); - if (!cinfo->bitmap_lockres) + if (!cinfo->bitmap_lockres) { + ret = -ENOMEM; goto err; + } if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) { pr_err("Failed to get bitmap lock\n"); ret = -EINVAL; @@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes) } cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0); - if (!cinfo->resync_lockres) + if (!cinfo->resync_lockres) { + ret = -ENOMEM; goto err; + } return 0; err: -- cgit v1.1 From 27028626b4b9022dcac23688e09ea43b36e1183c Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Tue, 23 Aug 2016 10:53:57 +0200 Subject: raid10: record correct address of bad block For failed write request record block address on a device, not block address in an array. Signed-off-by: Tomasz Majchrzak Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1a632a8..4589866 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) while (sect_to_write) { struct bio *wbio; + sector_t wsector; if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); - wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ - choose_data_offset(r10_bio, rdev) + - (sector - r10_bio->sector)); + wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); + wbio->bi_iter.bi_sector = wsector + + choose_data_offset(r10_bio, rdev); wbio->bi_bdev = rdev->bdev; bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) /* Failure! */ - ok = rdev_set_badblocks(rdev, sector, + ok = rdev_set_badblocks(rdev, wsector, sectors, 0) && ok; -- cgit v1.1 From 5f9d1fde7d54a5d5fd8cccbee9c9c31474fcdcf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 22 Aug 2016 21:14:01 -0700 Subject: raid5: fix memory leak of bio integrity data Yi reported a memory leak of raid5 with DIF/DIX enabled disks. raid5 doesn't alloc/free bio, instead it reuses bios. There are two issues in current code: 1. the code calls bio_init (from init_stripe->raid5_build_block->bio_init) then bio_reset (ops_run_io). The bio is reused, so likely there is integrity data attached. bio_init will clear a pointer to integrity data and makes bio_reset can't release the data 2. bio_reset is called before dispatching bio. After bio is finished, it's possible we don't free bio's integrity data (eg, we don't call bio_reset again) Both issues will cause memory leak. The patch moves bio_init to stripe creation and bio_reset to bio end io. This will fix the two issues. Reported-by: Yi Zhang Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2119e09..d1a279b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1005,7 +1005,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(bi); bi->bi_bdev = rdev->bdev; bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) @@ -1057,7 +1056,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(rbi); rbi->bi_bdev = rrdev->bdev; bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); @@ -1990,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } -static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, + int disks) { struct stripe_head *sh; + int i; sh = kmem_cache_zalloc(sc, gfp); if (sh) { @@ -2001,6 +2001,12 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); atomic_set(&sh->count, 1); + for (i = 0; i < disks; i++) { + struct r5dev *dev = &sh->dev[i]; + + bio_init(&dev->req); + bio_init(&dev->rreq); + } } return sh; } @@ -2008,7 +2014,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) { struct stripe_head *sh; - sh = alloc_stripe(conf->slab_cache, gfp); + sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); if (!sh) return 0; @@ -2179,7 +2185,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) mutex_lock(&conf->cache_size_mutex); for (i = conf->max_nr_stripes; i; i--) { - nsh = alloc_stripe(sc, GFP_KERNEL); + nsh = alloc_stripe(sc, GFP_KERNEL, newsize); if (!nsh) break; @@ -2311,6 +2317,7 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2414,6 +2421,7 @@ static void raid5_end_read_request(struct bio * bi) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); + bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2448,6 +2456,7 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2491,18 +2500,17 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); + bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req); dev->req.bi_io_vec = &dev->vec; dev->req.bi_max_vecs = 1; dev->req.bi_private = sh; - bio_init(&dev->rreq); dev->rreq.bi_io_vec = &dev->rvec; dev->rreq.bi_max_vecs = 1; dev->rreq.bi_private = sh; -- cgit v1.1 From 45c91d808ff989d950e260dab9f89e8f4a3c9c2c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 22 Aug 2016 21:14:02 -0700 Subject: raid5: avoid unnecessary bio data set bio_reset doesn't change bi_io_vec and bi_max_vecs, so we don't need to set them every time. bi_private will be set before the bio is dispatched. Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d1a279b..62febe8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2005,7 +2005,12 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, struct r5dev *dev = &sh->dev[i]; bio_init(&dev->req); + dev->req.bi_io_vec = &dev->vec; + dev->req.bi_max_vecs = 1; + bio_init(&dev->rreq); + dev->rreq.bi_io_vec = &dev->rvec; + dev->rreq.bi_max_vecs = 1; } } return sh; @@ -2507,14 +2512,6 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - dev->req.bi_io_vec = &dev->vec; - dev->req.bi_max_vecs = 1; - dev->req.bi_private = sh; - - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; - dev->rreq.bi_private = sh; - dev->flags = 0; dev->sector = raid5_compute_blocknr(sh, i, previous); } -- cgit v1.1 From af7c388a9c2e5fdd36da6eaaa35fb86fb8aefd0b Mon Sep 17 00:00:00 2001 From: Vince Hsu Date: Wed, 24 Aug 2016 15:56:56 +0200 Subject: clk: tegra: remove TEGRA_PLL_USE_LOCK for PLLD/PLLD2 Tegra114 has a HW bug that the PLLD/PLLD2 lock bit cannot be asserted when the DIS power domain is during up-powergating process but the clamp to this domain is not removed yet. That causes a timeout and aborts the power sequence, although the PLLD/PLLD2 has already locked. To remove the false alarm, we don't use the lock for PLLD/PLLD2. Just wait 1ms and treat the clocks as locked. Signed-off-by: Vince Hsu Tested-by: Jonathan Hunter Signed-off-by: Thierry Reding Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-tegra114.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index 64da7b7..933b5dd 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -428,7 +428,7 @@ static struct tegra_clk_pll_params pll_d_params = { .div_nmp = &pllp_nmp, .freq_table = pll_d_freq_table, .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON | - TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE, + TEGRA_PLL_HAS_LOCK_ENABLE, }; static struct tegra_clk_pll_params pll_d2_params = { @@ -446,7 +446,7 @@ static struct tegra_clk_pll_params pll_d2_params = { .div_nmp = &pllp_nmp, .freq_table = pll_d_freq_table, .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON | - TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE, + TEGRA_PLL_HAS_LOCK_ENABLE, }; static const struct pdiv_map pllu_p[] = { -- cgit v1.1 From a45f9d41c9dd2c28e38b9b88f69c39bc63807de9 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Wed, 24 Aug 2016 11:29:39 -0700 Subject: clk: rockchip: mark aclk_emmc_noc as a critical clock on rk3399 We don't have code to handle any of the noc clocks in rk3399 and they're all just listed as critical clocks. Let's do the same for aclk_emmc_noc. Without this clock being marked as critical we have problems around suspend/resume after commit 20c389e656a8 ("clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399"). Before that change we were presumably not actually gating any of these clocks because we were setting the wrong gate. Fixes: 20c389e656a8 ("clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399") Signed-off-by: Xing Zheng Signed-off-by: Douglas Anderson Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3399.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index ec5b2fd..cdfabeb 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1484,6 +1484,7 @@ static const char *const rk3399_cru_critical_clocks[] __initconst = { "hclk_perilp1", "hclk_perilp1_noc", "aclk_dmac0_perilp", + "aclk_emmc_noc", "gpll_hclk_perilp1_src", "gpll_aclk_perilp0_src", "gpll_aclk_perihp_src", -- cgit v1.1 From 16590a228109e2f318d2cc6466221134cfab723a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Aug 2016 14:57:42 -0400 Subject: SUNRPC: Silence WARN_ON when NFSv4.1 over RDMA is in use Using NFSv4.1 on RDMA should be safe, so broaden the new checks in rpc_create(). WARN_ON_ONCE is used, matching most other WARN call sites in clnt.c. Fixes: 39a9beab5acb ("rpc: share one xps between all backchannels") Fixes: d50039ea5ee6 ("nfsd4/rpc: move backchannel create logic...") Signed-off-by: Chuck Lever Reviewed-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7f79fb7..66f23b3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -453,7 +453,7 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt_switch *xps; if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) { - WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); xps = args->bc_xprt->xpt_bc_xps; xprt_switch_get(xps); } else { @@ -520,7 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) char servername[48]; if (args->bc_xprt) { - WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); xprt = args->bc_xprt->xpt_bc_xprt; if (xprt) { xprt_get(xprt); -- cgit v1.1 From 23fd537c9508fb6e3b93ddf23982f51afc087781 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:33:27 +0300 Subject: usb: gadget: udc: core: don't starve DMA resources Always unmap all SG entries as required by DMA API Fixes: a698908d3b3b ("usb: gadget: add generic map/unmap request utilities") Cc: # v3.4+ Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 934f838..40c04bb 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -827,7 +827,7 @@ void usb_gadget_unmap_request_by_dev(struct device *dev, return; if (req->num_mapped_sgs) { - dma_unmap_sg(dev, req->sg, req->num_mapped_sgs, + dma_unmap_sg(dev, req->sg, req->num_sgs, is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE); req->num_mapped_sgs = 0; -- cgit v1.1 From 696fe69d7e631f00f23b0ef1694d9b90058dca54 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:32:39 +0300 Subject: usb: dwc3: debug: fix ep name on trace output There was a typo when generating endpoint name which would be very confusing when debugging. Fix it. Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index 22dfc3d..33ab2a2 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -192,7 +192,7 @@ dwc3_ep_event_string(const struct dwc3_event_depevt *event) int ret; ret = sprintf(str, "ep%d%s: ", epnum >> 1, - (epnum & 1) ? "in" : "in"); + (epnum & 1) ? "in" : "out"); if (ret < 0) return "UNKNOWN"; -- cgit v1.1 From 6f8245b4e37c2072d3daea24e19dbc0162ffd22c Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:40:13 +0300 Subject: usb: dwc3: gadget: always decrement by 1 We need to decrement in both cases (enq > deq and enq < deq) Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 122e64d..7a8d3d8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -884,12 +884,9 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) return DWC3_TRB_NUM - 1; } - trbs_left = dep->trb_dequeue - dep->trb_enqueue; + trbs_left = dep->trb_dequeue - dep->trb_enqueue - 1; trbs_left &= (DWC3_TRB_NUM - 1); - if (dep->trb_dequeue < dep->trb_enqueue) - trbs_left--; - return trbs_left; } -- cgit v1.1 From 89e1f6d2b956649fbe0704d543a90b8e0cf872b0 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 22 Aug 2016 01:02:18 +0800 Subject: netfilter: nft_reject: restrict to INPUT/FORWARD/OUTPUT After I add the nft rule "nft add rule filter prerouting reject with tcp reset", kernel panic happened on my system: NULL pointer dereference at ... IP: [] nf_send_reset+0xaf/0x400 Call Trace: [] ? nf_reject_ip_tcphdr_get+0x160/0x160 [] nft_reject_ipv4_eval+0x61/0xb0 [nft_reject_ipv4] [] nft_do_chain+0x1fa/0x890 [nf_tables] [] ? __nft_trace_packet+0x170/0x170 [nf_tables] [] ? nf_ct_invert_tuple+0xb0/0xc0 [nf_conntrack] [] ? nf_nat_setup_info+0x5d4/0x650 [nf_nat] [...] Because in the PREROUTING chain, routing information is not exist, then we will dereference the NULL pointer and oops happen. So we restrict reject expression to INPUT, FORWARD and OUTPUT chain. This is consistent with iptables REJECT target. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_reject.h | 4 ++++ net/ipv4/netfilter/nft_reject_ipv4.c | 1 + net/ipv6/netfilter/nft_reject_ipv6.c | 1 + net/netfilter/nft_reject.c | 16 ++++++++++++++++ net/netfilter/nft_reject_inet.c | 7 ++++++- 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 60fa153..02e28c5 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -8,6 +8,10 @@ struct nft_reject { extern const struct nla_policy nft_reject_policy[]; +int nft_reject_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index c24f41c..2c2553b 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = { .eval = nft_reject_ipv4_eval, .init = nft_reject_init, .dump = nft_reject_dump, + .validate = nft_reject_validate, }; static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 533cd57..92bda99 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -47,6 +47,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = { .eval = nft_reject_ipv6_eval, .init = nft_reject_init, .dump = nft_reject_dump, + .validate = nft_reject_validate, }; static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 0522fc9..c64de3f7 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -26,11 +26,27 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { }; EXPORT_SYMBOL_GPL(nft_reject_policy); +int nft_reject_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT)); +} +EXPORT_SYMBOL_GPL(nft_reject_validate); + int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); + int err; + + err = nft_reject_validate(ctx, expr, NULL); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 759ca52..e79d9ca 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -66,7 +66,11 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; + int icmp_code, err; + + err = nft_reject_validate(ctx, expr, NULL); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -124,6 +128,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .eval = nft_reject_inet_eval, .init = nft_reject_inet_init, .dump = nft_reject_inet_dump, + .validate = nft_reject_validate, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { -- cgit v1.1 From 93fac10b99d78eb2c50a739cba2e590c7332d539 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 22 Aug 2016 21:58:16 +0800 Subject: netfilter: nfnetlink: use list_for_each_entry_safe to delete all objects cttimeout and acct objects are deleted from the list while traversing it, so use list_for_each_entry is unsafe here. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 6 +++--- net/netfilter/nfnetlink_cttimeout.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 70eb2f6a..d44d89b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -343,12 +343,12 @@ static int nfnl_acct_del(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - char *acct_name; - struct nf_acct *cur; + struct nf_acct *cur, *tmp; int ret = -ENOENT; + char *acct_name; if (!tb[NFACCT_NAME]) { - list_for_each_entry(cur, &net->nfnl_acct_list, head) + list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 68216cd..f74fee1 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -350,12 +350,13 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { - struct ctnl_timeout *cur; + struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; char *name; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &net->nfct_timeout_list, head) + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, + head) ctnl_timeout_try_del(net, cur); return 0; -- cgit v1.1 From 23aaba5ad55547db62bada5066c8fb6412d5b1c2 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 22 Aug 2016 21:58:17 +0800 Subject: netfilter: cttimeout: put back l4proto when replacing timeout policy We forget to call nf_ct_l4proto_put when replacing the existing timeout policy. Acctually, there's no need to get ct l4proto before doing replace, so we can move it to a later position. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f74fee1..6844c7a 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -98,31 +98,28 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, break; } - l4proto = nf_ct_l4proto_find_get(l3num, l4num); - - /* This protocol is not supportted, skip. */ - if (l4proto->l4proto != l4num) { - ret = -EOPNOTSUPP; - goto err_proto_put; - } - if (matching) { if (nlh->nlmsg_flags & NLM_F_REPLACE) { /* You cannot replace one timeout policy by another of * different kind, sorry. */ if (matching->l3num != l3num || - matching->l4proto->l4proto != l4num) { - ret = -EINVAL; - goto err_proto_put; - } - - ret = ctnl_timeout_parse_policy(&matching->data, - l4proto, net, - cda[CTA_TIMEOUT_DATA]); - return ret; + matching->l4proto->l4proto != l4num) + return -EINVAL; + + return ctnl_timeout_parse_policy(&matching->data, + matching->l4proto, net, + cda[CTA_TIMEOUT_DATA]); } - ret = -EBUSY; + + return -EBUSY; + } + + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supportted, skip. */ + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; goto err_proto_put; } -- cgit v1.1 From 533e33009897c7dd1b0424c0d4b3331b222d5681 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 22 Aug 2016 21:58:18 +0800 Subject: netfilter: cttimeout: unlink timeout objs in the unconfirmed ct lists KASAN reported this bug: BUG: KASAN: use-after-free in icmp_packet+0x25/0x50 [nf_conntrack_ipv4] at addr ffff880002db08c8 Read of size 4 by task lt-nf-queue/19041 Call Trace: [] dump_stack+0x63/0x88 [] kasan_report_error+0x528/0x560 [] kasan_report+0x58/0x60 [] ? icmp_packet+0x25/0x50 [nf_conntrack_ipv4] [] __asan_load4+0x61/0x80 [] icmp_packet+0x25/0x50 [nf_conntrack_ipv4] [] nf_conntrack_in+0x550/0x980 [nf_conntrack] [] ? __nf_conntrack_confirm+0xb10/0xb10 [nf_conntrack] [ ... ] The main reason is that we missed to unlink the timeout objects in the unconfirmed ct lists, so we will access the timeout objects that have already been freed. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 6844c7a..139e086 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -302,7 +302,16 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) const struct hlist_nulls_node *nn; unsigned int last_hsize; spinlock_t *lock; - int i; + int i, cpu; + + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) + untimeout(h, timeout); + spin_unlock_bh(&pcpu->lock); + } local_bh_disable(); restart: -- cgit v1.1 From 960fa72f67f1be6891d63a5518860d1ae4e14b88 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 22 Aug 2016 22:57:56 +0800 Subject: netfilter: nft_meta: improve the validity check of pkttype set expr "meta pkttype set" is only supported on prerouting chain with bridge family and ingress chain with netdev family. But the validate check is incomplete, and the user can add the nft rules on input chain with bridge family, for example: # nft add table bridge filter # nft add chain bridge filter input {type filter hook input \ priority 0 \;} # nft add chain bridge filter test # nft add rule bridge filter test meta pkttype set unicast # nft add rule bridge filter input jump test This patch fixes the problem. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_meta.h | 4 ++++ net/bridge/netfilter/nft_meta_bridge.c | 1 + net/netfilter/nft_meta.c | 17 +++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index d27588c..1139cde 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -36,4 +36,8 @@ void nft_meta_set_eval(const struct nft_expr *expr, void nft_meta_set_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr); +int nft_meta_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + #endif diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 4b901d9..ad47a92 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -86,6 +86,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, + .validate = nft_meta_set_validate, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2863f34..8a6bc76 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -291,10 +291,16 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); -static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) +int nft_meta_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { + struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; + if (priv->key != NFT_META_PKTTYPE) + return 0; + switch (ctx->afi->family) { case NFPROTO_BRIDGE: hooks = 1 << NF_BR_PRE_ROUTING; @@ -308,6 +314,7 @@ static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) return nft_chain_validate_hooks(ctx->chain, hooks); } +EXPORT_SYMBOL_GPL(nft_meta_set_validate); int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, @@ -327,15 +334,16 @@ int nft_meta_set_init(const struct nft_ctx *ctx, len = sizeof(u8); break; case NFT_META_PKTTYPE: - err = nft_meta_set_init_pkttype(ctx); - if (err) - return err; len = sizeof(u8); break; default: return -EOPNOTSUPP; } + err = nft_meta_set_validate(ctx, expr, NULL); + if (err < 0) + return err; + priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) @@ -407,6 +415,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, + .validate = nft_meta_set_validate, }; static const struct nft_expr_ops * -- cgit v1.1 From 4249fc1f023a2106170bbf715e2e1a0ebc2d5b1f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 23 Aug 2016 10:20:31 +0200 Subject: netfilter: ebtables: put module reference when an incorrect extension is found commit bcf493428840 ("netfilter: ebtables: Fix extension lookup with identical name") added a second lookup in case the extension that was found during the first lookup matched another extension with the same name, but didn't release the reference on the incorrect module. Fixes: bcf493428840 ("netfilter: ebtables: Fix extension lookup with identical name") Signed-off-by: Sabrina Dubroca Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index cceac5b..0833c25 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -368,6 +368,8 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { + if (!IS_ERR(match)) + module_put(match->me); request_module("ebt_%s", m->u.name); match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); } -- cgit v1.1 From a7d4b8f2565ad0dfdff9a222d1d87990c73b36e8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 16 Aug 2016 14:38:24 +0200 Subject: KVM: s390: don't use current->thread.fpu.* when accessing registers As the meaning of these variables and pointers seems to change more frequently, let's directly access our save area, instead of going via current->thread. Right now, this is broken for set/get_fpu. They simply overwrite the host registers, as the pointers to the current save area were turned into the static host save area. Cc: stable@vger.kernel.org # 4.7 Fixes: 3f6813b9a5e0 ("s390/fpu: allocate 'struct fpu' with the task_struct") Reported-by: Hao QingFeng Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f142215..607ec91 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2231,9 +2231,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; current->thread.fpu.fpc = fpu->fpc; if (MACHINE_HAS_VX) - convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, + (freg_t *) fpu->fprs); else - memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); + memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } @@ -2242,9 +2243,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) - convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + convert_vx_to_fp((freg_t *) fpu->fprs, + (__vector128 *) vcpu->run->s.regs.vrs); else - memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = current->thread.fpu.fpc; return 0; } -- cgit v1.1 From 9a5382e8ffe86412f442f6a34fde43f94aae530b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 Aug 2016 16:52:21 +0200 Subject: i2c: mux: demux-pinctrl: invalidate properly when switching fails Make sure the index to the active channel is invalidated when switching fails. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 215ac87..b6b9d25 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -107,6 +107,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne of_changeset_revert(&priv->chan[new_chan].chgset); err: dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret); + priv->cur_chan = -EINVAL; return ret; } -- cgit v1.1 From 488d69ea72457c7566b36eb0d484d579f19963c5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 24 Aug 2016 11:19:28 +0200 Subject: i2c: sh_mobile: use proper device with dma_mapping_error We must use the same device we used for mapping. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sh_mobile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 6fb3e26..05b1eea 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -610,7 +610,7 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd) return; dma_addr = dma_map_single(chan->device->dev, pd->msg->buf, pd->msg->len, dir); - if (dma_mapping_error(pd->dev, dma_addr)) { + if (dma_mapping_error(chan->device->dev, dma_addr)) { dev_dbg(pd->dev, "dma map failed, using PIO\n"); return; } -- cgit v1.1 From c13c29186c59b056d6ec141b5967c2ca3efc0c16 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 24 Aug 2016 11:19:29 +0200 Subject: i2c: rcar: use proper device with dma_mapping_error We must use the same device we used for mapping. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 52407f3..9bd849d 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -378,7 +378,7 @@ static void rcar_i2c_dma(struct rcar_i2c_priv *priv) } dma_addr = dma_map_single(chan->device->dev, buf, len, dir); - if (dma_mapping_error(dev, dma_addr)) { + if (dma_mapping_error(chan->device->dev, dma_addr)) { dev_dbg(dev, "dma map failed, using PIO\n"); return; } -- cgit v1.1 From b31cc4b37e8e33e84e9f990d0d84387f37ef01a1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Aug 2016 00:47:22 +0200 Subject: i2c: bcm-kona: fix inconsistent indenting smatch rightfully says: drivers/i2c/busses/i2c-bcm-kona.c:646 bcm_kona_i2c_xfer() warn: inconsistent indenting Signed-off-by: Wolfram Sang Reviewed-by: Tim Kryger --- drivers/i2c/busses/i2c-bcm-kona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c index f987432..258cb9a 100644 --- a/drivers/i2c/busses/i2c-bcm-kona.c +++ b/drivers/i2c/busses/i2c-bcm-kona.c @@ -643,7 +643,7 @@ static int bcm_kona_i2c_xfer(struct i2c_adapter *adapter, if (rc < 0) { dev_err(dev->device, "restart cmd failed rc = %d\n", rc); - goto xfer_send_stop; + goto xfer_send_stop; } } -- cgit v1.1 From becc8d3ccdcfa746fc4302092da11ad763bc9e49 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Aug 2016 03:08:22 +0900 Subject: MAINTAINERS: add tree entry for ARM/UniPhier architecture Add T: entry for a new git tree, which I expect UniPhier SoC updates will be pulled from. Signed-off-by: Masahiro Yamada Signed-off-by: Olof Johansson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b1..a437901 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1822,6 +1822,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git ARM/UNIPHIER ARCHITECTURE M: Masahiro Yamada L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git S: Maintained F: arch/arm/boot/dts/uniphier* F: arch/arm/include/asm/hardware/cache-uniphier.h -- cgit v1.1 From 0a10e85b6c546c2f34ee257b955f33dbb4ece746 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 24 Aug 2016 15:14:38 +0200 Subject: ARM: tegra: Correct polarity for Tegra114 PMIC interrupt The ARM GIC only supports interrupts with either level-high or rising-edge types for SPIs. The interrupt type for the Palmas PMIC used for Tegra114 boards is specified as level-low which is invalid for the GIC. This has gone undetected because until recently, failures to set the interrupt type when the interrupts are mapped via firmware (such as device-tree) have not been reported. Since commits 4b357daed698 ("genirq: Look-up trigger type if not specified by caller") and 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ"), failure to set the interrupt type will cause the requesting of the interrupt to fail and exposing incorrectly configured interrupts. Please note that although the interrupt type was never being set for the Palmas PMIC, it was still working fine, because the default type setting for the interrupt, 'level-high', happen to match the correct type for the interrupt. Finally, it should be noted that the Palmas interrupt from the PMIC is actually 'level-low', however, this interrupt signal is inverted by the Tegra PMC and so the GIC actually sees a 'level-high' interrupt which is what should be specified in the device-tree interrupt specifier. Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Olof Johansson --- arch/arm/boot/dts/tegra114-dalmore.dts | 2 +- arch/arm/boot/dts/tegra114-roth.dts | 2 +- arch/arm/boot/dts/tegra114-tn7.dts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index 1dfc492..1444fbd 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -897,7 +897,7 @@ palmas: tps65913@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = <0 86 IRQ_TYPE_LEVEL_LOW>; + interrupts = <0 86 IRQ_TYPE_LEVEL_HIGH>; #interrupt-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index 70cf409..966a7fc 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -802,7 +802,7 @@ palmas: pmic@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = ; + interrupts = ; #interrupt-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 17dd145..a161fa1 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -63,7 +63,7 @@ palmas: pmic@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = ; + interrupts = ; #interrupt-cells = <2>; interrupt-controller; -- cgit v1.1 From ed150e1a5cf20c04cf0b2d2c34e498fc1d6519be Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:58:40 +1000 Subject: xfs: don't perform lookups on zero-height btrees If the caller passes in a cursor to a zero-height btree (which is impossible), we never set block to anything but NULL, which causes the later dereference of it to crash. Instead, just return -EFSCORRUPTED. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b5c213a..33f1406 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1814,6 +1814,10 @@ xfs_btree_lookup( XFS_BTREE_STATS_INC(cur, lookup); + /* No such thing as a zero-level tree. */ + if (cur->bc_nlevels == 0) + return -EFSCORRUPTED; + block = NULL; keyno = 0; -- cgit v1.1 From 738f57c16a2bb527c705641f0fc1c68ff8cba72a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:19 +1000 Subject: xfs: disallow mounting of realtime + rmap filesystems Since the kernel doesn't currently support the realtime rmapbt, don't allow such filesystems to be mounted. Support will appear in a future release. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 24ef83e..fd6be45 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1574,9 +1574,16 @@ xfs_fs_fill_super( } } - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (mp->m_sb.sb_rblocks) { + xfs_alert(mp, + "EXPERIMENTAL reverse mapping btree not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } xfs_alert(mp, "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); + } error = xfs_mountfs(mp); if (error) -- cgit v1.1 From da1f039d6947b1a49f13b39a6de0df2a3e9e1ed1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:31 +1000 Subject: xfs: don't log the entire end of the AGF When we're logging the last non-spare field in the AGF, we don't need to log the spare fields, so plumb in a new AGF logging flag to help us avoid that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 2 ++ fs/xfs/libxfs/xfs_format.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3dd8f1d..05b5243 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2278,6 +2278,8 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), offsetof(xfs_agf_t, agf_rmap_blocks), + /* needed so that we don't log the whole rest of the structure: */ + offsetof(xfs_agf_t, agf_spare64), sizeof(xfs_agf_t) }; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index e6a8bea..270fb5c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -674,7 +674,8 @@ typedef struct xfs_agf { #define XFS_AGF_BTREEBLKS 0x00000800 #define XFS_AGF_UUID 0x00001000 #define XFS_AGF_RMAP_BLOCKS 0x00002000 -#define XFS_AGF_NUM_BITS 14 +#define XFS_AGF_SPARE64 0x00004000 +#define XFS_AGF_NUM_BITS 15 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) #define XFS_AGF_FLAGS \ @@ -691,7 +692,8 @@ typedef struct xfs_agf { { XFS_AGF_LONGEST, "LONGEST" }, \ { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ { XFS_AGF_UUID, "UUID" }, \ - { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" } + { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }, \ + { XFS_AGF_SPARE64, "SPARE64" } /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) -- cgit v1.1 From 722278997bc964349e23e7061d541f8df3133a04 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:50 +1000 Subject: xfs: fix some key handling problems in _btree_simple_query_range We only need the record's high key for the first record that we look at; for all records, we /definitely/ need the regular record key. Therefore, fix how the simple range query function gets its keys. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 33f1406..b70d9f9 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4563,10 +4563,10 @@ xfs_btree_simple_query_range( error = xfs_btree_get_rec(cur, &recp, &stat); if (error || !stat) break; - cur->bc_ops->init_high_key_from_rec(&rec_key, recp); /* Skip if high_key(rec) < low_key. */ if (firstrec) { + cur->bc_ops->init_high_key_from_rec(&rec_key, recp); firstrec = false; diff = cur->bc_ops->diff_two_keys(cur, low_key, &rec_key); @@ -4575,6 +4575,7 @@ xfs_btree_simple_query_range( } /* Stop if high_key < low_key(rec). */ + cur->bc_ops->init_key_from_rec(&rec_key, recp); diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); if (diff > 0) break; -- cgit v1.1 From 5b5c2dbd3c9bcfa89fba9709c12ecc0a445c6e40 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 16:00:10 +1000 Subject: xfs: simple btree query range should look right if LE lookup fails If the initial LOOKUP_LE in the simple query range fails to find anything, we should attempt to increment the btree cursor to see if there actually /are/ records for what we're trying to find. Without this patch, a bnobt range query of (0, $agsize) returns no results because the leftmost record never has a startblock of zero. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b70d9f9..0856979 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4558,6 +4558,13 @@ xfs_btree_simple_query_range( if (error) goto out; + /* Nothing? See if there's anything to the right. */ + if (!stat) { + error = xfs_btree_increment(cur, 0, &stat); + if (error) + goto out; + } + while (stat) { /* Find the record. */ error = xfs_btree_get_rec(cur, &recp, &stat); -- cgit v1.1 From f3d7ebdeb2c297bd26272384e955033493ca291c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 26 Aug 2016 16:01:30 +1000 Subject: xfs: fix superblock inprogress check From inspection, the superblock sb_inprogress check is done in the verifier and triggered only for the primary superblock via a "bp->b_bn == XFS_SB_DADDR" check. Unfortunately, the primary superblock is an uncached buffer, and hence it is configured by xfs_buf_read_uncached() with: bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ And so this check never triggers. Fix it. cc: Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_sb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 0e3d4f5..4aecc5f 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -583,7 +583,8 @@ xfs_sb_verify( * Only check the in progress field for the primary superblock as * mkfs.xfs doesn't clear it from secondary superblocks. */ - return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR, + return xfs_mount_validate_sb(mp, &sb, + bp->b_maps[0].bm_bn == XFS_SB_DADDR, check_version); } -- cgit v1.1 From 800b2694f890cc35a1bda63501fc71c94389d517 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 26 Aug 2016 16:01:59 +1000 Subject: xfs: prevent dropping ioend completions during buftarg wait xfs_wait_buftarg() waits for all pending I/O, drains the ioend completion workqueue and walks the LRU until all buffers in the cache have been released. This is traditionally an unmount operation` but the mechanism is also reused during filesystem freeze. xfs_wait_buftarg() invokes drain_workqueue() as part of the quiesce, which is intended more for a shutdown sequence in that it indicates to the queue that new operations are not expected once the drain has begun. New work jobs after this point result in a WARN_ON_ONCE() and are otherwise dropped. With filesystem freeze, however, read operations are allowed and can proceed during or after the workqueue drain. If such a read occurs during the drain sequence, the workqueue infrastructure complains about the queued ioend completion work item and drops it on the floor. As a result, the buffer remains on the LRU and the freeze never completes. Despite the fact that the overall buffer cache cleanup is not necessary during freeze, fix up this operation such that it is safe to invoke during non-unmount quiesce operations. Replace the drain_workqueue() call with flush_workqueue(), which runs a similar serialization on pending workqueue jobs without causing new jobs to be dropped. This is safe for unmount as unmount independently locks out new operations by the time xfs_wait_buftarg() is invoked. cc: Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 607cc29..b5b9bff 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1611,7 +1611,7 @@ xfs_wait_buftarg( */ while (percpu_counter_sum(&btp->bt_io_count)) delay(100); - drain_workqueue(btp->bt_mount->m_buf_workqueue); + flush_workqueue(btp->bt_mount->m_buf_workqueue); /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { -- cgit v1.1 From 0811ef7e2f5470833a353426a6fbe0b845aea926 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:41 +0100 Subject: bus: arm-ccn: fix PMU interrupt flags Currently the IRQ core is permitted to make the CCN PMU IRQ handler threaded, and will allow userspace to change the CPU affinity of the interrupt behind our back. Both of these could violate our synchronisation requirements with the core perf code, which relies upon strict CPU affinity and disabling of interrupts to guarantee mutual exclusion in some cases. As with the CPU PMU drivers, we should request the interrupt with IRQF_NOBALANCING and IRQF_NO_THREAD, to avoid these issues. Signed-off-by: Mark Rutland Acked-by: Pawel Moll Reviewed-by: Will Deacon Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 02f81e3..c826bb2 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1496,8 +1496,9 @@ static int arm_ccn_probe(struct platform_device *pdev) /* Can set 'disable' bits, so can acknowledge interrupts */ writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE, ccn->base + CCN_MN_ERRINT_STATUS); - err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0, - dev_name(ccn->dev), ccn); + err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(ccn->dev), ccn); if (err) return err; -- cgit v1.1 From 5b1e01f3ce15d3a8f2af5d38cc31f0d5c3c11dae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:42 +0100 Subject: bus: arm-ccn: fix hrtimer registration The CCN PMU driver has a single hrtimer, used to simulate a periodic interrupt on systems where the overflow interrupt is not possible to use. The hrtimer is started when any event is started, and cancelled when any event is stopped. Thus, stopping a single event is sufficient to disable to hrtimer, and overflows (of other events) may be lost. To avoid this, this patch reworks the hrtimer start/cancel to only occur when the first event is added to a PMU, and the last event removed, making use of the existing bitmap counting active events. Signed-off-by: Mark Rutland Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index c826bb2..12c1fd1 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -940,15 +940,6 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags) arm_ccn_pmu_read_counter(ccn, hw->idx)); hw->state = 0; - /* - * Pin the timer, so that the overflows are handled by the chosen - * event->cpu (this is the same one as presented in "cpumask" - * attribute). - */ - if (!ccn->irq) - hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), - HRTIMER_MODE_REL_PINNED); - /* Set the DT bus input, engaging the counter */ arm_ccn_pmu_xp_dt_config(event, 1); } @@ -962,9 +953,6 @@ static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags) /* Disable counting, setting the DT bus to pass-through mode */ arm_ccn_pmu_xp_dt_config(event, 0); - if (!ccn->irq) - hrtimer_cancel(&ccn->dt.hrtimer); - /* Let the DT bus drain */ timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) + ccn->num_xps; @@ -1122,15 +1110,31 @@ static void arm_ccn_pmu_event_config(struct perf_event *event) spin_unlock(&ccn->dt.config_lock); } +static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn) +{ + return bitmap_weight(ccn->dt.pmu_counters_mask, + CCN_NUM_PMU_EVENT_COUNTERS + 1); +} + static int arm_ccn_pmu_event_add(struct perf_event *event, int flags) { int err; struct hw_perf_event *hw = &event->hw; + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); err = arm_ccn_pmu_event_alloc(event); if (err) return err; + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1) + hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), + HRTIMER_MODE_REL_PINNED); + arm_ccn_pmu_event_config(event); hw->state = PERF_HES_STOPPED; @@ -1143,9 +1147,14 @@ static int arm_ccn_pmu_event_add(struct perf_event *event, int flags) static void arm_ccn_pmu_event_del(struct perf_event *event, int flags) { + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE); arm_ccn_pmu_event_release(event); + + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0) + hrtimer_cancel(&ccn->dt.hrtimer); } static void arm_ccn_pmu_event_read(struct perf_event *event) -- cgit v1.1 From d662ed2e50c9dab1d4c25e80fa3e01ebe257bd65 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:43 +0100 Subject: bus: arm-ccn: make event groups reliable The CCN PMU driver leaves the counting logic always enabled, and thus events are enabled while groups are manipulated. As each event is stopped and read individually, this leads to arbitrary skew across event groups, which can be seen if counting several identical events. To avoid this, implement pmu_{enable,disable} callbacks to stop and start all counters atomically around event manipulation. As the counters are now stopped, we cannot poll the cycle counter to wait for events to drain from the bus. However, as the counters are stopped and the events will not be read regardless, we can simply allow the bus to drain naturally. Signed-off-by: Mark Rutland Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 12c1fd1..884c030 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -946,20 +946,11 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags) static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags) { - struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); struct hw_perf_event *hw = &event->hw; - u64 timeout; /* Disable counting, setting the DT bus to pass-through mode */ arm_ccn_pmu_xp_dt_config(event, 0); - /* Let the DT bus drain */ - timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) + - ccn->num_xps; - while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) < - timeout) - cpu_relax(); - if (flags & PERF_EF_UPDATE) arm_ccn_pmu_event_update(event); @@ -1162,6 +1153,24 @@ static void arm_ccn_pmu_event_read(struct perf_event *event) arm_ccn_pmu_event_update(event); } +static void arm_ccn_pmu_enable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val |= CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + +static void arm_ccn_pmu_disable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val &= ~CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt) { u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR); @@ -1284,6 +1293,8 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) .start = arm_ccn_pmu_event_start, .stop = arm_ccn_pmu_event_stop, .read = arm_ccn_pmu_event_read, + .pmu_enable = arm_ccn_pmu_enable, + .pmu_disable = arm_ccn_pmu_disable, }; /* No overflow interrupt? Have to use a timer instead. */ -- cgit v1.1 From a778937888867aac17a33887d1c429120790fbc2 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Fri, 12 Aug 2016 19:12:50 +0200 Subject: ARM: kirkwood: ib62x0: fix size of u-boot environment partition Commit 148c274ea644 ("ARM: kirkwood: ib62x0: add u-boot environment partition") split the "u-boot" partition into "u-boot" and "u-boot environment". However, instead of the size of the environment, an offset was given, resulting in overlapping partitions. Signed-off-by: Simon Baatz Fixes: 148c274ea644 ("ARM: kirkwood: ib62x0: add u-boot environment partition") Cc: Jason Cooper Cc: Andrew Lunn Cc: Gregory Clement Cc: Sebastian Hesselbarth Cc: Luka Perkov Cc: stable@vger.kernel.org # 3.13+ Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/kirkwood-ib62x0.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/kirkwood-ib62x0.dts b/arch/arm/boot/dts/kirkwood-ib62x0.dts index ef84d86..5bf6289 100644 --- a/arch/arm/boot/dts/kirkwood-ib62x0.dts +++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts @@ -113,7 +113,7 @@ partition@e0000 { label = "u-boot environment"; - reg = <0xe0000 0x100000>; + reg = <0xe0000 0x20000>; }; partition@100000 { -- cgit v1.1 From c721da1d05760ad0b4e7670896dae31b6b07d8d6 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 22 Aug 2016 18:09:36 +0200 Subject: ARM: dts: kirkwood: Fix PCIe label on OpenRD While converting PCIe node on kirkwood by using label, the following commit eb13cf8345e9 ("ARM: dts: kirkwood: Fixup pcie DT warnings") introduced a regression on the OpenRD boards: the PCIe didn't work anymore. As reported by Aaro Koskinen, the display/framebuffer was lost. This commit adds the forgotten label. Reported-by: Aaro Koskinen Tested-by: Aaro Koskinen Fixes: eb13cf8345e9 ("ARM: dts: kirkwood: Fixup pcie DT warnings") Cc: stable@vger.kernel.org Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/kirkwood-openrd.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/kirkwood-openrd.dtsi b/arch/arm/boot/dts/kirkwood-openrd.dtsi index e4ecab1..7175511 100644 --- a/arch/arm/boot/dts/kirkwood-openrd.dtsi +++ b/arch/arm/boot/dts/kirkwood-openrd.dtsi @@ -116,6 +116,10 @@ }; }; +&pciec { + status = "okay"; +}; + &pcie0 { status = "okay"; }; -- cgit v1.1 From 936523441bb64cdc9a5b263e8fd2782e70313a57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 6 Aug 2016 15:50:52 +0200 Subject: batman-adv: Add missing refcnt for last_candidate batadv_find_router dereferences last_bonding_candidate from orig_node without making sure that it has a valid reference. This reference has to be retrieved by increasing the reference counter while holding neigh_list_lock. The lock is required to avoid that batadv_last_bonding_replace removes the current last_bonding_candidate, reduces the reference counter and maybe destroys the object in this process. Fixes: f3b3d9018975 ("batman-adv: add bonding again") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c00..3d19947 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -470,6 +470,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, } /** + * batadv_last_bonding_get - Get last_bonding_candidate of orig_node + * @orig_node: originator node whose last bonding candidate should be retrieved + * + * Return: last bonding candidate of router or NULL if not found + * + * The object is returned with refcounter increased by 1. + */ +static struct batadv_orig_ifinfo * +batadv_last_bonding_get(struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *last_bonding_candidate; + + spin_lock_bh(&orig_node->neigh_list_lock); + last_bonding_candidate = orig_node->last_bonding_candidate; + + if (last_bonding_candidate) + kref_get(&last_bonding_candidate->refcount); + spin_unlock_bh(&orig_node->neigh_list_lock); + + return last_bonding_candidate; +} + +/** * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node * @orig_node: originator node whose bonding candidates should be replaced * @new_candidate: new bonding candidate or NULL @@ -539,7 +562,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * router - obviously there are no other candidates. */ rcu_read_lock(); - last_candidate = orig_node->last_bonding_candidate; + last_candidate = batadv_last_bonding_get(orig_node); if (last_candidate) last_cand_router = rcu_dereference(last_candidate->router); @@ -631,6 +654,9 @@ next: batadv_orig_ifinfo_put(next_candidate); } + if (last_candidate) + batadv_orig_ifinfo_put(last_candidate); + return router; } -- cgit v1.1 From 1e5d343b8f23770e8ac5d31f5c439826bdb35148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 23 Aug 2016 03:13:03 +0200 Subject: batman-adv: fix elp packet data reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skb_reserve() call only reserved headroom for the mac header, but not the elp packet header itself. Fixing this by using skb_put()'ing towards the skb tail instead of skb_push()'ing towards the skb head. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7d17001..ee08540 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -335,7 +335,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); elp_packet = (struct batadv_elp_packet *)elp_buff; memset(elp_packet, 0, BATADV_ELP_HLEN); -- cgit v1.1 From 866e0f4d73390ee6f5cd68aa92cf74eef3a2b0f2 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Thu, 25 Aug 2016 11:52:47 -0500 Subject: i40iw: Update hw_iwarp_state Update iwqp->hw_iwarp_state to reflect the new state of the CQP modify QP operation. This avoids reissuing a CQP operation to modify a QP to a state that it is already in. Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 3ee0cad..0c92a40 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp, info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR)) info.reset_tcp_conn = true; + iwqp->hw_iwarp_state = state; i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); } -- cgit v1.1 From b71121b4b70a995c0b794026e84c880c4f26c361 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 25 Aug 2016 11:53:24 -0500 Subject: i40iw: Receive notification events correctly Device notifications are not received after the first interface is closed; since there is an unregister for notifications on every interface close. Correct this by unregistering for device notifications only when the last interface is closed. Also, make all operations on the i40iw_notifiers_registered atomic as it can be read/modified concurrently. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 0cbbe40..445e230 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = { .notifier_call = i40iw_net_event }; -static int i40iw_notifiers_registered; +static atomic_t i40iw_notifiers_registered; /** * i40iw_find_i40e_handler - find a handler given a client info @@ -1342,12 +1342,11 @@ exit: */ static void i40iw_register_notifiers(void) { - if (!i40iw_notifiers_registered) { + if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { register_inetaddr_notifier(&i40iw_inetaddr_notifier); register_inet6addr_notifier(&i40iw_inetaddr6_notifier); register_netevent_notifier(&i40iw_net_notifier); } - i40iw_notifiers_registered++; } /** @@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: - if (i40iw_notifiers_registered > 0) { - i40iw_notifiers_registered--; + if (!atomic_dec_return(&i40iw_notifiers_registered)) { unregister_netevent_notifier(&i40iw_net_notifier); unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); -- cgit v1.1 From 61a28d2b690a8b6c383a075a12d62d711850f9d7 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 26 Aug 2016 06:49:25 +0200 Subject: IB/hfi1: Clean up type used and casting In all other places in this file where 'find_first_bit' is called, port_num is defined as a 'u8' and no casting is done. Do the same here in order to be more consistent. Signed-off-by: Christophe JAILLET Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 39e42c3..9912d2c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2604,7 +2604,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; - unsigned long port_num; + u8 port_num; unsigned long vl; u32 vl_select_mask; int vfi; @@ -2640,7 +2640,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, port_num = find_first_bit((unsigned long *)&port_mask, sizeof(port_mask)); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } -- cgit v1.1 From da60626e7d02a4f385cae80e450afc8b07035368 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2016 16:34:45 +0100 Subject: ARM: sa1100: clear reset status prior to reboot Clear the current reset status prior to rebooting the platform. This adds the bit missing from 04fef228fb00 ("[ARM] pxa: introduce reset_status and clear_reset_status for driver's usage"). Fixes: 04fef228fb00 ("[ARM] pxa: introduce reset_status and clear_reset_status for driver's usage") Signed-off-by: Russell King --- arch/arm/mach-sa1100/generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 2e2c35b..3e09bed 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -34,6 +34,7 @@ #include #include +#include #include "generic.h" #include @@ -95,6 +96,8 @@ static void sa1100_power_off(void) void sa11x0_restart(enum reboot_mode mode, const char *cmd) { + clear_reset_status(RESET_STATUS_ALL); + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); -- cgit v1.1 From 29617e1cfc2aa869154f5ed2580b756ec2c3cb28 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 25 Aug 2016 10:56:48 -0400 Subject: MAINTAINERS: Add myself as reviewer for Samsung Exynos support I've been helping reviewing and testing Exynos SoC support patches for the last couple of years. But it would be easier for me if I'm cc'ed for patches, so I'm adding myself as reviewer for this entry. Signed-off-by: Javier Martinez Canillas Acked-by: Kukjin Kim Acked-by: Sylwester Nawrocki Signed-off-by: Krzysztof Kozlowski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c9cd8d3..8a8a485 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim M: Krzysztof Kozlowski +R: Javier Martinez Canillas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained -- cgit v1.1 From f5b7b559e14881b27d76f9c97817ec82bfc48827 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Aug 2016 12:25:56 +0300 Subject: nvme-rdma: Get rid of duplicate variable We already have need_inval in ib_mr, lets use that instead. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c133256..881ac28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,7 +77,6 @@ struct nvme_rdma_request { u32 num_sge; int nents; bool inline_data; - bool need_inval; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; @@ -286,7 +285,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; - if (!req->need_inval) + if (!req->mr->need_inval) goto out; ib_dereg_mr(req->mr); @@ -298,7 +297,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) req->mr = NULL; } - req->need_inval = false; + req->mr->need_inval = false; out: return ret; @@ -850,7 +849,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; - if (req->need_inval) { + if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); if (res < 0) { dev_err(ctrl->ctrl.device, @@ -936,7 +935,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->need_inval = true; + req->mr->need_inval = true; sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); @@ -959,7 +958,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; - req->need_inval = false; + req->mr->need_inval = false; c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1146,7 +1145,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) - req->need_inval = false; + req->mr->need_inval = false; blk_mq_complete_request(rq, status); @@ -1476,7 +1475,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) flush = true; ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); if (ret) { nvme_rdma_unmap_data(queue, rq); goto err; -- cgit v1.1 From 4d8c6a7946d53648d9ed0e3852a1c81ce07d40db Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 26 Aug 2016 00:37:52 +0300 Subject: nvme-rdma: Get rid of redundant defines Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 881ac28..ab545fb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -43,10 +43,6 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -#define NVME_RDMA_MAX_PAGES_PER_MR 512 - -#define NVME_RDMA_DEF_RECONNECT_DELAY 20 - /* * We handle AEN commands ourselves and don't even let the * block layer know about them. -- cgit v1.1 From 89f82cbb0d5c0ab768c8d02914188aa2211cd2e3 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 22 Aug 2016 15:15:23 -0400 Subject: drm/msm: fix use of copy_from_user() while holding spinlock Use instead __copy_from_user_inatomic() and fallback to slow-path where we drop and re-aquire the lock in case of fault. Cc: stable@vger.kernel.org Reported-by: Vaishali Thakkar Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 9766f9a..408da40 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -64,6 +64,14 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) kfree(submit); } +static inline unsigned long __must_check +copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + return __copy_from_user_inatomic(to, from, n); + return -EFAULT; +} + static int submit_lookup_objects(struct msm_gem_submit *submit, struct drm_msm_gem_submit *args, struct drm_file *file) { @@ -71,6 +79,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, int ret = 0; spin_lock(&file->table_lock); + pagefault_disable(); for (i = 0; i < args->nr_bos; i++) { struct drm_msm_gem_submit_bo submit_bo; @@ -84,10 +93,15 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ submit->bos[i].flags = 0; - ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); - if (ret) { - ret = -EFAULT; - goto out_unlock; + ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo)); + if (unlikely(ret)) { + pagefault_enable(); + spin_unlock(&file->table_lock); + ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); + if (ret) + goto out; + spin_lock(&file->table_lock); + pagefault_disable(); } if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) { @@ -127,9 +141,12 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, } out_unlock: - submit->nr_bos = i; + pagefault_enable(); spin_unlock(&file->table_lock); +out: + submit->nr_bos = i; + return ret; } -- cgit v1.1 From d78d383ab354b0b9e1d23404ae0d9fbdeb9aa035 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 22 Aug 2016 15:28:38 -0400 Subject: drm/msm: protect against faults from copy_from_user() in submit ioctl An evil userspace could try to cause deadlock by passing an unfaulted-in GEM bo as submit->bos (or submit->cmds) table. Which will trigger msm_gem_fault() while we already hold struct_mutex. See: https://github.com/freedreno/msmtest/blob/master/evilsubmittest.c Cc: stable@vger.kernel.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.h | 6 ++++++ drivers/gpu/drm/msm/msm_gem.c | 9 +++++++++ drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b4bc7f1..d0da52f 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -157,6 +157,12 @@ struct msm_drm_private { struct shrinker shrinker; struct msm_vblank_ctrl vblank_ctrl; + + /* task holding struct_mutex.. currently only used in submit path + * to detect and reject faults from copy_from_user() for submit + * ioctl. + */ + struct task_struct *struct_mutex_task; }; struct msm_format { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6cd4af4..85f3047 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -196,11 +196,20 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; + struct msm_drm_private *priv = dev->dev_private; struct page **pages; unsigned long pfn; pgoff_t pgoff; int ret; + /* This should only happen if userspace tries to pass a mmap'd + * but unfaulted gem bo vaddr into submit ioctl, triggering + * a page fault while struct_mutex is already held. This is + * not a valid use-case so just bail. + */ + if (priv->struct_mutex_task == current) + return VM_FAULT_SIGBUS; + /* Make sure we don't parallel update on a fault, nor move or remove * something from beneath our feet */ diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 408da40..880d6a9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -394,6 +394,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) return ret; + priv->struct_mutex_task = current; + submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; @@ -485,6 +487,7 @@ out: if (ret) msm_gem_submit_free(submit); out_unlock: + priv->struct_mutex_task = NULL; mutex_unlock(&dev->struct_mutex); return ret; } -- cgit v1.1 From e09c978aae5bedfdb379be80363b024b7d82638b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Aug 2016 23:44:04 -0400 Subject: NFSv4.1: Fix Oopsable condition in server callback races The slot table hasn't been an array since v3.7. Ensure that we use nfs4_lookup_slot() to access the slot correctly. Fixes: 87dda67e7386 ("NFSv4.1: Allow SEQUENCE to resize the slot table...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.8+ --- fs/nfs/callback_proc.c | 5 +---- fs/nfs/nfs4session.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/nfs4session.h | 1 + 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c92a75e..a4cf6d2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp, ((u32 *)&rclist->rcl_sessionid.data)[3], ref->rc_sequenceid, ref->rc_slotid); - spin_lock(&tbl->slot_tbl_lock); - status = (test_bit(ref->rc_slotid, tbl->used_slots) && - tbl->slots[ref->rc_slotid].seq_nr == + status = nfs4_slot_seqid_in_use(tbl, ref->rc_slotid, ref->rc_sequenceid); - spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; } diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 332d06e..c1f4c20 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -172,6 +172,39 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) return ERR_PTR(-E2BIG); } +static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, + u32 *seq_nr) + __must_hold(&tbl->slot_tbl_lock) +{ + struct nfs4_slot *slot; + + slot = nfs4_lookup_slot(tbl, slotid); + if (IS_ERR(slot)) + return PTR_ERR(slot); + *seq_nr = slot->seq_nr; + return 0; +} + +/* + * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use + * + * Given a slot table, slot id and sequence number, determine if the + * RPC call in question is still in flight. This function is mainly + * intended for use by the callback channel. + */ +bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) +{ + u32 cur_seq; + bool ret = false; + + spin_lock(&tbl->slot_tbl_lock); + if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 && + cur_seq == seq_nr && test_bit(slotid, tbl->used_slots)) + ret = true; + spin_unlock(&tbl->slot_tbl_lock); + return ret; +} + /* * nfs4_alloc_slot - efficiently look for a free slot * diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 5b51298..33cace6 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -78,6 +78,7 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); +extern bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr); extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v1.1 From 045d2a6d076a2ecd7043ea543ea198af943f8b16 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 13:25:43 -0400 Subject: NFSv4.1: Delay callback processing when there are referring triples If CB_SEQUENCE tells us that the processing of this request depends on the completion of one or more referring triples (see RFC 5661 Section 2.10.6.3), delay the callback processing until after the RPC requests being referred to have completed. If we end up delaying for more than 1/2 second, then fall back to returning NFS4ERR_DELAY in reply to the callback. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4session.c | 22 +++++++++++++++++++++- fs/nfs/nfs4session.h | 5 ++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a4cf6d2..c359329 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -454,8 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp, ((u32 *)&rclist->rcl_sessionid.data)[3], ref->rc_sequenceid, ref->rc_slotid); - status = nfs4_slot_seqid_in_use(tbl, ref->rc_slotid, - ref->rc_sequenceid); + status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, + ref->rc_sequenceid, HZ >> 1) < 0; if (status) goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1949bbd..0cc0c31 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -686,6 +686,8 @@ out_unlock: res->sr_slot = NULL; if (send_new_highest_used_slotid) nfs41_notify_server(session->clp); + if (waitqueue_active(&tbl->slot_waitq)) + wake_up_all(&tbl->slot_waitq); } int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index c1f4c20..b629730 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue) tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); + init_waitqueue_head(&tbl->slot_waitq); init_completion(&tbl->complete); } @@ -192,7 +193,8 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, * RPC call in question is still in flight. This function is mainly * intended for use by the callback channel. */ -bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) +static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr) { u32 cur_seq; bool ret = false; @@ -206,6 +208,24 @@ bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) } /* + * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete + * + * Given a slot table, slot id and sequence number, wait until the + * corresponding RPC call completes. This function is mainly + * intended for use by the callback channel. + */ +int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr, + unsigned long timeout) +{ + if (wait_event_timeout(tbl->slot_waitq, + !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr), + timeout) == 0) + return -ETIMEDOUT; + return 0; +} + +/* * nfs4_alloc_slot - efficiently look for a free slot * * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 33cace6..fa75d7d 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -36,6 +36,7 @@ struct nfs4_slot_table { unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ + wait_queue_head_t slot_waitq; /* Completion wait on slot */ u32 max_slots; /* # slots in table */ u32 max_slotid; /* Max allowed slotid value */ u32 highest_used_slotid; /* sent to server on each SEQ. @@ -78,7 +79,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); -extern bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr); +extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr, + unsigned long timeout); extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v1.1 From 07e8dcbda71ef87e9cbdc42b5bb16a44c1ab839b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 10:28:25 -0400 Subject: NFSv4.1: Defer bumping the slot sequence number until we free the slot For operations like OPEN or LAYOUTGET, which return recallable state (i.e. delegations and layouts) we want to enable the mechanism for resolving recall races in RFC5661 Section 2.10.6.3. To do so, we will want to defer bumping the slot's sequence number until we have finished processing the RPC results. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 +++++++-- fs/nfs/nfs4session.h | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0cc0c31..de4a89d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -666,6 +666,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) tbl = slot->table; session = tbl->session; + /* Bump the slot sequence number */ + if (slot->seq_done) + slot->seq_nr++; + slot->seq_done = 0; + spin_lock(&tbl->slot_tbl_lock); /* Be nice to the server: try to ensure that the last transmitted * value for highest_user_slotid <= target_highest_slotid @@ -716,7 +721,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++slot->seq_nr; + slot->seq_done = 1; clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ @@ -771,7 +776,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) goto retry_nowait; default: /* Just update the slot sequence no. */ - ++slot->seq_nr; + slot->seq_done = 1; } out: /* The session may be reset by one of the error handlers. */ diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index fa75d7d..f703b75 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -21,7 +21,8 @@ struct nfs4_slot { unsigned long generation; u32 slot_nr; u32 seq_nr; - unsigned int interrupted : 1; + unsigned int interrupted : 1, + seq_done : 1; }; /* Sessions */ -- cgit v1.1 From 2e80dbe7ac51a911e8a828407b1a48c5ba938cd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 11:50:26 -0400 Subject: NFSv4.1: Close callback races for OPEN, LAYOUTGET and LAYOUTRETURN Defer freeing the slot until after we have processed the results from OPEN and LAYOUTGET. This means that the server can rely on the mechanism in RFC5661 Section 2.10.6.3 to ensure that replies to an OPEN or LAYOUTGET/RETURN RPC call don't race with the callbacks that apply to them. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 78 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de4a89d..f5aecaa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -634,15 +634,11 @@ out_sleep: } EXPORT_SYMBOL_GPL(nfs40_setup_sequence); -static int nfs40_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_slot *slot = res->sr_slot; struct nfs4_slot_table *tbl; - if (slot == NULL) - goto out; - tbl = slot->table; spin_lock(&tbl->slot_tbl_lock); if (!nfs41_wake_and_assign_slot(tbl, slot)) @@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task, spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; -out: +} + +static int nfs40_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); return 1; } @@ -695,7 +697,8 @@ out_unlock: wake_up_all(&tbl->slot_waitq); } -int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +static int nfs41_sequence_process(struct rpc_task *task, + struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot *slot = res->sr_slot; @@ -781,11 +784,11 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); - nfs41_sequence_free_slot(res); out_noaction: return ret; retry_nowait: if (rpc_restart_call_prepare(task)) { + nfs41_sequence_free_slot(res); task->tk_status = 0; ret = 0; } @@ -796,8 +799,37 @@ out_retry: rpc_delay(task, NFS4_POLL_RETRY_MAX); return 0; } + +int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (!nfs41_sequence_process(task, res)) + return 0; + if (res->sr_slot != NULL) + nfs41_sequence_free_slot(res); + return 1; + +} EXPORT_SYMBOL_GPL(nfs41_sequence_done); +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (res->sr_slot == NULL) + return 1; + if (res->sr_slot->table->session != NULL) + return nfs41_sequence_process(task, res); + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) { + if (res->sr_slot->table->session != NULL) + nfs41_sequence_free_slot(res); + else + nfs40_sequence_free_slot(res); + } +} + int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { if (res->sr_slot == NULL) @@ -927,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server, args, res, task); } +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); +} + int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -1204,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref) struct super_block *sb = p->dentry->d_sb; nfs_free_seqid(p->o_arg.seqid); + nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); @@ -1663,9 +1707,14 @@ err: static struct nfs4_state * nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { + struct nfs4_state *ret; + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) - return _nfs4_opendata_reclaim_to_nfs4_state(data); - return _nfs4_opendata_to_nfs4_state(data); + ret =_nfs4_opendata_reclaim_to_nfs4_state(data); + else + ret = _nfs4_opendata_to_nfs4_state(data); + nfs4_sequence_free_slot(&data->o_res.seq_res); + return ret; } static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) @@ -2063,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; - if (!nfs4_sequence_done(task, &data->o_res.seq_res)) + if (!nfs4_sequence_process(task, &data->o_res.seq_res)) return; if (task->tk_status == 0) { @@ -7871,7 +7920,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; dprintk("--> %s\n", __func__); - nfs41_sequence_done(task, &lgp->res.seq_res); + nfs41_sequence_process(task, &lgp->res.seq_res); dprintk("<-- %s\n", __func__); } @@ -8087,6 +8136,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); + nfs4_sequence_free_slot(&lgp->res.seq_res); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) @@ -8113,7 +8163,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); - if (!nfs41_sequence_done(task, &lrp->res.seq_res)) + if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; server = NFS_SERVER(lrp->args.inode); @@ -8125,6 +8175,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; + nfs4_sequence_free_slot(&lrp->res.seq_res); rpc_restart_call_prepare(task); return; } @@ -8145,6 +8196,7 @@ static void nfs4_layoutreturn_release(void *calldata) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&lo->plh_inode->i_lock); + nfs4_sequence_free_slot(&lrp->res.seq_res); pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); -- cgit v1.1 From d138027a8256a3e9d7657c8d0dae84c08ef2cfe1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 12:19:04 -0400 Subject: NFSv4.1: Remove obsolete and incorrrect assignment in nfs4_callback_sequence Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c359329..f953ef6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -484,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; tbl = &clp->cl_session->bc_slot_table; - slot = tbl->slots + args->csa_slotid; /* Set up res before grabbing the spinlock */ memcpy(&res->csr_sessionid, &args->csa_sessionid, -- cgit v1.1 From 17de0a9ff3df8f54f2f47746d118112d4e61d973 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Aug 2016 11:33:58 +1000 Subject: iomap: don't set FIEMAP_EXTENT_MERGED for extent based filesystems Filesystems like XFS that use extents should not set the FIEMAP_EXTENT_MERGED flag in the fiemap extent structures. To allow for both behaviors for the upcoming gfs2 usage split the iomap type field into type and flags, and only set FIEMAP_EXTENT_MERGED if the IOMAP_F_MERGED flag is set. The flags field will also come in handy for future features such as shared extents on reflink-enabled file systems. Reported-by: Andreas Gruenbacher Signed-off-by: Christoph Hellwig Acked-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/iomap.c | 5 ++++- include/linux/iomap.h | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 0342254..706270f 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -428,9 +428,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, break; } + if (iomap->flags & IOMAP_F_MERGED) + flags |= FIEMAP_EXTENT_MERGED; + return fiemap_fill_next_extent(fi, iomap->offset, iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, - iomap->length, flags | FIEMAP_EXTENT_MERGED); + iomap->length, flags); } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3267df4..3d70ece 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -19,6 +19,11 @@ struct vm_fault; #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ /* + * Flags for iomap mappings: + */ +#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ + +/* * Magic value for blkno: */ #define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ @@ -27,7 +32,8 @@ struct iomap { sector_t blkno; /* 1st sector of mapping, 512b units */ loff_t offset; /* file offset of mapping, bytes */ u64 length; /* length of mapping, bytes */ - int type; /* type of mapping */ + u16 type; /* type of mapping */ + u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ }; -- cgit v1.1 From 6654674cb7b5953ac04fc9d7f5f511676ae97e29 Mon Sep 17 00:00:00 2001 From: Jorik Jonker Date: Sat, 27 Aug 2016 21:04:33 +0200 Subject: clk: sunxi-ng: Fix wrong reset register offsets The reset register offsets for UART*, I2C* and SCR were off by a few bytes. Signed-off-by: Jorik Jonker Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 9af35954..267f995 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -783,14 +783,14 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = { [RST_BUS_I2S1] = { 0x2d0, BIT(13) }, [RST_BUS_I2S2] = { 0x2d0, BIT(14) }, - [RST_BUS_I2C0] = { 0x2d4, BIT(0) }, - [RST_BUS_I2C1] = { 0x2d4, BIT(1) }, - [RST_BUS_I2C2] = { 0x2d4, BIT(2) }, - [RST_BUS_UART0] = { 0x2d4, BIT(16) }, - [RST_BUS_UART1] = { 0x2d4, BIT(17) }, - [RST_BUS_UART2] = { 0x2d4, BIT(18) }, - [RST_BUS_UART3] = { 0x2d4, BIT(19) }, - [RST_BUS_SCR] = { 0x2d4, BIT(20) }, + [RST_BUS_I2C0] = { 0x2d8, BIT(0) }, + [RST_BUS_I2C1] = { 0x2d8, BIT(1) }, + [RST_BUS_I2C2] = { 0x2d8, BIT(2) }, + [RST_BUS_UART0] = { 0x2d8, BIT(16) }, + [RST_BUS_UART1] = { 0x2d8, BIT(17) }, + [RST_BUS_UART2] = { 0x2d8, BIT(18) }, + [RST_BUS_UART3] = { 0x2d8, BIT(19) }, + [RST_BUS_SCR] = { 0x2d8, BIT(20) }, }; static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = { -- cgit v1.1 From 6b760bb2c63a9e322c0e4a0b5daf335ad93d5a33 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 29 Aug 2016 00:33:50 +0200 Subject: ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE I got this: divide error: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 1327 Comm: a.out Not tainted 4.8.0-rc2+ #189 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 task: ffff8801120a9580 task.stack: ffff8801120b0000 RIP: 0010:[] [] snd_hrtimer_callback+0x1da/0x3f0 RSP: 0018:ffff88011aa87da8 EFLAGS: 00010006 RAX: 0000000000004f76 RBX: ffff880112655e88 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880112655ea0 RDI: 0000000000000001 RBP: ffff88011aa87e00 R08: ffff88013fff905c R09: ffff88013fff9048 R10: ffff88013fff9050 R11: 00000001050a7b8c R12: ffff880114778a00 R13: ffff880114778ab4 R14: ffff880114778b30 R15: 0000000000000000 FS: 00007f071647c700(0000) GS:ffff88011aa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000603001 CR3: 0000000112021000 CR4: 00000000000006e0 Stack: 0000000000000000 ffff880114778ab8 ffff880112655ea0 0000000000004f76 ffff880112655ec8 ffff880112655e80 ffff880112655e88 ffff88011aa98fc0 00000000b97ccf2b dffffc0000000000 ffff88011aa98fc0 ffff88011aa87ef0 Call Trace: [] __hrtimer_run_queues+0x347/0xa00 [] ? snd_hrtimer_close+0x130/0x130 [] ? retrigger_next_event+0x1b0/0x1b0 [] ? hrtimer_interrupt+0x136/0x4b0 [] hrtimer_interrupt+0x1b0/0x4b0 [] local_apic_timer_interrupt+0x6e/0xf0 [] ? kvm_guest_apic_eoi_write+0x13/0xc0 [] smp_apic_timer_interrupt+0x76/0xa0 [] apic_timer_interrupt+0x8c/0xa0 [] ? _raw_spin_unlock_irqrestore+0x2c/0x60 [] snd_timer_start1+0xdd/0x670 [] snd_timer_continue+0x45/0x80 [] snd_timer_user_ioctl+0x1030/0x2830 [] ? __follow_pte.isra.49+0x430/0x430 [] ? snd_timer_pause+0x80/0x80 [] ? do_wp_page+0x3aa/0x1c90 [] ? handle_mm_fault+0xbc8/0x27f0 [] ? __pmd_alloc+0x370/0x370 [] ? snd_timer_pause+0x80/0x80 [] do_vfs_ioctl+0x193/0x1050 [] ? ioctl_preallocate+0x200/0x200 [] ? syscall_trace_enter+0x3cf/0xdb0 [] ? __context_tracking_exit.part.4+0x9a/0x1e0 [] ? exit_to_usermode_loop+0x190/0x190 [] ? check_preemption_disabled+0x37/0x1e0 [] ? security_file_ioctl+0x89/0xb0 [] SyS_ioctl+0x8f/0xc0 [] ? do_vfs_ioctl+0x1050/0x1050 [] do_syscall_64+0x1c4/0x4e0 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: e8 fc 42 7b fe 8b 0d 06 8a 50 03 49 0f af cf 48 85 c9 0f 88 7c 01 00 00 48 89 4d a8 e8 e0 42 7b fe 48 8b 45 c0 48 8b 4d a8 48 99 <48> f7 f9 49 01 c7 e8 cb 42 7b fe 48 8b 55 d0 48 b8 00 00 00 00 RIP [] snd_hrtimer_callback+0x1da/0x3f0 RSP ---[ end trace 6aa380f756a21074 ]--- The problem happens when you call ioctl(SNDRV_TIMER_IOCTL_CONTINUE) on a completely new/unused timer -- it will have ->sticks == 0, which causes a divide by 0 in snd_hrtimer_callback(). Signed-off-by: Vegard Nossum Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 9a6157e..0061870 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -813,6 +813,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, timer->tmr_subdevice = tid->subdevice; if (id) strlcpy(timer->id, id, sizeof(timer->id)); + timer->sticks = 1; INIT_LIST_HEAD(&timer->device_list); INIT_LIST_HEAD(&timer->open_list_head); INIT_LIST_HEAD(&timer->active_list_head); -- cgit v1.1 From 8ddc05638ee42b18ba4fe99b5fb647fa3ad20456 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 29 Aug 2016 00:33:51 +0200 Subject: ALSA: timer: fix NULL pointer dereference on memory allocation failure I hit this with syzkaller: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 1327 Comm: a.out Not tainted 4.8.0-rc2+ #190 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 task: ffff88011278d600 task.stack: ffff8801120c0000 RIP: 0010:[] [] snd_hrtimer_start+0x77/0x100 RSP: 0018:ffff8801120c7a60 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000007 RDX: 0000000000000009 RSI: 1ffff10023483091 RDI: 0000000000000048 RBP: ffff8801120c7a78 R08: ffff88011a5cf768 R09: ffff88011a5ba790 R10: 0000000000000002 R11: ffffed00234b9ef1 R12: ffff880114843980 R13: ffffffff84213c00 R14: ffff880114843ab0 R15: 0000000000000286 FS: 00007f72958f3700(0000) GS:ffff88011aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000603001 CR3: 00000001126ab000 CR4: 00000000000006f0 Stack: ffff880114843980 ffff880111eb2dc0 ffff880114843a34 ffff8801120c7ad0 ffffffff82c81ab1 0000000000000000 ffffffff842138e0 0000000100000000 ffff880111eb2dd0 ffff880111eb2dc0 0000000000000001 ffff880111eb2dc0 Call Trace: [] snd_timer_start1+0x331/0x670 [] snd_timer_start+0x5d/0xa0 [] snd_timer_user_ioctl+0x88e/0x2830 [] ? __follow_pte.isra.49+0x430/0x430 [] ? snd_timer_pause+0x80/0x80 [] ? do_wp_page+0x3aa/0x1c90 [] ? put_prev_entity+0x108f/0x21a0 [] ? snd_timer_pause+0x80/0x80 [] do_vfs_ioctl+0x193/0x1050 [] ? cpuacct_account_field+0x12f/0x1a0 [] ? ioctl_preallocate+0x200/0x200 [] ? syscall_trace_enter+0x3cf/0xdb0 [] ? __context_tracking_exit.part.4+0x9a/0x1e0 [] ? exit_to_usermode_loop+0x190/0x190 [] ? check_preemption_disabled+0x37/0x1e0 [] ? security_file_ioctl+0x89/0xb0 [] SyS_ioctl+0x8f/0xc0 [] ? do_vfs_ioctl+0x1050/0x1050 [] do_syscall_64+0x1c4/0x4e0 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: c7 c7 c4 b9 c8 82 48 89 d9 4c 89 ee e8 63 88 7f fe e8 7e 46 7b fe 48 8d 7b 48 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 04 84 c0 7e 65 80 7b 48 00 74 0e e8 52 46 RIP [] snd_hrtimer_start+0x77/0x100 RSP ---[ end trace 5955b08db7f2b029 ]--- This can happen if snd_hrtimer_open() fails to allocate memory and returns an error, which is currently not checked by snd_timer_open(): ioctl(SNDRV_TIMER_IOCTL_SELECT) - snd_timer_user_tselect() - snd_timer_close() - snd_hrtimer_close() - (struct snd_timer *) t->private_data = NULL - snd_timer_open() - snd_hrtimer_open() - kzalloc() fails; t->private_data is still NULL ioctl(SNDRV_TIMER_IOCTL_START) - snd_timer_user_start() - snd_timer_start() - snd_timer_start1() - snd_hrtimer_start() - t->private_data == NULL // boom Signed-off-by: Vegard Nossum Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 0061870..2d6e3e7 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -294,8 +294,21 @@ int snd_timer_open(struct snd_timer_instance **ti, get_device(&timer->card->card_dev); timeri->slave_class = tid->dev_sclass; timeri->slave_id = slave_id; - if (list_empty(&timer->open_list_head) && timer->hw.open) - timer->hw.open(timer); + + if (list_empty(&timer->open_list_head) && timer->hw.open) { + int err = timer->hw.open(timer); + if (err) { + kfree(timeri->owner); + kfree(timeri); + + if (timer->card) + put_device(&timer->card->card_dev); + module_put(timer->module); + mutex_unlock(®ister_mutex); + return err; + } + } + list_add_tail(&timeri->open_list, &timer->open_list_head); snd_timer_check_master(timeri); mutex_unlock(®ister_mutex); -- cgit v1.1 From 98744b408c757901df57fa50cbd5826245dc3a1f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 29 Aug 2016 08:44:03 +0200 Subject: clocksource/drivers/atmel-pit: Fix compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix introduced a check against the ret variable which is not defined, hence producing a compilation error: linux/drivers/clocksource/timer-atmel-pit.c: In function ‘at91sam926x_pit_dt_init’: linux/drivers/clocksource/timer-atmel-pit.c:264:2: error: ‘ret’ undeclared (first use in this function) ret = clk_prepare_enable(data->mck); ^ linux/drivers/clocksource/timer-atmel-pit.c:264:2: note: each undeclared identifier is reported only once for each function it appears in Add the missing the variable 'ret'. Fixes: 504f34c9e45c "clocksource/drivers/atmel-pit: Convert init function to return error" Signed-off-by: Daniel Lezcano Cc: alexandre.belloni@free-electrons.com Cc: motobud@gmail.com Cc: realbright@lgcns.com Link: http://lkml.kernel.org/r/1472453043-24287-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner --- drivers/clocksource/timer-atmel-pit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 3494bc5..7f0f5b2 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -240,6 +240,7 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data) static int __init at91sam926x_pit_dt_init(struct device_node *node) { struct pit_data *data; + int ret; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) -- cgit v1.1 From 8a07fed44b126f48020f122b9e6bf05d8c48f281 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Aug 2016 10:25:58 +0100 Subject: drm/i915/dvo: Remove dangling call to drm_encoder_cleanup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we hit the error path, we have never called drm_encoder_init() and so have nothing to cleanup. Doing so hits a null dereference: [ 10.066261] BUG: unable to handle kernel NULL pointer dereference at 00000104 [ 10.066273] IP: [] mutex_lock+0xa/0x15 [ 10.066287] *pde = 00000000 [ 10.066295] Oops: 0002 [#1] [ 10.066302] Modules linked in: i915(+) video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm iTCO_wdt iTCO_vendor_support ppdev evdev snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm snd_timer snd pcspkr uhci_hcd ehci_pci soundcore sr_mod ehci_hcd serio_raw i2c_i801 usbcore i2c_smbus cdrom lpc_ich mfd_core rng_core e100 mii floppy parport_pc parport acpi_cpufreq button processor usb_common eeprom lm85 hwmon_vid autofs4 [ 10.066378] CPU: 0 PID: 132 Comm: systemd-udevd Not tainted 4.8.0-rc3-00013-gef0e1ea #34 [ 10.066389] Hardware name: MicroLink /D865GLC , BIOS BF86510A.86A.0077.P25.0508040031 08/04/2005 [ 10.066401] task: f62db800 task.stack: f5970000 [ 10.066409] EIP: 0060:[] EFLAGS: 00010286 CPU: 0 [ 10.066417] EIP is at mutex_lock+0xa/0x15 [ 10.066424] EAX: 00000104 EBX: 00000104 ECX: 00000000 EDX: 80000000 [ 10.066432] ESI: 00000000 EDI: 00000104 EBP: f5be8000 ESP: f5971b58 [ 10.066439] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 10.066446] CR0: 80050033 CR2: 00000104 CR3: 35945000 CR4: 000006d0 [ 10.066453] Stack: [ 10.066459] f503d740 f824dddf 00000000 f61170c0 f61170c0 f82371ae f850f40e 00000001 [ 10.066476] f61170c0 f5971bcc f5be8000 f9c2d401 00000001 f8236fcc 00000001 00000000 [ 10.066491] f5144014 f5be8104 00000008 f9c5267c 00000007 f61170c0 f5144400 f9c4ff00 [ 10.066507] Call Trace: [ 10.066526] [] ? drm_modeset_lock_all+0x27/0xb3 [drm] [ 10.066545] [] ? drm_encoder_cleanup+0x1a/0x132 [drm] [ 10.066559] [] ? drm_atomic_helper_connector_reset+0x3f/0x5c [drm_kms_helper] [ 10.066644] [] ? intel_dvo_init+0x569/0x788 [i915] [ 10.066663] [] ? drm_encoder_init+0x43/0x20b [drm] [ 10.066734] [] ? intel_modeset_init+0x1436/0x17dd [i915] [ 10.066791] [] ? i915_driver_load+0x85a/0x15d3 [i915] [ 10.066846] [] ? i915_driver_open+0x5/0x5 [i915] [ 10.066857] [] ? firmware_map_add_entry.part.2+0xc/0xc [ 10.066868] [] ? pci_device_probe+0x8e/0x11c [ 10.066878] [] ? driver_probe_device+0x1db/0x62e [ 10.066888] [] ? kernfs_new_node+0x29/0x9c [ 10.066897] [] ? pci_match_device+0xd9/0x161 [ 10.066905] [] ? kernfs_create_dir_ns+0x42/0x88 [ 10.066914] [] ? __driver_attach+0xe6/0x11b [ 10.066924] [] ? kobject_add_internal+0x1bb/0x44f [ 10.066933] [] ? driver_probe_device+0x62e/0x62e [ 10.066941] [] ? bus_for_each_dev+0x46/0x7f [ 10.066950] [] ? driver_attach+0x1a/0x34 [ 10.066958] [] ? driver_probe_device+0x62e/0x62e [ 10.066966] [] ? bus_add_driver+0x217/0x32a [ 10.066975] [] ? 0xf8403000 [ 10.066982] [] ? driver_register+0x5f/0x108 [ 10.066991] [] ? do_one_initcall+0x49/0x1f6 [ 10.067000] [] ? pick_next_task_fair+0x14b/0x2a3 [ 10.067008] [] ? __schedule+0x15c/0x4fe [ 10.067016] [] ? preempt_schedule_common+0x19/0x3c [ 10.067027] [] ? do_init_module+0x17/0x230 [ 10.067035] [] ? _cond_resched+0x12/0x1a [ 10.067044] [] ? kmem_cache_alloc+0x8f/0x11f [ 10.067052] [] ? do_init_module+0x17/0x230 [ 10.067060] [] ? kfree+0x137/0x203 [ 10.067068] [] ? do_init_module+0x76/0x230 [ 10.067078] [] ? load_module+0x2a39/0x333f [ 10.067087] [] ? SyS_finit_module+0x96/0xd5 [ 10.067096] [] ? vm_mmap_pgoff+0x79/0xa0 [ 10.067105] [] ? do_fast_syscall_32+0xb5/0x1b0 [ 10.067114] [] ? sysenter_past_esp+0x47/0x75 [ 10.067121] Code: c8 f7 76 c1 e8 8e cc d2 ff e9 45 fe ff ff 66 90 66 90 66 90 66 90 90 ff 00 7f 05 e8 4e 0c 00 00 c3 53 89 c3 e8 75 ec ff ff 89 d8 08 79 05 e8 fa 0a 00 00 5b c3 53 89 c3 85 c0 74 1b 8b 03 83 [ 10.067180] EIP: [] mutex_lock+0xa/0x15 SS:ESP 0068:f5971b58 [ 10.067190] CR2: 0000000000000104 [ 10.067222] ---[ end trace 049f1f09da45a856 ]--- Reported-by: Meelis Roos Fixes: 580d8ed522e0 ("drm/i915: Give encoders useful names") Reviewed-by: David Weinehall Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160823092558.14931-1-chris@chris-wilson.co.uk (cherry picked from commit 8f76aa0ebe0b7787afe768d9df80031e832d2520) --- drivers/gpu/drm/i915/intel_dvo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 47bdf9d..b9e5a63 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev) return; } - drm_encoder_cleanup(&intel_encoder->base); kfree(intel_dvo); kfree(intel_connector); } -- cgit v1.1 From e0c6fba45ab730afc22fa01ac1c42459893252ec Mon Sep 17 00:00:00 2001 From: Rob Rice Date: Mon, 1 Aug 2016 13:03:41 -0400 Subject: mailbox: Add HAS_DMA Kconfig dependency to BCM_PDC_MBOX Add HAS_DMA Kconfig dependency to BCM_PDC_MBOX to avoid link error on some platforms. Reported-by: Fengguang Wu Signed-off-by: Rob Rice Acked-by: Geert Uytterhoeven Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 97c3729..7817d40 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -127,6 +127,7 @@ config XGENE_SLIMPRO_MBOX config BCM_PDC_MBOX tristate "Broadcom PDC Mailbox" depends on ARM64 || COMPILE_TEST + depends on HAS_DMA default ARCH_BCM_IPROC help Mailbox implementation for the Broadcom PDC ring manager, -- cgit v1.1 From 068cf29eca4ef25556496635b978143b170b862c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2016 08:30:31 +0300 Subject: mailbox: bcm-pdc: potential NULL dereference in pdc_shutdown() We can't pass NULL pointers to pdc_ring_free() so I moved the check for NULL. Signed-off-by: Dan Carpenter Signed-off-by: Jassi Brar --- drivers/mailbox/bcm-pdc-mailbox.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index cbe0c1e..c56d4d0 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -1191,10 +1191,11 @@ static void pdc_shutdown(struct mbox_chan *chan) { struct pdc_state *pdcs = chan->con_priv; - if (pdcs) - dev_dbg(&pdcs->pdev->dev, - "Shutdown mailbox channel for PDC %u", pdcs->pdc_idx); + if (!pdcs) + return; + dev_dbg(&pdcs->pdev->dev, + "Shutdown mailbox channel for PDC %u", pdcs->pdc_idx); pdc_ring_free(pdcs); } -- cgit v1.1 From a75e4a85f49e7f5d71cf0e425bc009c15ad3b5c4 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 28 Aug 2016 01:15:24 +0800 Subject: fix:mailbox:bcm-pdc-mailbox:mark symbols static where possible We get 2 warnings when biuld kernel with W=1: drivers/mailbox/bcm-pdc-mailbox.c:472:6: warning: no previous prototype for 'pdc_setup_debugfs' [-Wmissing-prototypes] drivers/mailbox/bcm-pdc-mailbox.c:488:6: warning: no previous prototype for 'pdc_free_debugfs' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Signed-off-by: Jassi Brar --- drivers/mailbox/bcm-pdc-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index c56d4d0..c19dd82 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -469,7 +469,7 @@ static const struct file_operations pdc_debugfs_stats = { * this directory for a SPU. * @pdcs: PDC state structure */ -void pdc_setup_debugfs(struct pdc_state *pdcs) +static void pdc_setup_debugfs(struct pdc_state *pdcs) { char spu_stats_name[16]; @@ -485,7 +485,7 @@ void pdc_setup_debugfs(struct pdc_state *pdcs) &pdc_debugfs_stats); } -void pdc_free_debugfs(void) +static void pdc_free_debugfs(void) { if (debugfs_dir && simple_empty(debugfs_dir)) { debugfs_remove_recursive(debugfs_dir); -- cgit v1.1 From f7d3586f2dbe330a84f8d0d02c39ddb3fbde67d3 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 29 Aug 2016 04:41:50 +0300 Subject: ARM: dts: imx7d-sdb: fix ti,x-plate-ohms property name Fix misspelled "ti,x-plate-ohms" property name of TI TSC2046 touchscreen controller. Fixes: d09e6beafa23 ("ARM: dts: imx7d-sdb: Add support for touchscreen") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-sdb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 95ee268..2f33c46 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -131,7 +131,7 @@ ti,y-min = /bits/ 16 <0>; ti,y-max = /bits/ 16 <0>; ti,pressure-max = /bits/ 16 <0>; - ti,x-plat-ohms = /bits/ 16 <400>; + ti,x-plate-ohms = /bits/ 16 <400>; wakeup-source; }; }; -- cgit v1.1 From c6c1f9bc798bee7cfc2e172cd2c9b48187d801a7 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 15 Aug 2016 16:41:13 +0800 Subject: drm/imx: Add active plane reconfiguration support We don't support configuring active plane on-the-fly for imx-drm. The relevant CRTC should be disabled before the plane configuration. Of course, the plane itself should be disabled as well. This patch adds active plane reconfiguration support by forcing CRTC mode change and disabling-enabling plane in plane's ->atomic_update callback. Suggested-by: Daniel Vetter Cc: Philipp Zabel Cc: David Airlie Cc: Russell King Cc: Daniel Vetter Cc: Peter Senna Tschudin Signed-off-by: Liu Ying Tested-by: Peter Senna Tschudin Tested-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 26 +++++++++++++++++++++++++- drivers/gpu/drm/imx/ipuv3-plane.c | 21 ++++++++++++++------- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 9f7dafc..7bf90e9 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -171,10 +171,34 @@ static void imx_drm_output_poll_changed(struct drm_device *drm) drm_fbdev_cma_hotplug_event(imxdrm->fbhelper); } +static int imx_drm_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) + return ret; + + /* + * Check modeset again in case crtc_state->mode_changed is + * updated in plane's ->atomic_check callback. + */ + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + return ret; +} + static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = { .fb_create = drm_fb_cma_create, .output_poll_changed = imx_drm_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = imx_drm_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 4ad67d0..29423e75 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -319,13 +319,14 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; /* - * since we cannot touch active IDMAC channels, we do not support - * resizing the enabled plane or changing its format + * We support resizing active plane or changing its format by + * forcing CRTC mode change and disabling-enabling plane in plane's + * ->atomic_update callback. */ if (old_fb && (state->src_w != old_state->src_w || state->src_h != old_state->src_h || fb->pixel_format != old_fb->pixel_format)) - return -EINVAL; + crtc_state->mode_changed = true; eba = drm_plane_state_to_eba(state); @@ -336,7 +337,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (old_fb && fb->pitches[0] != old_fb->pitches[0]) - return -EINVAL; + crtc_state->mode_changed = true; switch (fb->pixel_format) { case DRM_FORMAT_YUV420: @@ -372,7 +373,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (old_fb && old_fb->pitches[1] != fb->pitches[1]) - return -EINVAL; + crtc_state->mode_changed = true; } return 0; @@ -392,8 +393,14 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, enum ipu_color_space ics; if (old_state->fb) { - ipu_plane_atomic_set_base(ipu_plane, old_state); - return; + struct drm_crtc_state *crtc_state = state->crtc->state; + + if (!crtc_state->mode_changed) { + ipu_plane_atomic_set_base(ipu_plane, old_state); + return; + } + + ipu_disable_plane(plane); } switch (ipu_plane->dp_flow) { -- cgit v1.1 From 83d9956b7e6b310c1062df7894257251c625b22e Mon Sep 17 00:00:00 2001 From: Ken Lin Date: Fri, 12 Aug 2016 14:08:47 -0400 Subject: ALSA: usb-audio: Add sample rate inquiry quirk for B850V3 CP2114 Avoid getting sample rate on B850V3 CP2114 as it is unsupported and causes noisy "current rate is different from the runtime rate" messages when playback starts. Signed-off-by: Ken Lin Signed-off-by: Akshay Bhat Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6cf1f35..152292e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1141,6 +1141,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ -- cgit v1.1 From f8e81d7e496ea89f377c6935b92022032bed62e5 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 24 Aug 2016 15:28:27 +0200 Subject: tools:iio:iio_generic_buffer: fix trigger-less mode Passing the trigger-less mode option on the command line causes iio_generic_buffer to fail searching for an IIO trigger. Fix this by skipping trigger initialization if trigger-less mode is requested. Technically it actually fixes: 7c7e9dad70 where the bug was introduced but as the window to the patch below that changes the context was very small let's mark it with that. Signed-off-by: Gregor Boirie Fixes: deb4d1fdcb5af ("iio: generic_buffer: Fix --trigger-num option") Signed-off-by: Jonathan Cameron --- tools/iio/iio_generic_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index ae68bf0..f39c0e9 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -456,7 +456,7 @@ int main(int argc, char **argv) if (notrigger) { printf("trigger-less mode selected\n"); - } if (trig_num >= 0) { + } else if (trig_num >= 0) { char *trig_dev_name; ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num); if (ret < 0) { -- cgit v1.1 From 3dc147359e3dcdf0648f1e2c11f62cfae3160df0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 15:12:54 -0400 Subject: pNFS/flexfiles: Fix an Oopsable condition when connection to the DS fails If the attempt to connect to a DS fails inside ff_layout_pg_init_read or ff_layout_pg_init_write, then we currently end up clearing the layout segment carried by the struct nfs_pageio_descriptor, causing an Oops when we later call into ff_layout_read_pagelist/ff_layout_write_pagelist. The fix is to ensure we return the layout and then retry. Fixes: 446ca2195303 ("pNFS/flexfiles: When initing reads or writes, we...") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 37 +++++++++++++++---------------- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 ++++++++-------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ee1c94c..51b5136 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; + bool fail_return = false; int idx; /* mirrors are sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); + if (idx+1 == fls->mirror_array_cnt) + fail_return = true; + ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return); if (ds) { *best_idx = idx; return ds; @@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_pnfs_ds *ds; int ds_idx; +retry: /* Use full layout for now */ if (!pgio->pg_lseg) ff_layout_pg_get_read(pgio, req, false); @@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) { - if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) - goto out_pnfs; - else + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; } mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -890,12 +897,6 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_read_mds(pgio); - return; - -out_pnfs: - pnfs_set_lo_fail(pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; } static void @@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, int i; int status; +retry: if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, for (i = 0; i < pgio->pg_mirror_count; i++) { ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); if (!ds) { - if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) - goto out_pnfs; - else + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; } pgm = &pgio->pg_mirrors[i]; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); @@ -956,12 +961,6 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_write_mds(pgio); - return; - -out_pnfs: - pnfs_set_lo_fail(pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; } static unsigned int diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 970efba..f7a3f6b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, devid = &mirror->mirror_ds->id_node; if (ff_layout_test_devid_unavailable(devid)) - goto out; + goto out_fail; ds = mirror->mirror_ds->ds; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ @@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].rsize = max_payload; if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) mirror->mirror_ds->ds_versions[0].wsize = max_payload; - } else { - ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), - mirror, lseg->pls_range.offset, - lseg->pls_range.length, NFS4ERR_NXIO, - OP_ILLEGAL, GFP_NOIO); - if (fail_return || !ff_layout_has_available_ds(lseg)) - pnfs_error_mark_layout_for_return(ino, lseg); - ds = NULL; + goto out; } + ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), + mirror, lseg->pls_range.offset, + lseg->pls_range.length, NFS4ERR_NXIO, + OP_ILLEGAL, GFP_NOIO); +out_fail: + if (fail_return || !ff_layout_has_available_ds(lseg)) + pnfs_error_mark_layout_for_return(ino, lseg); + ds = NULL; out: return ds; } -- cgit v1.1 From 360647959323ec066cc55d82133df73e30530824 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 11 Jul 2016 09:25:45 +0300 Subject: iwlwifi: mvm: consider P2p device type for firmware dump triggers When the user really wanted a dump on P2P Client, he coudln't get it because we checked vif->type but didn't take vif->p2p into account. Fix that. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h index f7dff76..e9f1be9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h @@ -105,7 +105,8 @@ iwl_fw_dbg_trigger_vif_match(struct iwl_fw_dbg_trigger_tlv *trig, { u32 trig_vif = le32_to_cpu(trig->vif_type); - return trig_vif == IWL_FW_DBG_CONF_VIF_ANY || vif->type == trig_vif; + return trig_vif == IWL_FW_DBG_CONF_VIF_ANY || + ieee80211_vif_type_p2p(vif) == trig_vif; } static inline bool -- cgit v1.1 From bdc98b13716eafc454457e068d6c8aaffaaa71e4 Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Mon, 25 Jul 2016 16:11:05 +0300 Subject: iwlwifi: mvm: check if vif is NULL before using it wdev_to_ieee80211_vif() might return NULL. Check that vif != NULL before dereferencing it. Signed-off-by: Sharon Dvir Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b4fc86d..6a615bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -467,6 +467,8 @@ struct iwl_mvm_vif { static inline struct iwl_mvm_vif * iwl_mvm_vif_from_mac80211(struct ieee80211_vif *vif) { + if (!vif) + return NULL; return (void *)vif->drv_priv; } -- cgit v1.1 From ff6e58e648ed5f3cc43891767811d5c3c88bbd41 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 3 Aug 2016 22:06:43 +0300 Subject: iwlwifi: mvm: don't use ret when not initialised fw-dbg code return ret but that variable was either 0 or not initialised. Return 0 always. Signed-off-by: Emmanuel Grumbach Fixes: 6a95126763fb ("iwlwifi: mvm: send dbg config hcmds to fw if set in tlv") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 1abcabb..46b52bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -960,5 +960,6 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id) } mvm->fw_dbg_conf = conf_id; - return ret; + + return 0; } -- cgit v1.1 From a904a08b5fee5317ff0f7b8212aa5d0776795a52 Mon Sep 17 00:00:00 2001 From: "Peer, Ilan" Date: Wed, 10 Aug 2016 13:48:35 +0300 Subject: iwlwifi: mvm: Advertise support for AP channel width change The iwlmvm driver supports channel width change in AP mode. Add the proper flag. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6d60645..5dd77e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -624,6 +624,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->features |= NL80211_FEATURE_P2P_GO_CTWIN | NL80211_FEATURE_LOW_PRIORITY_SCAN | NL80211_FEATURE_P2P_GO_OPPPS | + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_DYNAMIC_SMPS | NL80211_FEATURE_STATIC_SMPS | NL80211_FEATURE_SUPPORTS_WMM_ADMISSION; -- cgit v1.1 From f1ae02b186d9b37ee621c7e922ecf5db96f5fb5c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 6 Mar 2016 15:08:55 +0200 Subject: iwlwifi: mvm: allow same PN for de-aggregated AMSDU The 9000 hardware will de-aggregate AMSDUs. In the process it will copy the mac header "as is" to the new MPDUs. This means driver should allow the same PN for MPDUs originated from the same AMSDU. Do that by incrementing the PN only for the last MPDU in the sequence. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index df6c32c..08d8a8a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -132,7 +132,8 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, IEEE80211_CCMP_PN_LEN) <= 0) return -1; - memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); + if (!(stats->flag & RX_FLAG_AMSDU_MORE)) + memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); stats->flag |= RX_FLAG_PN_VALIDATED; return 0; @@ -883,6 +884,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, u8 *qc = ieee80211_get_qos_ctl(hdr); *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + if (!(desc->amsdu_info & + IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) + rx_status->flag |= RX_FLAG_AMSDU_MORE; } if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) iwl_mvm_agg_rx_received(mvm, baid); -- cgit v1.1 From 8e160ab83a32a16cd45d82778aca1ec3e51b802b Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Mon, 11 Apr 2016 11:37:38 +0300 Subject: iwlwifi: mvm: support GMAC protocol Add support for installing and removing GMAC key for newer FW versions that support GCM and MFP. GMAC provides authentication and integrity for multicast management frames. Firmware API was changed, update the driver accordingly. Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/fw-api-sta.h | 31 ++++++++++++---- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 18 ++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 42 ++++++++++++++++++---- 4 files changed, 76 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h index d1c4fb8..6c8e3ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h @@ -433,25 +433,42 @@ struct iwl_mvm_rm_sta_cmd { } __packed; /* REMOVE_STA_CMD_API_S_VER_2 */ /** + * struct iwl_mvm_mgmt_mcast_key_cmd_v1 + * ( MGMT_MCAST_KEY = 0x1f ) + * @ctrl_flags: %iwl_sta_key_flag + * @igtk: + * @k1: unused + * @k2: unused + * @sta_id: station ID that support IGTK + * @key_id: + * @receive_seq_cnt: initial RSC/PN needed for replay check + */ +struct iwl_mvm_mgmt_mcast_key_cmd_v1 { + __le32 ctrl_flags; + u8 igtk[16]; + u8 k1[16]; + u8 k2[16]; + __le32 key_id; + __le32 sta_id; + __le64 receive_seq_cnt; +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ + +/** * struct iwl_mvm_mgmt_mcast_key_cmd * ( MGMT_MCAST_KEY = 0x1f ) * @ctrl_flags: %iwl_sta_key_flag - * @IGTK: - * @K1: unused - * @K2: unused + * @igtk: IGTK master key * @sta_id: station ID that support IGTK * @key_id: * @receive_seq_cnt: initial RSC/PN needed for replay check */ struct iwl_mvm_mgmt_mcast_key_cmd { __le32 ctrl_flags; - u8 IGTK[16]; - u8 K1[16]; - u8 K2[16]; + u8 igtk[32]; __le32 key_id; __le32 sta_id; __le64 receive_seq_cnt; -} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_2 */ struct iwl_mvm_wep_key { u8 key_index; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6d60645..f5290c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -465,7 +465,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES; hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; - BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 4); + BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 6); memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers)); hw->wiphy->n_cipher_suites = ARRAY_SIZE(mvm_ciphers); hw->wiphy->cipher_suites = mvm->ciphers; @@ -490,6 +490,14 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; hw->wiphy->n_cipher_suites++; + if (iwl_mvm_has_new_rx_api(mvm)) { + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_128; + hw->wiphy->n_cipher_suites++; + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_256; + hw->wiphy->n_cipher_suites++; + } } /* currently FW API supports only one optional cipher scheme */ @@ -2746,6 +2754,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: WARN_ON_ONCE(!ieee80211_hw_check(hw, MFP_CAPABLE)); break; case WLAN_CIPHER_SUITE_WEP40: @@ -2779,9 +2789,11 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, * GTK on AP interface is a TX-only key, return 0; * on IBSS they're per-station and because we're lazy * we don't support them for RX, so do the same. - * CMAC in AP/IBSS modes must be done in software. + * CMAC/GMAC in AP/IBSS modes must be done in software. */ - if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) ret = -EOPNOTSUPP; else ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b4fc86d..0b0855a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -707,7 +707,7 @@ enum iwl_mvm_queue_status { }; #define IWL_MVM_DQA_QUEUE_TIMEOUT (5 * HZ) -#define IWL_MVM_NUM_CIPHERS 8 +#define IWL_MVM_NUM_CIPHERS 10 struct iwl_mvm { /* for logger access */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 3130b9c..5960eb4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2412,9 +2412,15 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, struct iwl_mvm_mgmt_mcast_key_cmd igtk_cmd = {}; /* verify the key details match the required command's expectations */ - if (WARN_ON((keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC) || - (keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || - (keyconf->keyidx != 4 && keyconf->keyidx != 5))) + if (WARN_ON((keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || + (keyconf->keyidx != 4 && keyconf->keyidx != 5) || + (keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_128 && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_256))) + return -EINVAL; + + if (WARN_ON(!iwl_mvm_has_new_rx_api(mvm) && + keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC)) return -EINVAL; igtk_cmd.key_id = cpu_to_le32(keyconf->keyidx); @@ -2430,11 +2436,18 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, case WLAN_CIPHER_SUITE_AES_CMAC: igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_GCMP); + break; default: return -EINVAL; } - memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen); + memcpy(igtk_cmd.igtk, keyconf->key, keyconf->keylen); + if (keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) + igtk_cmd.ctrl_flags |= + cpu_to_le32(STA_KEY_FLG_KEY_32BYTES); ieee80211_get_key_rx_seq(keyconf, 0, &seq); pn = seq.aes_cmac.pn; igtk_cmd.receive_seq_cnt = cpu_to_le64(((u64) pn[5] << 0) | @@ -2449,6 +2462,19 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, remove_key ? "removing" : "installing", igtk_cmd.sta_id); + if (!iwl_mvm_has_new_rx_api(mvm)) { + struct iwl_mvm_mgmt_mcast_key_cmd_v1 igtk_cmd_v1 = { + .ctrl_flags = igtk_cmd.ctrl_flags, + .key_id = igtk_cmd.key_id, + .sta_id = igtk_cmd.sta_id, + .receive_seq_cnt = igtk_cmd.receive_seq_cnt + }; + + memcpy(igtk_cmd_v1.igtk, igtk_cmd.igtk, + ARRAY_SIZE(igtk_cmd_v1.igtk)); + return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, + sizeof(igtk_cmd_v1), &igtk_cmd_v1); + } return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, sizeof(igtk_cmd), &igtk_cmd); } @@ -2573,7 +2599,9 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, } sta_id = mvm_sta->sta_id; - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) { ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, false); goto end; } @@ -2659,7 +2687,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { -- cgit v1.1 From d975d72016bb2540eff3018c3c0dd96688711748 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Jul 2016 11:52:07 +0300 Subject: iwlwifi: mvm: support new paging command format For a000 devices there is a support of 64 bit DMA addressing. The paging command was changed accordingly - support it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h | 8 +++++-- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 28 ++++++++++++++++++------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 71076f0..57b574b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -482,13 +482,17 @@ struct iwl_nvm_access_cmd { * @block_size: the block size in powers of 2 * @block_num: number of blocks specified in the command. * @device_phy_addr: virtual addresses from device side + * 32 bit address for API version 1, 64 bit address for API version 2. */ struct iwl_fw_paging_cmd { __le32 flags; __le32 block_size; __le32 block_num; - __le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS]; -} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */ + union { + __le32 addr32[NUM_OF_FW_PAGING_BLOCKS]; + __le64 addr64[NUM_OF_FW_PAGING_BLOCKS]; + } device_phy_addr; +} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_2 */ /* * Fw items ID's diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7e0cdbf..47e8e70 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -385,9 +385,7 @@ static int iwl_save_fw_paging(struct iwl_mvm *mvm, /* send paging cmd to FW in case CPU2 has paging image */ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) { - int blk_idx; - __le32 dev_phy_addr; - struct iwl_fw_paging_cmd fw_paging_cmd = { + struct iwl_fw_paging_cmd paging_cmd = { .flags = cpu_to_le32(PAGING_CMD_IS_SECURED | PAGING_CMD_IS_ENABLED | @@ -396,18 +394,32 @@ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) .block_size = cpu_to_le32(BLOCK_2_EXP_SIZE), .block_num = cpu_to_le32(mvm->num_of_paging_blk), }; + int blk_idx, size = sizeof(paging_cmd); + + /* A bit hard coded - but this is the old API and will be deprecated */ + if (!iwl_mvm_has_new_tx_api(mvm)) + size -= NUM_OF_FW_PAGING_BLOCKS * 4; /* loop for for all paging blocks + CSS block */ for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) { - dev_phy_addr = - cpu_to_le32(mvm->fw_paging_db[blk_idx].fw_paging_phys >> - PAGE_2_EXP_SIZE); - fw_paging_cmd.device_phy_addr[blk_idx] = dev_phy_addr; + dma_addr_t addr = mvm->fw_paging_db[blk_idx].fw_paging_phys; + + addr = addr >> PAGE_2_EXP_SIZE; + + if (iwl_mvm_has_new_tx_api(mvm)) { + __le64 phy_addr = cpu_to_le64(addr); + + paging_cmd.device_phy_addr.addr64[blk_idx] = phy_addr; + } else { + __le32 phy_addr = cpu_to_le32(addr); + + paging_cmd.device_phy_addr.addr32[blk_idx] = phy_addr; + } } return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(FW_PAGING_BLOCK_CMD, IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(fw_paging_cmd), &fw_paging_cmd); + 0, size, &paging_cmd); } /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 0b0855a..28ebc12 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1192,6 +1192,12 @@ static inline bool iwl_mvm_has_new_rx_api(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT); } +static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm) +{ + /* TODO - replace with TLV once defined */ + return mvm->trans->cfg->use_tfh; +} + static inline bool iwl_mvm_is_tt_in_fw(struct iwl_mvm *mvm) { #ifdef CONFIG_THERMAL -- cgit v1.1 From ea78d80866ce375defb2fdd1c8a3aafec95e0f85 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Aug 2016 13:51:39 +1000 Subject: xfs: track log done items directly in the deferred pending work item Christoph reports slab corruption when a deferred refcount update aborts during _defer_finish(). The cause of this was broken log item state tracking in xfs_defer_pending -- upon an abort, _defer_trans_abort() will call abort_intent on all intent items, including the ones that have already had a done item attached. This is incorrect because each intent item has 2 refcount: the first is released when the intent item is committed to the log; and the second is released when the _done_ item is committed to the log, or by the intent creator if there is no done item. In other words, once we log the done item, responsibility for releasing the intent item's second refcount is transferred to the done item and /must not/ be performed by anything else. The dfp_committed flag should have been tracking whether or not we had a done item so that _defer_trans_abort could decide if it needs to abort the intent item, but due to a thinko this was not the case. Rip it out and track the done item directly so that we do the right thing w.r.t. intent item freeing. Signed-off-by: Darrick J. Wong Reported-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_defer.c | 17 ++++------------- fs/xfs/libxfs/xfs_defer.h | 2 +- fs/xfs/xfs_trace.h | 2 +- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 054a203..c221d0e 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -194,7 +194,7 @@ xfs_defer_trans_abort( /* Abort intent items. */ list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { trace_xfs_defer_pending_abort(tp->t_mountp, dfp); - if (dfp->dfp_committed) + if (!dfp->dfp_done) dfp->dfp_type->abort_intent(dfp->dfp_intent); } @@ -290,7 +290,6 @@ xfs_defer_finish( struct xfs_defer_pending *dfp; struct list_head *li; struct list_head *n; - void *done_item = NULL; void *state; int error = 0; void (*cleanup_fn)(struct xfs_trans *, void *, int); @@ -309,19 +308,11 @@ xfs_defer_finish( if (error) goto out; - /* Mark all pending intents as committed. */ - list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) { - if (dfp->dfp_committed) - break; - trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp); - dfp->dfp_committed = true; - } - /* Log an intent-done item for the first pending item. */ dfp = list_first_entry(&dop->dop_pending, struct xfs_defer_pending, dfp_list); trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); - done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, + dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, dfp->dfp_count); cleanup_fn = dfp->dfp_type->finish_cleanup; @@ -331,7 +322,7 @@ xfs_defer_finish( list_del(li); dfp->dfp_count--; error = dfp->dfp_type->finish_item(*tp, dop, li, - done_item, &state); + dfp->dfp_done, &state); if (error) { /* * Clean up after ourselves and jump out. @@ -428,8 +419,8 @@ xfs_defer_add( dfp = kmem_alloc(sizeof(struct xfs_defer_pending), KM_SLEEP | KM_NOFS); dfp->dfp_type = defer_op_types[type]; - dfp->dfp_committed = false; dfp->dfp_intent = NULL; + dfp->dfp_done = NULL; dfp->dfp_count = 0; INIT_LIST_HEAD(&dfp->dfp_work); list_add_tail(&dfp->dfp_list, &dop->dop_intake); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index cc3981c..e96533d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -30,8 +30,8 @@ struct xfs_defer_op_type; struct xfs_defer_pending { const struct xfs_defer_op_type *dfp_type; /* function pointers */ struct list_head dfp_list; /* pending items */ - bool dfp_committed; /* committed trans? */ void *dfp_intent; /* log intent item */ + void *dfp_done; /* log done item */ struct list_head dfp_work; /* work items */ unsigned int dfp_count; /* # extent items */ }; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7e88bec..d303a66 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2295,7 +2295,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __entry->dev = mp ? mp->m_super->s_dev : 0; __entry->type = dfp->dfp_type->type; __entry->intent = dfp->dfp_intent; - __entry->committed = dfp->dfp_committed; + __entry->committed = dfp->dfp_done != NULL; __entry->nr = dfp->dfp_count; ), TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", -- cgit v1.1 From 311042d1b67d9a1856a8e1294e7729fb86f64014 Mon Sep 17 00:00:00 2001 From: Shrirang Bagul Date: Mon, 29 Aug 2016 15:19:27 +0800 Subject: ALSA: hda - Add headset mic quirk for Dell Inspiron 5468 This patch enables headset microphone on some variants of Dell Inspiron 5468. (Dell SSID 0x07ad) BugLink: https://bugs.launchpad.net/bugs/1617900 Signed-off-by: Shrirang Bagul Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7100f05..02fd252 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5895,6 +5895,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60170}, {0x14, 0x90170120}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell Inspiron 5468", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60180}, + {0x14, 0x90170120}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, -- cgit v1.1 From 554d072e7bc3e56de5893c8181110a547b2062c9 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Mon, 29 Aug 2016 12:37:35 +0300 Subject: mac80211: TDLS: don't require beaconing for AP BW Stop downgrading TDLS chandef when reaching the AP BW. The AP provides the necessary regulatory protection in this case. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=153961, which reported an infinite loop here. Reported-by: Kamil Toman Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- net/mac80211/tdls.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index b5d28f1..afca7d1 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -333,10 +333,11 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, if (!uc.center_freq1) return; - /* proceed to downgrade the chandef until usable or the same */ + /* proceed to downgrade the chandef until usable or the same as AP BW */ while (uc.width > max_width || - !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, - sdata->wdev.iftype)) + (uc.width > sta->tdls_chandef.width && + !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, + sdata->wdev.iftype))) ieee80211_chandef_downgrade(&uc); if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) { -- cgit v1.1 From 61aaa0e8c1c15d9e045f0577f046be50f2f571ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 19 Aug 2016 22:02:48 +0200 Subject: cfg80211: Add stub for cfg80211_get_station() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows modules using this function (currently: batman-adv) to compile even if cfg80211 is not built at all, thus relaxing dependencies. Signed-off-by: Linus Lüssing Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9c23f4d3..beb7610 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1102,6 +1102,7 @@ struct station_info { struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1]; }; +#if IS_ENABLED(CONFIG_CFG80211) /** * cfg80211_get_station - retrieve information about a given station * @dev: the device where the station is supposed to be connected to @@ -1114,6 +1115,14 @@ struct station_info { */ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo); +#else +static inline int cfg80211_get_station(struct net_device *dev, + const u8 *mac_addr, + struct station_info *sinfo) +{ + return -ENOENT; +} +#endif /** * enum monitor_flags - monitor flags -- cgit v1.1 From a474478642d57641ea06645104a15acc0420f01a Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 29 Aug 2016 17:51:24 +0200 Subject: drm/imx: fix crtc vblank state regression The atomic conversion lost the notification to let the DRM core know about the current state of the CRTC vblank interrupts. This regressed the ability of the core to reject page flip attempts on currently disabled CRTCs. Add back the notifications. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 08e188b..462056e 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -76,6 +76,8 @@ static void ipu_crtc_disable(struct drm_crtc *crtc) crtc->state->event = NULL; } spin_unlock_irq(&crtc->dev->event_lock); + + drm_crtc_vblank_off(crtc); } static void imx_drm_crtc_reset(struct drm_crtc *crtc) @@ -175,6 +177,8 @@ static int ipu_crtc_atomic_check(struct drm_crtc *crtc, static void ipu_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + drm_crtc_vblank_on(crtc); + spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { WARN_ON(drm_crtc_vblank_get(crtc)); -- cgit v1.1 From fd06c77eb9200b53d421da5fffe0dcd894b5d72a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 30 Aug 2016 15:36:34 +0800 Subject: ALSA: hda - Enable subwoofer on Dell Inspiron 7559 The subwoofer on Inspiron 7559 was disabled originally. Applying a pin fixup to node 0x1b can enable it and make it work. Old pin: 0x411111f0 New pin: 0x90170151 Signed-off-by: Kai-Heng Feng Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 02fd252..575cefd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4855,6 +4855,7 @@ enum { ALC221_FIXUP_HP_FRONT_MIC, ALC292_FIXUP_TPT460, ALC298_FIXUP_SPK_VOLUME, + ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, }; static const struct hda_fixup alc269_fixups[] = { @@ -5516,6 +5517,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, }, + [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x90170151 }, + { } + }, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5560,6 +5570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), -- cgit v1.1 From b030485220caf862c71db6fb8b8ad016ce7f7565 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 21 Aug 2016 03:27:45 -0400 Subject: ARM: EXYNOS: Clear OF_POPULATED flag from PMU node in IRQ init callback The Exynos PMU node is an interrupt, clock and PMU (Power Management Unit) controller, and these functionalities are supported by different drivers that matches the same compatible strings. Since commit 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") the OF core flags interrupt controllers registered with the IRQCHIP_DECLARE() macro as OF_POPULATED, so platform devices with the same compatible string as the interrupt controller will not be registered. This prevents the PMU platform device to be registered so the Exynos PMU driver is never probed. This breaks (among other things) Suspend-to-RAM. Fix this by clearing the OF_POPULATED flag in the PMU IRQ init callback, to allow the Exynos PMU platform driver to be probed. The patch is based on Philipp Zabel's "ARM: imx6: mark GPC node as not populated after irq init to probe pm domain driver". Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski --- arch/arm/mach-exynos/suspend.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 3750575..06332f6 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -255,6 +255,12 @@ static int __init exynos_pmu_irq_init(struct device_node *node, return -ENOMEM; } + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the Exynos PMU platform device won't be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } -- cgit v1.1 From c73c2484901139c28383b58eabcbf4d613e91518 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 28 Aug 2016 16:59:52 +0800 Subject: netfilter: nf_tables_netdev: remove redundant ip_hdr assignment We have already use skb_header_pointer to get the ip header pointer, so there's no need to use ip_hdr again. Moreover, in NETDEV INGRESS hook, ip header maybe not linear, so use ip_hdr is not appropriate, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 5eefe4a..75d696f 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -30,7 +30,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, if (!iph) return; - iph = ip_hdr(skb); if (iph->ihl < 5 || iph->version != 4) return; -- cgit v1.1 From 9f9af3d7d303a5f622ceb219bd03bba3af553e76 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 23 Dec 2015 16:03:46 +0200 Subject: iwlwifi: mvm: re-aggregate shared queue after unsharing When a shared queue becomes unshared, aggregations should be re-enabled if they've existed before. Make sure that they do this, if required. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 17 +++ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 166 +++++++++++++++++++++---- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 4 + drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 38 +++++- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 8 +- 5 files changed, 205 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 28ebc12..ee5a9ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -697,6 +697,10 @@ struct iwl_mvm_baid_data { * it. In this state, when a new queue is needed to be allocated but no * such free queue exists, an inactive queue might be freed and given to * the new RA/TID. + * @IWL_MVM_QUEUE_RECONFIGURING: queue is being reconfigured + * This is the state of a queue that has had traffic pass through it, but + * needs to be reconfigured for some reason, e.g. the queue needs to + * become unshared and aggregations re-enabled on. */ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_FREE, @@ -704,6 +708,7 @@ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_READY, IWL_MVM_QUEUE_SHARED, IWL_MVM_QUEUE_INACTIVE, + IWL_MVM_QUEUE_RECONFIGURING, }; #define IWL_MVM_DQA_QUEUE_TIMEOUT (5 * HZ) @@ -1122,6 +1127,18 @@ static inline bool iwl_mvm_enter_d0i3_on_suspend(struct iwl_mvm *mvm) (mvm->trans->runtime_pm_mode != IWL_PLAT_PM_MODE_D0I3); } +static inline bool iwl_mvm_is_dqa_data_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_DATA_QUEUE); +} + +static inline bool iwl_mvm_is_dqa_mgmt_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE); +} + static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) { bool nvm_lar = mvm->nvm_data->lar_enabled; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 5960eb4..1f235e8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -468,6 +468,11 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, i != IWL_MVM_DQA_BSS_CLIENT_QUEUE) continue; + /* Don't try and take queues being reconfigured */ + if (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING) + continue; + ac_to_queue[mvm->queue_info[i].mac80211_ac] = i; } @@ -501,27 +506,33 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, queue = ac_to_queue[IEEE80211_AC_VO]; /* Make sure queue found (or not) is legal */ - if (!((queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE && - queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE) || - (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE && - queue <= IWL_MVM_DQA_MAX_DATA_QUEUE) || - (queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE))) { + if (!iwl_mvm_is_dqa_data_queue(mvm, queue) && + !iwl_mvm_is_dqa_mgmt_queue(mvm, queue) && + (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)) { IWL_ERR(mvm, "No DATA queues available to share\n"); - queue = -ENOSPC; + return -ENOSPC; + } + + /* Make sure the queue isn't in the middle of being reconfigured */ + if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING) { + IWL_ERR(mvm, + "TXQ %d is in the middle of re-config - try again\n", + queue); + return -EBUSY; } return queue; } /* - * If a given queue has a higher AC than the TID stream that is being added to - * it, the queue needs to be redirected to the lower AC. This function does that + * If a given queue has a higher AC than the TID stream that is being compared + * to, the queue needs to be redirected to the lower AC. This function does that * in such a case, otherwise - if no redirection required - it does nothing, * unless the %force param is true. */ -static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, - int ac, int ssn, unsigned int wdg_timeout, - bool force) +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, @@ -555,7 +566,7 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1); spin_unlock_bh(&mvm->queue_info_lock); - IWL_DEBUG_TX_QUEUES(mvm, "Redirecting shared TXQ #%d to FIFO #%d\n", + IWL_DEBUG_TX_QUEUES(mvm, "Redirecting TXQ #%d to FIFO #%d\n", queue, iwl_mvm_ac_to_tx_fifo[ac]); /* Stop MAC queues and wait for this queue to empty */ @@ -709,7 +720,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (WARN_ON(queue <= 0)) { IWL_ERR(mvm, "No available queues for tid %d on sta_id %d\n", tid, cfg.sta_id); - return -ENOSPC; + return queue; } /* @@ -827,6 +838,84 @@ out_err: return ret; } +static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) +{ + struct ieee80211_sta *sta; + struct iwl_mvm_sta *mvmsta; + s8 sta_id; + int tid = -1; + unsigned long tid_bitmap; + unsigned int wdg_timeout; + int ssn; + int ret = true; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + /* Find TID for queue, and make sure it is the only one on the queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + if (tid_bitmap != BIT(tid)) { + IWL_ERR(mvm, "Failed to unshare q %d, active tids=0x%lx\n", + queue, tid_bitmap); + return; + } + + IWL_DEBUG_TX_QUEUES(mvm, "Unsharing TXQ %d, keeping tid %d\n", queue, + tid); + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) + return; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); + + ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number); + + ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, + tid_to_mac80211_ac[tid], ssn, + wdg_timeout, true); + if (ret) { + IWL_ERR(mvm, "Failed to redirect TXQ %d\n", queue); + return; + } + + /* If aggs should be turned back on - do it */ + if (mvmsta->tid_data[tid].state == IWL_AGG_ON) { + struct iwl_mvm_add_sta_cmd cmd; + + mvmsta->tid_disable_agg &= ~BIT(tid); + + cmd.mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color); + cmd.sta_id = mvmsta->sta_id; + cmd.add_modify = STA_MODE_MODIFY; + cmd.modify_mask = STA_MODIFY_TID_DISABLE_TX; + cmd.tfd_queue_msk = cpu_to_le32(mvmsta->tfd_queue_msk); + cmd.tid_disable_tx = cpu_to_le16(mvmsta->tid_disable_agg); + + ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, + iwl_mvm_add_sta_cmd_size(mvm), &cmd); + if (!ret) { + IWL_DEBUG_TX_QUEUES(mvm, + "TXQ #%d is now aggregated again\n", + queue); + + /* Mark queue intenally as aggregating again */ + iwl_trans_txq_set_shared_mode(mvm->trans, queue, false); + } + } + + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].status = IWL_MVM_QUEUE_READY; + spin_unlock_bh(&mvm->queue_info_lock); +} + static inline u8 iwl_mvm_tid_to_ac_queue(int tid) { if (tid == IWL_MAX_TID_COUNT) @@ -894,13 +983,26 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; unsigned long deferred_tid_traffic; - int sta_id, tid; + int queue, sta_id, tid; /* Check inactivity of queues */ iwl_mvm_inactivity_check(mvm); mutex_lock(&mvm->mutex); + /* Reconfigure queues requiring reconfiguation */ + for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) { + bool reconfig; + + spin_lock_bh(&mvm->queue_info_lock); + reconfig = (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING); + spin_unlock_bh(&mvm->queue_info_lock); + + if (reconfig) + iwl_mvm_unshare_queue(mvm, queue); + } + /* Go over all stations with deferred traffic */ for_each_set_bit(sta_id, mvm->sta_deferred_frames, IWL_MVM_STATION_COUNT) { @@ -1956,7 +2058,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return -EIO; } - spin_lock_bh(&mvm->queue_info_lock); + spin_lock(&mvm->queue_info_lock); /* * Note the possible cases: @@ -1967,14 +2069,20 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, * non-DQA mode, since the TXQ hasn't yet been allocated */ txq_id = mvmsta->tid_data[tid].txq_id; - if (!iwl_mvm_is_dqa_supported(mvm) || + if (iwl_mvm_is_dqa_supported(mvm) && + unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { + ret = -ENXIO; + IWL_DEBUG_TX_QUEUES(mvm, + "Can't start tid %d agg on shared queue!\n", + tid); + goto release_locks; + } else if (!iwl_mvm_is_dqa_supported(mvm) || mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) { txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, mvm->first_agg_queue, mvm->last_agg_queue); if (txq_id < 0) { ret = txq_id; - spin_unlock_bh(&mvm->queue_info_lock); IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto release_locks; } @@ -1982,7 +2090,8 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; } - spin_unlock_bh(&mvm->queue_info_lock); + + spin_unlock(&mvm->queue_info_lock); IWL_DEBUG_TX_QUEUES(mvm, "AGG for tid %d will be on queue #%d\n", @@ -2006,8 +2115,11 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } ret = 0; + goto out; release_locks: + spin_unlock(&mvm->queue_info_lock); +out: spin_unlock_bh(&mvmsta->lock); return ret; @@ -2023,6 +2135,7 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_wd_timeout(mvm, vif, sta->tdls, false); int queue, ret; bool alloc_queue = true; + enum iwl_mvm_queue_status queue_status; u16 ssn; struct iwl_trans_txq_scd_cfg cfg = { @@ -2048,13 +2161,15 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, cfg.fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + spin_lock_bh(&mvm->queue_info_lock); + queue_status = mvm->queue_info[queue].status; + spin_unlock_bh(&mvm->queue_info_lock); + /* In DQA mode, the existing queue might need to be reconfigured */ if (iwl_mvm_is_dqa_supported(mvm)) { - spin_lock_bh(&mvm->queue_info_lock); /* Maybe there is no need to even alloc a queue... */ if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY) alloc_queue = false; - spin_unlock_bh(&mvm->queue_info_lock); /* * Only reconfig the SCD for the queue if the window size has @@ -2089,9 +2204,12 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, vif->hw_queue[tid_to_mac80211_ac[tid]], ssn, &cfg, wdg_timeout); - ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); - if (ret) - return -EIO; + /* Send ADD_STA command to enable aggs only if the queue isn't shared */ + if (queue_status != IWL_MVM_QUEUE_SHARED) { + ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); + if (ret) + return -EIO; + } /* No need to mark as reserved */ spin_lock_bh(&mvm->queue_info_lock); @@ -2123,7 +2241,6 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u16 txq_id; int err; - /* * If mac80211 is cleaning its state, then say that we finished since * our state has been cleared anyway. @@ -2152,6 +2269,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED) mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); switch (tid_data->state) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index bbc1cab..709542b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -554,4 +554,8 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force); + #endif /* __sta_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab..8b91544 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -838,6 +838,22 @@ static void iwl_mvm_tx_add_stream(struct iwl_mvm *mvm, } } +/* Check if there are any timed-out TIDs on a given shared TXQ */ +static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) +{ + unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap; + unsigned long now = jiffies; + int tid; + + for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) { + if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] + + IWL_MVM_DQA_QUEUE_TIMEOUT, now)) + return true; + } + + return false; +} + /* * Sets the fields in the Tx cmd that are crypto related */ @@ -940,7 +956,6 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); spin_unlock(&mvmsta->lock); return 0; - } /* If we are here - TXQ exists and needs to be re-activated */ @@ -953,8 +968,25 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, txq_id); } - /* Keep track of the time of the last frame for this RA/TID */ - mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + if (iwl_mvm_is_dqa_supported(mvm)) { + /* Keep track of the time of the last frame for this RA/TID */ + mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + + /* + * If we have timed-out TIDs - schedule the worker that will + * reconfig the queues and update them + * + * Note that the mvm->queue_info_lock isn't being taken here in + * order to not serialize the TX flow. This isn't dangerous + * because scheduling mvm->add_stream_wk can't ruin the state, + * and if we DON'T schedule it due to some race condition then + * next TX we get here we will. + */ + if (unlikely(mvm->queue_info[txq_id].status == + IWL_MVM_QUEUE_SHARED && + iwl_mvm_txq_should_update(mvm, txq_id))) + schedule_work(&mvm->add_stream_wk); + } IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id, tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number)); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 68f4e7f..dae64a6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1131,7 +1131,13 @@ static void iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm, BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]); } - /* TODO: if queue was shared - need to re-enable AGGs */ + /* If the queue is marked as shared - "unshare" it */ + if (mvm->queue_info[queue].hw_queue_refcount == 1 && + mvm->queue_info[queue].status == IWL_MVM_QUEUE_SHARED) { + mvm->queue_info[queue].status = IWL_MVM_QUEUE_RECONFIGURING; + IWL_DEBUG_TX_QUEUES(mvm, "Marking Q:%d for reconfig\n", + queue); + } } void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) -- cgit v1.1 From edbe961cf44eed1d3b78f3b1eee0dad013ad927f Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 2 Feb 2016 15:43:32 +0200 Subject: iwlwifi: mvm: keep track of tid associated with each queue When sending the SCD_QUEUE_CONFIG command, the queue is associated to a specific TID. If later there is a need to use this TID on a different queue instead, it first needs to be unassociated from the first queue. Keep track for every queue what TID is associated with it. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 +++++++ drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 3 +++ 3 files changed, 11 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ee5a9ad..1806495 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -772,6 +772,7 @@ struct iwl_mvm { u8 ra_sta_id; /* The RA this queue is mapped to, if exists */ bool reserved; /* Is this the TXQ reserved for a STA */ u8 mac80211_ac; /* The mac80211 AC this queue is mapped to */ + u8 txq_tid; /* The TID "owner" of this queue*/ u16 tid_bitmap; /* Bitmap of the TIDs mapped to this queue */ /* Timestamp for inactivation per TID of this queue */ unsigned long last_frame_time[IWL_MAX_TID_COUNT + 1]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 1f235e8..730ba78 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -562,6 +562,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; mq = mvm->queue_info[queue].hw_queue_to_mac80211; shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1); spin_unlock_bh(&mvm->queue_info_lock); @@ -591,6 +592,11 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, ssn, wdg_timeout); + /* Update the TID "owner" of the queue */ + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].txq_tid = tid; + spin_unlock_bh(&mvm->queue_info_lock); + /* TODO: Work-around SCD bug when moving back by multiples of 0x40 */ /* Redirect to lower AC */ @@ -749,6 +755,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, ac = mvm->queue_info[queue].mac80211_ac; cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; spin_unlock_bh(&mvm->queue_info_lock); /* Disable the queue */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index dae64a6..423efab 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -669,6 +669,8 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, tid_to_mac80211_ac[cfg->tid]; else mvm->queue_info[queue].mac80211_ac = IEEE80211_AC_VO; + + mvm->queue_info[queue].txq_tid = cfg->tid; } IWL_DEBUG_TX_QUEUES(mvm, @@ -761,6 +763,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, } cmd.sta_id = mvm->queue_info[queue].ra_sta_id; + cmd.tid = mvm->queue_info[queue].txq_tid; /* Make sure queue info is correct even though we overwrite it */ WARN(mvm->queue_info[queue].hw_queue_refcount || -- cgit v1.1 From 8d98ae6eb0d51f75a7af51758072558ffbb8270f Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 2 Feb 2016 16:02:46 +0200 Subject: iwlwifi: mvm: re-assign old queues after hw restart in dqa mode When working in DQA mode, if a queue is shared and a HW restart occurs, there might be a possible race condition between stations on the queues, and an existing queue might be left with no queues. To solve this, make sure in DQA mode to re-assign the same queues as before the HW restart. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 72 +++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 730ba78..1ddcbea 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -759,8 +759,9 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, spin_unlock_bh(&mvm->queue_info_lock); /* Disable the queue */ - iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids, - true); + if (disable_agg_tids) + iwl_mvm_invalidate_sta_queue(mvm, queue, + disable_agg_tids, false); iwl_trans_txq_disable(mvm->trans, queue, false); ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); @@ -776,6 +777,10 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return ret; } + + /* If TXQ is allocated to another STA, update removal in FW */ + if (cmd.sta_id != mvmsta->sta_id) + iwl_mvm_invalidate_sta_queue(mvm, queue, 0, true); } IWL_DEBUG_TX_QUEUES(mvm, @@ -1072,6 +1077,61 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm, return 0; } +/* + * In DQA mode, after a HW restart the queues should be allocated as before, in + * order to avoid race conditions when there are shared queues. This function + * does the re-mapping and queue allocation. + * + * Note that re-enabling aggregations isn't done in this function. + */ +static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, + struct iwl_mvm_sta *mvm_sta) +{ + unsigned int wdg_timeout = + iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false); + int i; + struct iwl_trans_txq_scd_cfg cfg = { + .sta_id = mvm_sta->sta_id, + .frame_limit = IWL_FRAME_LIMIT, + }; + + /* Make sure reserved queue is still marked as such (or allocated) */ + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; + + for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { + struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; + int txq_id = tid_data->txq_id; + int ac; + u8 mac_queue; + + if (txq_id == IEEE80211_INVAL_HW_QUEUE) + continue; + + skb_queue_head_init(&tid_data->deferred_tx_frames); + + ac = tid_to_mac80211_ac[i]; + mac_queue = mvm_sta->vif->hw_queue[ac]; + + cfg.tid = i; + cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE || + txq_id == IWL_MVM_DQA_BSS_CLIENT_QUEUE); + + IWL_DEBUG_TX_QUEUES(mvm, + "Re-mapping sta %d tid %d to queue %d\n", + mvm_sta->sta_id, i, txq_id); + + iwl_mvm_enable_txq(mvm, txq_id, mac_queue, + IEEE80211_SEQ_TO_SN(tid_data->seq_number), + &cfg, wdg_timeout); + + mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY; + } + + atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0); +} + int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta) @@ -1094,6 +1154,13 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, spin_lock_init(&mvm_sta->lock); + /* In DQA mode, if this is a HW restart, re-alloc existing queues */ + if (iwl_mvm_is_dqa_supported(mvm) && + test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { + iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta); + goto update_fw; + } + mvm_sta->sta_id = sta_id; mvm_sta->mac_id_n_color = FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color); @@ -1157,6 +1224,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, goto err; } +update_fw: ret = iwl_mvm_sta_send_to_fw(mvm, sta, false, 0); if (ret) goto err; -- cgit v1.1 From f7c692deef19ea953e3f792cc1c148dfcc74ba17 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 8 Mar 2016 10:41:32 +0200 Subject: iwlwifi: mvm: use defines for SCD_CONFIG_CMD enablement Due to the addition of another option in the SCD_CONFIG_CMD's %enable field, change the assignment of this field to use defines rather than hard-code the value itself. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h | 12 ++++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 13 +++++++------ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 4144623..6b4c63a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -675,13 +675,21 @@ static inline u32 iwl_mvm_get_scd_ssn(struct iwl_mvm_tx_resp *tx_resp) tx_resp->frame_count) & 0xfff; } +/* Available options for the SCD_QUEUE_CFG HCMD */ +enum iwl_scd_cfg_actions { + SCD_CFG_DISABLE_QUEUE = 0x0, + SCD_CFG_ENABLE_QUEUE = 0x1, + SCD_CFG_UPDATE_QUEUE_TID = 0x2, +}; + /** * struct iwl_scd_txq_cfg_cmd - New txq hw scheduler config command * @token: * @sta_id: station id * @tid: * @scd_queue: scheduler queue to confiug - * @enable: 1 queue enable, 0 queue disable + * @action: 1 queue enable, 0 queue disable, 2 change txq's tid owner + * Value is one of %iwl_scd_cfg_actions options * @aggregate: 1 aggregated queue, 0 otherwise * @tx_fifo: %enum iwl_mvm_tx_fifo * @window: BA window size @@ -692,7 +700,7 @@ struct iwl_scd_txq_cfg_cmd { u8 sta_id; u8 tid; u8 scd_queue; - u8 enable; + u8 action; u8 aggregate; u8 tx_fifo; u8 window; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 1ddcbea..e87473a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -536,7 +536,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool shared_queue; unsigned long mq; @@ -745,7 +745,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (using_inactive_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; u8 ac; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 423efab..7c138fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -610,7 +610,7 @@ int iwl_mvm_reconfig_scd(struct iwl_mvm *mvm, int queue, int fifo, int sta_id, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = frame_limit, .sta_id = sta_id, .ssn = cpu_to_le16(ssn), @@ -684,7 +684,7 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, if (enable_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = cfg->frame_limit, .sta_id = cfg->sta_id, .ssn = cpu_to_le16(ssn), @@ -711,7 +711,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool remove_mac_queue = true; int ret; @@ -746,8 +746,9 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, ~BIT(mac80211_queue); mvm->queue_info[queue].hw_queue_refcount--; - cmd.enable = mvm->queue_info[queue].hw_queue_refcount ? 1 : 0; - if (!cmd.enable) + cmd.action = mvm->queue_info[queue].hw_queue_refcount ? + SCD_CFG_ENABLE_QUEUE : SCD_CFG_DISABLE_QUEUE; + if (cmd.action == SCD_CFG_DISABLE_QUEUE) mvm->queue_info[queue].status = IWL_MVM_QUEUE_FREE; IWL_DEBUG_TX_QUEUES(mvm, @@ -757,7 +758,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, mvm->queue_info[queue].hw_queue_to_mac80211); /* If the queue is still enabled - nothing left to do in this func */ - if (cmd.enable) { + if (cmd.action == SCD_CFG_ENABLE_QUEUE) { spin_unlock_bh(&mvm->queue_info_lock); return; } -- cgit v1.1 From 19aefa45941d2d1f6220f0b9768cdef907e15086 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 8 Mar 2016 14:29:51 +0200 Subject: iwlwifi: mvm: support txq tid owner change Every active TXQ is assigned to a TID given through the SCD_CONFIG_CMD, and acts as an identifier in the FW. However, there may be cases this ownership needs to be changed. For example, in the following scenario: 1. TID x is owner of a queue 2. Due to a shortage of queues, TID y and z share with x 3. TID x becomes inactive and needs to be removed from the shared queue. In this scenario, if another queue is freed and traffic on x continues, we can't allocate it a new queue as long as it is the owner of the first queue. Support moving ownership of a TXQ to a different TID (same STA) without stopping the queue. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 51 ++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index e87473a..30fc3af 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -850,6 +850,41 @@ out_err: return ret; } +static void iwl_mvm_change_queue_owner(struct iwl_mvm *mvm, int queue) +{ + struct iwl_scd_txq_cfg_cmd cmd = { + .scd_queue = queue, + .action = SCD_CFG_UPDATE_QUEUE_TID, + }; + s8 sta_id; + int tid; + unsigned long tid_bitmap; + int ret; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + if (WARN(!tid_bitmap, "TXQ %d has no tids assigned to it\n", queue)) + return; + + /* Find any TID for queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + cmd.tid = tid; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + + ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); + if (ret) + IWL_ERR(mvm, "Failed to update owner of TXQ %d (ret=%d)\n", + queue, ret); + else + IWL_DEBUG_TX_QUEUES(mvm, "Changed TXQ %d ownership to tid %d\n", + queue, tid); +} + static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) { struct ieee80211_sta *sta; @@ -1005,14 +1040,30 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) /* Reconfigure queues requiring reconfiguation */ for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) { bool reconfig; + bool change_owner; spin_lock_bh(&mvm->queue_info_lock); reconfig = (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING); + + /* + * We need to take into account a situation in which a TXQ was + * allocated to TID x, and then turned shared by adding TIDs y + * and z. If TID x becomes inactive and is removed from the TXQ, + * ownership must be given to one of the remaining TIDs. + * This is mainly because if TID x continues - a new queue can't + * be allocated for it as long as it is an owner of another TXQ. + */ + change_owner = !(mvm->queue_info[queue].tid_bitmap & + BIT(mvm->queue_info[queue].txq_tid)) && + (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_SHARED); spin_unlock_bh(&mvm->queue_info_lock); if (reconfig) iwl_mvm_unshare_queue(mvm, queue); + else if (change_owner) + iwl_mvm_change_queue_owner(mvm, queue); } /* Go over all stations with deferred traffic */ -- cgit v1.1 From 486c96a753177a37e8dff4195df698df35d5c2bc Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 23 Jun 2016 14:51:41 +0300 Subject: iwlwifi: rename and reorder 9000 series configuration structs Rename and reorder the 9000 series configuration structs: - struct containing configuration of 5165 was renamed to 9000. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 63 +++++++++++++------------ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 4 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 34 +++++++------ 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index fbaf705..b8356a8 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -72,15 +72,15 @@ #define IWL9000_SMEM_OFFSET 0x400000 #define IWL9000_SMEM_LEN 0x68000 -#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" +#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-jf-a0-" #define IWL9260_FW_PRE "iwlwifi-9260-th-a0-jf-a0-" -#define IWL9260LC_FW_PRE "iwlwifi-9260-th-a0-lc-a0-" +#define IWL9000LC_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" #define IWL9000_MODULE_FIRMWARE(api) \ IWL9000_FW_PRE "-" __stringify(api) ".ucode" #define IWL9260_MODULE_FIRMWARE(api) \ IWL9260_FW_PRE "-" __stringify(api) ".ucode" -#define IWL9260LC_MODULE_FIRMWARE(api) \ - IWL9260LC_FW_PRE "-" __stringify(api) ".ucode" +#define IWL9000LC_MODULE_FIRMWARE(api) \ + IWL9000LC_FW_PRE "-" __stringify(api) ".ucode" #define NVM_HW_SECTION_NUM_FAMILY_9000 10 @@ -147,40 +147,41 @@ static const struct iwl_tt_params iwl9000_tt_params = { .rf_id = true const struct iwl_cfg iwl9260_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AC 9260", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + +const struct iwl_cfg iwl9000_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9000", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; /* * TODO the struct below is for internal testing only this should be * removed by EO 2016~ */ -const struct iwl_cfg iwl9260lc_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260LC_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, -}; - -const struct iwl_cfg iwl5165_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 5165", - .fw_name_pre = IWL9000_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, - .integrated = true, +const struct iwl_cfg iwl9000lc_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9000", + .fw_name_pre = IWL9000LC_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9260_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9260LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL9000LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 423b233..04260fc 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -449,9 +449,9 @@ extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; +extern const struct iwl_cfg iwl9000_2ac_cfg; +extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; -extern const struct iwl_cfg iwl9260lc_2ac_cfg; -extern const struct iwl_cfg iwl5165_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 78cf9a7..bf523f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,18 +504,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9000_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, @@ -608,7 +608,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data); const struct iwl_cfg *cfg_7265d __maybe_unused = NULL; - const struct iwl_cfg *cfg_9260lc __maybe_unused = NULL; struct iwl_trans *iwl_trans; int ret; @@ -637,11 +636,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (iwl_trans->cfg->rf_id) { - if (cfg == &iwl9260_2ac_cfg) - cfg_9260lc = &iwl9260lc_2ac_cfg; - if (cfg_9260lc && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { - cfg = cfg_9260lc; - iwl_trans->cfg = cfg_9260lc; + if (cfg == &iwl9000_2ac_cfg && + iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { + cfg = &iwl9000lc_2ac_cfg; + iwl_trans->cfg = cfg; } } #endif -- cgit v1.1 From 827e9ab85402dc876d23e27c64b837255460108e Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Sun, 26 Jun 2016 14:15:27 +0300 Subject: iwlwifi: add a new series 9460 with new PCI ID Add a new series to the 9000 series called 9460. In addition, add a new PCI ID that is the 9460 new series. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 27 +++++++++++++------------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index b8356a8..5621cf2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -156,8 +156,8 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; -const struct iwl_cfg iwl9000_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9000", +const struct iwl_cfg iwl9460_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 04260fc..a18b57f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -449,9 +449,9 @@ extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; -extern const struct iwl_cfg iwl9000_2ac_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index bf523f5..d053183 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,18 +504,19 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9000_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9000_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, @@ -636,7 +637,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (iwl_trans->cfg->rf_id) { - if (cfg == &iwl9000_2ac_cfg && + if (cfg == &iwl9460_2ac_cfg && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { cfg = &iwl9000lc_2ac_cfg; iwl_trans->cfg = cfg; -- cgit v1.1 From c62446d2b028eab024e45f9f73e9496089f5fa7a Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 10:31:17 +0300 Subject: iwlwifi: add new 9460 series PCI IDs Add 4 more new 9460 series PCI IDs. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index d053183..3e5a11a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -517,6 +517,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, -- cgit v1.1 From 22ccabf17a2c0e4adf1b6e4ef0d2df79e93cf7b6 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:23:56 +0300 Subject: iwlwifi: add the new 9270 series Add a new config struct for the new 9270 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 10 ++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index 5621cf2..e442650 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -156,6 +156,16 @@ const struct iwl_cfg iwl9260_2ac_cfg = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, }; +const struct iwl_cfg iwl9270_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9270", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9460_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9460", .fw_name_pre = IWL9000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index a18b57f..0ab415c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -451,6 +451,7 @@ extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3e5a11a..1be0ac2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -504,6 +504,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, -- cgit v1.1 From fe4a7249732de1fe18e7ceb41924e329a572cb2d Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 7 Jul 2016 09:40:12 +0300 Subject: iwlwifi: add the new 9170 series Add a new config struct for the new 9170 series and add the first PCI ID for it. Signed-off-by: Oren Givon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-9000.c | 10 ++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index e442650..1fec6af 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -146,6 +146,16 @@ static const struct iwl_tt_params iwl9000_tt_params = { .mac_addr_from_csr = true, \ .rf_id = true +const struct iwl_cfg iwl9160_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9160", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", .fw_name_pre = IWL9260_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 0ab415c..7008319 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -450,6 +450,7 @@ extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; extern const struct iwl_cfg iwl9000lc_2ac_cfg; +extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 1be0ac2..c6e24fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -502,6 +502,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)}, /* 9000 Series */ + {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, -- cgit v1.1 From ae79785f13972b2180d3d460510b7d1981d08cbc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 30 Jun 2016 16:36:24 +0300 Subject: iwlwifi: pcie: refrain from SCD accesses Up till now we accessed SCD configuration only for initial configuration and for enabling command queue. For a000 generation the command queue is open by default and firmware configures the rest. No driver SCD accesses are expected. Make sure this is the case. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 74f2f03..559f118 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1960,6 +1960,10 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq) IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", txq->q.read_ptr, txq->q.write_ptr); + if (trans->cfg->use_tfh) + /* TODO: access new SCD registers and dump them */ + return; + scd_sram_addr = trans_pcie->scd_base_addr + SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 18650dc..9636dc89 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -703,6 +703,9 @@ void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr) memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped)); memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used)); + if (trans->cfg->use_tfh) + return; + trans_pcie->scd_base_addr = iwl_read_prph(trans, SCD_SRAM_BASE_ADDR); @@ -970,11 +973,13 @@ int iwl_pcie_tx_init(struct iwl_trans *trans) } } - if (trans->cfg->use_tfh) + if (trans->cfg->use_tfh) { iwl_write_direct32(trans, TFH_TRANSFER_MODE, TFH_TRANSFER_MAX_PENDING_REQ | TFH_CHUNK_SIZE_128 | TFH_CHUNK_SPLIT_MODE); + return 0; + } iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE); if (trans->cfg->base_params->num_of_queues > 20) @@ -1249,6 +1254,9 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, if (test_and_set_bit(txq_id, trans_pcie->queue_used)) WARN_ONCE(1, "queue %d already used - expect issues", txq_id); + if (cfg && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + txq->wd_timeout = msecs_to_jiffies(wdg_timeout); if (cfg) { @@ -1366,6 +1374,9 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, return; } + if (configure_scd && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + if (configure_scd) { iwl_scd_txq_set_inactive(trans, txq_id); -- cgit v1.1 From d6a2c5c78dcbbbe9dc20ff6e126b83f088cd0501 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Jun 2016 12:08:48 +0300 Subject: iwlwifi: pcie: fix ucode load flow for a000 devices Turns out we should access TFH relative addresses. Also, the FH_UCODE_LOAD_STATUS was replaced by UREG_UCODE_LOAD_STATUS. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-fh.h | 15 ++++++----- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 8 ++++++ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 33 +++++++++++++++++++------ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 1d6f5d2..dd75ea7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -77,7 +77,6 @@ */ #define FH_MEM_LOWER_BOUND (0x1000) #define FH_MEM_UPPER_BOUND (0x2000) -#define TFH_MEM_LOWER_BOUND (0xA06000) /** * Keep-Warm (KW) buffer base address. @@ -120,7 +119,7 @@ #define FH_MEM_CBBC_20_31_LOWER_BOUND (FH_MEM_LOWER_BOUND + 0xB20) #define FH_MEM_CBBC_20_31_UPPER_BOUND (FH_MEM_LOWER_BOUND + 0xB80) /* a000 TFD table address, 64 bit */ -#define TFH_TFDQ_CBB_TABLE (TFH_MEM_LOWER_BOUND + 0x1C00) +#define TFH_TFDQ_CBB_TABLE (0x1C00) /* Find TFD CB base pointer for given queue */ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, @@ -156,7 +155,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * In case of DRAM read address which is not aligned to 128B, the TFH will * enable transfer size which doesn't cross 64B DRAM address boundary. */ -#define TFH_TRANSFER_MODE (TFH_MEM_LOWER_BOUND + 0x1F40) +#define TFH_TRANSFER_MODE (0x1F40) #define TFH_TRANSFER_MAX_PENDING_REQ 0xc #define TFH_CHUNK_SIZE_128 BIT(8) #define TFH_CHUNK_SPLIT_MODE BIT(10) @@ -167,7 +166,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * the start of the TFD first TB. * In case of a DRAM Tx CMD update the TFH will update PN and Key ID */ -#define TFH_TXCMD_UPDATE_CFG (TFH_MEM_LOWER_BOUND + 0x1F48) +#define TFH_TXCMD_UPDATE_CFG (0x1F48) /* * Controls TX DMA operation * @@ -181,22 +180,22 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * set to 1 - interrupt is sent to the driver * Bit 0: Indicates the snoop configuration */ -#define TFH_SRV_DMA_CHNL0_CTRL (TFH_MEM_LOWER_BOUND + 0x1F60) +#define TFH_SRV_DMA_CHNL0_CTRL (0x1F60) #define TFH_SRV_DMA_SNOOP BIT(0) #define TFH_SRV_DMA_TO_DRIVER BIT(24) #define TFH_SRV_DMA_START BIT(31) /* Defines the DMA SRAM write start address to transfer a data block */ -#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F64) +#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (0x1F64) /* Defines the 64bits DRAM start address to read the DMA data block from */ -#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F68) +#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (0x1F68) /* * Defines the number of bytes to transfer from DRAM to SRAM. * Note that this register may be configured with non-dword aligned size. */ -#define TFH_SRV_DMA_CHNL0_BC (TFH_MEM_LOWER_BOUND + 0x1F70) +#define TFH_SRV_DMA_CHNL0_BC (0x1F70) /** * Rx SRAM Control and Status Registers (RSCSR) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 459bf73..849ee79 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -303,6 +303,14 @@ #define FH_UCODE_LOAD_STATUS (0x1AF0) #define CSR_UCODE_LOAD_STATUS_ADDR (0x1E70) + +/* + * Replacing FH_UCODE_LOAD_STATUS + * This register is writen by driver and is read by uCode during boot flow. + * Note this address is cleared after MAC reset. + */ +#define UREG_UCODE_LOAD_STATUS (0xa05c40) + enum secure_load_status_reg { LMPM_CPU_UCODE_LOADING_STARTED = 0x00000001, LMPM_CPU_HDRS_LOADING_COMPLETED = 0x00000003, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 559f118..039eeca 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -827,10 +827,16 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, if (ret) return ret; - /* Notify the ucode of the loaded section number and status */ - val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); - val = val | (sec_num << shift_param); - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + /* Notify ucode of loaded section number and status */ + if (trans->cfg->use_tfh) { + val = iwl_read_prph(trans, UREG_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, val); + } else { + val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + } sec_num = (sec_num << 1) | 0x1; } @@ -838,10 +844,21 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, iwl_enable_interrupts(trans); - if (cpu == 1) - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF); - else - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); + if (trans->cfg->use_tfh) { + if (cpu == 1) + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } else { + if (cpu == 1) + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } return 0; } -- cgit v1.1 From 76f8c0e17edc6eba43f84952e5a87c7f50f69370 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Jun 2016 12:23:06 +0300 Subject: iwlwifi: pcie: remove dead code If device family is 8000 then iwl_pcie_load_cpu_sections() won't be called at all (iwl_pcie_load_cpu_sections_8000() is called in that case) so this piece of code never gets called. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 13 ------------- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 -------- 2 files changed, 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 849ee79..406ef30 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -302,7 +302,6 @@ #define OSC_CLK_FORCE_CONTROL (0x8) #define FH_UCODE_LOAD_STATUS (0x1AF0) -#define CSR_UCODE_LOAD_STATUS_ADDR (0x1E70) /* * Replacing FH_UCODE_LOAD_STATUS @@ -311,21 +310,9 @@ */ #define UREG_UCODE_LOAD_STATUS (0xa05c40) -enum secure_load_status_reg { - LMPM_CPU_UCODE_LOADING_STARTED = 0x00000001, - LMPM_CPU_HDRS_LOADING_COMPLETED = 0x00000003, - LMPM_CPU_UCODE_LOADING_COMPLETED = 0x00000007, - LMPM_CPU_STATUS_NUM_OF_LAST_COMPLETED = 0x000000F8, - LMPM_CPU_STATUS_NUM_OF_LAST_LOADED_BLOCK = 0x0000FF00, -}; - -#define LMPM_SECURE_INSPECTOR_CODE_ADDR (0x1E38) -#define LMPM_SECURE_INSPECTOR_DATA_ADDR (0x1E3C) #define LMPM_SECURE_UCODE_LOAD_CPU1_HDR_ADDR (0x1E78) #define LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR (0x1E7C) -#define LMPM_SECURE_INSPECTOR_CODE_MEM_SPACE (0x400000) -#define LMPM_SECURE_INSPECTOR_DATA_MEM_SPACE (0x402000) #define LMPM_SECURE_CPU1_HDR_MEM_SPACE (0x420000) #define LMPM_SECURE_CPU2_HDR_MEM_SPACE (0x420400) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 039eeca..2f46eed 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -903,14 +903,6 @@ static int iwl_pcie_load_cpu_sections(struct iwl_trans *trans, return ret; } - if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) - iwl_set_bits_prph(trans, - CSR_UCODE_LOAD_STATUS_ADDR, - (LMPM_CPU_UCODE_LOADING_COMPLETED | - LMPM_CPU_HDRS_LOADING_COMPLETED | - LMPM_CPU_UCODE_LOADING_STARTED) << - shift_param); - *first_ucode_section = last_read_idx; return 0; -- cgit v1.1 From 8aade778f787305fdbfd3c1d54e6b583601b5902 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 22 Aug 2016 23:53:25 +0800 Subject: ARM: imx6: add missing BM_CLPCR_BYPASS_PMIC_READY setting for imx6sx i.MX6SX has bypass PMIC ready function, as this function is normally NOT enabled on the board design, so we need to bypass the PMIC ready pin check during DSM mode resume flow, otherwise, the internal DSM resume logic will be waiting for this signal to be ready forever and cause resume fail. Signed-off-by: Anson Huang Fixes: ff843d621bfc ("ARM: imx: add suspend support for i.mx6sx") Cc: Tested-by: Peter Chen Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 67bab74..fe708e2 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -310,7 +310,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode) val |= 0x3 << BP_CLPCR_STBY_COUNT; val |= BM_CLPCR_VSTBY; val |= BM_CLPCR_SBYOS; - if (cpu_is_imx6sl()) + if (cpu_is_imx6sl() || cpu_is_imx6sx()) val |= BM_CLPCR_BYPASS_PMIC_READY; if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul()) val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS; -- cgit v1.1 From 2c5575401e34de3d2fc90af1c95bc73435784093 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 26 Aug 2016 16:28:39 -0500 Subject: usb: musb: Fix locking errors for host only mode If we have USB gadgets disabled and USB_MUSB_HOST set, we get errors "possible irq lock inverssion dependency detected" errors during boot. Let's fix the issue by adding start_musb flag and start the controller after we're out of the spinlock protected section. Reported-by: Ladislav Michl Tested-by: Ladislav Michl Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_virthub.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 192248f..fe08e77 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -290,6 +290,7 @@ int musb_hub_control( u32 temp; int retval = 0; unsigned long flags; + bool start_musb = false; spin_lock_irqsave(&musb->lock, flags); @@ -390,7 +391,7 @@ int musb_hub_control( * logic relating to VBUS power-up. */ if (!hcd->self.is_b_host && musb_has_gadget(musb)) - musb_start(musb); + start_musb = true; break; case USB_PORT_FEAT_RESET: musb_port_reset(musb, true); @@ -451,5 +452,9 @@ error: retval = -EPIPE; } spin_unlock_irqrestore(&musb->lock, flags); + + if (start_musb) + musb_start(musb); + return retval; } -- cgit v1.1 From 8c57cac1457f3125a5d13dc03635c0708c61bff0 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 20 Jul 2016 10:24:02 +0300 Subject: mei: me: disable driver on SPT SPS firmware Sunrise Point PCH with SPS Firmware doesn't expose working MEI interface, we need to quirk it out. The SPS Firmware is identifiable only on the first PCI function of the device. Cc: #4.6+ Tested-by: Sujith Pandel Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 10 ++++++++-- drivers/misc/mei/pci-me.c | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index e2fb44c..dc3a854 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1263,8 +1263,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev) static bool mei_me_fw_type_sps(struct pci_dev *pdev) { u32 reg; - /* Read ME FW Status check for SPS Firmware */ - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + unsigned int devfn; + + /* + * Read ME FW Status register to check for SPS Firmware + * The SPS FW is only signaled in pci function 0 + */ + devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®); trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); /* if bits [19:16] = 15, running SPS Firmware */ return (reg & 0xf0000) == 0xf0000; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 64e64da..71cea9b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -85,8 +85,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)}, -- cgit v1.1 From 52442f9b11b7e5d4a38d99143011831fd171f8d9 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 30 Aug 2016 09:20:32 -0400 Subject: NFS4: Avoid migration loops If a server returns itself as a location while migrating, the client may end up getting stuck attempting to migrate twice to the same server. Catch this by checking if the nfs_client found is the same as the existing client. For the other two callers to nfs4_set_client, the nfs_client will always be ERR_PTR(-EINVAL). Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8d7d08d..cd3b7cf 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server, goto error; } + if (server->nfs_client == clp) { + error = -ELOOP; + goto error; + } + /* * Query for the lease time on clientid setup or renewal * -- cgit v1.1 From 98b0f80c2396224bbbed81792b526e6c72ba9efa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 11:15:36 -0400 Subject: NFSv4.x: Fix a refcount leak in nfs_callback_up_net On error, the callers expect us to return without bumping nn->cb_users[]. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.7+ --- fs/nfs/callback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a7f2e6e..52a2831 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, err_socks: svc_rpcb_cleanup(serv, net); err_bind: + nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " "net = %p\n", ret, net); return ret; -- cgit v1.1 From 9ebae9e4bcd7dff22536af8a969d8f66e6f23900 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 30 Aug 2016 16:47:02 +0100 Subject: pata_ninja32: Avoid corrupting status flags Ninja32 needs to set some flags to indicate it does 32bit IO. However it currently assigns this which loses the initializing flag and causes a warning spew. Fix it to use a logical or as is intended. Signed-off-by: Alan Cox Tested-by: Ellmar Stelnberger Signed-off-by: Tejun Heo --- drivers/ata/pata_ninja32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 633aa29..44f97ad 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -144,7 +144,7 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id) ap->ioaddr.altstatus_addr = base + 0x1E; ap->ioaddr.bmdma_addr = base; ata_sff_std_ports(&ap->ioaddr); - ap->pflags = ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; + ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; ninja32_program(base); /* FIXME: Should we disable them at remove ? */ -- cgit v1.1 From 0d025d271e55f3de21f0aaaf54b42d20404d2b23 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Aug 2016 08:04:16 -0500 Subject: mm/usercopy: get rid of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS There are three usercopy warnings which are currently being silenced for gcc 4.6 and newer: 1) "copy_from_user() buffer size is too small" compile warning/error This is a static warning which happens when object size and copy size are both const, and copy size > object size. I didn't see any false positives for this one. So the function warning attribute seems to be working fine here. Note this scenario is always a bug and so I think it should be changed to *always* be an error, regardless of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS. 2) "copy_from_user() buffer size is not provably correct" compile warning This is another static warning which happens when I enable __compiletime_object_size() for new compilers (and CONFIG_DEBUG_STRICT_USER_COPY_CHECKS). It happens when object size is const, but copy size is *not*. In this case there's no way to compare the two at build time, so it gives the warning. (Note the warning is a byproduct of the fact that gcc has no way of knowing whether the overflow function will be called, so the call isn't dead code and the warning attribute is activated.) So this warning seems to only indicate "this is an unusual pattern, maybe you should check it out" rather than "this is a bug". I get 102(!) of these warnings with allyesconfig and the __compiletime_object_size() gcc check removed. I don't know if there are any real bugs hiding in there, but from looking at a small sample, I didn't see any. According to Kees, it does sometimes find real bugs. But the false positive rate seems high. 3) "Buffer overflow detected" runtime warning This is a runtime warning where object size is const, and copy size > object size. All three warnings (both static and runtime) were completely disabled for gcc 4.6 with the following commit: 2fb0815c9ee6 ("gcc4: disable __compiletime_object_size for GCC 4.6+") That commit mistakenly assumed that the false positives were caused by a gcc bug in __compiletime_object_size(). But in fact, __compiletime_object_size() seems to be working fine. The false positives were instead triggered by #2 above. (Though I don't have an explanation for why the warnings supposedly only started showing up in gcc 4.6.) So remove warning #2 to get rid of all the false positives, and re-enable warnings #1 and #3 by reverting the above commit. Furthermore, since #1 is a real bug which is detected at compile time, upgrade it to always be an error. Having done all that, CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is no longer needed. Signed-off-by: Josh Poimboeuf Cc: Kees Cook Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Steven Rostedt Cc: Brian Gerst Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Byungchul Park Cc: Nilay Vaish Signed-off-by: Linus Torvalds --- arch/parisc/Kconfig | 1 - arch/parisc/configs/c8000_defconfig | 1 - arch/parisc/configs/generic-64bit_defconfig | 1 - arch/parisc/include/asm/uaccess.h | 22 ++++----- arch/s390/Kconfig | 1 - arch/s390/configs/default_defconfig | 1 - arch/s390/configs/gcov_defconfig | 1 - arch/s390/configs/performance_defconfig | 1 - arch/s390/defconfig | 1 - arch/s390/include/asm/uaccess.h | 19 +++++--- arch/tile/Kconfig | 1 - arch/tile/include/asm/uaccess.h | 22 +++++---- arch/x86/Kconfig | 1 - arch/x86/include/asm/uaccess.h | 69 ++++------------------------- include/asm-generic/uaccess.h | 1 + include/linux/compiler-gcc.h | 2 +- lib/Kconfig.debug | 18 -------- lib/Makefile | 1 - lib/usercopy.c | 9 ---- 19 files changed, 45 insertions(+), 128 deletions(-) delete mode 100644 lib/usercopy.c diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index cd87781..af12c2d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -1,6 +1,5 @@ config PARISC def_bool y - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_MIGHT_HAVE_PC_PARPORT select HAVE_IDE select HAVE_OPROFILE diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index 1a8f6f95..f6a4c01 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -245,7 +245,6 @@ CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_RCU_DELAY=y CONFIG_DEBUG_BLOCK_EXT_DEVT=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_KEYS=y # CONFIG_CRYPTO_HW is not set CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 7e07926..c564e6e 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -291,7 +291,6 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0f59fd9..e915048 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -208,13 +208,13 @@ unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned lo #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS - __compiletime_error("copy_from_user() buffer size is not provably correct") -#else - __compiletime_warning("copy_from_user() buffer size is not provably correct") -#endif -; +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, @@ -223,10 +223,12 @@ static inline unsigned long __must_check copy_from_user(void *to, int sz = __compiletime_object_size(to); int ret = -EFAULT; - if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) + if (likely(sz == -1 || sz >= n)) ret = __copy_from_user(to, from, n); - else - copy_from_user_overflow(); + else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); return ret; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e751fe2..c109f07 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -68,7 +68,6 @@ config DEBUG_RODATA config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 26e0c7f..412b1bd 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -602,7 +602,6 @@ CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_IRQSOFF_TRACER=y CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 24879da..bec279e 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -552,7 +552,6 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index a5c1e5f..1751446 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -549,7 +549,6 @@ CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 73610f2..2d40ef0 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -172,7 +172,6 @@ CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9b49cf1..95aefdb 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -311,6 +311,14 @@ int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + /** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. @@ -332,12 +340,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return __copy_to_user(to, from, n); } -void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -__compiletime_warning("copy_from_user() buffer size is not provably correct") -#endif -; - /** * copy_from_user: - Copy a block of data from user space. * @to: Destination address, in kernel space. @@ -362,7 +364,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) might_fault(); if (unlikely(sz != -1 && sz < n)) { - copy_from_user_overflow(); + if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); return n; } return __copy_from_user(to, from, n); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 4820a02..78da75b 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -4,7 +4,6 @@ config TILE def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_WANT_FRAME_POINTERS diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index 0a9c4265..a77369e 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -416,14 +416,13 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -/* - * There are still unprovable places in the generic code as of 2.6.34, so this - * option is not really compatible with -Werror, which is more useful in - * general. - */ -extern void copy_from_user_overflow(void) - __compiletime_warning("copy_from_user() size is not provably correct"); +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, @@ -433,14 +432,13 @@ static inline unsigned long __must_check copy_from_user(void *to, if (likely(sz == -1 || sz >= n)) n = _copy_from_user(to, from, n); + else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - copy_from_user_overflow(); + __bad_copy_user(); return n; } -#else -#define copy_from_user _copy_from_user -#endif #ifdef __tilegx__ /** diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c580d8c..2a1f0ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,6 @@ config X86 select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a0ae610..c3f2911 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -697,43 +697,14 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long __must_check _copy_to_user(void __user *to, const void *from, unsigned n); -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -# define copy_user_diag __compiletime_error -#else -# define copy_user_diag __compiletime_warning -#endif - -extern void copy_user_diag("copy_from_user() buffer size is too small") -copy_from_user_overflow(void); -extern void copy_user_diag("copy_to_user() buffer size is too small") -copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); - -#undef copy_user_diag - -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS - -extern void -__compiletime_warning("copy_from_user() buffer size is not provably correct") -__copy_from_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_from_user_overflow(size, count) __copy_from_user_overflow() - -extern void -__compiletime_warning("copy_to_user() buffer size is not provably correct") -__copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_to_user_overflow(size, count) __copy_to_user_overflow() - -#else +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); -static inline void -__copy_from_user_overflow(int size, unsigned long count) +static inline void copy_user_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } -#define __copy_to_user_overflow __copy_from_user_overflow - -#endif - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { @@ -743,31 +714,13 @@ copy_from_user(void *to, const void __user *from, unsigned long n) kasan_check_write(to, n); - /* - * While we would like to have the compiler do the checking for us - * even in the non-constant size case, any false positives there are - * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even - * without - the [hopefully] dangerous looking nature of the warning - * would make people go look at the respecitive call sites over and - * over again just to find that there's no problem). - * - * And there are cases where it's just not realistic for the compiler - * to prove the count to be in range. For example when multiple call - * sites of a helper function - perhaps in different source files - - * all doing proper range checking, yet the helper function not doing - * so again. - * - * Therefore limit the compile time checking to the constant size - * case, and do only runtime checking for non-constant sizes. - */ - if (likely(sz < 0 || sz >= n)) { check_object_size(to, n, false); n = _copy_from_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_from_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_from_user_overflow(sz, n); + __bad_copy_user(); return n; } @@ -781,21 +734,17 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); - /* See the comment in copy_from_user() above. */ if (likely(sz < 0 || sz >= n)) { check_object_size(from, n, true); n = _copy_to_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_to_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_to_user_overflow(sz, n); + __bad_copy_user(); return n; } -#undef __copy_from_user_overflow -#undef __copy_to_user_overflow - /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 1bfa602..5dea1fb 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -72,6 +72,7 @@ struct exception_table_entry /* Returns 0 if exception not found and fixup otherwise. */ extern unsigned long search_exception_table(unsigned long); + /* * architectures with an MMU should override these two */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 8dbc892..573c5a1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -158,7 +158,7 @@ #define __compiler_offsetof(a, b) \ __builtin_offsetof(a, b) -#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 +#if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2307d7c..2e2cca5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1686,24 +1686,6 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. -config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS - bool - -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict user copy size checks" - depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - help - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, say N. - source kernel/trace/Kconfig menu "Runtime Testing" diff --git a/lib/Makefile b/lib/Makefile index cfa68eb..5dc77a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -24,7 +24,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o -obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_HAS_DMA) += dma-noop.o diff --git a/lib/usercopy.c b/lib/usercopy.c deleted file mode 100644 index 4f5b1dd..0000000 --- a/lib/usercopy.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include -#include - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); -- cgit v1.1 From a5d60783df61fbb67b7596b8a0f6b4b2e05251d5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:11:53 -0400 Subject: dm log writes: move IO accounting earlier to fix error path Move log_one_block()'s atomic_inc(&lc->io_blocks) before bio_alloc() to fix a bug that the target hangs if bio_alloc() fails. The error path does put_io_block(lc), so atomic_inc(&lc->io_blocks) must occur before invoking the error path to avoid underflow of lc->io_blocks. Signed-off-by: Mikulas Patocka Reviewed-by: Josef Bacik Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-log-writes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4ab6803..4cc78ae 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -259,12 +259,12 @@ static int log_one_block(struct log_writes_c *lc, goto out; sector++; + atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, block->vec_cnt); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; } - atomic_inc(&lc->io_blocks); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio->bi_bdev = lc->logdev->bdev; -- cgit v1.1 From 7efb367320f56fc4d549875b6f3a6940018ef2e5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:20:55 -0400 Subject: dm log writes: fix bug with too large bios bio_alloc() can allocate a bio with at most BIO_MAX_PAGES (256) vector entries. However, the incoming bio may have more vector entries if it was allocated by other means. For example, bcache submits bios with more than BIO_MAX_PAGES entries. This results in bio_alloc() failure. To avoid the failure, change the code so that it allocates bio with at most BIO_MAX_PAGES entries. If the incoming bio has more entries, bio_add_page() will fail and a new bio will be allocated - the code that handles bio_add_page() failure already exists in the dm-log-writes target. Signed-off-by: Mikulas Patocka Reviewed-by: Josef Bacik Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v4.1+ --- drivers/md/dm-log-writes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4cc78ae..ba24f4f 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -260,7 +260,7 @@ static int log_one_block(struct log_writes_c *lc, sector++; atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; @@ -282,7 +282,7 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; -- cgit v1.1 From 313a61d30761217ce4383018de1cc0d5d503a376 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Aug 2016 13:57:38 -0700 Subject: drm/vc4: Allow some more signals to be packed with uniform resets. The intent was to make sure people don't sneak in a small immediate or something to change the interpretation of the uniform update args, but these signals are just fine. Fixes a validation failure in the current X server on some Render operation. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 46527e9..2543cf5 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -309,8 +309,14 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade * of uniforms on each side. However, this scheme is easy to * validate so it's all we allow for now. */ - - if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { + switch (QPU_GET_FIELD(inst, QPU_SIG)) { + case QPU_SIG_NONE: + case QPU_SIG_SCOREBOARD_UNLOCK: + case QPU_SIG_COLOR_LOAD: + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + break; + default: DRM_ERROR("uniforms address change must be " "normal math\n"); return false; -- cgit v1.1 From 485a252a5559b45d7df04c819ec91177c62c270b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Aug 2016 16:28:09 -0700 Subject: seccomp: Fix tracer exit notifications during fatal signals This fixes a ptrace vs fatal pending signals bug as manifested in seccomp now that seccomp was reordered to happen after ptrace. The short version is that seccomp should not attempt to call do_exit() while fatal signals are pending under a tracer. The existing code was trying to be as defensively paranoid as possible, but it now ends up confusing ptrace. Instead, the syscall can just be skipped (which solves the original concern that the do_exit() was addressing) and normal signal handling, tracer notification, and process death can happen. Paraphrasing from the original bug report: If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed after such a trap but not yet been scheduled, and another task in the thread-group calls exit_group(), then the tracee task exits without the ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here: https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7 The bug happens because when __seccomp_filter() detects fatal_signal_pending(), it calls do_exit() without dequeuing the fatal signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and that task is descheduled, __schedule() notices that there is a fatal signal pending and changes its state from TASK_TRACED to TASK_RUNNING. That prevents the ptracer's waitpid() from returning the ptrace event. A more detailed analysis is here: https://github.com/mozilla/rr/issues/1762#issuecomment-237396255. Reported-by: Robert O'Callahan Reported-by: Kyle Huey Tested-by: Kyle Huey Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace") Signed-off-by: Kees Cook Acked-by: Oleg Nesterov Acked-by: James Morris --- kernel/seccomp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ef6c6c3..0db7c8a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, ptrace_event(PTRACE_EVENT_SECCOMP, data); /* * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. + * notification may silently skip tracer notification, + * which could leave us with a potentially unmodified + * syscall that the tracer would have liked to have + * changed. Since the process is about to die, we just + * force the syscall to be skipped and let the signal + * kill the process and correctly handle any tracer exit + * notifications. */ if (fatal_signal_pending(current)) - do_exit(SIGSYS); + goto skip; /* Check if the tracer forced the syscall to be skipped. */ this_syscall = syscall_get_nr(current, task_pt_regs(current)); if (this_syscall < 0) -- cgit v1.1 From 91e630d9ae6de6f740ef7c8176736eb55366833e Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 10 Mar 2016 01:22:19 +0200 Subject: dm log writes: fix check of kthread_run() return value The kthread_run() function returns either a valid task_struct or ERR_PTR() value, check for NULL is invalid. This change fixes potential for oops, e.g. in OOM situation. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-log-writes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index ba24f4f..49e4d8d 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -459,9 +459,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); - if (!lc->log_kthread) { + if (IS_ERR(lc->log_kthread)) { + ret = PTR_ERR(lc->log_kthread); ti->error = "Couldn't alloc kthread"; dm_put_device(ti, lc->dev); dm_put_device(ti, lc->logdev); -- cgit v1.1 From 4e870e948fbabf62b78e8410f04c67703e7c816b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:38:42 -0400 Subject: dm crypt: fix error with too large bios When dm-crypt processes writes, it allocates a new bio in crypt_alloc_buffer(). The bio is allocated from a bio set and it can have at most BIO_MAX_PAGES vector entries, however the incoming bio can be larger (e.g. if it was allocated by bcache). If the incoming bio is larger, bio_alloc_bioset() fails and an error is returned. To avoid the error, we test for a too large bio in the function crypt_map() and use dm_accept_partial_bio() to split the bio. dm_accept_partial_bio() trims the current bio to the desired size and asks DM core to send another bio with the rest of the data. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v3.16+ --- drivers/md/dm-crypt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eedba67..d609566 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1924,6 +1924,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io->ctx.req = (struct skcipher_request *)(io + 1); -- cgit v1.1 From 5d0be84ec0cacfc7a6d6ea548afdd07d481324cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 30 Aug 2016 09:51:44 -0700 Subject: dm crypt: fix free of bad values after tfm allocation failure If crypt_alloc_tfms() had to allocate multiple tfms and it failed before the last allocation, then it would call crypt_free_tfms() and could free pointers from uninitialized memory -- due to the crypt_free_tfms() check for non-zero cc->tfms[i]. Fix by allocating zeroed memory. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d609566..8742957 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1453,7 +1453,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) unsigned i; int err; - cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), + cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), GFP_KERNEL); if (!cc->tfms) return -ENOMEM; -- cgit v1.1 From edd1ea2a8a2549e4fe58e817d539445729491ecf Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:19:11 +0530 Subject: dm bufio: remove use of deprecated create_singlethread_workqueue() The workqueue "dm_bufio_wq" queues a single work item &dm_bufio_work so it doesn't require execution ordering. Hence, alloc_workqueue() has been used to replace the deprecated create_singlethread_workqueue(). The WQ_MEM_RECLAIM flag has been set since DM requires forward progress under memory pressure. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 6571c81..8625040 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1879,7 +1879,7 @@ static int __init dm_bufio_init(void) __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache"); + dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); if (!dm_bufio_wq) return -ENOMEM; -- cgit v1.1 From bd37e022e334757a5dc1dae41baa29e16befe4ec Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 15:41:44 +0000 Subject: cpufreq: dt: Add terminate entry for of_device_id tables Make sure of_device_id tables are NULL terminated. Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Fixes: f56aad1d98f1 (cpufreq: dt: Add generic platform-device creation support) CC: 4.7+ # 4.7+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 0bb44d5..2ee40fd 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -74,6 +74,8 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "ti,omap5", }, { .compatible = "xlnx,zynq-7000", }, + + { } }; static int __init cpufreq_dt_platdev_init(void) -- cgit v1.1 From d44c950e9398e639e124014e5872480a37b67259 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 13:38:55 -0700 Subject: PM / runtime: Add _rcuidle suffix to allow rpm_resume() to be called from idle This commit applies another _rcuidle suffix to fix an RCU use from idle. > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1122 Not tainted > ------------------------------- > include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __pm_runtime_resume+0x3c/0x64 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [] (unwind_backtrace) from [] (show_stack+0x10/0x14) > [] (show_stack) from [] (dump_stack+0xb0/0xe4) > [] (dump_stack) from [] (rpm_resume+0x5cc/0x7f4) > [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64) > [] (__pm_runtime_resume) from [] (omap2_gpio_resume_after_idle+0x54/0x68) > [] (omap2_gpio_resume_after_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) > [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) > [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) > [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) > [] (start_kernel) from [<8000807c>] (0x8000807c) Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e097d35..76127e1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -601,7 +601,7 @@ static int rpm_resume(struct device *dev, int rpmflags) struct device *parent = NULL; int retval = 0; - trace_rpm_resume(dev, rpmflags); + trace_rpm_resume_rcuidle(dev, rpmflags); repeat: if (dev->power.runtime_error) @@ -764,7 +764,7 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock_irq(&dev->power.lock); } - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval; } -- cgit v1.1 From d7737ce964d944dd07e25b0f569edcd550ede18c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 13:03:51 -0700 Subject: PM / runtime: Add _rcuidle suffix to allow rpm_idle() use from idle This commit appends a few _rcuidle suffixes to fix the following RCU-used-from-idle bug: > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1116 Not tainted > ------------------------------- > include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __rpm_callback+0x58/0x60 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [] (unwind_backtrace) from [] (show_stack+0x10/0x14) > [] (show_stack) from [] (dump_stack+0xb0/0xe4) > [] (dump_stack) from [] (rpm_suspend+0x580/0x768) > [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84) > [] (__pm_runtime_suspend) from [] (omap2_gpio_prepare_for_idle+0x5c/0x70) > [] (omap2_gpio_prepare_for_idle) from [] (omap_sram_idle+0x140/0x244) > [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) > [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) > [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) > [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) > [] (start_kernel) from [<8000807c>] (0x8000807c) In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!" Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck WhACKED-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 76127e1..17995fa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -301,7 +301,7 @@ static int rpm_idle(struct device *dev, int rpmflags) int (*callback)(struct device *); int retval; - trace_rpm_idle(dev, rpmflags); + trace_rpm_idle_rcuidle(dev, rpmflags); retval = rpm_check_suspend_allowed(dev); if (retval < 0) ; /* Conditions are wrong. */ @@ -337,7 +337,7 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); } - trace_rpm_return_int(dev, _THIS_IP_, 0); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0); return 0; } @@ -352,7 +352,7 @@ static int rpm_idle(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); out: - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO); } -- cgit v1.1 From 279cf3f23870f7eb8ca071115e06d3d5ca0a2b9e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 26 Aug 2016 01:00:54 +0200 Subject: drm/nouveau/acpi: use DSM if bridge does not support D3cold Even if PR3 support is available on the bridge, it will not be used if the PCI layer considers it unavailable (i.e. on all laptops from 2013 and 2014). Ensure that this condition is checked to allow a fallback to the Optimus DSM for device poweroff. Initially I wanted to call pci_d3cold_enable before checking bridge_d3 (in case the user changed d3cold_allowed), but that is such an unlikely case and likely fragile anyway. The current patch is suggested by Mika in http://www.spinics.net/lists/linux-pci/msg52599.html Cc: Mika Westerberg Signed-off-by: Peter Wu Reviewed-by: Mika Westerberg Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index f2ad17a..dc57b62 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -225,6 +225,17 @@ static bool nouveau_pr3_present(struct pci_dev *pdev) if (!parent_pdev) return false; + if (!parent_pdev->bridge_d3) { + /* + * Parent PCI bridge is currently not power managed. + * Since userspace can change these afterwards to be on + * the safe side we stick with _DSM and prevent usage of + * _PR3 from the bridge. + */ + pci_d3cold_disable(pdev); + return false; + } + parent_adev = ACPI_COMPANION(&parent_pdev->dev); if (!parent_adev) return false; -- cgit v1.1 From 237e15dfd5d651868726111c3a9d828bec700490 Mon Sep 17 00:00:00 2001 From: Ashok Raj Nagarajan Date: Fri, 19 Aug 2016 13:37:37 +0300 Subject: ath10k: fix get rx_status from htt context On handling amsdu on rx path, get the rx_status from htt context. Without this fix, we are seeing warnings when running DBDC traffic like this. WARNING: CPU: 0 PID: 0 at net/mac80211/rx.c:4105 ieee80211_rx_napi+0x88/0x7d8 [mac80211]() [ 1715.878248] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 3.18.21 #1 [ 1715.878273] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 1715.878293] [] (show_stack) from [] (dump_stack+0x70/0xbc) [ 1715.878315] [] (dump_stack) from [] (warn_slowpath_common+0x64/0x88) [ 1715.878339] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x18/0x20) [ 1715.878395] [] (warn_slowpath_null) from [] (ieee80211_rx_napi+0x88/0x7d8 [mac80211]) [ 1715.878474] [] (ieee80211_rx_napi [mac80211]) from [] (ath10k_htt_t2h_msg_handler+0xb48/0xbfc [ath10k_core]) [ 1715.878535] [] (ath10k_htt_t2h_msg_handler [ath10k_core]) from [] (ath10k_htt_t2h_msg_handler+0xbf8/0xbfc [ath10k_core]) [ 1715.878597] [] (ath10k_htt_t2h_msg_handler [ath10k_core]) from [] (ath10k_htt_txrx_compl_task+0xa54/0x1170 [ath10k_core]) [ 1715.878639] [] (ath10k_htt_txrx_compl_task [ath10k_core]) from [] (tasklet_action+0xb4/0x130) [ 1715.878659] [] (tasklet_action) from [] (__do_softirq+0xe0/0x210) [ 1715.878678] [] (__do_softirq) from [] (irq_exit+0x84/0xe0) [ 1715.878700] [] (irq_exit) from [] (__handle_domain_irq+0x98/0xd0) [ 1715.878722] [] (__handle_domain_irq) from [] (gic_handle_irq+0x38/0x5c) [ 1715.878741] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 1715.878753] Exception stack(0xc05f9f50 to 0xc05f9f98) [ 1715.878767] 9f40: ffffffed 00000000 00399e1e c000a220 [ 1715.878786] 9f60: 00000000 c05f6780 c05f8000 00000000 c05f5db8 ffffffed c05f8000 c04d1980 [ 1715.878802] 9f80: 00000000 c05f9f98 c0018110 c0018114 60000013 ffffffff [ 1715.878822] [] (__irq_svc) from [] (arch_cpu_idle+0x2c/0x50) [ 1715.878844] [] (arch_cpu_idle) from [] (cpu_startup_entry+0x108/0x234) [ 1715.878866] [] (cpu_startup_entry) from [] (start_kernel+0x33c/0x3b8) [ 1715.878879] ---[ end trace 6d5e1cc0fef8ed6a ]--- [ 1715.878899] ------------[ cut here ]------------ Fixes: 18235664e7f9 ("ath10k: cleanup amsdu processing for rx indication") Signed-off-by: Ashok Raj Nagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 78db5d6..24c8d65 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1525,7 +1525,7 @@ static void ath10k_htt_rx_h_filter(struct ath10k *ar, static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) { struct ath10k *ar = htt->ar; - static struct ieee80211_rx_status rx_status; + struct ieee80211_rx_status *rx_status = &htt->rx_status; struct sk_buff_head amsdu; int ret; @@ -1549,11 +1549,11 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) return ret; } - ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); + ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); - ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); - ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); + ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); + ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); return 0; } -- cgit v1.1 From 4e80ffab0e722911842b59299dbf8033709673e5 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Fri, 19 Aug 2016 13:37:37 +0300 Subject: ath10k: Remove driver log suggesting QCA9887 support is experimental Support for QCA9887 is no longer experimental and if there are any issues we need to address them Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 9a22c47..07933c5 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3162,7 +3162,6 @@ static int ath10k_pci_probe(struct pci_dev *pdev, pci_hard_reset = ath10k_pci_qca988x_chip_reset; break; case QCA9887_1_0_DEVICE_ID: - dev_warn(&pdev->dev, "QCA9887 support is still experimental, there are likely bugs. You have been warned.\n"); hw_rev = ATH10K_HW_QCA9887; pci_ps = false; pci_soft_reset = ath10k_pci_warm_reset; -- cgit v1.1 From 5459c5d47608e7d66c89face4bb6084d0c4136a3 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:39 +0300 Subject: ath10k: move firmware_swap_code_seg_info to ath10k_fw_file Preparation to make use of firmware_swap_code_seg_info for UTF binary. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 6 +++--- drivers/net/wireless/ath/ath10k/core.h | 13 +++++++++---- drivers/net/wireless/ath/ath10k/swap.c | 26 ++++++++++++++------------ drivers/net/wireless/ath/ath10k/swap.h | 11 ++++++++--- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index e889829..f82877d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -745,7 +745,7 @@ static int ath10k_download_fw(struct ath10k *ar) data = ar->running_fw->fw_file.firmware_data; data_len = ar->running_fw->fw_file.firmware_len; - ret = ath10k_swap_code_seg_configure(ar); + ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file); if (ret) { ath10k_err(ar, "failed to configure fw code swap: %d\n", ret); @@ -787,7 +787,7 @@ static void ath10k_core_free_firmware_files(struct ath10k *ar) if (!IS_ERR(ar->pre_cal_file)) release_firmware(ar->pre_cal_file); - ath10k_swap_code_seg_release(ar); + ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file); ar->normal_mode_fw.fw_file.otp_data = NULL; ar->normal_mode_fw.fw_file.otp_len = 0; @@ -2031,7 +2031,7 @@ static int ath10k_core_probe_fw(struct ath10k *ar) goto err_free_firmware_files; } - ret = ath10k_swap_code_seg_init(ar); + ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file); if (ret) { ath10k_err(ar, "failed to initialize code swap segment: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 30ae5bf..cc6e66f 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -663,6 +663,15 @@ struct ath10k_fw_file { const void *codeswap_data; size_t codeswap_len; + + /* The original idea of struct ath10k_fw_file was that it only + * contains struct firmware and pointers to various parts (actual + * firmware binary, otp, metadata etc) of the file. This seg_info + * is actually created separate but as this is used similarly as + * the other firmware components it's more convenient to have it + * here. + */ + struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; }; struct ath10k_fw_components { @@ -775,10 +784,6 @@ struct ath10k { const struct firmware *cal_file; struct { - struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; - } swap; - - struct { u32 vendor; u32 device; u32 subsystem_vendor; diff --git a/drivers/net/wireless/ath/ath10k/swap.c b/drivers/net/wireless/ath/ath10k/swap.c index 0c5f586..adf4592 100644 --- a/drivers/net/wireless/ath/ath10k/swap.c +++ b/drivers/net/wireless/ath/ath10k/swap.c @@ -134,17 +134,18 @@ ath10k_swap_code_seg_alloc(struct ath10k *ar, size_t swap_bin_len) return seg_info; } -int ath10k_swap_code_seg_configure(struct ath10k *ar) +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info = NULL; - if (!ar->swap.firmware_swap_code_seg_info) + if (!fw_file->firmware_swap_code_seg_info) return 0; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found firmware code swap binary\n"); - seg_info = ar->swap.firmware_swap_code_seg_info; + seg_info = fw_file->firmware_swap_code_seg_info; ret = ath10k_bmi_write_memory(ar, seg_info->target_addr, &seg_info->seg_hw_info, @@ -158,28 +159,29 @@ int ath10k_swap_code_seg_configure(struct ath10k *ar) return 0; } -void ath10k_swap_code_seg_release(struct ath10k *ar) +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file) { - ath10k_swap_code_seg_free(ar, ar->swap.firmware_swap_code_seg_info); + ath10k_swap_code_seg_free(ar, fw_file->firmware_swap_code_seg_info); /* FIXME: these two assignments look to bein wrong place! Shouldn't * they be in ath10k_core_free_firmware_files() like the rest? */ - ar->normal_mode_fw.fw_file.codeswap_data = NULL; - ar->normal_mode_fw.fw_file.codeswap_len = 0; + fw_file->codeswap_data = NULL; + fw_file->codeswap_len = 0; - ar->swap.firmware_swap_code_seg_info = NULL; + fw_file->firmware_swap_code_seg_info = NULL; } -int ath10k_swap_code_seg_init(struct ath10k *ar) +int ath10k_swap_code_seg_init(struct ath10k *ar, struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info; const void *codeswap_data; size_t codeswap_len; - codeswap_data = ar->normal_mode_fw.fw_file.codeswap_data; - codeswap_len = ar->normal_mode_fw.fw_file.codeswap_len; + codeswap_data = fw_file->codeswap_data; + codeswap_len = fw_file->codeswap_len; if (!codeswap_len || !codeswap_data) return 0; @@ -200,7 +202,7 @@ int ath10k_swap_code_seg_init(struct ath10k *ar) return ret; } - ar->swap.firmware_swap_code_seg_info = seg_info; + fw_file->firmware_swap_code_seg_info = seg_info; return 0; } diff --git a/drivers/net/wireless/ath/ath10k/swap.h b/drivers/net/wireless/ath/ath10k/swap.h index 36991c7..f5dc047 100644 --- a/drivers/net/wireless/ath/ath10k/swap.h +++ b/drivers/net/wireless/ath/ath10k/swap.h @@ -23,6 +23,8 @@ /* Currently only one swap segment is supported */ #define ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED 1 +struct ath10k_fw_file; + struct ath10k_swap_code_seg_tlv { __le32 address; __le32 length; @@ -58,8 +60,11 @@ struct ath10k_swap_code_seg_info { dma_addr_t paddr[ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED]; }; -int ath10k_swap_code_seg_configure(struct ath10k *ar); -void ath10k_swap_code_seg_release(struct ath10k *ar); -int ath10k_swap_code_seg_init(struct ath10k *ar); +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file); +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file); +int ath10k_swap_code_seg_init(struct ath10k *ar, + struct ath10k_fw_file *fw_file); #endif -- cgit v1.1 From d912fc094bc458c7df5e1d8eadf74a35ffc07380 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:40 +0300 Subject: ath10k: handle testmode events for 10.2 and 10.4 based firmware Currently testmode events for 10.x firmware are processed from 10.1 wmi event processing. This patch is used to handle testmode events in 10.2 and 10.4 based firmware. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index d246288..e67b254 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -5124,6 +5124,7 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_2_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5133,6 +5134,18 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_2_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_2_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); @@ -5248,6 +5261,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_4_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5257,6 +5271,18 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_4_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_4_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); -- cgit v1.1 From ebce1a5e3a0e2c035f201c21ae9f403b42efcbcb Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Fri, 19 Aug 2016 13:37:40 +0300 Subject: ath10k: add testmode support for 10.4 firmware This patch adds testmode support for 10.4 based chipsets and added code swap support for UTF binary. Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/testmode.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 120f423..091f29d 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -23,6 +23,7 @@ #include "wmi.h" #include "hif.h" #include "hw.h" +#include "core.h" #include "testmode_i.h" @@ -240,6 +241,18 @@ static int ath10k_tm_cmd_utf_start(struct ath10k *ar, struct nlattr *tb[]) goto err; } + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) { + ret = ath10k_swap_code_seg_init(ar, + &ar->testmode.utf_mode_fw.fw_file); + if (ret) { + ath10k_warn(ar, + "failed to init utf code swap segment: %d\n", + ret); + goto err_release_utf_mode_fw; + } + } + spin_lock_bh(&ar->data_lock); ar->testmode.utf_monitor = true; spin_unlock_bh(&ar->data_lock); @@ -279,6 +292,11 @@ err_power_down: ath10k_hif_power_down(ar); err_release_utf_mode_fw: + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; @@ -301,6 +319,11 @@ static void __ath10k_tm_cmd_utf_stop(struct ath10k *ar) spin_unlock_bh(&ar->data_lock); + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; -- cgit v1.1 From e25854f2404cc92882e42fe8002b0fd75a77d842 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:41 +0300 Subject: ath10k: implement wmi echo command Will be useful for implementing command barriers. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi-ops.h | 17 +++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi-tlv.c | 29 +++++++++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.c | 23 +++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index 64ebd30..b1d88fa 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -194,6 +194,7 @@ struct wmi_ops { struct sk_buff *(*gen_pdev_bss_chan_info_req) (struct ath10k *ar, enum wmi_bss_survey_req_type type); + struct sk_buff *(*gen_echo)(struct ath10k *ar, u32 value); }; int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id); @@ -1382,4 +1383,20 @@ ath10k_wmi_pdev_bss_chan_info_request(struct ath10k *ar, wmi->cmd->pdev_bss_chan_info_request_cmdid); } +static inline int +ath10k_wmi_echo(struct ath10k *ar, u32 value) +{ + struct ath10k_wmi *wmi = &ar->wmi; + struct sk_buff *skb; + + if (!wmi->ops->gen_echo) + return -EOPNOTSUPP; + + skb = wmi->ops->gen_echo(ar, value); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + return ath10k_wmi_cmd_send(ar, skb, wmi->cmd->echo_cmdid); +} + #endif diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index e09337e..cd59585 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -3081,6 +3081,34 @@ ath10k_wmi_tlv_op_gen_adaptive_qcs(struct ath10k *ar, bool enable) return skb; } +static struct sk_buff * +ath10k_wmi_tlv_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct wmi_tlv *tlv; + struct sk_buff *skb; + void *ptr; + size_t len; + + len = sizeof(*tlv) + sizeof(*cmd); + skb = ath10k_wmi_alloc_skb(ar, len); + if (!skb) + return ERR_PTR(-ENOMEM); + + ptr = (void *)skb->data; + tlv = ptr; + tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_ECHO_CMD); + tlv->len = __cpu_to_le16(sizeof(*cmd)); + cmd = (void *)tlv->value; + cmd->value = cpu_to_le32(value); + + ptr += sizeof(*tlv); + ptr += sizeof(*cmd); + + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv echo value 0x%08x\n", value); + return skb; +} + /****************/ /* TLV mappings */ /****************/ @@ -3485,6 +3513,7 @@ static const struct wmi_ops wmi_tlv_ops = { .gen_adaptive_qcs = ath10k_wmi_tlv_op_gen_adaptive_qcs, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_tlv_op_gen_echo, }; static const struct wmi_peer_flags_map wmi_tlv_peer_flags_map = { diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index e67b254..014c310 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -7675,6 +7675,24 @@ ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar, return skb; } +static struct sk_buff * +ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct sk_buff *skb; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_echo_cmd *)skb->data; + cmd->value = cpu_to_le32(value); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi echo value 0x%08x\n", value); + return skb; +} + static const struct wmi_ops wmi_ops = { .rx = ath10k_wmi_op_rx, .map_svc = wmi_main_svc_map, @@ -7735,6 +7753,7 @@ static const struct wmi_ops wmi_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7803,6 +7822,7 @@ static const struct wmi_ops wmi_10_1_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7822,6 +7842,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -7888,6 +7909,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -8010,6 +8032,7 @@ static const struct wmi_ops wmi_10_4_ops = { .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature, .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype, .gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info, + .gen_echo = ath10k_wmi_op_gen_echo, }; int ath10k_wmi_attach(struct ath10k *ar) -- cgit v1.1 From 84d4911b7184dfa911ea089c2d6728b994de6cd9 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:42 +0300 Subject: ath10k: implement wmi echo event Will be useful for implementing command barriers. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi-ops.h | 12 ++++++++++++ drivers/net/wireless/ath/ath10k/wmi-tlv.c | 28 ++++++++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.c | 29 ++++++++++++++++++++++++++++- drivers/net/wireless/ath/ath10k/wmi.h | 4 ++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index b1d88fa..c67eda7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -51,6 +51,8 @@ struct wmi_ops { struct wmi_roam_ev_arg *arg); int (*pull_wow_event)(struct ath10k *ar, struct sk_buff *skb, struct wmi_wow_ev_arg *arg); + int (*pull_echo_ev)(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg); enum wmi_txbf_conf (*get_txbf_conf_scheme)(struct ath10k *ar); struct sk_buff *(*gen_pdev_suspend)(struct ath10k *ar, u32 suspend_opt); @@ -350,6 +352,16 @@ ath10k_wmi_pull_wow_event(struct ath10k *ar, struct sk_buff *skb, return ar->wmi.ops->pull_wow_event(ar, skb, arg); } +static inline int +ath10k_wmi_pull_echo_ev(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + if (!ar->wmi.ops->pull_echo_ev) + return -EOPNOTSUPP; + + return ar->wmi.ops->pull_echo_ev(ar, skb, arg); +} + static inline enum wmi_txbf_conf ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar) { diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index cd59585..a42f52d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1223,6 +1223,33 @@ ath10k_wmi_tlv_op_pull_wow_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_tlv_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + const void **tb; + const struct wmi_echo_event *ev; + int ret; + + tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath10k_warn(ar, "failed to parse tlv: %d\n", ret); + return ret; + } + + ev = tb[WMI_TLV_TAG_STRUCT_ECHO_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } + + arg->value = ev->value; + + kfree(tb); + return 0; +} + static struct sk_buff * ath10k_wmi_tlv_op_gen_pdev_suspend(struct ath10k *ar, u32 opt) { @@ -3457,6 +3484,7 @@ static const struct wmi_ops wmi_tlv_ops = { .pull_fw_stats = ath10k_wmi_tlv_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_tlv_op_pull_roam_ev, .pull_wow_event = ath10k_wmi_tlv_op_pull_wow_ev, + .pull_echo_ev = ath10k_wmi_tlv_op_pull_echo_ev, .get_txbf_conf_scheme = ath10k_wmi_tlv_txbf_conf_scheme, .gen_pdev_suspend = ath10k_wmi_tlv_op_gen_pdev_suspend, diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 014c310..b802ca9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2495,7 +2495,18 @@ exit: void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb) { - ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_ECHO_EVENTID\n"); + struct wmi_echo_ev_arg arg = {}; + int ret; + + ret = ath10k_wmi_pull_echo_ev(ar, skb, &arg); + if (ret) { + ath10k_warn(ar, "failed to parse echo: %d\n", ret); + return; + } + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi event echo value 0x%08x\n", + le32_to_cpu(arg.value)); } int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb) @@ -4792,6 +4803,17 @@ static int ath10k_wmi_op_pull_roam_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + struct wmi_echo_event *ev = (void *)skb->data; + + arg->value = ev->value; + + return 0; +} + int ath10k_wmi_event_ready(struct ath10k *ar, struct sk_buff *skb) { struct wmi_rdy_ev_arg arg = {}; @@ -7709,6 +7731,7 @@ static const struct wmi_ops wmi_ops = { .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_fw_stats = ath10k_wmi_main_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7783,6 +7806,7 @@ static const struct wmi_ops wmi_10_1_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7854,6 +7878,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7921,6 +7946,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -8028,6 +8054,7 @@ static const struct wmi_ops wmi_10_4_ops = { .ext_resource_config = ath10k_wmi_10_4_ext_resource_config, /* shared with 10.2 */ + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_request_stats = ath10k_wmi_op_gen_request_stats, .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature, .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 3ef4688..086d788 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6296,6 +6296,10 @@ struct wmi_roam_ev_arg { __le32 rssi; }; +struct wmi_echo_ev_arg { + __le32 value; +}; + struct wmi_pdev_temperature_event { /* temperature value in Celcius degree */ __le32 temperature; -- cgit v1.1 From 20ddca21dcf84fcae063f2f75f49cfd545bf5237 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:42 +0300 Subject: ath10k: add wmi command barrier utility This allows placing command barriers for explicit serializing and synchronizing state. Useful for future driver development. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/wmi.c | 31 +++++++++++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/wmi.h | 1 + 3 files changed, 33 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index cc6e66f..56daeb7 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -142,6 +142,7 @@ struct ath10k_wmi { enum ath10k_htc_ep_id eid; struct completion service_ready; struct completion unified_ready; + struct completion barrier; wait_queue_head_t tx_credits_wq; DECLARE_BITMAP(svc_map, WMI_SERVICE_MAX); struct wmi_cmd_map *cmd; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b802ca9..ae5f541 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -29,6 +29,9 @@ #include "p2p.h" #include "hw.h" +#define ATH10K_WMI_BARRIER_ECHO_ID 0xBA991E9 +#define ATH10K_WMI_BARRIER_TIMEOUT_HZ (3 * HZ) + /* MAIN WMI cmd track */ static struct wmi_cmd_map wmi_cmd_map = { .init_cmdid = WMI_INIT_CMDID, @@ -2507,6 +2510,9 @@ void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb) ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi event echo value 0x%08x\n", le32_to_cpu(arg.value)); + + if (le32_to_cpu(arg.value) == ATH10K_WMI_BARRIER_ECHO_ID) + complete(&ar->wmi.barrier); } int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb) @@ -7715,6 +7721,30 @@ ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value) return skb; } +int +ath10k_wmi_barrier(struct ath10k *ar) +{ + int ret; + int time_left; + + spin_lock_bh(&ar->data_lock); + reinit_completion(&ar->wmi.barrier); + spin_unlock_bh(&ar->data_lock); + + ret = ath10k_wmi_echo(ar, ATH10K_WMI_BARRIER_ECHO_ID); + if (ret) { + ath10k_warn(ar, "failed to submit wmi echo: %d\n", ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->wmi.barrier, + ATH10K_WMI_BARRIER_TIMEOUT_HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + static const struct wmi_ops wmi_ops = { .rx = ath10k_wmi_op_rx, .map_svc = wmi_main_svc_map, @@ -8112,6 +8142,7 @@ int ath10k_wmi_attach(struct ath10k *ar) init_completion(&ar->wmi.service_ready); init_completion(&ar->wmi.unified_ready); + init_completion(&ar->wmi.barrier); INIT_WORK(&ar->svc_rdy_work, ath10k_wmi_event_service_ready_work); diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 086d788..89adfa9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6628,5 +6628,6 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar, char *buf); int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar, enum wmi_vdev_subtype subtype); +int ath10k_wmi_barrier(struct ath10k *ar); #endif /* _WMI_H_ */ -- cgit v1.1 From 47b1848d9fde5daf102f599be6e589a1d3c8da7d Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 19 Aug 2016 13:37:43 +0300 Subject: ath10k: fix spurious tx/rx during boot HW Rx filters and masks are not configured properly by firmware during boot sequences. The MAC_PCU_ADDR1 is set to 0s instead of 1s which allows the HW to ACK any frame that passes through MAC_PCU_RX_FILTER. The MAC_PCU_RX_FILTER itself is misconfigured on boot as well. The combination of these bugs ended up with the following manifestations: - "no channel configured; ignoring frame(s)!" warnings in the driver - spurious ACKs (transmission) on the air during firmware bootup sequences The former was a long standing and known bug originally though mostly harmless. However Marek recently discovered that this problem also involves ACKing *all* frames the HW receives (including beacons ;). Such frames are delivered to host and generate the former warning as well. This could be a problem with regulatory compliance in some rare cases (e.g. Taiwan which forbids transmissions on channel 36 which is the default bootup channel on 5Ghz band cards). The good news is that it'd require someone else to violate regulatory first to coerce our device to generate and transmit an ACK. The problem could be reproduced in a rather busy environment that has a lot of APs. The likelihood could be increased by injecting an msleep() of 5000 or longer immediately after ath10k_htt_setup() in ath10k_core_start(). The reason why the former warnings were only showing up seldom is because the device was either quickly reset again (i.e. during firmware probing) or wmi vdev was created (which fixes hw and fw states). It is technically possible for host driver to override adequate hw registers however this can't work reliably because the bug root cause lies in incorrect firmware state on boot (internal structure used to program MAC_PCU_ADDR1 is not properly initialized) and only vdev create/delete events can fix it. This is why the patch takes dummy vdev approach. This could be fixed in firmware as well but having this fixed in driver is more robust, most notably when thinking of users of older firmware such as 999.999.0.636. Reported-by: Marek Puzyniak Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 68 ++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index f82877d..6b49374 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1705,6 +1705,55 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar) return 0; } +static int ath10k_core_reset_rx_filter(struct ath10k *ar) +{ + int ret; + int vdev_id; + int vdev_type; + int vdev_subtype; + const u8 *vdev_addr; + + vdev_id = 0; + vdev_type = WMI_VDEV_TYPE_STA; + vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); + vdev_addr = ar->mac_addr; + + ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype, + vdev_addr); + if (ret) { + ath10k_err(ar, "failed to create dummy vdev: %d\n", ret); + return ret; + } + + ret = ath10k_wmi_vdev_delete(ar, vdev_id); + if (ret) { + ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret); + return ret; + } + + /* WMI and HTT may use separate HIF pipes and are not guaranteed to be + * serialized properly implicitly. + * + * Moreover (most) WMI commands have no explicit acknowledges. It is + * possible to infer it implicitly by poking firmware with echo + * command - getting a reply means all preceding comments have been + * (mostly) processed. + * + * In case of vdev create/delete this is sufficient. + * + * Without this it's possible to end up with a race when HTT Rx ring is + * started before vdev create/delete hack is complete allowing a short + * window of opportunity to receive (and Tx ACK) a bunch of frames. + */ + ret = ath10k_wmi_barrier(ar); + if (ret) { + ath10k_err(ar, "failed to ping firmware: %d\n", ret); + return ret; + } + + return 0; +} + int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { @@ -1872,6 +1921,25 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } + /* Some firmware revisions do not properly set up hardware rx filter + * registers. + * + * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK + * is filled with 0s instead of 1s allowing HW to respond with ACKs to + * any frames that matches MAC_PCU_RX_FILTER which is also + * misconfigured to accept anything. + * + * The ADDR1 is programmed using internal firmware structure field and + * can't be (easily/sanely) reached from the driver explicitly. It is + * possible to implicitly make it correct by creating a dummy vdev and + * then deleting it. + */ + status = ath10k_core_reset_rx_filter(ar); + if (status) { + ath10k_err(ar, "failed to reset rx filter: %d\n", status); + goto err_hif_stop; + } + /* If firmware indicates Full Rx Reorder support it must be used in a * slightly different manner. Let HTT code know. */ -- cgit v1.1 From 5ffae43208ec160e584117fabee2cddc5ad0e39a Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:19 +0300 Subject: wil6210: fix protection of wil->scan_request Currently the places that check wil->scan_request and call cfg80211_scan_done are not consistently protected, so there is a risk that cfg80211_scan_done will be called with NULL scan_request, causing a kernel crash. Fix this by using p2p_wdev_mutex in few other places that access scan_request. This makes sense since scan_request may point to p2p_wdev, and it is not worth the extra complexity of adding a new mutex. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/cfg80211.c | 3 +++ drivers/net/wireless/ath/wil6210/main.c | 4 ++++ drivers/net/wireless/ath/wil6210/wil6210.h | 2 +- drivers/net/wireless/ath/wil6210/wmi.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index ffacc76..d117240 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -354,10 +354,13 @@ static int wil_cfg80211_scan(struct wiphy *wiphy, wil_dbg_misc(wil, "%s(), wdev=0x%p iftype=%d\n", __func__, wdev, wdev->iftype); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { wil_err(wil, "Already scanning\n"); + mutex_unlock(&wil->p2p_wdev_mutex); return -EAGAIN; } + mutex_unlock(&wil->p2p_wdev_mutex); /* check we are client side */ switch (wdev->iftype) { diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index d0b180c..7b7619c 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -852,6 +852,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) bitmap_zero(wil->status, wil_status_last); mutex_unlock(&wil->wmi_mutex); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -863,6 +864,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } + mutex_unlock(&wil->p2p_wdev_mutex); wil_mask_irq(wil); @@ -1055,6 +1057,7 @@ int __wil_down(struct wil6210_priv *wil) wil_p2p_stop_radio_operations(wil); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -1066,6 +1069,7 @@ int __wil_down(struct wil6210_priv *wil) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } + mutex_unlock(&wil->p2p_wdev_mutex); wil_reset(wil, false); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 6087691..1eb7fe7 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -657,7 +657,7 @@ struct wil6210_priv { /* P2P_DEVICE vif */ struct wireless_dev *p2p_wdev; - struct mutex p2p_wdev_mutex; /* protect @p2p_wdev */ + struct mutex p2p_wdev_mutex; /* protect @p2p_wdev and @scan_request */ struct wireless_dev *radio_wdev; /* High Access Latency Policy voting */ diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 4d92541..0b109b2 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -424,6 +424,7 @@ static void wmi_evt_tx_mgmt(struct wil6210_priv *wil, int id, void *d, int len) static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, void *d, int len) { + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct wmi_scan_complete_event *data = d; struct cfg80211_scan_info info = { @@ -435,14 +436,13 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, wil->scan_request, info.aborted); del_timer_sync(&wil->scan_timer); - mutex_lock(&wil->p2p_wdev_mutex); cfg80211_scan_done(wil->scan_request, &info); wil->radio_wdev = wil->wdev; - mutex_unlock(&wil->p2p_wdev_mutex); wil->scan_request = NULL; } else { wil_err(wil, "SCAN_COMPLETE while not scanning\n"); } + mutex_unlock(&wil->p2p_wdev_mutex); } static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len) -- cgit v1.1 From 08989f9640a03939ec170916f80d371a8e3504b0 Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:20 +0300 Subject: wil6210: align to latest auto generated wmi.h Align to latest version of the auto generated wmi file describing the interface with FW. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/wmi.h | 292 +++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h index 349510c..f430e8a 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.h +++ b/drivers/net/wireless/ath/wil6210/wmi.h @@ -46,6 +46,16 @@ enum wmi_mid { MID_BROADCAST = 0xFF, }; +/* FW capability IDs + * Each ID maps to a bit in a 32-bit bitmask value provided by the FW to + * the host + */ +enum wmi_fw_capability { + WMI_FW_CAPABILITY_FTM = 0, + WMI_FW_CAPABILITY_PS_CONFIG = 1, + WMI_FW_CAPABILITY_MAX, +}; + /* WMI_CMD_HDR */ struct wmi_cmd_hdr { u8 mid; @@ -170,6 +180,13 @@ enum wmi_command_id { /* Not supported yet */ WMI_PS_MID_CFG_READ_CMDID = 0x920, WMI_RS_CFG_CMDID = 0x921, + WMI_GET_DETAILED_RS_RES_CMDID = 0x922, + WMI_AOA_MEAS_CMDID = 0x923, + WMI_TOF_SESSION_START_CMDID = 0x991, + WMI_TOF_GET_CAPABILITIES_CMDID = 0x992, + WMI_TOF_SET_LCR_CMDID = 0x993, + WMI_TOF_SET_LCI_CMDID = 0x994, + WMI_TOF_CHANNEL_INFO_CMDID = 0x995, WMI_SET_MAC_ADDRESS_CMDID = 0xF003, WMI_ABORT_SCAN_CMDID = 0xF007, WMI_SET_PROMISCUOUS_MODE_CMDID = 0xF041, @@ -843,6 +860,88 @@ struct wmi_pmc_cmd { __le64 mem_base; } __packed; +enum wmi_aoa_meas_type { + WMI_AOA_PHASE_MEAS = 0x00, + WMI_AOA_PHASE_AMP_MEAS = 0x01, +}; + +/* WMI_AOA_MEAS_CMDID */ +struct wmi_aoa_meas_cmd { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + __le32 meas_rf_mask; +} __packed; + +enum wmi_tof_burst_duration { + WMI_TOF_BURST_DURATION_250_USEC = 2, + WMI_TOF_BURST_DURATION_500_USEC = 3, + WMI_TOF_BURST_DURATION_1_MSEC = 4, + WMI_TOF_BURST_DURATION_2_MSEC = 5, + WMI_TOF_BURST_DURATION_4_MSEC = 6, + WMI_TOF_BURST_DURATION_8_MSEC = 7, + WMI_TOF_BURST_DURATION_16_MSEC = 8, + WMI_TOF_BURST_DURATION_32_MSEC = 9, + WMI_TOF_BURST_DURATION_64_MSEC = 10, + WMI_TOF_BURST_DURATION_128_MSEC = 11, + WMI_TOF_BURST_DURATION_NO_PREFERENCES = 15, +}; + +enum wmi_tof_session_start_flags { + WMI_TOF_SESSION_START_FLAG_SECURED = 0x1, + WMI_TOF_SESSION_START_FLAG_ASAP = 0x2, + WMI_TOF_SESSION_START_FLAG_LCI_REQ = 0x4, + WMI_TOF_SESSION_START_FLAG_LCR_REQ = 0x8, +}; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_ftm_dest_info { + u8 channel; + /* wmi_tof_session_start_flags_e */ + u8 flags; + u8 initial_token; + u8 num_of_ftm_per_burst; + u8 num_of_bursts_exp; + /* wmi_tof_burst_duration_e */ + u8 burst_duration; + /* Burst Period indicate interval between two consecutive burst + * instances, in units of 100 ms + */ + __le16 burst_period; + u8 dst_mac[WMI_MAC_LEN]; + __le16 reserved; +} __packed; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_tof_session_start_cmd { + __le32 session_id; + u8 num_of_aoa_measures; + u8 aoa_type; + __le16 num_of_dest; + u8 reserved[4]; + struct wmi_ftm_dest_info ftm_dest_info[0]; +} __packed; + +enum wmi_tof_channel_info_report_type { + WMI_TOF_CHANNEL_INFO_TYPE_CIR = 0x1, + WMI_TOF_CHANNEL_INFO_TYPE_RSSI = 0x2, + WMI_TOF_CHANNEL_INFO_TYPE_SNR = 0x4, + WMI_TOF_CHANNEL_INFO_TYPE_DEBUG_DATA = 0x8, + WMI_TOF_CHANNEL_INFO_TYPE_VENDOR_SPECIFIC = 0x10, +}; + +/* WMI_TOF_CHANNEL_INFO_CMDID */ +struct wmi_tof_channel_info_cmd { + /* wmi_tof_channel_info_report_type_e */ + __le32 channel_info_report_request; +} __packed; + /* WMI Events * List of Events (target to host) */ @@ -934,6 +1033,14 @@ enum wmi_event_id { /* Not supported yet */ WMI_PS_MID_CFG_READ_EVENTID = 0x1920, WMI_RS_CFG_DONE_EVENTID = 0x1921, + WMI_GET_DETAILED_RS_RES_EVENTID = 0x1922, + WMI_AOA_MEAS_EVENTID = 0x1923, + WMI_TOF_SESSION_END_EVENTID = 0x1991, + WMI_TOF_GET_CAPABILITIES_EVENTID = 0x1992, + WMI_TOF_SET_LCR_EVENTID = 0x1993, + WMI_TOF_SET_LCI_EVENTID = 0x1994, + WMI_TOF_FTM_PER_DEST_RES_EVENTID = 0x1995, + WMI_TOF_CHANNEL_INFO_EVENTID = 0x1996, WMI_SET_CHANNEL_EVENTID = 0x9000, WMI_ASSOC_REQ_EVENTID = 0x9001, WMI_EAPOL_RX_EVENTID = 0x9002, @@ -1003,6 +1110,13 @@ struct wmi_fw_ver_event { __le32 bl_minor; __le32 bl_subminor; __le32 bl_build; + /* The number of entries in the FW capabilies array */ + u8 fw_capabilities_len; + u8 reserved[3]; + /* FW capabilities info + * Must be the last member of the struct + */ + __le32 fw_capabilities[0]; } __packed; /* WMI_GET_RF_STATUS_EVENTID */ @@ -1565,6 +1679,41 @@ struct wmi_rs_cfg_done_event { u8 reserved[2]; } __packed; +/* WMI_GET_DETAILED_RS_RES_CMDID */ +struct wmi_get_detailed_rs_res_cmd { + /* connection id */ + u8 cid; + u8 reserved[3]; +} __packed; + +/* RS results status */ +enum wmi_rs_results_status { + WMI_RS_RES_VALID = 0x00, + WMI_RS_RES_INVALID = 0x01, +}; + +/* Rate search results */ +struct wmi_rs_results { + /* number of sent MPDUs */ + u8 num_of_tx_pkt[WMI_NUM_MCS]; + /* number of non-acked MPDUs */ + u8 num_of_non_acked_pkt[WMI_NUM_MCS]; + /* RS timestamp */ + __le32 tsf; + /* RS selected MCS */ + u8 mcs; +} __packed; + +/* WMI_GET_DETAILED_RS_RES_EVENTID */ +struct wmi_get_detailed_rs_res_event { + u8 cid; + /* enum wmi_rs_results_status */ + u8 status; + /* detailed rs results */ + struct wmi_rs_results rs_results; + u8 reserved[3]; +} __packed; + /* broadcast connection ID */ #define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST (0xFFFFFFFF) @@ -1892,4 +2041,147 @@ struct wmi_ps_mid_cfg_read_event { __le32 status; } __packed; +#define WMI_AOA_MAX_DATA_SIZE (128) + +enum wmi_aoa_meas_status { + WMI_AOA_MEAS_SUCCESS = 0x00, + WMI_AOA_MEAS_PEER_INCAPABLE = 0x01, + WMI_AOA_MEAS_FAILURE = 0x02, +}; + +/* WMI_AOA_MEAS_EVENTID */ +struct wmi_aoa_meas_event { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + /* Measurments are from RFs, defined by the mask */ + __le32 meas_rf_mask; + /* enum wmi_aoa_meas_status */ + u8 meas_status; + u8 reserved; + /* Length of meas_data in bytes */ + __le16 length; + u8 meas_data[WMI_AOA_MAX_DATA_SIZE]; +} __packed; + +/* WMI_TOF_GET_CAPABILITIES_EVENTID */ +struct wmi_tof_get_capabilities_event { + u8 ftm_capability; + /* maximum supported number of destination to start TOF */ + u8 max_num_of_dest; + /* maximum supported number of measurements per burst */ + u8 max_num_of_meas_per_burst; + u8 reserved; + /* maximum supported multi bursts */ + __le16 max_multi_bursts_sessions; + /* maximum supported FTM burst duration , wmi_tof_burst_duration_e */ + __le16 max_ftm_burst_duration; + /* AOA supported types */ + __le32 aoa_supported_types; +} __packed; + +enum wmi_tof_session_end_status { + WMI_TOF_SESSION_END_NO_ERROR = 0x00, + WMI_TOF_SESSION_END_FAIL = 0x01, + WMI_TOF_SESSION_END_PARAMS_ERROR = 0x02, + WMI_TOF_SESSION_END_ABORTED = 0x03, +}; + +/* WMI_TOF_SESSION_END_EVENTID */ +struct wmi_tof_session_end_event { + /* FTM session ID */ + __le32 session_id; + /* wmi_tof_session_end_status_e */ + u8 status; + u8 reserved[3]; +} __packed; + +/* Responder FTM Results */ +struct wmi_responder_ftm_res { + u8 t1[6]; + u8 t2[6]; + u8 t3[6]; + u8 t4[6]; + __le16 tod_err; + __le16 toa_err; + __le16 tod_err_initiator; + __le16 toa_err_initiator; +} __packed; + +enum wmi_tof_ftm_per_dest_res_status { + WMI_PER_DEST_RES_NO_ERROR = 0x00, + WMI_PER_DEST_RES_TX_RX_FAIL = 0x01, + WMI_PER_DEST_RES_PARAM_DONT_MATCH = 0x02, +}; + +enum wmi_tof_ftm_per_dest_res_flags { + WMI_PER_DEST_RES_REQ_START = 0x01, + WMI_PER_DEST_RES_BURST_REPORT_END = 0x02, + WMI_PER_DEST_RES_REQ_END = 0x04, + WMI_PER_DEST_RES_PARAM_UPDATE = 0x08, +}; + +/* WMI_TOF_FTM_PER_DEST_RES_EVENTID */ +struct wmi_tof_ftm_per_dest_res_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_ftm_per_dest_res_flags_e */ + u8 flags; + /* wmi_tof_ftm_per_dest_res_status_e */ + u8 status; + /* responder ASAP */ + u8 responder_asap; + /* responder number of FTM per burst */ + u8 responder_num_ftm_per_burst; + /* responder number of FTM burst exponent */ + u8 responder_num_ftm_bursts_exp; + /* responder burst duration ,wmi_tof_burst_duration_e */ + u8 responder_burst_duration; + /* responder burst period, indicate interval between two consecutive + * burst instances, in units of 100 ms + */ + __le16 responder_burst_period; + /* receive burst counter */ + __le16 bursts_cnt; + /* tsf of responder start burst */ + __le32 tsf_sync; + /* actual received ftm per burst */ + u8 actual_ftm_per_burst; + u8 reserved0[7]; + struct wmi_responder_ftm_res responder_ftm_res[0]; +} __packed; + +enum wmi_tof_channel_info_type { + WMI_TOF_CHANNEL_INFO_AOA = 0x00, + WMI_TOF_CHANNEL_INFO_LCI = 0x01, + WMI_TOF_CHANNEL_INFO_LCR = 0x02, + WMI_TOF_CHANNEL_INFO_VENDOR_SPECIFIC = 0x03, + WMI_TOF_CHANNEL_INFO_CIR = 0x04, + WMI_TOF_CHANNEL_INFO_RSSI = 0x05, + WMI_TOF_CHANNEL_INFO_SNR = 0x06, + WMI_TOF_CHANNEL_INFO_DEBUG = 0x07, +}; + +/* WMI_TOF_CHANNEL_INFO_EVENTID */ +struct wmi_tof_channel_info_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_channel_info_type_e */ + u8 type; + /* data report length */ + u8 len; + /* data report payload */ + u8 report[0]; +} __packed; + #endif /* __WILOCITY_WMI_H__ */ -- cgit v1.1 From 12bace75704ec0d64621be6ebf6e51772ce2cb0f Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:21 +0300 Subject: wil6210: extract firmware capabilities from FW file When driver is loaded, extract a capabilities record from the FW file. This record contains bits indicating which optional features are supported by this FW. The driver can use this information to determine which functionality to support and/or expose to user space. The extraction is done before wiphy structure is registered, because the capabilities can affect information published by the this structure. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/debugfs.c | 25 +++++++++ drivers/net/wireless/ath/wil6210/fw.h | 11 +++- drivers/net/wireless/ath/wil6210/fw_inc.c | 85 +++++++++++++++++++++-------- drivers/net/wireless/ath/wil6210/main.c | 4 +- drivers/net/wireless/ath/wil6210/pcie_bus.c | 4 ++ drivers/net/wireless/ath/wil6210/wil6210.h | 4 +- 6 files changed, 106 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index a8098b4..a244a36 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1553,6 +1553,30 @@ static const struct file_operations fops_led_blink_time = { .open = simple_open, }; +/*---------FW capabilities------------*/ +static int wil_fw_capabilities_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + seq_printf(s, "fw_capabilities : %*pb\n", WMI_FW_CAPABILITY_MAX, + wil->fw_capabilities); + + return 0; +} + +static int wil_fw_capabilities_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_capabilities_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_capabilities = { + .open = wil_fw_capabilities_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + /*----------------*/ static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil, struct dentry *dbg) @@ -1603,6 +1627,7 @@ static const struct { {"recovery", S_IRUGO | S_IWUSR, &fops_recovery}, {"led_cfg", S_IRUGO | S_IWUSR, &fops_led_cfg}, {"led_blink_time", S_IRUGO | S_IWUSR, &fops_led_blink_time}, + {"fw_capabilities", S_IRUGO, &fops_fw_capabilities}, }; static void wil6210_debugfs_init_files(struct wil6210_priv *wil, diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h index 7a2c6c1..c3191c6 100644 --- a/drivers/net/wireless/ath/wil6210/fw.h +++ b/drivers/net/wireless/ath/wil6210/fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Qualcomm Atheros, Inc. + * Copyright (c) 2014,2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -58,6 +58,15 @@ struct wil_fw_record_comment { /* type == wil_fw_type_comment */ u8 data[0]; /* free-form data [data_size], see above */ } __packed; +/* FW capabilities encoded inside a comment record */ +#define WIL_FW_CAPABILITIES_MAGIC (0xabcddcba) +struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */ + /* identifies capabilities record */ + __le32 magic; + /* capabilities (variable size), see enum wmi_fw_capability */ + u8 capabilities[0]; +}; + /* perform action * data_size = @head.size - offsetof(struct wil_fw_record_action, data) */ diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index d30657e..3860238 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2014-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -118,6 +118,12 @@ static int wil_fw_verify(struct wil6210_priv *wil, const u8 *data, size_t size) return (int)dlen; } +static int fw_ignore_section(struct wil6210_priv *wil, const void *data, + size_t size) +{ + return 0; +} + static int fw_handle_comment(struct wil6210_priv *wil, const void *data, size_t size) { @@ -126,6 +132,27 @@ static int fw_handle_comment(struct wil6210_priv *wil, const void *data, return 0; } +static int +fw_handle_capabilities(struct wil6210_priv *wil, const void *data, + size_t size) +{ + const struct wil_fw_record_capabilities *rec = data; + size_t capa_size; + + if (size < sizeof(*rec) || + le32_to_cpu(rec->magic) != WIL_FW_CAPABILITIES_MAGIC) + return 0; + + capa_size = size - offsetof(struct wil_fw_record_capabilities, + capabilities); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); + memcpy(wil->fw_capabilities, rec->capabilities, + min(sizeof(wil->fw_capabilities), capa_size)); + wil_hex_dump_fw("CAPA", DUMP_PREFIX_OFFSET, 16, 1, + rec->capabilities, capa_size, false); + return 0; +} + static int fw_handle_data(struct wil6210_priv *wil, const void *data, size_t size) { @@ -383,42 +410,51 @@ static int fw_handle_gateway_data4(struct wil6210_priv *wil, const void *data, static const struct { int type; - int (*handler)(struct wil6210_priv *wil, const void *data, size_t size); + int (*load_handler)(struct wil6210_priv *wil, const void *data, + size_t size); + int (*parse_handler)(struct wil6210_priv *wil, const void *data, + size_t size); } wil_fw_handlers[] = { - {wil_fw_type_comment, fw_handle_comment}, - {wil_fw_type_data, fw_handle_data}, - {wil_fw_type_fill, fw_handle_fill}, + {wil_fw_type_comment, fw_handle_comment, fw_handle_capabilities}, + {wil_fw_type_data, fw_handle_data, fw_ignore_section}, + {wil_fw_type_fill, fw_handle_fill, fw_ignore_section}, /* wil_fw_type_action */ /* wil_fw_type_verify */ - {wil_fw_type_file_header, fw_handle_file_header}, - {wil_fw_type_direct_write, fw_handle_direct_write}, - {wil_fw_type_gateway_data, fw_handle_gateway_data}, - {wil_fw_type_gateway_data4, fw_handle_gateway_data4}, + {wil_fw_type_file_header, fw_handle_file_header, + fw_handle_file_header}, + {wil_fw_type_direct_write, fw_handle_direct_write, fw_ignore_section}, + {wil_fw_type_gateway_data, fw_handle_gateway_data, fw_ignore_section}, + {wil_fw_type_gateway_data4, fw_handle_gateway_data4, + fw_ignore_section}, }; static int wil_fw_handle_record(struct wil6210_priv *wil, int type, - const void *data, size_t size) + const void *data, size_t size, bool load) { int i; - for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) { + for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) if (wil_fw_handlers[i].type == type) - return wil_fw_handlers[i].handler(wil, data, size); - } + return load ? + wil_fw_handlers[i].load_handler( + wil, data, size) : + wil_fw_handlers[i].parse_handler( + wil, data, size); wil_err_fw(wil, "unknown record type: %d\n", type); return -EINVAL; } /** - * wil_fw_load - load FW into device - * - * Load the FW and uCode code and data to the corresponding device - * memory regions + * wil_fw_process - process section from FW file + * if load is true: Load the FW and uCode code and data to the + * corresponding device memory regions, + * otherwise only parse and look for capabilities * * Return error code */ -static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) +static int wil_fw_process(struct wil6210_priv *wil, const void *data, + size_t size, bool load) { int rc = 0; const struct wil_fw_record_head *hdr; @@ -437,7 +473,7 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) return -EINVAL; } rc = wil_fw_handle_record(wil, le16_to_cpu(hdr->type), - &hdr[1], hdr_sz); + &hdr[1], hdr_sz, load); if (rc) return rc; } @@ -456,13 +492,16 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) } /** - * wil_request_firmware - Request firmware and load to device + * wil_request_firmware - Request firmware * - * Request firmware image from the file and load it to device + * Request firmware image from the file + * If load is true, load firmware to device, otherwise + * only parse and extract capabilities * * Return error code */ -int wil_request_firmware(struct wil6210_priv *wil, const char *name) +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load) { int rc, rc1; const struct firmware *fw; @@ -482,7 +521,7 @@ int wil_request_firmware(struct wil6210_priv *wil, const char *name) rc = rc1; goto out; } - rc = wil_fw_load(wil, d, rc1); + rc = wil_fw_process(wil, d, rc1, load); if (rc < 0) goto out; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 7b7619c..7198c86 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -894,10 +894,10 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) wil_halt_cpu(wil); /* Loading f/w from the file */ - rc = wil_request_firmware(wil, WIL_FW_NAME); + rc = wil_request_firmware(wil, WIL_FW_NAME, true); if (rc) return rc; - rc = wil_request_firmware(wil, WIL_FW2_NAME); + rc = wil_request_firmware(wil, WIL_FW2_NAME, true); if (rc) return rc; diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 5b7a9d2..44746ca 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -39,6 +39,7 @@ void wil_set_capabilities(struct wil6210_priv *wil) u32 rev_id = wil_r(wil, RGF_USER_JTAG_DEV_ID); bitmap_zero(wil->hw_capabilities, hw_capability_last); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); switch (rev_id) { case JTAG_DEV_ID_SPARROW_B0: @@ -52,6 +53,9 @@ void wil_set_capabilities(struct wil6210_priv *wil) } wil_info(wil, "Board hardware is %s\n", wil->hw_name); + + /* extract FW capabilities from file without loading the FW */ + wil_request_firmware(wil, WIL_FW_NAME, false); } void wil_disable_irq(struct wil6210_priv *wil) diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 1eb7fe7..979536c 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -580,6 +580,7 @@ struct wil6210_priv { u32 hw_version; const char *hw_name; DECLARE_BITMAP(hw_capabilities, hw_capability_last); + DECLARE_BITMAP(fw_capabilities, WMI_FW_CAPABILITY_MAX); u8 n_mids; /* number of additional MIDs as reported by FW */ u32 recovery_count; /* num of FW recovery attempts in a short time */ u32 recovery_state; /* FW recovery state machine */ @@ -895,7 +896,8 @@ void wil6210_unmask_irq_rx(struct wil6210_priv *wil); int wil_iftype_nl2wmi(enum nl80211_iftype type); int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd); -int wil_request_firmware(struct wil6210_priv *wil, const char *name); +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load); int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime); int wil_suspend(struct wil6210_priv *wil, bool is_runtime); -- cgit v1.1 From 13cd9f758a555d1ab547b0dbed3f9d1e529230c3 Mon Sep 17 00:00:00 2001 From: Lior David Date: Mon, 22 Aug 2016 12:42:22 +0300 Subject: wil6210: extract firmware version from file header Currently the FW version is taken from the sw_version field of the FW ready event. This version is based on internal version control revision and it is difficult to map to actual FW version. Fix this by using the actual FW version stored in the FW file header record. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wil6210/debugfs.c | 28 +++++++++++++++++++++++++++- drivers/net/wireless/ath/wil6210/fw.h | 3 +++ drivers/net/wireless/ath/wil6210/fw_inc.c | 7 +++++++ drivers/net/wireless/ath/wil6210/main.c | 1 + drivers/net/wireless/ath/wil6210/netdev.c | 2 ++ drivers/net/wireless/ath/wil6210/wil6210.h | 3 ++- drivers/net/wireless/ath/wil6210/wmi.c | 8 ++++---- 7 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index a244a36..5e4058a 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1577,6 +1577,32 @@ static const struct file_operations fops_fw_capabilities = { .llseek = seq_lseek, }; +/*---------FW version------------*/ +static int wil_fw_version_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + if (wil->fw_version[0]) + seq_printf(s, "%s\n", wil->fw_version); + else + seq_puts(s, "N/A\n"); + + return 0; +} + +static int wil_fw_version_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_version_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_version = { + .open = wil_fw_version_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + /*----------------*/ static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil, struct dentry *dbg) @@ -1628,6 +1654,7 @@ static const struct { {"led_cfg", S_IRUGO | S_IWUSR, &fops_led_cfg}, {"led_blink_time", S_IRUGO | S_IWUSR, &fops_led_blink_time}, {"fw_capabilities", S_IRUGO, &fops_fw_capabilities}, + {"fw_version", S_IRUGO, &fops_fw_version}, }; static void wil6210_debugfs_init_files(struct wil6210_priv *wil, @@ -1668,7 +1695,6 @@ static void wil6210_debugfs_init_isr(struct wil6210_priv *wil, static const struct dbg_off dbg_wil_off[] = { WIL_FIELD(privacy, S_IRUGO, doff_u32), WIL_FIELD(status[0], S_IRUGO | S_IWUSR, doff_ulong), - WIL_FIELD(fw_version, S_IRUGO, doff_u32), WIL_FIELD(hw_version, S_IRUGO, doff_x32), WIL_FIELD(recovery_count, S_IRUGO, doff_u32), WIL_FIELD(ap_isolate, S_IRUGO, doff_u32), diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h index c3191c6..2f2b910 100644 --- a/drivers/net/wireless/ath/wil6210/fw.h +++ b/drivers/net/wireless/ath/wil6210/fw.h @@ -102,6 +102,9 @@ struct wil_fw_record_verify { /* type == wil_fw_verify */ /* file header * First record of every file */ +/* the FW version prefix in the comment */ +#define WIL_FW_VERSION_PREFIX "FW version: " +#define WIL_FW_VERSION_PREFIX_LEN (sizeof(WIL_FW_VERSION_PREFIX) - 1) struct wil_fw_record_file_header { __le32 signature ; /* Wilocity signature */ __le32 reserved; diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index 3860238..8f40eb3 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -223,6 +223,13 @@ static int fw_handle_file_header(struct wil6210_priv *wil, const void *data, wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1, d->comment, sizeof(d->comment), true); + if (!memcmp(d->comment, WIL_FW_VERSION_PREFIX, + WIL_FW_VERSION_PREFIX_LEN)) + memcpy(wil->fw_version, + d->comment + WIL_FW_VERSION_PREFIX_LEN, + min(sizeof(d->comment) - WIL_FW_VERSION_PREFIX_LEN, + sizeof(wil->fw_version) - 1)); + return 0; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 7198c86..e7130b5 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -893,6 +893,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) WIL_FW2_NAME); wil_halt_cpu(wil); + memset(wil->fw_version, 0, sizeof(wil->fw_version)); /* Loading f/w from the file */ rc = wil_request_firmware(wil, WIL_FW_NAME, true); if (rc) diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 4bc9bb0a..61de5e9 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -216,6 +216,8 @@ int wil_if_add(struct wil6210_priv *wil) wil_dbg_misc(wil, "entered"); + strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version)); + rc = wiphy_register(wiphy); if (rc < 0) { wil_err(wil, "failed to register wiphy, err %d\n", rc); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 979536c..a949cd6 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -17,6 +17,7 @@ #ifndef __WIL6210_H__ #define __WIL6210_H__ +#include #include #include #include @@ -576,7 +577,7 @@ struct wil6210_priv { struct wireless_dev *wdev; void __iomem *csr; DECLARE_BITMAP(status, wil_status_last); - u32 fw_version; + u8 fw_version[ETHTOOL_FWVERS_LEN]; u32 hw_version; const char *hw_name; DECLARE_BITMAP(hw_capabilities, hw_capability_last); diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 0b109b2..fae4f12 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -312,14 +312,14 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len) struct wireless_dev *wdev = wil->wdev; struct wmi_ready_event *evt = d; - wil->fw_version = le32_to_cpu(evt->sw_version); wil->n_mids = evt->numof_additional_mids; - wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version, + wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n", + wil->fw_version, le32_to_cpu(evt->sw_version), evt->mac, wil->n_mids); /* ignore MAC address, we already have it from the boot loader */ - snprintf(wdev->wiphy->fw_version, sizeof(wdev->wiphy->fw_version), - "%d", wil->fw_version); + strlcpy(wdev->wiphy->fw_version, wil->fw_version, + sizeof(wdev->wiphy->fw_version)); wil_set_recovery_state(wil, fw_recovery_idle); set_bit(wil_status_fwready, wil->status); -- cgit v1.1 From c012268b37db6b10b59dac9b7f45956cb9a8bcb2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 5 Aug 2016 14:25:53 -0700 Subject: lkdtm: Mark lkdtm_rodata_do_nothing() notrace lkdtm_rodata_do_nothing() is an empty function which is generated in order to test the non-executability of rodata. Currently if function tracing is enabled then an mcount callsite will be generated for lkdtm_rodata_do_nothing(), and it will appear in the list of available functions for function tracing (available_filter_functions). Given it's purpose purely as a test function, it seems preferable for lkdtm_rodata_do_nothing() to be marked notrace, so it doesn't appear as traceable. This also avoids triggering a linker bug on powerpc: https://sourceware.org/bugzilla/show_bug.cgi?id=20428 When the linker sees code that needs to generate a call stub, eg. a branch to mcount(), it assumes the section is executable and dereferences a NULL pointer leading to a linker segfault. Marking lkdtm_rodata_do_nothing() notrace avoids triggering the bug because the function contains no other function calls. Signed-off-by: Michael Ellerman Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lkdtm_rodata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/lkdtm_rodata.c b/drivers/misc/lkdtm_rodata.c index 166b1db..3564477 100644 --- a/drivers/misc/lkdtm_rodata.c +++ b/drivers/misc/lkdtm_rodata.c @@ -4,7 +4,7 @@ */ #include "lkdtm.h" -void lkdtm_rodata_do_nothing(void) +void notrace lkdtm_rodata_do_nothing(void) { /* Does nothing. We just want an architecture agnostic "return". */ } -- cgit v1.1 From 25eb7e5c7454c9e8407c5382b9fe32d3b45fe163 Mon Sep 17 00:00:00 2001 From: Andreas Noever Date: Tue, 26 Jul 2016 18:40:37 +0200 Subject: thunderbolt: Fix resume quirk for Falcon Ridge 4C. The quirk 'quirk_apple_wait_for_thunderbolt' did not fire on Falcon Ridge 4C controllers with subdevice/subvendor set to zero. This lead to lost pci devices on system resume. Older thunderbolt controllers (pre Falcon Ridge) used the same device id for bridges and for the controller. On Apple hardware the subvendor- & subdevice-ids were set for the controller, but not for bridges. So that is what was used to differentiate between the two. Starting with Falcon Ridge bridges and controllers received different device ids. Additionally on some MacBookPro models (but not all) the subvendor/subdevice was zeroed. Starting with a42fb351c (thunderbolt: Allow loading of module on recent Apple MacBooks with thunderbolt 2 controller) the thunderbolt driver binds to all Falcon Ridge 4C controllers (irregardless of subvendor/subdevice). The corresponding quirk was not updated. This commit changes the quirk to check the device class instead of its subvendor-/subdeviceids. This works for all generations of Thunderbolt controllers. Signed-off-by: Andreas Noever Reviewed-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 37ff015..6ff6469 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3328,8 +3328,7 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev) || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI) - || nhi->subsystem_vendor != 0x2222 - || nhi->subsystem_device != 0x1111) + || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8) goto out; dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n"); device_pm_wait_for_dev(&dev->dev, &nhi->dev); -- cgit v1.1 From 82a6a81c2a38aa7a7813a0c532637877773c50ae Mon Sep 17 00:00:00 2001 From: Xavier Gnata Date: Tue, 26 Jul 2016 18:40:38 +0200 Subject: thunderbolt: Add support for INTEL_FALCON_RIDGE_2C controller. From: Xavier Gnata Add support to INTEL_FALCON_RIDGE_2C controller and corresponding quirk to support suspend/resume. Tested against 4.7 master on a MacBook Air 11" 2015. Signed-off-by: Andreas Noever Reviewed-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 4 ++++ drivers/thunderbolt/nhi.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6ff6469..44e0ff3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3327,6 +3327,7 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev) if (nhi->vendor != PCI_VENDOR_ID_INTEL || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && + nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI && nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI) || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8) goto out; @@ -3343,6 +3344,9 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C, quirk_apple_wait_for_thunderbolt); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE, + quirk_apple_wait_for_thunderbolt); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE, quirk_apple_wait_for_thunderbolt); #endif diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 9c15344..a8c2041 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -651,6 +651,12 @@ static struct pci_device_id nhi_ids[] = { { .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI, + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, + }, + { + .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, + .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, -- cgit v1.1 From ccdf3b888d87df1b914fedde91ed1848f0651c65 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 3 Aug 2016 10:44:12 +0200 Subject: thunderbolt: Don't declare Falcon Ridge unsupported Falcon Ridge 4C has been supported by the driver from the beginning, Falcon Ridge 2C support was just added. Don't irritate users with a warning declaring the opposite. Signed-off-by: Lukas Wunner Signed-off-by: Andreas Noever Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 1e116f5..9840fde 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -372,7 +372,9 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, u64 route) if (sw->config.device_id != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && - sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE) + sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE && + sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE && + sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE) tb_sw_warn(sw, "unsupported switch device id %#x\n", sw->config.device_id); -- cgit v1.1 From 04b2d9c9c319277ad4fbbb71855c256a9f4d5f98 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 31 Aug 2016 20:15:32 +0900 Subject: ALSA: firewire-tascam: accessing to user space outside spinlock In hwdep interface of firewire-tascam driver, accessing to user space is in a critical section with disabled local interrupt. Depending on architecture, accessing to user space can cause page fault exception. Then local processor stores machine status and handle the synchronous event. A handler corresponding to the event can call task scheduler to wait for preparing pages. In a case of usage of single core processor, the state to disable local interrupt is worse because it doesn't handle usual interrupts from hardware. This commit fixes this bug, by performing the accessing outside spinlock. Reported-by: Vaishali Thakkar Cc: stable@vger.kernel.org Fixes: e5e0c3dd257b('ALSA: firewire-tascam: add hwdep interface') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/tascam/tascam-hwdep.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c index 131267c..106406c 100644 --- a/sound/firewire/tascam/tascam-hwdep.c +++ b/sound/firewire/tascam/tascam-hwdep.c @@ -16,31 +16,14 @@ #include "tascam.h" -static long hwdep_read_locked(struct snd_tscm *tscm, char __user *buf, - long count) -{ - union snd_firewire_event event; - - memset(&event, 0, sizeof(event)); - - event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; - event.lock_status.status = (tscm->dev_lock_count > 0); - tscm->dev_lock_changed = false; - - count = min_t(long, count, sizeof(event.lock_status)); - - if (copy_to_user(buf, &event, count)) - return -EFAULT; - - return count; -} - static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, loff_t *offset) { struct snd_tscm *tscm = hwdep->private_data; DEFINE_WAIT(wait); - union snd_firewire_event event; + union snd_firewire_event event = { + .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS, + }; spin_lock_irq(&tscm->lock); @@ -54,10 +37,16 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, spin_lock_irq(&tscm->lock); } - memset(&event, 0, sizeof(event)); - count = hwdep_read_locked(tscm, buf, count); + event.lock_status.status = (tscm->dev_lock_count > 0); + tscm->dev_lock_changed = false; + spin_unlock_irq(&tscm->lock); + count = min_t(long, count, sizeof(event.lock_status)); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + return count; } -- cgit v1.1 From df6a58c5c5aa8ecb1e088ecead3fa33ae70181f1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2016 17:51:17 -0400 Subject: kernfs: don't depend on d_find_any_alias() when generating notifications kernfs_notify_workfn() sends out file modified events for the scheduled kernfs_nodes. Because the modifications aren't from userland, it doesn't have the matching file struct at hand and can't use fsnotify_modify(). Instead, it looked up the inode and then used d_find_any_alias() to find the dentry and used fsnotify_parent() and fsnotify() directly to generate notifications. The assumption was that the relevant dentries would have been pinned if there are listeners, which isn't true as inotify doesn't pin dentries at all and watching the parent doesn't pin the child dentries even for dnotify. This led to, for example, inotify watchers not getting notifications if the system is under memory pressure and the matching dentries got reclaimed. It can also be triggered through /proc/sys/vm/drop_caches or a remount attempt which involves shrinking dcache. fsnotify_parent() only uses the dentry to access the parent inode, which kernfs can do easily. Update kernfs_notify_workfn() so that it uses fsnotify() directly for both the parent and target inodes without going through d_find_any_alias(). While at it, supply the target file name to fsnotify() from kernfs_node->name. Signed-off-by: Tejun Heo Reported-by: Evgeny Vereshchagin Fixes: d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events too") Cc: John McCutchan Cc: Robert Love Cc: Eric Paris Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e157400..2bcb86e 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -840,21 +840,35 @@ repeat: mutex_lock(&kernfs_mutex); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { + struct kernfs_node *parent; struct inode *inode; - struct dentry *dentry; + /* + * We want fsnotify_modify() on @kn but as the + * modifications aren't originating from userland don't + * have the matching @file available. Look up the inodes + * and generate the events manually. + */ inode = ilookup(info->sb, kn->ino); if (!inode) continue; - dentry = d_find_any_alias(inode); - if (dentry) { - fsnotify_parent(NULL, dentry, FS_MODIFY); - fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, - NULL, 0); - dput(dentry); + parent = kernfs_get_parent(kn); + if (parent) { + struct inode *p_inode; + + p_inode = ilookup(info->sb, parent->ino); + if (p_inode) { + fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, + inode, FSNOTIFY_EVENT_INODE, kn->name, 0); + iput(p_inode); + } + + kernfs_put(parent); } + fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, + kn->name, 0); iput(inode); } -- cgit v1.1 From 96b0af4b729cabd44e237c5a6b9bd4e0ea4ed457 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 20 May 2016 14:23:38 +0200 Subject: documentation: drivers/core/of: fix name of of_node symlink commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") added a symlink called "of_node" to sysfs however the documentation describes it as "of_path". Fix the documentation to match what the code actually does. Signed-off-by: Martin Fuzzey Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/stable/sysfs-devices | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices index 43f78b88d..df449d7 100644 --- a/Documentation/ABI/stable/sysfs-devices +++ b/Documentation/ABI/stable/sysfs-devices @@ -1,7 +1,7 @@ # Note: This documents additional properties of any device beyond what # is documented in Documentation/sysfs-rules.txt -What: /sys/devices/*/of_path +What: /sys/devices/*/of_node Date: February 2015 Contact: Device Tree mailing list Description: -- cgit v1.1 From 17d0774f80681020eccc9638d925a23f1fc4f671 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 22 Jun 2016 21:42:16 +0300 Subject: sysfs: correctly handle read offset on PREALLOC attrs Attributes declared with __ATTR_PREALLOC use sysfs_kf_read() which returns zero bytes for non-zero offset. This breaks script checkarray in mdadm tool in debian where /bin/sh is 'dash' because its builtin 'read' reads only one byte at a time. Script gets 'i' instead of 'idle' when reads current action from /sys/block/$dev/md/sync_action and as a result does nothing. This patch adds trivial implementation of partial read: generate whole string and move required part into buffer head. Signed-off-by: Konstantin Khlebnikov Fixes: 4ef67a8c95f3 ("sysfs/kernfs: make read requests on pre-alloc files use the buffer.") Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=787950 Cc: Stable # v3.19+ Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index f35523d..b803213 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, * If buf != of->prealloc_buf, we don't know how * large it is, so cannot safely pass it to ->show */ - if (pos || WARN_ON_ONCE(buf != of->prealloc_buf)) + if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); + if (pos) { + if (len <= pos) + return 0; + len -= pos; + memmove(buf, buf + pos, len); + } return min(count, len); } -- cgit v1.1 From 5db4f7f80d165fc9725f356e99feec409e446baa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 16 Aug 2016 15:06:54 +0300 Subject: Revert "tty/serial/8250: use mctrl_gpio helpers" Serial console is broken in v4.8-rcX. Mika and I independently bisected down to commit 4ef03d328769 ("tty/serial/8250: use mctrl_gpio helpers"). Since neither author nor anyone else didn't propose a solution we better revert it for now. This reverts commit 4ef03d328769eddbfeca1f1c958fdb181a69c341. Link: https://lkml.kernel.org/r/20160809130229.GN1729@lahna.fi.intel.com Signed-off-by: Andy Shevchenko Tested-by: Heikki Krogerus Tested-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.txt | 19 ------------ drivers/tty/serial/8250/8250.h | 35 +---------------------- drivers/tty/serial/8250/8250_core.c | 9 ------ drivers/tty/serial/8250/8250_omap.c | 31 +++++++++----------- drivers/tty/serial/8250/8250_port.c | 7 +---- drivers/tty/serial/8250/Kconfig | 1 - include/linux/serial_8250.h | 1 - 7 files changed, 15 insertions(+), 88 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt index f5561ac..936ab5b 100644 --- a/Documentation/devicetree/bindings/serial/8250.txt +++ b/Documentation/devicetree/bindings/serial/8250.txt @@ -42,9 +42,6 @@ Optional properties: - auto-flow-control: one way to enable automatic flow control support. The driver is allowed to detect support for the capability even without this property. -- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD - line respectively. It will use specified GPIO instead of the peripheral - function pin for the UART feature. If unsure, don't specify this property. Note: * fsl,ns16550: @@ -66,19 +63,3 @@ Example: interrupts = <10>; reg-shift = <2>; }; - -Example for OMAP UART using GPIO-based modem control signals: - - uart4: serial@49042000 { - compatible = "ti,omap3-uart"; - reg = <0x49042000 0x400>; - interrupts = <80>; - ti,hwmods = "uart4"; - clock-frequency = <48000000>; - cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>; - rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; - dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>; - dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>; - dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; - rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>; - }; diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 122e0e4..1a16fea 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -15,8 +15,6 @@ #include #include -#include "../serial_mctrl_gpio.h" - struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); @@ -133,43 +131,12 @@ void serial8250_em485_destroy(struct uart_8250_port *p); static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { - int mctrl_gpio = 0; - serial_out(up, UART_MCR, value); - - if (value & UART_MCR_RTS) - mctrl_gpio |= TIOCM_RTS; - if (value & UART_MCR_DTR) - mctrl_gpio |= TIOCM_DTR; - - mctrl_gpio_set(up->gpios, mctrl_gpio); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { - int mctrl, mctrl_gpio = 0; - - mctrl = serial_in(up, UART_MCR); - - /* save current MCR values */ - if (mctrl & UART_MCR_RTS) - mctrl_gpio |= TIOCM_RTS; - if (mctrl & UART_MCR_DTR) - mctrl_gpio |= TIOCM_DTR; - - mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); - - if (mctrl_gpio & TIOCM_RTS) - mctrl |= UART_MCR_RTS; - else - mctrl &= ~UART_MCR_RTS; - - if (mctrl_gpio & TIOCM_DTR) - mctrl |= UART_MCR_DTR; - else - mctrl &= ~UART_MCR_DTR; - - return mctrl; + return serial_in(up, UART_MCR); } #if defined(__alpha__) && !defined(CONFIG_PCI) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 13ad5c3..dcf43f6 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -974,8 +974,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up) uart = serial8250_find_match_or_unused(&up->port); if (uart && uart->port.type != PORT_8250_CIR) { - struct mctrl_gpios *gpios; - if (uart->port.dev) uart_remove_one_port(&serial8250_reg, &uart->port); @@ -1013,13 +1011,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up) if (up->port.flags & UPF_FIXED_TYPE) uart->port.type = up->port.type; - gpios = mctrl_gpio_init(&uart->port, 0); - if (IS_ERR(gpios)) { - if (PTR_ERR(gpios) != -ENOSYS) - return PTR_ERR(gpios); - } else - uart->gpios = gpios; - serial8250_set_defaults(uart); /* Possibly override default I/O functions. */ diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index e14982f..61ad6c3 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -134,21 +134,18 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) serial8250_do_set_mctrl(port, mctrl); - if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios, - UART_GPIO_RTS))) { - /* - * Turn off autoRTS if RTS is lowered and restore autoRTS - * setting if RTS is raised - */ - lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) - priv->efr |= UART_EFR_RTS; - else - priv->efr &= ~UART_EFR_RTS; - serial_out(up, UART_EFR, priv->efr); - serial_out(up, UART_LCR, lcr); - } + /* + * Turn off autoRTS if RTS is lowered and restore autoRTS setting + * if RTS is raised + */ + lcr = serial_in(up, UART_LCR); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) + priv->efr |= UART_EFR_RTS; + else + priv->efr &= ~UART_EFR_RTS; + serial_out(up, UART_EFR, priv->efr); + serial_out(up, UART_LCR, lcr); } /* @@ -449,9 +446,7 @@ static void omap_8250_set_termios(struct uart_port *port, priv->efr = 0; up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF); - if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW - && IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios, - UART_GPIO_RTS))) { + if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) { /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */ up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS; priv->efr |= UART_EFR_CTS; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 7481b95..bdfa659 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1618,8 +1618,6 @@ static void serial8250_disable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; - mctrl_gpio_disable_ms(up->gpios); - up->ier &= ~UART_IER_MSI; serial_port_out(port, UART_IER, up->ier); } @@ -1632,8 +1630,6 @@ static void serial8250_enable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; - mctrl_gpio_enable_ms(up->gpios); - up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -1917,8 +1913,7 @@ unsigned int serial8250_do_get_mctrl(struct uart_port *port) ret |= TIOCM_DSR; if (status & UART_MSR_CTS) ret |= TIOCM_CTS; - - return mctrl_gpio_get(up->gpios, &ret); + return ret; } EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl); diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index c9ec839..7c6f7af 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -6,7 +6,6 @@ config SERIAL_8250 tristate "8250/16550 and compatible serial support" select SERIAL_CORE - select SERIAL_MCTRL_GPIO if GPIOLIB ---help--- This selects whether you want to include the driver for the standard serial ports. The standard answer is Y. People who might say N diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 923266c..48ec765 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -111,7 +111,6 @@ struct uart_8250_port { * if no_console_suspend */ unsigned char probe; - struct mctrl_gpios *gpios; #define UART_PROBE_RSA (1 << 0) /* -- cgit v1.1 From 47b34d2ef266e2c283b514d65c8963c2ccd42474 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Jul 2016 17:21:49 +0300 Subject: serial: 8250_mid: fix divide error bug if baud rate is 0 Since the commit c1a67b48f6a5 ("serial: 8250_pci: replace switch-case by formula for Intel MID"), the 8250 driver crashes in the byt_set_termios() function with a divide error. This is caused by the fact that a baud rate of 0 (B0) is not handled properly. Fix it by falling back to B9600 in this case. Reported-by: "Mendez Salinas, Fernando" Fixes: c1a67b48f6a5 ("serial: 8250_pci: replace switch-case by formula for Intel MID") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c index 339de9c..20c5db2 100644 --- a/drivers/tty/serial/8250/8250_mid.c +++ b/drivers/tty/serial/8250/8250_mid.c @@ -168,6 +168,9 @@ static void mid8250_set_termios(struct uart_port *p, unsigned long w = BIT(24) - 1; unsigned long mul, div; + /* Gracefully handle the B0 case: fall back to B9600 */ + fuart = fuart ? fuart : 9600 * 16; + if (mid->board->freq < fuart) { /* Find prescaler value that satisfies Fuart < Fref */ if (mid->board->freq > baud) -- cgit v1.1 From 6b1ca4bcadf9ef077cc5f03c6822ba276ed14902 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 31 Aug 2016 22:58:42 +0900 Subject: ALSA: fireworks: accessing to user space outside spinlock In hwdep interface of fireworks driver, accessing to user space is in a critical section with disabled local interrupt. Depending on architecture, accessing to user space can cause page fault exception. Then local processor stores machine status and handles the synchronous event. A handler corresponding to the event can call task scheduler to wait for preparing pages. In a case of usage of single core processor, the state to disable local interrupt is worse because it don't handle usual interrupts from hardware. This commit fixes this bug, performing the accessing outside spinlock. This commit also gives up counting the number of queued response messages to simplify ring-buffer management. Reported-by: Vaishali Thakkar Cc: stable@vger.kernel.org Fixes: 555e8a8f7f14('ALSA: fireworks: Add command/response functionality into hwdep interface') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks.h | 1 - sound/firewire/fireworks/fireworks_hwdep.c | 71 +++++++++++++++++------- sound/firewire/fireworks/fireworks_proc.c | 4 +- sound/firewire/fireworks/fireworks_transaction.c | 5 +- 4 files changed, 56 insertions(+), 25 deletions(-) diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index 03ed352..d73c12b 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -108,7 +108,6 @@ struct snd_efw { u8 *resp_buf; u8 *pull_ptr; u8 *push_ptr; - unsigned int resp_queues; }; int snd_efw_transaction_cmd(struct fw_unit *unit, diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c index 33df865..2e1d9a2 100644 --- a/sound/firewire/fireworks/fireworks_hwdep.c +++ b/sound/firewire/fireworks/fireworks_hwdep.c @@ -25,6 +25,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, { unsigned int length, till_end, type; struct snd_efw_transaction *t; + u8 *pull_ptr; long count = 0; if (remained < sizeof(type) + sizeof(struct snd_efw_transaction)) @@ -38,8 +39,17 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, buf += sizeof(type); /* write into buffer as many responses as possible */ - while (efw->resp_queues > 0) { - t = (struct snd_efw_transaction *)(efw->pull_ptr); + spin_lock_irq(&efw->lock); + + /* + * When another task reaches here during this task's access to user + * space, it picks up current position in buffer and can read the same + * series of responses. + */ + pull_ptr = efw->pull_ptr; + + while (efw->push_ptr != pull_ptr) { + t = (struct snd_efw_transaction *)(pull_ptr); length = be32_to_cpu(t->length) * sizeof(__be32); /* confirm enough space for this response */ @@ -49,26 +59,39 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, /* copy from ring buffer to user buffer */ while (length > 0) { till_end = snd_efw_resp_buf_size - - (unsigned int)(efw->pull_ptr - efw->resp_buf); + (unsigned int)(pull_ptr - efw->resp_buf); till_end = min_t(unsigned int, length, till_end); - if (copy_to_user(buf, efw->pull_ptr, till_end)) + spin_unlock_irq(&efw->lock); + + if (copy_to_user(buf, pull_ptr, till_end)) return -EFAULT; - efw->pull_ptr += till_end; - if (efw->pull_ptr >= efw->resp_buf + - snd_efw_resp_buf_size) - efw->pull_ptr -= snd_efw_resp_buf_size; + spin_lock_irq(&efw->lock); + + pull_ptr += till_end; + if (pull_ptr >= efw->resp_buf + snd_efw_resp_buf_size) + pull_ptr -= snd_efw_resp_buf_size; length -= till_end; buf += till_end; count += till_end; remained -= till_end; } - - efw->resp_queues--; } + /* + * All of tasks can read from the buffer nearly simultaneously, but the + * last position for each task is different depending on the length of + * given buffer. Here, for simplicity, a position of buffer is set by + * the latest task. It's better for a listening application to allow one + * thread to read from the buffer. Unless, each task can read different + * sequence of responses depending on variation of buffer length. + */ + efw->pull_ptr = pull_ptr; + + spin_unlock_irq(&efw->lock); + return count; } @@ -76,14 +99,17 @@ static long hwdep_read_locked(struct snd_efw *efw, char __user *buf, long count, loff_t *offset) { - union snd_firewire_event event; + union snd_firewire_event event = { + .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS, + }; - memset(&event, 0, sizeof(event)); + spin_lock_irq(&efw->lock); - event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; event.lock_status.status = (efw->dev_lock_count > 0); efw->dev_lock_changed = false; + spin_unlock_irq(&efw->lock); + count = min_t(long, count, sizeof(event.lock_status)); if (copy_to_user(buf, &event, count)) @@ -98,10 +124,15 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, { struct snd_efw *efw = hwdep->private_data; DEFINE_WAIT(wait); + bool dev_lock_changed; + bool queued; spin_lock_irq(&efw->lock); - while ((!efw->dev_lock_changed) && (efw->resp_queues == 0)) { + dev_lock_changed = efw->dev_lock_changed; + queued = efw->push_ptr != efw->pull_ptr; + + while (!dev_lock_changed && !queued) { prepare_to_wait(&efw->hwdep_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock_irq(&efw->lock); schedule(); @@ -109,15 +140,17 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, if (signal_pending(current)) return -ERESTARTSYS; spin_lock_irq(&efw->lock); + dev_lock_changed = efw->dev_lock_changed; + queued = efw->push_ptr != efw->pull_ptr; } - if (efw->dev_lock_changed) + spin_unlock_irq(&efw->lock); + + if (dev_lock_changed) count = hwdep_read_locked(efw, buf, count, offset); - else if (efw->resp_queues > 0) + else if (queued) count = hwdep_read_resp_buf(efw, buf, count, offset); - spin_unlock_irq(&efw->lock); - return count; } @@ -160,7 +193,7 @@ hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_table *wait) poll_wait(file, &efw->hwdep_wait, wait); spin_lock_irq(&efw->lock); - if (efw->dev_lock_changed || (efw->resp_queues > 0)) + if (efw->dev_lock_changed || efw->pull_ptr != efw->push_ptr) events = POLLIN | POLLRDNORM; else events = 0; diff --git a/sound/firewire/fireworks/fireworks_proc.c b/sound/firewire/fireworks/fireworks_proc.c index 0639dcb..beb0a0f 100644 --- a/sound/firewire/fireworks/fireworks_proc.c +++ b/sound/firewire/fireworks/fireworks_proc.c @@ -188,8 +188,8 @@ proc_read_queues_state(struct snd_info_entry *entry, else consumed = (unsigned int)(efw->push_ptr - efw->pull_ptr); - snd_iprintf(buffer, "%d %d/%d\n", - efw->resp_queues, consumed, snd_efw_resp_buf_size); + snd_iprintf(buffer, "%d/%d\n", + consumed, snd_efw_resp_buf_size); } static void diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index f550808..36a08ba 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -121,11 +121,11 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) size_t capacity, till_end; struct snd_efw_transaction *t; - spin_lock_irq(&efw->lock); - t = (struct snd_efw_transaction *)data; length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length); + spin_lock_irq(&efw->lock); + if (efw->push_ptr < efw->pull_ptr) capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr); else @@ -155,7 +155,6 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) } /* for hwdep */ - efw->resp_queues++; wake_up(&efw->hwdep_wait); *rcode = RCODE_COMPLETE; -- cgit v1.1 From c8d192428f52f244130b84650ad616df09f2b1e1 Mon Sep 17 00:00:00 2001 From: Jimi Damon Date: Wed, 20 Jul 2016 17:00:40 -0700 Subject: serial: 8250: added acces i/o products quad and octal serial cards Added devices ids for acces i/o products quad and octal serial cards that make use of existing Pericom PI7C9X7954 and PI7C9X7958 configurations . Signed-off-by: Jimi Damon Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 139 +++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 20ebaea..bc51b32 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1950,6 +1950,43 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCI_DEVICE_ID_PERICOM_PI7C9X7954 0x7954 #define PCI_DEVICE_ID_PERICOM_PI7C9X7958 0x7958 +#define PCI_VENDOR_ID_ACCESIO 0x494f +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB 0x1051 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S 0x1053 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB 0x105C +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S 0x105E +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB 0x1091 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2 0x1093 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB 0x1099 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4 0x109B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB 0x10D1 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM 0x10D3 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB 0x10DA +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM 0x10DC +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1 0x1108 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2 0x1110 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2 0x1111 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4 0x1118 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4 0x1119 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S 0x1152 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S 0x115A +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2 0x1190 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2 0x1191 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4 0x1198 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4 0x1199 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM 0x11D0 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4 0x105A +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4 0x105B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8 0x106A +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8 0x106B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4 0x1098 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8 0x10A9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM 0x10D9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM 0x10E9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM 0x11D8 + + + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -5113,6 +5150,108 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_pericom_PI7C9X7958 }, /* + * ACCES I/O Products quad + */ + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ { PCI_VENDOR_ID_TOPIC, PCI_DEVICE_ID_TOPIC_TP560, -- cgit v1.1 From 31bd44e7687be8a70e1e744e5cb84974b798d568 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 25 Aug 2016 16:49:51 +0800 Subject: crypto: cryptd - Use correct tfm object for AEAD tracking The AEAD code path incorrectly uses the child tfm to track the cryptd refcnt, and then potentially frees the child tfm. Fixes: 81760ea6a95a ("crypto: cryptd - Add helpers to check...") Reported-by: Sowmini Varadhan Signed-off-by: Herbert Xu --- crypto/cryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index cf8037a..77207b4 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -733,13 +733,14 @@ static void cryptd_aead_crypt(struct aead_request *req, rctx = aead_request_ctx(req); compl = rctx->complete; + tfm = crypto_aead_reqtfm(req); + if (unlikely(err == -EINPROGRESS)) goto out; aead_request_set_tfm(req, child); err = crypt( req ); out: - tfm = crypto_aead_reqtfm(req); ctx = crypto_aead_ctx(tfm); refcnt = atomic_read(&ctx->refcnt); -- cgit v1.1 From 8b18e2359aff2ab810aba84cebffc9da07fef78f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 29 Aug 2016 14:52:14 +0300 Subject: crypto: caam - fix IV loading for authenc (giv)decryption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For algorithms that implement IV generators before the crypto ops, the IV needed for decryption is initially located in req->src scatterlist, not in req->iv. Avoid copying the IV into req->iv by modifying the (givdecrypt) descriptors to load it directly from req->src. aead_givdecrypt() is no longer needed and goes away. Cc: # 4.3+ Fixes: 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 77 +++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 6dc5971..b304421 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -556,7 +556,10 @@ skip_enc: /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + if (alg->caam.geniv) + append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize); + else + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -565,6 +568,14 @@ skip_enc: append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); + if (alg->caam.geniv) { + append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + (ctx1_iv_off << LDST_OFFSET_SHIFT)); + append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | + (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize); + } + /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | @@ -2150,7 +2161,7 @@ static void init_authenc_job(struct aead_request *req, init_aead_job(req, edesc, all_contig, encrypt); - if (ivsize && (is_rfc3686 || !(alg->caam.geniv && encrypt))) + if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv)) append_load_as_imm(desc, req->iv, ivsize, LDST_CLASS_1_CCB | LDST_SRCDST_BYTE_CONTEXT | @@ -2537,20 +2548,6 @@ static int aead_decrypt(struct aead_request *req) return ret; } -static int aead_givdecrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - unsigned int ivsize = crypto_aead_ivsize(aead); - - if (req->cryptlen < ivsize) - return -EINVAL; - - req->cryptlen -= ivsize; - req->assoclen += ivsize; - - return aead_decrypt(req); -} - /* * allocate and map the ablkcipher extended descriptor for ablkcipher */ @@ -3210,7 +3207,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3256,7 +3253,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3302,7 +3299,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3348,7 +3345,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3394,7 +3391,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -3440,7 +3437,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -3486,7 +3483,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3534,7 +3531,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3582,7 +3579,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3630,7 +3627,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3678,7 +3675,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -3726,7 +3723,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -3772,7 +3769,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3818,7 +3815,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3864,7 +3861,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3910,7 +3907,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3956,7 +3953,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -4002,7 +3999,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -4051,7 +4048,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -4102,7 +4099,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -4153,7 +4150,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -4204,7 +4201,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -4255,7 +4252,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -4306,7 +4303,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, -- cgit v1.1 From 8e018c21da3febb558586b48c8db0d6d66cb6593 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 25 Aug 2016 10:09:39 -0700 Subject: raid5-cache: fix a deadlock in superblock write There is a potential deadlock in superblock write. Discard could zero data, so before discard we must make sure superblock is updated to new log tail. Updating superblock (either directly call md_update_sb() or depend on md thread) must hold reconfig mutex. On the other hand, raid5_quiesce is called with reconfig_mutex hold. The first step of raid5_quiesce() is waitting for all IO finish, hence waitting for reclaim thread, while reclaim thread is calling this function and waitting for reconfig mutex. So there is a deadlock. We workaround this issue with a trylock. The downside of the solution is we could miss discard if we can't take reconfig mutex. But this should happen rarely (mainly in raid array stop), so miss discard shouldn't be a big problem. Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 46 +++++++++++++++------------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76dd..1b1ab4a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); -- cgit v1.1 From ad5b0f7685dbfc4730987cd16af3c5ebe8133f10 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 30 Aug 2016 10:29:33 -0700 Subject: raid5: guarantee enough stripes to avoid reshape hang If there aren't enough stripes, reshape will hang. We have a check for this in new reshape, but miss it for reshape resume, hence we could see hang in reshape resume. This patch forces enough stripes existed if reshape resumes. Reviewed-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da583bb..b95c54c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); -- cgit v1.1 From 9f834ec18defc369d73ccf9e87a2790bfa05bf46 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Aug 2016 16:41:46 -0700 Subject: binfmt_elf: switch to new creds when switching to new mm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to delay switching to the new credentials until after we had mapped the executable (and possible elf interpreter). That was kind of odd to begin with, since the new executable will actually then _run_ with the new creds, but whatever. The bigger problem was that we also want to make sure that we turn off prof events and tracing before we start mapping the new executable state. So while this is a cleanup, it's also a fix for a possible information leak. Reported-by: Robert Święcki Tested-by: Peter Zijlstra Acked-by: David Howells Acked-by: Oleg Nesterov Acked-by: Andy Lutomirski Acked-by: Eric W. Biederman Cc: Willy Tarreau Cc: Kees Cook Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7f6aff3f..e5495f3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); + install_exec_creds(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ @@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) -- cgit v1.1 From 9264251ee2a55bce8fb93826b3f581fb9eb7e2c2 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 31 Aug 2016 14:16:44 +0200 Subject: bridge: re-introduce 'fix parsing of MLDv2 reports' commit bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") seems to have accidentally reverted commit 47cc84ce0c2f ("bridge: fix parsing of MLDv2 reports"). This commit brings back a change to br_ip6_multicast_mld2_report() where parsing of MLDv2 reports stops when the first group is successfully added to the MDB cache. Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Signed-off-by: Davide Caratti Acked-by: Nikolay Aleksandrov Acked-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a5423a1..c5fea93 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1138,7 +1138,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } else { err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, vid); - if (!err) + if (err) break; } } -- cgit v1.1 From 10ea9434065e56fe14287f89258ecf2fb684ed1a Mon Sep 17 00:00:00 2001 From: jimqu Date: Tue, 30 Aug 2016 08:59:42 +0800 Subject: drm/amd/amdgpu: sdma resume fail during S4 on CI SDMA could be fail in the thaw() and restore() processes, do software reset if each SDMA engine is busy. Signed-off-by: JimQu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ee64669..77fdd99 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -52,6 +52,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev); static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev); static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); +static int cik_sdma_soft_reset(void *handle); MODULE_FIRMWARE("radeon/bonaire_sdma.bin"); MODULE_FIRMWARE("radeon/bonaire_sdma1.bin"); @@ -1037,6 +1038,8 @@ static int cik_sdma_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cik_sdma_soft_reset(handle); + return cik_sdma_hw_init(adev); } -- cgit v1.1 From 53960b4f89db58bc155d6f8aa0a44ccc59ccb26f Mon Sep 17 00:00:00 2001 From: jimqu Date: Tue, 30 Aug 2016 09:03:16 +0800 Subject: drm/amd/amdgpu: compute ring test fail during S4 on CI unhalt Instrction Fetch Unit after all rings are inited. Signed-off-by: JimQu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d869d05..425413f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2755,8 +2755,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) u64 wb_gpu_addr; u32 *buf; struct bonaire_mqd *mqd; - - gfx_v7_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; /* fix up chicken bits */ tmp = RREG32(mmCP_CPF_DEBUG); @@ -2791,7 +2790,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* init the queues. Just two for now. */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i]; if (ring->mqd_obj == NULL) { r = amdgpu_bo_create(adev, @@ -2970,6 +2969,13 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) amdgpu_bo_unreserve(ring->mqd_obj); ring->ready = true; + } + + gfx_v7_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; -- cgit v1.1 From 1f703e6679f373f5bba4efe7093aa82e91af4037 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 30 Aug 2016 17:59:11 +0800 Subject: drm/amdgpu: record error code when ring test failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we may miss errors. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a31d7ef..ec1282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -280,7 +280,7 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { unsigned i; - int r; + int r, ret = 0; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -301,10 +301,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) } else { /* still not good, but we can live with it */ DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); + ret = r; } } } - return 0; + return ret; } /* -- cgit v1.1 From cd81a9170e69e018bbaba547c1fd85a585f5697a Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 23 Aug 2016 16:20:38 +0200 Subject: mm: introduce get_task_exe_file For more convenient access if one has a pointer to the task. As a minor nit take advantage of the fact that only task lock + rcu are needed to safely grab ->exe_file. This saves mm refcount dance. Use the helper in proc_exe_link. Signed-off-by: Mateusz Guzik Acked-by: Konstantin Khlebnikov Acked-by: Richard Guy Briggs Cc: # 4.3.x Signed-off-by: Paul Moore --- fs/proc/base.c | 7 +------ include/linux/mm.h | 1 + kernel/fork.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 0d163a8..da8b194 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1552,18 +1552,13 @@ static const struct file_operations proc_pid_set_comm_operations = { static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; - struct mm_struct *mm; struct file *exe_file; task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; - mm = get_task_mm(task); + exe_file = get_task_exe_file(task); put_task_struct(task); - if (!mm) - return -ENOENT; - exe_file = get_mm_exe_file(mm); - mmput(mm); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f468e0..004c73a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1987,6 +1987,7 @@ extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); +extern struct file *get_task_exe_file(struct task_struct *task); extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); diff --git a/kernel/fork.c b/kernel/fork.c index d277e83..42451ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -774,6 +774,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm) EXPORT_SYMBOL(get_mm_exe_file); /** + * get_task_exe_file - acquire a reference to the task's executable file + * + * Returns %NULL if task's mm (if any) has no associated executable file or + * this is a kernel thread with borrowed mm (see the comment above get_task_mm). + * User must release file via fput(). + */ +struct file *get_task_exe_file(struct task_struct *task) +{ + struct file *exe_file = NULL; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) { + if (!(task->flags & PF_KTHREAD)) + exe_file = get_mm_exe_file(mm); + } + task_unlock(task); + return exe_file; +} +EXPORT_SYMBOL(get_task_exe_file); + +/** * get_task_mm - acquire a reference to the task's mm * * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning -- cgit v1.1 From 5efc244346f9f338765da3d592f7947b0afdc4b5 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 23 Aug 2016 16:20:39 +0200 Subject: audit: fix exe_file access in audit_exe_compare Prior to the change the function would blindly deference mm, exe_file and exe_file->f_inode, each of which could have been NULL or freed. Use get_task_exe_file to safely obtain stable exe_file. Signed-off-by: Mateusz Guzik Acked-by: Konstantin Khlebnikov Acked-by: Richard Guy Briggs Cc: # 4.3.x Signed-off-by: Paul Moore --- kernel/audit_watch.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3cf1c59..4846691 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -19,6 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) unsigned long ino; dev_t dev; - rcu_read_lock(); - exe_file = rcu_dereference(tsk->mm->exe_file); + exe_file = get_task_exe_file(tsk); + if (!exe_file) + return 0; ino = exe_file->f_inode->i_ino; dev = exe_file->f_inode->i_sb->s_dev; - rcu_read_unlock(); + fput(exe_file); return audit_mark_compare(mark, ino, dev); } -- cgit v1.1 From c6f1dc4d9ce275c1bd9d90b5630f36ca6abccc97 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:27 +0800 Subject: net: ethernet: mediatek: fix fails from TX housekeeping due to incorrect port setup which net device the SKB is complete for depends on the forward port on txd4 on the corresponding TX descriptor, but the information isn't set up well in case of SKB fragments that would lead to watchdog timeout from the upper layer, so fix it up. Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f160954..7fc2ff0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -588,14 +588,15 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, dma_addr_t mapped_addr; unsigned int nr_frags; int i, n_desc = 1; - u32 txd4 = 0; + u32 txd4 = 0, fport; itxd = ring->next_free; if (itxd == ring->last_free) return -ENOMEM; /* set the forward port */ - txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT; + fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT; + txd4 |= fport; tx_buf = mtk_desc_to_tx_buf(ring, itxd); memset(tx_buf, 0, sizeof(*tx_buf)); @@ -653,7 +654,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, WRITE_ONCE(txd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(frag_map_size) | last_frag * TX_DMA_LS0)); - WRITE_ONCE(txd->txd4, 0); + WRITE_ONCE(txd->txd4, fport); tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf = mtk_desc_to_tx_buf(ring, txd); -- cgit v1.1 From 549e5495467ac2a76489f1304e2b4a7ad85e1644 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:28 +0800 Subject: net: ethernet: mediatek: fix incorrect return value of devm_clk_get with EPROBE_DEFER 1) If the return value of devm_clk_get is EPROBE_DEFER, we should defer probing the driver. The change is verified and works based on 4.8-rc1 staying with the latest clk-next code for MT7623. 2) Changing with the usage of loops to work out if all clocks required are fine Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 39 ++++++++++++++++------------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 22 ++++++++++------ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7fc2ff0..a5dcf57 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -50,6 +50,10 @@ static const struct mtk_ethtool_stats { MTK_ETHTOOL_STAT(rx_flow_control_packets), }; +static const char * const mtk_clks_source_name[] = { + "ethif", "esw", "gp1", "gp2" +}; + void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) { __raw_writel(val, eth->base + reg); @@ -1814,6 +1818,7 @@ static int mtk_probe(struct platform_device *pdev) if (!eth) return -ENOMEM; + eth->dev = &pdev->dev; eth->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(eth->base)) return PTR_ERR(eth->base); @@ -1848,21 +1853,21 @@ static int mtk_probe(struct platform_device *pdev) return -ENXIO; } } + for (i = 0; i < ARRAY_SIZE(eth->clks); i++) { + eth->clks[i] = devm_clk_get(eth->dev, + mtk_clks_source_name[i]); + if (IS_ERR(eth->clks[i])) { + if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER) + return -EPROBE_DEFER; + return -ENODEV; + } + } - eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif"); - eth->clk_esw = devm_clk_get(&pdev->dev, "esw"); - eth->clk_gp1 = devm_clk_get(&pdev->dev, "gp1"); - eth->clk_gp2 = devm_clk_get(&pdev->dev, "gp2"); - if (IS_ERR(eth->clk_esw) || IS_ERR(eth->clk_gp1) || - IS_ERR(eth->clk_gp2) || IS_ERR(eth->clk_ethif)) - return -ENODEV; - - clk_prepare_enable(eth->clk_ethif); - clk_prepare_enable(eth->clk_esw); - clk_prepare_enable(eth->clk_gp1); - clk_prepare_enable(eth->clk_gp2); + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); + clk_prepare_enable(eth->clks[MTK_CLK_ESW]); + clk_prepare_enable(eth->clks[MTK_CLK_GP1]); + clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - eth->dev = &pdev->dev; eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE); INIT_WORK(ð->pending_work, mtk_pending_work); @@ -1905,10 +1910,10 @@ static int mtk_remove(struct platform_device *pdev) { struct mtk_eth *eth = platform_get_drvdata(pdev); - clk_disable_unprepare(eth->clk_ethif); - clk_disable_unprepare(eth->clk_esw); - clk_disable_unprepare(eth->clk_gp1); - clk_disable_unprepare(eth->clk_gp2); + clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); + clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index f82e3ac..6e1ade7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -290,6 +290,17 @@ enum mtk_tx_flags { MTK_TX_FLAGS_PAGE0 = 0x02, }; +/* This enum allows us to identify how the clock is defined on the array of the + * clock in the order + */ +enum mtk_clks_map { + MTK_CLK_ETHIF, + MTK_CLK_ESW, + MTK_CLK_GP1, + MTK_CLK_GP2, + MTK_CLK_MAX +}; + /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at * by the TX descriptor s * @skb: The SKB pointer of the packet being sent @@ -370,10 +381,7 @@ struct mtk_rx_ring { * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. - * @clk_ethif: The ethif clock - * @clk_esw: The switch clock - * @clk_gp1: The gmac1 clock - * @clk_gp2: The gmac2 clock + * @clks: clock array for all clocks required * @mii_bus: If there is a bus we need to create an instance for it * @pending_work: The workqueue used to reset the dma ring */ @@ -400,10 +408,8 @@ struct mtk_eth { struct mtk_tx_dma *scratch_ring; dma_addr_t phy_scratch_ring; void *scratch_head; - struct clk *clk_ethif; - struct clk *clk_esw; - struct clk *clk_gp1; - struct clk *clk_gp2; + struct clk *clks[MTK_CLK_MAX]; + struct mii_bus *mii_bus; struct work_struct pending_work; }; -- cgit v1.1 From 1b43079906367d4d6e1ad00f04fff525a32e26c4 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:29 +0800 Subject: net: ethernet: mediatek: fix API usage with skb_free_frag use skb_free_frag() instead of legacy put_page() Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a5dcf57..c9e25a7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -870,7 +870,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, /* receive data */ skb = build_skb(data, ring->frag_size); if (unlikely(!skb)) { - put_page(virt_to_head_page(new_data)); + skb_free_frag(new_data); netdev->stats.rx_dropped++; goto release_desc; } -- cgit v1.1 From d3bd1ce4db8e843dce421e2f8f123e5251a9c7d3 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:30 +0800 Subject: net: ethernet: mediatek: remove redundant free_irq for devm_request_irq allocated irq these irqs are not used for shared irq and disabled during ethernet stops. irq requested by devm_request_irq is safe to be freed automatically on driver detach. Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c9e25a7..1ffde91 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1513,8 +1513,6 @@ static void mtk_uninit(struct net_device *dev) phy_disconnect(mac->phy_dev); mtk_mdio_cleanup(eth); mtk_irq_disable(eth, ~0); - free_irq(eth->irq[1], dev); - free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -- cgit v1.1 From 7c6b0d76fa02213393815e3b6d5e4a415bf3f0e2 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:31 +0800 Subject: net: ethernet: mediatek: fix logic unbalance between probe and remove original mdio_cleanup is not in the symmetric place against where mdio_init is, so relocate mdio_cleanup to the right one. Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1ffde91..bf5b7e1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1511,7 +1511,6 @@ static void mtk_uninit(struct net_device *dev) struct mtk_eth *eth = mac->hw; phy_disconnect(mac->phy_dev); - mtk_mdio_cleanup(eth); mtk_irq_disable(eth, ~0); } @@ -1916,6 +1915,7 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); + mtk_mdio_cleanup(eth); platform_set_drvdata(pdev, NULL); return 0; -- cgit v1.1 From 79e9a41438527a4cc2b426a2dfb92cd0825d7d29 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:32 +0800 Subject: net: ethernet: mediatek: fix issue of driver removal with interface is up mtk_stop() must be called to stop for freeing DMA resources acquired and restoring state changed by mtk_open() firstly when module removal. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bf5b7e1..556951e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1906,6 +1906,14 @@ err_free_dev: static int mtk_remove(struct platform_device *pdev) { struct mtk_eth *eth = platform_get_drvdata(pdev); + int i; + + /* stop all devices to make sure that dma is properly shut down */ + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + mtk_stop(eth->netdev[i]); + } clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); -- cgit v1.1 From b5776f01bf1f36a6c89c6dc407dda6509002e0d7 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:33 +0800 Subject: net: ethernet: mediatek: fix the missing of_node_put() after node is used done inside mtk_mdio_init This patch adds the missing of_node_put() after finishing the usage of of_get_child_by_name. Signed-off-by: Sean Wang Acked-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 556951e..409efcf 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -324,6 +324,7 @@ static int mtk_mdio_init(struct mtk_eth *eth) err = of_mdiobus_register(eth->mii_bus, mii_np); if (err) goto err_free_bus; + of_node_put(mii_np); return 0; -- cgit v1.1 From 1e515b7fdbaeef29b1025c09b50ebbb132a39208 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:34 +0800 Subject: net: ethernet: mediatek: use devm_mdiobus_alloc instead of mdiobus_alloc inside mtk_mdio_init a lot of parts in the driver uses devm_* APIs to gain benefits from the device resource management, so devm_mdiobus_alloc is also used instead of mdiobus_alloc to have more elegant code flow. Using common code provided by the devm_* helps to 1) have simplified the code flow as [1] says 2) decrease the risk of incorrect error handling by human 3) only a few drivers used it since it was proposed on linux 3.16, so just hope to promote for this. Ref: [1] https://patchwork.ozlabs.org/patch/344093/ Signed-off-by: Sean Wang Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 409efcf..0367f51 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -295,7 +295,7 @@ err_phy: static int mtk_mdio_init(struct mtk_eth *eth) { struct device_node *mii_np; - int err; + int ret; mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus"); if (!mii_np) { @@ -304,13 +304,13 @@ static int mtk_mdio_init(struct mtk_eth *eth) } if (!of_device_is_available(mii_np)) { - err = 0; + ret = 0; goto err_put_node; } - eth->mii_bus = mdiobus_alloc(); + eth->mii_bus = devm_mdiobus_alloc(eth->dev); if (!eth->mii_bus) { - err = -ENOMEM; + ret = -ENOMEM; goto err_put_node; } @@ -321,20 +321,11 @@ static int mtk_mdio_init(struct mtk_eth *eth) eth->mii_bus->parent = eth->dev; snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name); - err = of_mdiobus_register(eth->mii_bus, mii_np); - if (err) - goto err_free_bus; - of_node_put(mii_np); - - return 0; - -err_free_bus: - mdiobus_free(eth->mii_bus); + ret = of_mdiobus_register(eth->mii_bus, mii_np); err_put_node: of_node_put(mii_np); - eth->mii_bus = NULL; - return err; + return ret; } static void mtk_mdio_cleanup(struct mtk_eth *eth) @@ -343,8 +334,6 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) return; mdiobus_unregister(eth->mii_bus); - of_node_put(eth->mii_bus->dev.of_node); - mdiobus_free(eth->mii_bus); } static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) -- cgit v1.1 From aa6e8a54f653daaf34ff7fdceb2d992d714eea7c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 1 Sep 2016 10:47:35 +0800 Subject: net: ethernet: mediatek: fix error handling inside mtk_mdio_init Return -ENODEV if the MDIO bus is disabled in the device tree. Signed-off-by: Sean Wang Acked-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0367f51..d919915 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -304,7 +304,7 @@ static int mtk_mdio_init(struct mtk_eth *eth) } if (!of_device_is_available(mii_np)) { - ret = 0; + ret = -ENODEV; goto err_put_node; } -- cgit v1.1 From c0338aff2260ea6c092806312dbb154cec07a242 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 28 Aug 2016 21:28:26 -0700 Subject: kcm: fix a socket double free Dmitry reported a double free on kcm socket, which could be easily reproduced by: #include #include int main() { int fd = syscall(SYS_socket, 0x29ul, 0x5ul, 0x0ul, 0, 0, 0); syscall(SYS_ioctl, fd, 0x89e2ul, 0x20a98000ul, 0, 0, 0); return 0; } This is because on the error path, after we install the new socket file, we call sock_release() to clean up the socket, which leaves the fd pointing to a freed socket. Fix this by calling sys_close() on that fd directly. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index cb39e05..4116932 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -2029,7 +2030,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (copy_to_user((void __user *)arg, &info, sizeof(info))) { err = -EFAULT; - sock_release(newsock); + sys_close(info.fd); } } -- cgit v1.1 From 5fe118c9fd6b8e129788c88a10ac0af82b255f81 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 29 Aug 2016 08:29:52 -0400 Subject: qed*: Disallow dcbx configuration for VF interfaces. Dcbx configuration is not supported for VF interfaces. Hence don't populate the callbacks for VFs and also fail the dcbx-query for VFs. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 4 ++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 226cb08..7ad1667 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -19,6 +19,7 @@ #include "qed_dcbx.h" #include "qed_hsi.h" #include "qed_sp.h" +#include "qed_sriov.h" #ifdef CONFIG_DCB #include #endif @@ -945,6 +946,9 @@ static int qed_dcbx_query_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt; int rc; + if (IS_VF(p_hwfn->cdev)) + return -EINVAL; + p_ptt = qed_ptt_acquire(p_hwfn); if (!p_ptt) return -EBUSY; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index a6eb6af..9544e4c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2520,7 +2520,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, edev->ops->register_ops(cdev, &qede_ll_ops, edev); #ifdef CONFIG_DCB - qede_set_dcbnl_ops(edev->ndev); + if (!IS_VF(edev)) + qede_set_dcbnl_ops(edev->ndev); #endif INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); -- cgit v1.1 From 5ec5dfa4d29448002bb05018ba12620e7fafffd1 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 29 Aug 2016 08:29:53 -0400 Subject: qed: Set selection-field while configuring the app entry in ieee mode. Management firmware requires the selection-field (SF) to be set for configuring the application/protocol entry in IEEE mode. Without this setting, the app entry will be configured incorrectly in MFW. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 7ad1667..192a886 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1063,23 +1063,31 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_app->app_pri_tbl[i].entry; if (ieee) { - *entry &= ~DCBX_APP_SF_IEEE_MASK; + *entry &= ~(DCBX_APP_SF_IEEE_MASK | DCBX_APP_SF_MASK); switch (p_params->app_entry[i].sf_ieee) { case QED_DCBX_SF_IEEE_ETHTYPE: *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE << DCBX_APP_SF_IEEE_SHIFT); + *entry |= ((u32)DCBX_APP_SF_ETHTYPE << + DCBX_APP_SF_SHIFT); break; case QED_DCBX_SF_IEEE_TCP_PORT: *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT << DCBX_APP_SF_IEEE_SHIFT); + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); break; case QED_DCBX_SF_IEEE_UDP_PORT: *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT << DCBX_APP_SF_IEEE_SHIFT); + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); break; case QED_DCBX_SF_IEEE_TCP_UDP_PORT: *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT << DCBX_APP_SF_IEEE_SHIFT); + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); break; } } else { -- cgit v1.1 From c5e801dac4daff972013613ae3edae6668b2aa7c Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 29 Aug 2016 08:29:54 -0400 Subject: qed: Clear dcbx memory buffers before the usage. This patch takes care of clearing the uninitialized buffer before using it. 1. pfc pri-enable bitmap need to be cleared before setting the requested enable bits. Without this, the un-touched values will be merged with requested values and sent to MFW. 2. The data in app-entry field need to be cleared before using it. 3. Clear the output data buffer used in qed_dcbx_query_params(). Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 192a886..ad023fe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -988,6 +988,7 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn, if (p_params->pfc.prio[i]) pfc_map |= BIT(i); + *pfc &= ~DCBX_PFC_PRI_EN_BITMAP_MASK; *pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT); DP_VERBOSE(p_hwfn, QED_MSG_DCB, "pfc = 0x%x\n", *pfc); @@ -1062,6 +1063,7 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_app->app_pri_tbl[i].entry; + *entry = 0; if (ieee) { *entry &= ~(DCBX_APP_SF_IEEE_MASK | DCBX_APP_SF_MASK); switch (p_params->app_entry[i].sf_ieee) { @@ -1193,6 +1195,7 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, return -ENOMEM; } + memset(dcbx_info, 0, sizeof(*dcbx_info)); rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { kfree(dcbx_info); @@ -1230,6 +1233,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, return NULL; } + memset(dcbx_info, 0, sizeof(*dcbx_info)); if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); return NULL; -- cgit v1.1 From 38b256973ea90fc7c2b7e1b734fa0e8b83538d50 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: handle umask and posix_acl_default correctly on creation Setting MS_POSIXACL in sb->s_flags has the side effect of passing mode to create functions without masking against umask. Another problem when creating over a whiteout is that the default posix acl is not inherited from the parent dir (because the real parent dir at the time of creation is the work directory). Fix these problems by: a) If upper fs does not have MS_POSIXACL, then mask mode with umask. b) If creating over a whiteout, call posix_acl_create() to get the inherited acls. After creation (but before moving to the final destination) set these acls on the created file. posix_acl_create() also updates the file creation mode as appropriate. Fixes: 39a25b2b3762 ("ovl: define ->get_acl() for overlay inodes") Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 12bcd07..f485dd4 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "overlayfs.h" void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) @@ -186,6 +188,9 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; + if (!hardlink && !IS_POSIXACL(udir)) + stat->mode &= ~current_umask(); + inode_lock_nested(udir, I_MUTEX_PARENT); newdentry = lookup_one_len(dentry->d_name.name, upperdir, dentry->d_name.len); @@ -335,6 +340,32 @@ out_free: return ret; } +static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, + const struct posix_acl *acl) +{ + void *buffer; + size_t size; + int err; + + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl) + return 0; + + size = posix_acl_to_xattr(NULL, acl, NULL, 0); + buffer = kmalloc(size, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + err = size; + if (err < 0) + goto out_free; + + err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE); +out_free: + kfree(buffer); + return err; +} + static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct kstat *stat, const char *link, struct dentry *hardlink) @@ -346,10 +377,18 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct dentry *upper; struct dentry *newdentry; int err; + struct posix_acl *acl, *default_acl; if (WARN_ON(!workdir)) return -EROFS; + if (!hardlink) { + err = posix_acl_create(dentry->d_parent->d_inode, + &stat->mode, &default_acl, &acl); + if (err) + return err; + } + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -384,6 +423,17 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out_cleanup; } + if (!hardlink) { + err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS, + acl); + if (err) + goto out_cleanup; + + err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT, + default_acl); + if (err) + goto out_cleanup; + } if (!hardlink && S_ISDIR(stat->mode)) { err = ovl_set_opaque(newdentry); @@ -410,6 +460,10 @@ out_dput: out_unlock: unlock_rename(workdir, upperdir); out: + if (!hardlink) { + posix_acl_release(acl); + posix_acl_release(default_acl); + } return err; out_cleanup: -- cgit v1.1 From c11b9fdd6a612f376a5e886505f1c54c16d8c380 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: remove posix_acl_default from workdir Clear out posix acl xattrs on workdir and also reset the mode after creation so that an inherited sgid bit is cleared. Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/super.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4036132..452fb71 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -814,6 +814,10 @@ retry: struct kstat stat = { .mode = S_IFDIR | 0, }; + struct iattr attr = { + .ia_valid = ATTR_MODE, + .ia_mode = stat.mode, + }; if (work->d_inode) { err = -EEXIST; @@ -829,6 +833,21 @@ retry: err = ovl_create_real(dir, work, &stat, NULL, NULL, true); if (err) goto out_dput; + + err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); + if (err && err != -ENODATA) + goto out_dput; + + err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); + if (err && err != -ENODATA) + goto out_dput; + + /* Clear any inherited mode bits */ + inode_lock(work->d_inode); + err = notify_change(work, &attr, NULL); + inode_unlock(work->d_inode); + if (err) + goto out_dput; } out_unlock: inode_unlock(dir); -- cgit v1.1 From eea2fb4851e9dcbab6b991aaf47e2e024f1f55a0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: proper cleanup of workdir When mounting overlayfs it needs a clean "work" directory under the supplied workdir. Previously the mount code removed this directory if it already existed and created a new one. If the removal failed (e.g. directory was not empty) then it fell back to a read-only mount not using the workdir. While this has never been reported, it is possible to get a non-empty "work" dir from a previous mount of overlayfs in case of crash in the middle of an operation using the work directory. In this case the left over state should be discarded and the overlay filesystem will be consistent, guaranteed by the atomicity of operations on moving to/from the workdir to the upper layer. This patch implements cleaning out any files left in workdir. It is implemented using real recursion for simplicity, but the depth is limited to 2, because the worst case is that of a directory containing whiteouts under "work". Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/readdir.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++- fs/overlayfs/super.c | 2 +- 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 34839bd..9a95e2c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -179,6 +179,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); int ovl_check_d_type_supported(struct path *realpath); +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level); /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cf37fc7..f241b4e 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath, err = rdd->err; } while (!err && rdd->count); - if (!err && rdd->first_maybe_whiteout) + if (!err && rdd->first_maybe_whiteout && rdd->dentry) err = ovl_check_whiteouts(realpath->dentry, rdd); fput(realfile); @@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath) return rdd.d_type_supported; } + +static void ovl_workdir_cleanup_recurse(struct path *path, int level) +{ + int err; + struct inode *dir = path->dentry->d_inode; + LIST_HEAD(list); + struct ovl_cache_entry *p; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .dentry = NULL, + .list = &list, + .root = RB_ROOT, + .is_lowest = false, + }; + + err = ovl_dir_read(path, &rdd); + if (err) + goto out; + + inode_lock_nested(dir, I_MUTEX_PARENT); + list_for_each_entry(p, &list, l_node) { + struct dentry *dentry; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + dentry = lookup_one_len(p->name, path->dentry, p->len); + if (IS_ERR(dentry)) + continue; + if (dentry->d_inode) + ovl_workdir_cleanup(dir, path->mnt, dentry, level); + dput(dentry); + } + inode_unlock(dir); +out: + ovl_cache_free(&list); +} + +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level) +{ + int err; + + if (!d_is_dir(dentry) || level > 1) { + ovl_cleanup(dir, dentry); + return; + } + + err = ovl_do_rmdir(dir, dentry); + if (err) { + struct path path = { .mnt = mnt, .dentry = dentry }; + + inode_unlock(dir); + ovl_workdir_cleanup_recurse(&path, level + 1); + inode_lock_nested(dir, I_MUTEX_PARENT); + ovl_cleanup(dir, dentry); + } +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 452fb71..219534e 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -825,7 +825,7 @@ retry: goto out_dput; retried = true; - ovl_cleanup(dir, work); + ovl_workdir_cleanup(dir, mnt, work, 0); dput(work); goto retry; } -- cgit v1.1 From 5201dc449e4b6b6d7e92f7f974269b11681f98b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: use cached acl on underlying layer Instead of calling ->get_acl() directly, use get_acl() to get the cached value. We will have the acl cached on the underlying inode anyway, because we do permission checking on the both the overlay and the underlying fs. So, since we already have double caching, this improves performance without any cost. Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 024352f..d50d1ea 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "overlayfs.h" static int ovl_copy_up_truncate(struct dentry *dentry) @@ -314,14 +315,14 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) const struct cred *old_cred; struct posix_acl *acl; - if (!IS_POSIXACL(realinode)) + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; if (!realinode->i_op->get_acl) return NULL; old_cred = ovl_override_creds(inode->i_sb); - acl = realinode->i_op->get_acl(realinode, type); + acl = get_acl(realinode, type); revert_creds(old_cred); return acl; -- cgit v1.1 From 2a3a2a3f35249412e35fbb48b743348c40373409 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: don't cache acl on overlay layer Some operations (setxattr/chmod) can make the cached acl stale. We either need to clear overlay's acl cache for the affected inode or prevent acl caching on the overlay altogether. Preventing caching has the following advantages: - no double caching, less memory used - overlay cache doesn't go stale when fs clears it's own cache Possible disadvantage is performance loss. If that becomes a problem get_acl() can be optimized for overlayfs. This patch disables caching by pre setting i_*acl to a value that - has bit 0 set, so is_uncached_acl() will return true - is not equal to ACL_NOT_CACHED, so get_acl() will not overwrite it The constant -3 was chosen for this purpose. Fixes: 39a25b2b3762 ("ovl: define ->get_acl() for overlay inodes") Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 3 +++ include/linux/fs.h | 1 + 2 files changed, 4 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index d50d1ea..47a4f33 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -416,6 +416,9 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode) inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; +#endif mode &= S_IFMT; switch (mode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3523bf6..901e25d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -574,6 +574,7 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +#define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * uncached_acl_sentinel(struct task_struct *task) -- cgit v1.1 From fd36570a8805f39b40a0ebde19b08603aa201d17 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Aug 2016 16:58:35 +0100 Subject: ovl: fix spelling mistake: "directries" -> "directories" Trivial fix to spelling mistake in pr_err message. Signed-off-by: Colin Ian King Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 219534e..6aad7d4 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1151,7 +1151,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); if (stacklen > OVL_MAX_STACK) { - pr_err("overlayfs: too many lower directries, limit is %d\n", + pr_err("overlayfs: too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_free_lowertmp; } else if (!ufs->config.upperdir && stacklen == 1) { -- cgit v1.1 From fe2b75952347762a21f67d9df1199137ae5988b2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:59:22 +0200 Subject: ovl: Fix OVL_XATTR_PREFIX Make sure ovl_own_xattr_handler only matches attribute names starting with "overlay.", not "overlayXXX". Signed-off-by: Andreas Gruenbacher Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 5 ++--- fs/overlayfs/overlayfs.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 47a4f33..f523511 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -194,9 +194,8 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) bool ovl_is_private_xattr(const char *name) { -#define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "." - return strncmp(name, OVL_XATTR_PRE_NAME, - sizeof(OVL_XATTR_PRE_NAME) - 1) == 0; + return strncmp(name, OVL_XATTR_PREFIX, + sizeof(OVL_XATTR_PREFIX) - 1) == 0; } int ovl_setxattr(struct dentry *dentry, struct inode *inode, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9a95e2c..f50c390 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -24,8 +24,8 @@ enum ovl_path_type { (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) -#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay" -#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque" +#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." +#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_ISUPPER_MASK 1UL -- cgit v1.1 From 0c97be22f928b85110504c4bbb8574facb4bd0c0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 16:36:49 +0200 Subject: ovl: Get rid of ovl_xattr_noacl_handlers array Use an ordinary #ifdef to conditionally include the POSIX ACL handlers in ovl_xattr_handlers, like the other filesystems do. Flag the code that is now only used conditionally with __maybe_unused. Signed-off-by: Andreas Gruenbacher Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 6aad7d4..c356191 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -986,10 +986,11 @@ static unsigned int ovl_split_lowerdirs(char *str) return ctr; } -static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) +static int __maybe_unused +ovl_posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { struct dentry *workdir = ovl_workdir(dentry); struct inode *realinode = ovl_inode_real(inode, NULL); @@ -1040,13 +1041,15 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } -static const struct xattr_handler ovl_posix_acl_access_xattr_handler = { +static const struct xattr_handler __maybe_unused +ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .set = ovl_posix_acl_xattr_set, }; -static const struct xattr_handler ovl_posix_acl_default_xattr_handler = { +static const struct xattr_handler __maybe_unused +ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .set = ovl_posix_acl_xattr_set, @@ -1063,19 +1066,15 @@ static const struct xattr_handler ovl_other_xattr_handler = { }; static const struct xattr_handler *ovl_xattr_handlers[] = { +#ifdef CONFIG_FS_POSIX_ACL &ovl_posix_acl_access_xattr_handler, &ovl_posix_acl_default_xattr_handler, +#endif &ovl_own_xattr_handler, &ovl_other_xattr_handler, NULL }; -static const struct xattr_handler *ovl_xattr_noacl_handlers[] = { - &ovl_own_xattr_handler, - &ovl_other_xattr_handler, - NULL, -}; - static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { NULL, NULL }; @@ -1288,10 +1287,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; - if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) - sb->s_xattr = ovl_xattr_handlers; - else - sb->s_xattr = ovl_xattr_noacl_handlers; + sb->s_xattr = ovl_xattr_handlers; sb->s_root = root_dentry; sb->s_fs_info = ufs; sb->s_flags |= MS_POSIXACL; -- cgit v1.1 From 0e585ccc13b3edbb187fb4f1b7cc9397f17d64a9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:22:11 +0200 Subject: ovl: Switch to generic_removexattr Commit d837a49bd57f ("ovl: fix POSIX ACL setting") switches from iop->setxattr from ovl_setxattr to generic_setxattr, so switch from ovl_removexattr to generic_removexattr as well. As far as permission checking goes, the same rules should apply in either case. While doing that, rename ovl_setxattr to ovl_xattr_set to indicate that this is not an iop->setxattr implementation and remove the unused inode argument. Move ovl_other_xattr_set above ovl_own_xattr_set so that they match the order of handlers in ovl_xattr_handlers. Signed-off-by: Andreas Gruenbacher Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 2 +- fs/overlayfs/inode.c | 65 ++++++++++++++++-------------------------------- fs/overlayfs/overlayfs.h | 6 ++--- fs/overlayfs/super.c | 18 +++++++------- 4 files changed, 33 insertions(+), 58 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f485dd4..791c6a2 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1006,7 +1006,7 @@ const struct inode_operations ovl_dir_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f523511..94bca71 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -198,25 +198,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { int err; - struct dentry *upperdentry; + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; + if (!value && !OVL_TYPE_UPPER(type)) { + err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (err < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (err) goto out_drop_write; - upperdentry = ovl_dentry_upper(dentry); + if (!OVL_TYPE_UPPER(type)) + ovl_path_upper(dentry, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_setxattr(upperdentry, name, value, size, flags); + if (value) + err = vfs_setxattr(realpath.dentry, name, value, size, flags); + else { + WARN_ON(flags != XATTR_REPLACE); + err = vfs_removexattr(realpath.dentry, name); + } revert_creds(old_cred); out_drop_write: @@ -272,42 +285,6 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; } -int ovl_removexattr(struct dentry *dentry, const char *name) -{ - int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); - const struct cred *old_cred; - - err = ovl_want_write(dentry); - if (err) - goto out; - - err = -ENODATA; - if (ovl_is_private_xattr(name)) - goto out_drop_write; - - if (!OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); - if (err < 0) - goto out_drop_write; - - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; - - ovl_path_upper(dentry, &realpath); - } - - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_removexattr(realpath.dentry, name); - revert_creds(old_cred); -out_drop_write: - ovl_drop_write(dentry); -out: - return err; -} - struct posix_acl *ovl_get_acl(struct inode *inode, int type) { struct inode *realinode = ovl_inode_real(inode, NULL); @@ -393,7 +370,7 @@ static const struct inode_operations ovl_file_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, }; @@ -406,7 +383,7 @@ static const struct inode_operations ovl_symlink_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f50c390..5769aaf 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -185,13 +185,11 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); -int ovl_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags); +int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags); ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -int ovl_removexattr(struct dentry *dentry, const char *name); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c356191..45a2eb0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1018,21 +1018,13 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, posix_acl_release(acl); - return ovl_setxattr(dentry, inode, handler->name, value, size, flags); + return ovl_xattr_set(dentry, handler->name, value, size, flags); out_acl_release: posix_acl_release(acl); return err; } -static int ovl_other_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return ovl_setxattr(dentry, inode, name, value, size, flags); -} - static int ovl_own_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1041,6 +1033,14 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } +static int ovl_other_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + return ovl_xattr_set(dentry, name, value, size, flags); +} + static const struct xattr_handler __maybe_unused ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, -- cgit v1.1 From ce31513a9114f74fe3e9caa6534d201bdac7238d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: copyattr after setting POSIX ACL Setting POSIX acl may also modify the file mode, so need to copy that up to the overlay inode. Reported-by: Eryu Guan Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 45a2eb0..cba2c9f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1018,7 +1018,11 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, posix_acl_release(acl); - return ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, handler->name, value, size, flags); + if (!err) + ovl_copyattr(ovl_inode_real(inode, NULL), inode); + + return err; out_acl_release: posix_acl_release(acl); -- cgit v1.1 From 0eb45fc3bb7a2cf9c9c93d9e95986a841e5f4625 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:52:55 +0200 Subject: ovl: Switch to generic_getxattr Now that overlayfs has xattr handlers for iop->{set,remove}xattr, use those same handlers for iop->getxattr as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 2 +- fs/overlayfs/inode.c | 11 ++++------- fs/overlayfs/overlayfs.h | 4 ++-- fs/overlayfs/super.c | 26 ++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 791c6a2..1560fdc 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1004,7 +1004,7 @@ const struct inode_operations ovl_dir_inode_operations = { .permission = ovl_permission, .getattr = ovl_dir_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .get_acl = ovl_get_acl, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 94bca71..1878591 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -238,16 +238,13 @@ out: return err; } -ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) +int ovl_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; - if (ovl_is_private_xattr(name)) - return -ENODATA; - old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); revert_creds(old_cred); @@ -368,7 +365,7 @@ static const struct inode_operations ovl_file_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .get_acl = ovl_get_acl, @@ -381,7 +378,7 @@ static const struct inode_operations ovl_symlink_inode_operations = { .readlink = ovl_readlink, .getattr = ovl_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .update_time = ovl_update_time, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 5769aaf..5813ccf 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -187,8 +187,8 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size); +int ovl_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index cba2c9f..a4585f9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -987,6 +987,14 @@ static unsigned int ovl_split_lowerdirs(char *str) } static int __maybe_unused +ovl_posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, handler->name, buffer, size); +} + +static int __maybe_unused ovl_posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1029,6 +1037,13 @@ out_acl_release: return err; } +static int ovl_own_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return -EPERM; +} + static int ovl_own_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1037,6 +1052,13 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } +static int ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, name, buffer, size); +} + static int ovl_other_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1049,6 +1071,7 @@ static const struct xattr_handler __maybe_unused ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, + .get = ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -1056,16 +1079,19 @@ static const struct xattr_handler __maybe_unused ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, + .get = ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; static const struct xattr_handler ovl_own_xattr_handler = { .prefix = OVL_XATTR_PREFIX, + .get = ovl_own_xattr_get, .set = ovl_own_xattr_set, }; static const struct xattr_handler ovl_other_xattr_handler = { .prefix = "", /* catch all */ + .get = ovl_other_xattr_get, .set = ovl_other_xattr_set, }; -- cgit v1.1 From 7cb35119d067191ce9ebc380a599db0b03cbd9d9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: listxattr: use strnlen() Be defensive about what underlying fs provides us in the returned xattr list buffer. If it's not properly null terminated, bail out with a warning insead of BUG. Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/inode.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1878591..c75625c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -255,7 +255,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; - int off; + size_t len; + char *s; const struct cred *old_cred; old_cred = ovl_override_creds(dentry->d_sb); @@ -265,17 +266,19 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; /* filter out private xattrs */ - for (off = 0; off < res;) { - char *s = list + off; - size_t slen = strlen(s) + 1; + for (s = list, len = res; len;) { + size_t slen = strnlen(s, len) + 1; - BUG_ON(off + slen > res); + /* underlying fs providing us with an broken xattr list? */ + if (WARN_ON(slen > len)) + return -EIO; + len -= slen; if (ovl_is_private_xattr(s)) { res -= slen; - memmove(s, s + slen, res - off); + memmove(s, s + slen, len); } else { - off += slen; + s += slen; } } -- cgit v1.1 From 026e5e0cc12474495515275d9c176ef823238c70 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: update doc Some of the documented quirks no longer apply. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index d6259c7..bcbf971 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -183,12 +183,10 @@ The copy_up operation essentially creates a new, identical file and moves it over to the old name. The new file may be on a different filesystem, so both st_dev and st_ino of the file may change. -Any open files referring to this inode will access the old data and -metadata. Similarly any file locks obtained before copy_up will not -apply to the copied up file. +Any open files referring to this inode will access the old data. -On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and -fsetxattr(2) will fail with EROFS. +Any file locks (and leases) obtained before copy_up will not apply +to the copied up file. If a file with multiple hard links is copied up, then this will "break" the link. Changes will not be propagated to other names -- cgit v1.1 From 6af7e4f77259ee946103387372cb159f2e99a6d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 1 Sep 2016 08:52:29 -0500 Subject: PCI: Mark Haswell Power Control Unit as having non-compliant BARs The Haswell Power Control Unit has a non-PCI register (CONFIG_TDP_NOMINAL) where BAR 0 is supposed to be. This is erratum HSE43 in the spec update referenced below: The PCIe* Base Specification indicates that Configuration Space Headers have a base address register at offset 0x10. Due to this erratum, the Power Control Unit's CONFIG_TDP_NOMINAL CSR (Bus 1; Device 30; Function 3; Offset 0x10) is located where a base register is expected. Mark the PCU as having non-compliant BARs so we don't try to probe any of them. There are no other BARs on this device. Rename the quirk so it's not Broadwell-specific. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-datasheet-vol-2.html (section 5.4, Device 30 Function 3) Link: https://bugzilla.kernel.org/show_bug.cgi?id=153881 Reported-by: Paul Menzel Tested-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe --- arch/x86/pci/fixup.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 837ea36..6d52b94 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -553,15 +553,21 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); /* - * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * Device [8086:2fc0] + * Erratum HSE43 + * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html * - * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html - * entry BDF2. + * Devices [8086:6f60,6fa0,6fc0] + * Erratum BDF2 + * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html */ -static void pci_bdwep_bar(struct pci_dev *dev) +static void pci_invalid_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); -- cgit v1.1 From 3dc09ec895f098cedd789a620c90ff1bf7f779a1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Aug 2016 11:57:52 -0400 Subject: Btrfs: kill invalid ASSERT() in process_all_refs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppose you have the following tree in snap1 on a file system mounted with -o inode_cache so that inode numbers are recycled └── [ 258] a └── [ 257] b and then you remove b, rename a to c, and then re-create b in c so you have the following tree └── [ 258] c └── [ 257] b and then you try to do an incremental send you will hit ASSERT(pending_move == 0); in process_all_refs(). This is because we assume that any recycling of inodes will not have a pending change in our path, which isn't the case. This is the case for the DELETE side, since we want to remove the old file using the old path, but on the create side we could have a pending move and need to do the normal pending rename dance. So remove this ASSERT() and put a comment about why we ignore pending_move. Thanks, Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/send.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index efe129f..a87675f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4268,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx, } btrfs_release_path(path); + /* + * We don't actually care about pending_move as we are simply + * re-creating this inode and will be rename'ing it into place once we + * rename the parent directory. + */ ret = process_recorded_refs(sctx, &pending_move); - /* Only applicable to an incremental send. */ - ASSERT(pending_move == 0); - out: btrfs_free_path(path); return ret; -- cgit v1.1 From a9b1fc851db054ddec703dc7951ed00620600b26 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 31 Aug 2016 16:43:33 -0700 Subject: Btrfs: fix endless loop in balancing block groups Qgroup function may overwrite the saved error 'err' with 0 in case quota is not enabled, and this ends up with a endless loop in balance because we keep going back to balance the same block group. It really should use 'ret' instead. Signed-off-by: Liu Bo Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8a2c2a0..c0c13dc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4200,9 +4200,11 @@ restart: err = PTR_ERR(trans); goto out_free; } - err = qgroup_fix_relocated_data_extents(trans, rc); - if (err < 0) { - btrfs_abort_transaction(trans, err); + ret = qgroup_fix_relocated_data_extents(trans, rc); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + if (!err) + err = ret; goto out_free; } btrfs_commit_transaction(trans, rc->extent_root); -- cgit v1.1 From e0af24849efb0eea572cf22d22bb65d164cb8a6f Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Wed, 31 Aug 2016 19:46:16 +0800 Subject: btrfs: fix one bug that process may endlessly wait for ticket in wait_reserve_ticket() If can_overcommit() in btrfs_calc_reclaim_metadata_size() returns true, btrfs_async_reclaim_metadata_space() will not reclaim metadata space, just return directly and also forget to wake up process which are waiting for their tickets, so these processes will wait endlessly. Fstests case generic/172 with mount option "-o compress=lzo" have revealed this bug in my test machine. Here if we have tickets to handle, we must handle them first. Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 60d4ae7..64676a1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4901,11 +4901,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, u64 expected; u64 to_reclaim = 0; - to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); - if (can_overcommit(root, space_info, to_reclaim, - BTRFS_RESERVE_FLUSH_ALL)) - return 0; - list_for_each_entry(ticket, &space_info->tickets, list) to_reclaim += ticket->bytes; list_for_each_entry(ticket, &space_info->priority_tickets, list) @@ -4913,6 +4908,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, if (to_reclaim) return to_reclaim; + to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); + if (can_overcommit(root, space_info, to_reclaim, + BTRFS_RESERVE_FLUSH_ALL)) + return 0; + used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_may_use; -- cgit v1.1 From 5b004412e2b7894105ea6043d380b7ab21f244ef Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Sep 2016 10:37:40 +0200 Subject: mlxsw: spectrum_router: Fix failure caused by double fib removal from HW In mlxsw we squash tables 254 and 255 together into HW. Kernel adds/dels /32 ip to/from both 254 and 255. On del path, that causes the same prefix being removed twice. Fix this by introducing reference counting for private mlxsw fib entries. That required a bit of code reshuffle. Also put dev into fib entry key so the same prefix could be represented once per every router interface. Fixes: 61c503f976b5 ("mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 126 +++++++++++++++------ 1 file changed, 92 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 917ddd1..ed61814 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -107,6 +107,7 @@ mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, } struct mlxsw_sp_fib_key { + struct net_device *dev; unsigned char addr[sizeof(struct in6_addr)]; unsigned char prefix_len; }; @@ -123,6 +124,7 @@ struct mlxsw_sp_fib_entry { struct rhash_head ht_node; struct mlxsw_sp_fib_key key; enum mlxsw_sp_fib_entry_type type; + unsigned int ref_count; u8 added:1; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; @@ -171,13 +173,15 @@ static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, - size_t addr_len, unsigned char prefix_len) + size_t addr_len, unsigned char prefix_len, + struct net_device *dev) { struct mlxsw_sp_fib_entry *fib_entry; fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); if (!fib_entry) return NULL; + fib_entry->key.dev = dev; memcpy(fib_entry->key.addr, addr, addr_len); fib_entry->key.prefix_len = prefix_len; return fib_entry; @@ -190,10 +194,13 @@ static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, - size_t addr_len, unsigned char prefix_len) + size_t addr_len, unsigned char prefix_len, + struct net_device *dev) { - struct mlxsw_sp_fib_key key = {{ 0 } }; + struct mlxsw_sp_fib_key key; + memset(&key, 0, sizeof(key)); + key.dev = dev; memcpy(key.addr, addr, addr_len); key.prefix_len = prefix_len; return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); @@ -1695,34 +1702,93 @@ mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); } -static int -mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; struct mlxsw_sp_fib_entry *fib_entry; + struct fib_info *fi = fib4->fi; struct mlxsw_sp_vr *vr; int err; vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr)) - return PTR_ERR(vr); + return ERR_CAST(vr); + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), + fib4->dst_len, fi->fib_dev); + if (fib_entry) { + /* Already exists, just take a reference */ + fib_entry->ref_count++; + return fib_entry; + } fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, - sizeof(fib4->dst), fib4->dst_len); + sizeof(fib4->dst), + fib4->dst_len, fi->fib_dev); if (!fib_entry) { err = -ENOMEM; goto err_fib_entry_create; } fib_entry->vr = vr; + fib_entry->ref_count = 1; err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); if (err) goto err_fib4_entry_init; + return fib_entry; + +err_fib4_entry_init: + mlxsw_sp_fib_entry_destroy(fib_entry); +err_fib_entry_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + + return ERR_PTR(err); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + if (!vr) + return NULL; + + return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len, + fib4->fi->fib_dev); +} + +void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_vr *vr = fib_entry->vr; + + if (--fib_entry->ref_count == 0) { + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + } + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static int +mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4); + if (IS_ERR(fib_entry)) + return PTR_ERR(fib_entry); + info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) { err = -ENOMEM; @@ -1736,11 +1802,7 @@ mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_alloc_info: - mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); -err_fib4_entry_init: - mlxsw_sp_fib_entry_destroy(fib_entry); -err_fib_entry_create: - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return err; } @@ -1759,11 +1821,14 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, fib_entry = info->fib_entry; kfree(info); + if (fib_entry->ref_count != 1) + return 0; + vr = fib_entry->vr; - err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry); + err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); if (err) goto err_fib_entry_insert; - err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry); if (err) goto err_fib_entry_add; return 0; @@ -1771,9 +1836,7 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, err_fib_entry_add: mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); err_fib_entry_insert: - mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return err; } @@ -1793,23 +1856,18 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); - if (!vr) { - dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n"); - return -ENOENT; - } - fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), fib4->dst_len); + fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4); if (!fib_entry) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); return -ENOENT; } - mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); - mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, vr); + + if (fib_entry->ref_count == 1) { + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); + } + + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return 0; } -- cgit v1.1 From 7146da31817aa21b19d7a01e95b94686c3f4bd97 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Sep 2016 10:37:41 +0200 Subject: mlxsw: spectrum_router: Fix fib entry update path Originally, I expected that there would be needed to call update operation in case RALUE record action is changed. However, that is not needed since write operation takes care of that nicely. Remove prepared construct and always call the write operation. Fixes: 61c503f976b5 ("mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ed61814..49f4e06 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -125,7 +125,6 @@ struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib_key key; enum mlxsw_sp_fib_entry_type type; unsigned int ref_count; - u8 added:1; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; struct list_head nexthop_group_node; @@ -1633,11 +1632,8 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - enum mlxsw_reg_ralue_op op; - - op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE : - MLXSW_REG_RALUE_OP_WRITE_UPDATE; - return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_WRITE); } static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, -- cgit v1.1 From de7d62952b01755f9885bbe945b141d9d4338e55 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Sep 2016 10:37:42 +0200 Subject: mlxsw: spectrum: Fix error path in mlxsw_sp_module_init Add forgotten notifier unregister. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7291f2c..ec1f7d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4553,6 +4553,7 @@ static int __init mlxsw_sp_module_init(void) return 0; err_core_driver_register: + unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; } -- cgit v1.1 From e73226384948e9b35823150cadd9a3ea4ca9fc97 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Sep 2016 10:37:43 +0200 Subject: mlxsw: spectrum_router: Fix netevent notifier registration Currently the notifier is registered for every asic instance, however the same block. Fix this by moving the registration to module init. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 17 ++--------------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ec1f7d8..e16b347 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -4541,18 +4542,25 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { + .notifier_call = mlxsw_sp_router_netevent_event, +}; + static int __init mlxsw_sp_module_init(void) { int err; register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_netevent_notifier(&mlxsw_sp_router_netevent_nb); + err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) goto err_core_driver_register; return 0; err_core_driver_register: + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4561,6 +4569,7 @@ err_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { mlxsw_core_driver_unregister(&mlxsw_sp_driver); + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ab3feb8..ac48abe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -587,6 +587,8 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct neighbour *n); void mlxsw_sp_router_neigh_destroy(struct net_device *dev, struct neighbour *n); +int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 49f4e06..352259b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -944,8 +944,8 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) mlxsw_sp_port_dev_put(mlxsw_sp_port); } -static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr) +int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_port *mlxsw_sp_port; @@ -1015,10 +1015,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, return NOTIFY_DONE; } -static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { - .notifier_call = mlxsw_sp_router_netevent_event, -}; - static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) { int err; @@ -1033,10 +1029,6 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) */ mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); - err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb); - if (err) - goto err_register_netevent_notifier; - /* Create the delayed works for the activity_update */ INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, mlxsw_sp_router_neighs_update_work); @@ -1045,17 +1037,12 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); return 0; - -err_register_netevent_notifier: - rhashtable_destroy(&mlxsw_sp->router.neigh_ht); - return err; } static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) { cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); rhashtable_destroy(&mlxsw_sp->router.neigh_ht); } -- cgit v1.1 From f1de7a28d53cfde8fa290d6c535eb690a4e1b5d5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 1 Sep 2016 10:37:44 +0200 Subject: mlxsw: spectrum: Don't take multiple references on a FID In commit 14d39461b3f4 ("mlxsw: spectrum: Use per-FID struct for the VLAN-aware bridge") I added a per-FID struct, which member ports can take a reference on upon VLAN membership configuration. However, sometimes only the VLAN flags (e.g. egress untagged) are toggled without changing the VLAN membership. In these cases we shouldn't take another reference on the FID. Fixes: 14d39461b3f4 ("mlxsw: spectrum: Use per-FID struct for the VLAN-aware bridge") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d1b59cd..e0d95c4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -460,6 +460,9 @@ static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_fid *f; + if (test_bit(fid, mlxsw_sp_port->active_vlans)) + return 0; + f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); if (!f) { f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid); -- cgit v1.1 From aad8b6bae7c63fcf860c4c7ce693cb8ac5d4a511 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 1 Sep 2016 10:37:45 +0200 Subject: mlxsw: spectrum: Use existing flood setup when adding VLANs When a VLAN is added on a bridge port we should use the existing unicast flood configuration of the port instead of assuming it's enabled. Fixes: 0293038e0c36 ("mlxsw: spectrum: Add support for flood control") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 25 ++++++++-------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index e0d95c4..7b654c51 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -167,8 +167,8 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 idx_begin, u16 idx_end, bool set, - bool only_uc) + u16 idx_begin, u16 idx_end, bool uc_set, + bool bm_set) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u16 local_port = mlxsw_sp_port->local_port; @@ -187,28 +187,22 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, set); + table_type, range, local_port, uc_set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto buffer_out; - /* Flooding control allows one to decide whether a given port will - * flood unicast traffic for which there is no FDB entry. - */ - if (only_uc) - goto buffer_out; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, - table_type, range, local_port, set); + table_type, range, local_port, bm_set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto err_flood_bm_set; - else - goto buffer_out; + + goto buffer_out; err_flood_bm_set: mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, !set); + table_type, range, local_port, !uc_set); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: kfree(sftr_pl); @@ -257,8 +251,7 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, * the start of the vFIDs range. */ vfid = mlxsw_sp_fid_to_vfid(fid); - return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, - false); + return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set); } static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -520,7 +513,7 @@ static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, } err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, - true, false); + mlxsw_sp_port->uc_flood, true); if (err) goto err_port_flood_set; -- cgit v1.1 From 561ed23331dfefea4c8eec616463ee4d0c8fa4b7 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 1 Sep 2016 14:45:12 +0800 Subject: qed: fix kzalloc-simple.cocci warnings drivers/net/ethernet/qlogic/qed/qed_dcbx.c:1230:13-20: WARNING: kzalloc should be used for dcbx_info, instead of kmalloc/memset drivers/net/ethernet/qlogic/qed/qed_dcbx.c:1192:13-20: WARNING: kzalloc should be used for dcbx_info, instead of kmalloc/memset Use kzalloc rather than kmalloc followed by memset with 0 This considers some simple cases that are common and easy to validate Note in particular that there are no ...s in the rule, so all of the matched code has to be contiguous Generated by: scripts/coccinelle/api/alloc/kzalloc-simple.cocci CC: Sudarsana Reddy Kalluru Signed-off-by: Fengguang Wu Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index ad023fe..3656d2f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1189,13 +1189,12 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, return 0; } - dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL); + dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); if (!dcbx_info) { DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n"); return -ENOMEM; } - memset(dcbx_info, 0, sizeof(*dcbx_info)); rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { kfree(dcbx_info); @@ -1227,13 +1226,12 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, { struct qed_dcbx_get *dcbx_info; - dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL); + dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); if (!dcbx_info) { DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n"); return NULL; } - memset(dcbx_info, 0, sizeof(*dcbx_info)); if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); return NULL; -- cgit v1.1 From aabdd09d535073c35f746e46c3a5d3286088be3a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 1 Sep 2016 11:28:59 +0200 Subject: tg3: Fix for disallow tx coalescing time to be 0 The recent commit 087d7a8c9174 "tg3: Fix for diasllow rx coalescing time to be 0" disallow to set Rx coalescing time to be 0 as this stops generating interrupts for the incoming packets. I found the zero Tx coalescing time stops generating interrupts for outgoing packets as well and fires Tx watchdog later. To avoid this, don't allow to set Tx coalescing time to 0 and also remove subsequent checks that become senseless. Cc: satish.baddipadige@broadcom.com Cc: siva.kallam@broadcom.com Cc: michael.chan@broadcom.com Signed-off-by: Ivan Vecera Acked-by: Siva Reddy Kallam Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 6592612..a2551bc 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14012,6 +14012,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) || (!ec->rx_coalesce_usecs) || (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) || + (!ec->tx_coalesce_usecs) || (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) || (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) || (ec->rx_coalesce_usecs_irq > max_rxcoal_tick_int) || @@ -14022,16 +14023,6 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) (ec->stats_block_coalesce_usecs < min_stat_coal_ticks)) return -EINVAL; - /* No rx interrupts will be generated if both are zero */ - if ((ec->rx_coalesce_usecs == 0) && - (ec->rx_max_coalesced_frames == 0)) - return -EINVAL; - - /* No tx interrupts will be generated if both are zero */ - if ((ec->tx_coalesce_usecs == 0) && - (ec->tx_max_coalesced_frames == 0)) - return -EINVAL; - /* Only copy relevant parameters, ignore all others. */ tp->coal.rx_coalesce_usecs = ec->rx_coalesce_usecs; tp->coal.tx_coalesce_usecs = ec->tx_coalesce_usecs; -- cgit v1.1 From d2f394dc4816b7bd1b44981d83509f18f19c53f0 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Thu, 1 Sep 2016 16:22:16 +0200 Subject: tipc: fix random link resets while adding a second bearer In a dual bearer configuration, if the second tipc link becomes active while the first link still has pending nametable "bulk" updates, it randomly leads to reset of the second link. When a link is established, the function named_distribute(), fills the skb based on node mtu (allows room for TUNNEL_PROTOCOL) with NAME_DISTRIBUTOR message for each PUBLICATION. However, the function named_distribute() allocates the buffer by increasing the node mtu by INT_H_SIZE (to insert NAME_DISTRIBUTOR). This consumes the space allocated for TUNNEL_PROTOCOL. When establishing the second link, the link shall tunnel all the messages in the first link queue including the "bulk" update. As size of the NAME_DISTRIBUTOR messages while tunnelling, exceeds the link mtu the transmission fails (-EMSGSIZE). Thus, the synch point based on the message count of the tunnel packets is never reached leading to link timeout. In this commit, we adjust the size of name distributor message so that they can be tunnelled. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 6b626a6..a04fe9b 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -62,6 +62,8 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message + * + * The buffer returned is of size INT_H_SIZE + payload size */ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, u32 dest) @@ -141,9 +143,9 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * - ITEM_SIZE; - uint msg_rem = msg_dsz; + u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + u32 msg_rem = msg_dsz; list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ -- cgit v1.1 From da29838dcbde86b3bdf3b5702abb5937b4c31d18 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Mon, 29 Aug 2016 16:51:30 -0400 Subject: MAINTAINERS: update to working email address Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 71aa5da..0537211 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2484,7 +2484,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh M: Veaceslav Falico -M: Andy Gospodarek +M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ S: Supported -- cgit v1.1 From a036244c068612a43fa8c0f33a0eb4daa4d8dba0 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 29 Aug 2016 17:38:26 -0700 Subject: i40e: Fix kernel panic on enable/disable LLDP If DCB is configured on the link partner switch with an unsupported traffic class configuration (e.g. non-contiguous TCs), the driver is flagging DCB as disabled. But, for future DCB LLDPDUs, the driver was checking if the interface was DCB capable instead of enabled. This was causing a kernel panic when LLDP was enabled/disabled on the link partner switch. This patch corrects the situation by having the LLDP event handler check the correct flag in the pf structure. It also cleans up the setting and clearing of the enabled flag for other checks. Signed-off-by: Dave Ertman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 828ed28..d0b3a1b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5113,9 +5113,13 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) DCB_CAP_DCBX_VER_IEEE; pf->flags |= I40E_FLAG_DCB_CAPABLE; - /* Enable DCB tagging only when more than one TC */ + /* Enable DCB tagging only when more than one TC + * or explicitly disable if only one TC + */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) pf->flags |= I40E_FLAG_DCB_ENABLED; + else + pf->flags &= ~I40E_FLAG_DCB_ENABLED; dev_dbg(&pf->pdev->dev, "DCBX offload is supported for this PF.\n"); } @@ -5716,7 +5720,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, u8 type; /* Not DCB capable or capability disabled */ - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return ret; /* Ignore if event is not for Nearest Bridge */ @@ -7896,6 +7900,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) #endif I40E_FLAG_RSS_ENABLED | I40E_FLAG_DCB_CAPABLE | + I40E_FLAG_DCB_ENABLED | I40E_FLAG_SRIOV_ENABLED | I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | @@ -10502,6 +10507,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_CAPABLE | + I40E_FLAG_DCB_ENABLED | I40E_FLAG_SRIOV_ENABLED | I40E_FLAG_VMDQ_ENABLED); } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED | @@ -10525,7 +10531,8 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) /* Not enough queues for all TCs */ if ((pf->flags & I40E_FLAG_DCB_CAPABLE) && (queues_left < I40E_MAX_TRAFFIC_CLASS)) { - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | + I40E_FLAG_DCB_ENABLED); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } pf->num_lan_qps = max_t(int, pf->rss_size_max, @@ -10922,7 +10929,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED); /* Continue without DCB enabled */ } #endif /* CONFIG_I40E_DCB */ -- cgit v1.1 From 57e81d44b0e1aa4dcb479ff8de8fc34cf635d0e8 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 30 Aug 2016 11:36:26 +0530 Subject: net: thunderx: Fix for HW issue while padding TSO packet There is a issue in HW where-in while sending GSO sized pkts as part of TSO, if pkt len falls below configured min packet size i.e 60, NIC will zero PAD packet and also updates IP total length. Hence set this value to lessthan min pkt size of MAC + IP + TCP headers, BGX will anyway do the padding to transmit 64 byte pkt including FCS. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 16ed203..85cc782 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -251,9 +251,14 @@ static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) int lmac; u64 lmac_cfg; - /* Max value that can be set is 60 */ - if (size > 60) - size = 60; + /* There is a issue in HW where-in while sending GSO sized + * pkts as part of TSO, if pkt len falls below this size + * NIC will zero PAD packet and also updates IP total length. + * Hence set this value to lessthan min pkt size of MAC+IP+TCP + * headers, BGX will do the padding to transmit 64 byte pkt. + */ + if (size > 52) + size = 52; for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); -- cgit v1.1 From 7ceb8a1319ec64954459d474dd4a8c3c60ff0999 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 30 Aug 2016 11:36:27 +0530 Subject: net: thunderx: Fix for issues with multiple CQEs posted for a TSO packet On ThunderX 88xx pass 2.x chips when TSO is offloaded to HW, HW posts a CQE for every TSO segment transmitted. Current code does handles this, but is prone to issues when segment sizes are small resulting in SW processing too many CQEs and also at times frees a SKB which is not yet transmitted. This patch handles the errata in a different way and eliminates issues with earlier approach, TSO packet is submitted to HW with post_cqe=0, so that no CQE is posted upon completion of transmission of TSO packet but a additional HDR + IMMEDIATE descriptors are added to SQ due to which a CQE is posted and will have required info to be used while cleanup in napi. This way only one CQE is posted for a TSO packet. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 1 + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 20 +++++-- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 64 +++++++++++++++++++--- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 83025bb..e29815d 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -279,6 +279,7 @@ struct nicvf { u8 sqs_id; bool sqs_mode; bool hw_tso; + bool t88; /* Receive buffer alloc */ u32 rb_page_offset; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index a19e73f..3240349 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -513,6 +513,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, struct nicvf *nic = netdev_priv(netdev); struct snd_queue *sq; struct sq_hdr_subdesc *hdr; + struct sq_hdr_subdesc *tso_sqe; sq = &nic->qs->sq[cqe_tx->sq_idx]; @@ -527,17 +528,21 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; - /* For TSO offloaded packets only one SQE will have a valid SKB */ if (skb) { + /* Check for dummy descriptor used for HW TSO offload on 88xx */ + if (hdr->dont_send) { + /* Get actual TSO descriptors and free them */ + tso_sqe = + (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); + nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); + } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); dev_consume_skb_any(skb); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { - /* In case of HW TSO, HW sends a CQE for each segment of a TSO - * packet instead of a single CQE for the whole TSO packet - * transmitted. Each of this CQE points to the same SQE, so - * avoid freeing same SQE multiple times. + /* In case of SW TSO on 88xx, only last segment will have + * a SKB attached, so just free SQEs here. */ if (!nic->hw_tso) nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); @@ -1502,6 +1507,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev; struct nicvf *nic; int err, qcount; + u16 sdevid; err = pci_enable_device(pdev); if (err) { @@ -1575,6 +1581,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pass1_silicon(nic->pdev)) nic->hw_tso = true; + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); + if (sdevid == 0xA134) + nic->t88 = true; + /* Check if this VF is in QS only mode */ if (nic->sqs_mode) return 0; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 0ff8e60..dda3ea3 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -938,6 +938,8 @@ static int nicvf_tso_count_subdescs(struct sk_buff *skb) return num_edescs + sh->gso_segs; } +#define POST_CQE_DESC_COUNT 2 + /* Get the number of SQ descriptors needed to xmit this skb */ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) { @@ -948,6 +950,10 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) return subdesc_cnt; } + /* Dummy descriptors to get TSO pkt completion notification */ + if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) + subdesc_cnt += POST_CQE_DESC_COUNT; + if (skb_shinfo(skb)->nr_frags) subdesc_cnt += skb_shinfo(skb)->nr_frags; @@ -965,14 +971,21 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, struct sq_hdr_subdesc *hdr; hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); - sq->skbuff[qentry] = (u64)skb; - memset(hdr, 0, SND_QUEUE_DESC_SIZE); hdr->subdesc_type = SQ_DESC_TYPE_HEADER; - /* Enable notification via CQE after processing SQE */ - hdr->post_cqe = 1; - /* No of subdescriptors following this */ - hdr->subdesc_cnt = subdesc_cnt; + + if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) { + /* post_cqe = 0, to avoid HW posting a CQE for every TSO + * segment transmitted on 88xx. + */ + hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT; + } else { + sq->skbuff[qentry] = (u64)skb; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* No of subdescriptors following this */ + hdr->subdesc_cnt = subdesc_cnt; + } hdr->tot_len = len; /* Offload checksum calculation to HW */ @@ -1023,6 +1036,37 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, gather->addr = data; } +/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO + * packet so that a CQE is posted as a notifation for transmission of + * TSO packet. + */ +static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, + int tso_sqe, struct sk_buff *skb) +{ + struct sq_imm_subdesc *imm; + struct sq_hdr_subdesc *hdr; + + sq->skbuff[qentry] = (u64)skb; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* There is no packet to transmit here */ + hdr->dont_send = 1; + hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1; + hdr->tot_len = 1; + /* Actual TSO header SQE index, needed for cleanup */ + hdr->rsvd2 = tso_sqe; + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry); + memset(imm, 0, SND_QUEUE_DESC_SIZE); + imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE; + imm->len = 1; +} + /* Segment a TSO packet into 'gso_size' segments and append * them to SQ for transfer */ @@ -1096,7 +1140,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) { int i, size; - int subdesc_cnt; + int subdesc_cnt, tso_sqe = 0; int sq_num, qentry; struct queue_set *qs; struct snd_queue *sq; @@ -1131,6 +1175,7 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) /* Add SQ header subdesc */ nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1, skb, skb->len); + tso_sqe = qentry; /* Add SQ gather subdescs */ qentry = nicvf_get_nxt_sqentry(sq, qentry); @@ -1154,6 +1199,11 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) } doorbell: + if (nic->t88 && skb_shinfo(skb)->gso_size) { + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); + } + /* make sure all memory stores are done before ringing doorbell */ smp_wmb(); -- cgit v1.1 From d26c638c16cb54f6fb1507e27df93ede692db572 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 30 Aug 2016 10:09:21 +0200 Subject: ipv6: add missing netconf notif when 'all' is updated The 'default' value was not advertised. Fixes: f3a1bfb11ccb ("rtnl/ipv6: use netconf msg to advertise forwarding status") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f418d2e..2a68817 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -778,7 +778,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->forwarding) { + int old_dflt = net->ipv6.devconf_dflt->forwarding; + net->ipv6.devconf_dflt->forwarding = newf; + if ((!newf) ^ (!old_dflt)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + addrconf_forward_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, -- cgit v1.1 From 29c994e361009142ec0bca6493cc8f7b0d3c561a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 30 Aug 2016 10:09:22 +0200 Subject: netconf: add a notif when settings are created All changes are notified, but the initial state was missing. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 11 +++++++---- net/ipv6/addrconf.c | 9 ++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 415e117..062a67c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2232,7 +2232,7 @@ static struct devinet_sysctl_table { }; static int __devinet_sysctl_register(struct net *net, char *dev_name, - struct ipv4_devconf *p) + int ifindex, struct ipv4_devconf *p) { int i; struct devinet_sysctl_table *t; @@ -2255,6 +2255,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, goto free; p->sysctl = t; + + inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); return 0; free: @@ -2286,7 +2288,7 @@ static int devinet_sysctl_register(struct in_device *idev) if (err) return err; err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, - &idev->cnf); + idev->dev->ifindex, &idev->cnf); if (err) neigh_sysctl_unregister(idev->arp_parms); return err; @@ -2347,11 +2349,12 @@ static __net_init int devinet_init_net(struct net *net) } #ifdef CONFIG_SYSCTL - err = __devinet_sysctl_register(net, "all", all); + err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all); if (err < 0) goto err_reg_all; - err = __devinet_sysctl_register(net, "default", dflt); + err = __devinet_sysctl_register(net, "default", + NETCONFA_IFINDEX_DEFAULT, dflt); if (err < 0) goto err_reg_dflt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2a68817..bdf368e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6032,7 +6032,7 @@ static const struct ctl_table addrconf_sysctl[] = { static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { - int i; + int i, ifindex; struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; @@ -6052,6 +6052,13 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, if (!p->sysctl_header) goto free; + if (!strcmp(dev_name, "all")) + ifindex = NETCONFA_IFINDEX_ALL; + else if (!strcmp(dev_name, "default")) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else + ifindex = idev->dev->ifindex; + inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); return 0; free: -- cgit v1.1 From 85a3d4a9356b595d5440c3f1bf07ee7cecca1567 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 30 Aug 2016 17:44:29 +0200 Subject: net: bridge: don't increment tx_dropped in br_do_proxy_arp pskb_may_pull may fail due to various reasons (e.g. alloc failure), but the skb isn't changed/dropped and processing continues so we shouldn't increment tx_dropped. CC: Kyeyoon Park CC: Roopa Prabhu CC: Stephen Hemminger CC: bridge@lists.linux-foundation.org Fixes: 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8e48620..abe11f0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,13 +80,10 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; - if (dev->flags & IFF_NOARP) + if ((dev->flags & IFF_NOARP) || + !pskb_may_pull(skb, arp_hdr_len(dev))) return; - if (!pskb_may_pull(skb, arp_hdr_len(dev))) { - dev->stats.tx_dropped++; - return; - } parp = arp_hdr(skb); if (parp->ar_pro != htons(ETH_P_IP) || -- cgit v1.1 From 28b346cbc0715ae45b2814d857f1d8a7e6817ed8 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 30 Aug 2016 11:55:23 -0400 Subject: tcp: fastopen: fix rcv_wup initialization for TFO server on SYN/data Yuchung noticed that on the first TFO server data packet sent after the (TFO) handshake, the server echoed the TCP timestamp value in the SYN/data instead of the timestamp value in the final ACK of the handshake. This problem did not happen on regular opens. The tcp_replace_ts_recent() logic that decides whether to remember an incoming TS value needs tp->rcv_wup to hold the latest receive sequence number that we have ACKed (latest tp->rcv_nxt we have ACKed). This commit fixes this issue by ensuring that a TFO server properly updates tp->rcv_wup to match tp->rcv_nxt at the time it sends a SYN/ACK for the SYN/data. Reported-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 54d9f9b..62a5751 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -226,6 +226,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tcp_fastopen_add_skb(child, skb); tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; + tp->rcv_wup = tp->rcv_nxt; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ -- cgit v1.1 From 6b4e3181d7bd5ca5ab6f45929e4a5ffa7ab4ab7f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 1 Sep 2016 16:14:41 -0700 Subject: mm, oom: prevent premature OOM killer invocation for high order request There have been several reports about pre-mature OOM killer invocation in 4.7 kernel when order-2 allocation request (for the kernel stack) invoked OOM killer even during basic workloads (light IO or even kernel compile on some filesystems). In all reported cases the memory is fragmented and there are no order-2+ pages available. There is usually a large amount of slab memory (usually dentries/inodes) and further debugging has shown that there are way too many unmovable blocks which are skipped during the compaction. Multiple reporters have confirmed that the current linux-next which includes [1] and [2] helped and OOMs are not reproducible anymore. A simpler fix for the late rc and stable is to simply ignore the compaction feedback and retry as long as there is a reclaim progress and we are not getting OOM for order-0 pages. We already do that for CONFING_COMPACTION=n so let's reuse the same code when compaction is enabled as well. [1] http://lkml.kernel.org/r/20160810091226.6709-1-vbabka@suse.cz [2] http://lkml.kernel.org/r/f7a9ea9d-bb88-bfd6-e340-3a933559305a@suse.cz Fixes: 0a0337e0d1d1 ("mm, oom: rework oom detection") Link: http://lkml.kernel.org/r/20160823074339.GB23577@dhcp22.suse.cz Signed-off-by: Michal Hocko Tested-by: Olaf Hering Tested-by: Ralf-Peter Rohbeck Cc: Markus Trippelsdorf Cc: Arkadiusz Miskiewicz Cc: Ralf-Peter Rohbeck Cc: Jiri Slaby Cc: Vlastimil Babka Cc: Joonsoo Kim Cc: Tetsuo Handa Cc: David Rientjes Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 51 ++------------------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3fbe73a..7791a03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3137,54 +3137,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } -static inline bool -should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, - enum compact_result compact_result, - enum compact_priority *compact_priority, - int compaction_retries) -{ - int max_retries = MAX_COMPACT_RETRIES; - - if (!order) - return false; - - /* - * compaction considers all the zone as desperately out of memory - * so it doesn't really make much sense to retry except when the - * failure could be caused by insufficient priority - */ - if (compaction_failed(compact_result)) { - if (*compact_priority > MIN_COMPACT_PRIORITY) { - (*compact_priority)--; - return true; - } - return false; - } - - /* - * make sure the compaction wasn't deferred or didn't bail out early - * due to locks contention before we declare that we should give up. - * But do not retry if the given zonelist is not suitable for - * compaction. - */ - if (compaction_withdrawn(compact_result)) - return compaction_zonelist_suitable(ac, order, alloc_flags); - - /* - * !costly requests are much more important than __GFP_REPEAT - * costly ones because they are de facto nofail and invoke OOM - * killer to move on while costly can fail and users are ready - * to cope with that. 1/4 retries is rather arbitrary but we - * would need much more detailed feedback from compaction to - * make a better decision. - */ - if (order > PAGE_ALLOC_COSTLY_ORDER) - max_retries /= 4; - if (compaction_retries <= max_retries) - return true; - - return false; -} #else static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, @@ -3195,6 +3147,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } +#endif /* CONFIG_COMPACTION */ + static inline bool should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, enum compact_result compact_result, @@ -3221,7 +3175,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla } return false; } -#endif /* CONFIG_COMPACTION */ /* Perform direct synchronous page reclaim */ static int -- cgit v1.1 From 070c43eea5043e950daa423707ae3c77e2f48edb Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 1 Sep 2016 16:14:44 -0700 Subject: kexec: fix double-free when failing to relocate the purgatory If kexec_apply_relocations fails, kexec_load_purgatory frees pi->sechdrs and pi->purgatory_buf. This is redundant, because in case of error kimage_file_prepare_segments calls kimage_file_post_load_cleanup, which will also free those buffers. This causes two warnings like the following, one for pi->sechdrs and the other for pi->purgatory_buf: kexec-bzImage64: Loading purgatory failed ------------[ cut here ]------------ WARNING: CPU: 1 PID: 2119 at mm/vmalloc.c:1490 __vunmap+0xc1/0xd0 Trying to vfree() nonexistent vm area (ffffc90000e91000) Modules linked in: CPU: 1 PID: 2119 Comm: kexec Not tainted 4.8.0-rc3+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x4d/0x65 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? find_vmap_area+0x19/0x70 ? kimage_file_post_load_cleanup+0x47/0xb0 __vunmap+0xc1/0xd0 vfree+0x2e/0x70 kimage_file_post_load_cleanup+0x5e/0xb0 SyS_kexec_file_load+0x448/0x680 ? putname+0x54/0x60 ? do_sys_open+0x190/0x1f0 entry_SYSCALL_64_fastpath+0x13/0x8f ---[ end trace 158bb74f5950ca2b ]--- Fix by setting pi->sechdrs an pi->purgatory_buf to NULL, since vfree won't try to free a NULL pointer. Link: http://lkml.kernel.org/r/1472083546-23683-1-git-send-email-bauerman@linux.vnet.ibm.com Signed-off-by: Thiago Jung Bauermann Acked-by: Baoquan He Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Dave Young Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 503bc2d..037c321 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min, return 0; out: vfree(pi->sechdrs); + pi->sechdrs = NULL; + vfree(pi->purgatory_buf); + pi->purgatory_buf = NULL; return ret; } -- cgit v1.1 From 236dec051078a8691950f56949612b4b74107e48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 1 Sep 2016 16:14:47 -0700 Subject: kconfig: tinyconfig: provide whole choice blocks to avoid warnings Using "make tinyconfig" produces a couple of annoying warnings that show up for build test machines all the time: .config:966:warning: override: NOHIGHMEM changes choice state .config:965:warning: override: SLOB changes choice state .config:963:warning: override: KERNEL_XZ changes choice state .config:962:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:933:warning: override: SLOB changes choice state .config:930:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:870:warning: override: SLOB changes choice state .config:868:warning: override: KERNEL_XZ changes choice state .config:867:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state I've made a previous attempt at fixing them and we discussed a number of alternatives. I tried changing the Makefile to use "merge_config.sh -n $(fragment-list)" but couldn't get that to work properly. This is yet another approach, based on the observation that we do want to see a warning for conflicting 'choice' options, and that we can simply make them non-conflicting by listing all other options as disabled. This is a trivial patch that we can apply independent of plans for other changes. Link: http://lkml.kernel.org/r/20160829214952.1334674-2-arnd@arndb.de Link: https://storage.kernelci.org/mainline/v4.7-rc6/x86-tinyconfig/build.log https://patchwork.kernel.org/patch/9212749/ Signed-off-by: Arnd Bergmann Reviewed-by: Josh Triplett Reviewed-by: Masahiro Yamada Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/configs/tiny.config | 2 ++ kernel/configs/tiny.config | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 4e2ecfa..4b429df 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config @@ -1 +1,3 @@ CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index c2de56a..7fa0c4a 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -1,4 +1,12 @@ +# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_KERNEL_GZIP is not set +# CONFIG_KERNEL_BZIP2 is not set +# CONFIG_KERNEL_LZMA is not set CONFIG_KERNEL_XZ=y +# CONFIG_KERNEL_LZO is not set +# CONFIG_KERNEL_LZ4 is not set CONFIG_OPTIMIZE_INLINING=y +# CONFIG_SLAB is not set +# CONFIG_SLUB is not set CONFIG_SLOB=y -- cgit v1.1 From ed76b7a131f41c91b0c725d472f9b969d75ce888 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Sep 2016 16:14:50 -0700 Subject: lib/test_hash.c: fix warning in two-dimensional array init lib/test_hash.c: In function 'test_hash_init': lib/test_hash.c:146:2: warning: missing braces around initializer [-Wmissing-braces] Fixes: 468a9428521e7d00 (": Add support for architecture-specific functions") Link: http://lkml.kernel.org/r/20160829214952.1334674-3-arnd@arndb.de Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Acked-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_hash.c b/lib/test_hash.c index 66c5fc8..81702ee 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -143,7 +143,7 @@ static int __init test_hash_init(void) { char buf[SIZE+1]; - u32 string_or = 0, hash_or[2][33] = { 0 }; + u32 string_or = 0, hash_or[2][33] = { { 0, } }; unsigned tests = 0; unsigned long long h64 = 0; int i, j; -- cgit v1.1 From e6173ba42bbdba05fd4f3021c0beda0506271507 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Sep 2016 16:14:53 -0700 Subject: lib/test_hash.c: fix warning in preprocessor symbol evaluation Some versions of gcc don't like tests for the value of an undefined preprocessor symbol, even in the #else branch of an #ifndef: lib/test_hash.c:224:7: warning: "HAVE_ARCH__HASH_32" is not defined [-Wundef] #elif HAVE_ARCH__HASH_32 != 1 ^ lib/test_hash.c:229:7: warning: "HAVE_ARCH_HASH_32" is not defined [-Wundef] #elif HAVE_ARCH_HASH_32 != 1 ^ lib/test_hash.c:234:7: warning: "HAVE_ARCH_HASH_64" is not defined [-Wundef] #elif HAVE_ARCH_HASH_64 != 1 ^ Seen with gcc 4.9, not seen with 4.1.2. Change the logic to only check the value inside an #ifdef to fix this. Fixes: 468a9428521e7d00 (": Add support for architecture-specific functions") Link: http://lkml.kernel.org/r/20160829214952.1334674-4-arnd@arndb.de Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Acked-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hash.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/test_hash.c b/lib/test_hash.c index 81702ee..cac20c5 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -219,21 +219,27 @@ test_hash_init(void) } /* Issue notices about skipped tests. */ -#ifndef HAVE_ARCH__HASH_32 - pr_info("__hash_32() has no arch implementation to test."); -#elif HAVE_ARCH__HASH_32 != 1 +#ifdef HAVE_ARCH__HASH_32 +#if HAVE_ARCH__HASH_32 != 1 pr_info("__hash_32() is arch-specific; not compared to generic."); #endif -#ifndef HAVE_ARCH_HASH_32 - pr_info("hash_32() has no arch implementation to test."); -#elif HAVE_ARCH_HASH_32 != 1 +#else + pr_info("__hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_32 +#if HAVE_ARCH_HASH_32 != 1 pr_info("hash_32() is arch-specific; not compared to generic."); #endif -#ifndef HAVE_ARCH_HASH_64 - pr_info("hash_64() has no arch implementation to test."); -#elif HAVE_ARCH_HASH_64 != 1 +#else + pr_info("hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_64 +#if HAVE_ARCH_HASH_64 != 1 pr_info("hash_64() is arch-specific; not compared to generic."); #endif +#else + pr_info("hash_64() has no arch implementation to test."); +#endif pr_notice("%u tests passed.", tests); -- cgit v1.1 From 6aa303defb7454a2520c4ddcdf6b081f62a15890 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 1 Sep 2016 16:14:55 -0700 Subject: mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator Firmware Assisted Dump (FA_DUMP) on ppc64 reserves substantial amounts of memory when booting a secondary kernel. Srikar Dronamraju reported that multiple nodes may have no memory managed by the buddy allocator but still return true for populated_zone(). Commit 1d82de618ddd ("mm, vmscan: make kswapd reclaim in terms of nodes") was reported to cause kswapd to spin at 100% CPU usage when fadump was enabled. The old code happened to deal with the situation of a populated node with zero free pages by co-incidence but the current code tries to reclaim populated zones without realising that is impossible. We cannot just convert populated_zone() as many existing users really need to check for present_pages. This patch introduces a managed_zone() helper and uses it in the few cases where it is critical that the check is made for managed pages -- zonelist construction and page reclaim. Link: http://lkml.kernel.org/r/20160831195104.GB8119@techsingularity.net Signed-off-by: Mel Gorman Reported-by: Srikar Dronamraju Tested-by: Srikar Dronamraju Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 16 ++++++++++++++-- mm/page_alloc.c | 4 ++-- mm/vmscan.c | 22 +++++++++++----------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d572b78..7f2ae99 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -828,9 +828,21 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -static inline int populated_zone(struct zone *zone) +/* + * Returns true if a zone has pages managed by the buddy allocator. + * All the reclaim decisions have to use this function rather than + * populated_zone(). If the whole zone is reserved then we can easily + * end up with populated_zone() && !managed_zone(). + */ +static inline bool managed_zone(struct zone *zone) +{ + return zone->managed_pages; +} + +/* Returns true if a zone has memory */ +static inline bool populated_zone(struct zone *zone) { - return (!!zone->present_pages); + return zone->present_pages; } extern int movable_zone; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7791a03..a2214c6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4360,7 +4360,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, do { zone_type--; zone = pgdat->node_zones + zone_type; - if (populated_zone(zone)) { + if (managed_zone(zone)) { zoneref_set_zone(zone, &zonelist->_zonerefs[nr_zones++]); check_highest_zone(zone_type); @@ -4598,7 +4598,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) for (j = 0; j < nr_nodes; j++) { node = node_order[j]; z = &NODE_DATA(node)->node_zones[zone_type]; - if (populated_zone(z)) { + if (managed_zone(z)) { zoneref_set_zone(z, &zonelist->_zonerefs[pos++]); check_highest_zone(zone_type); diff --git a/mm/vmscan.c b/mm/vmscan.c index 374d95d..b1e12a1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1665,7 +1665,7 @@ static bool inactive_reclaimable_pages(struct lruvec *lruvec, for (zid = sc->reclaim_idx; zid >= 0; zid--) { zone = &pgdat->node_zones[zid]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + @@ -2036,7 +2036,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, struct zone *zone = &pgdat->node_zones[zid]; unsigned long inactive_zone, active_zone; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; inactive_zone = zone_page_state(zone, @@ -2171,7 +2171,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = &pgdat->node_zones[z]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; total_high_wmark += high_wmark_pages(zone); @@ -2510,7 +2510,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, /* If compaction would go ahead or the allocation would succeed, stop */ for (z = 0; z <= sc->reclaim_idx; z++) { struct zone *zone = &pgdat->node_zones[z]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { @@ -2840,7 +2840,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; - if (!populated_zone(zone) || + if (!managed_zone(zone) || pgdat_reclaimable_pages(pgdat) == 0) continue; @@ -3141,7 +3141,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (!zone_balanced(zone, order, classzone_idx)) @@ -3169,7 +3169,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat, sc->nr_to_reclaim = 0; for (z = 0; z <= sc->reclaim_idx; z++) { zone = pgdat->node_zones + z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); @@ -3242,7 +3242,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) if (buffer_heads_over_limit) { for (i = MAX_NR_ZONES - 1; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; sc.reclaim_idx = i; @@ -3262,7 +3262,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) */ for (i = classzone_idx; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_balanced(zone, sc.order, classzone_idx)) @@ -3508,7 +3508,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) pg_data_t *pgdat; int z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) return; if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) @@ -3522,7 +3522,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_balanced(zone, order, classzone_idx)) -- cgit v1.1 From 8a793bea2745d9876030f892a84fc8be180f2e78 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 1 Sep 2016 16:14:58 -0700 Subject: drivers/scsi/wd719x.c: remove last declaration using DEFINE_PCI_DEVICE_TABLE Convert it to the preferred const struct pci_device_id instead. Link: http://lkml.kernel.org/r/95c5e4100c3cd4eda643624f5b70e8d7abceb86c.1472660229.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Bart Van Assche Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/wd719x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c index e3da1a2..2a9da2e 100644 --- a/drivers/scsi/wd719x.c +++ b/drivers/scsi/wd719x.c @@ -962,7 +962,7 @@ static void wd719x_pci_remove(struct pci_dev *pdev) scsi_host_put(sh); } -static DEFINE_PCI_DEVICE_TABLE(wd719x_pci_table) = { +static const struct pci_device_id wd719x_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_WD, 0x3296) }, {} }; -- cgit v1.1 From 7e932159901183283cd82d797bc9a7c681e48e9c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 1 Sep 2016 16:15:01 -0700 Subject: treewide: remove references to the now unnecessary DEFINE_PCI_DEVICE_TABLE It's been eliminated from the sources, remove it from everywhere else. Link: http://lkml.kernel.org/r/076eff466fd7edb550c25c8b25d76924ca0eba62.1472660229.git.joe@perches.com Signed-off-by: Joe Perches Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Bjorn Helgaas Cc: Andy Whitcroft Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/PCI/pci.txt | 1 - include/linux/pci.h | 9 --------- scripts/checkpatch.pl | 9 --------- scripts/tags.sh | 1 - 4 files changed, 20 deletions(-) diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt index 123881f..77f49dc 100644 --- a/Documentation/PCI/pci.txt +++ b/Documentation/PCI/pci.txt @@ -124,7 +124,6 @@ initialization with a pointer to a structure describing the driver The ID table is an array of struct pci_device_id entries ending with an all-zero entry. Definitions with static const are generally preferred. -Use of the deprecated macro DEFINE_PCI_DEVICE_TABLE should be avoided. Each entry consists of: diff --git a/include/linux/pci.h b/include/linux/pci.h index fbc1fa6..0ab8359 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -683,15 +683,6 @@ struct pci_driver { #define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) /** - * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table - * @_table: device table name - * - * This macro is deprecated and should not be used in new code. - */ -#define DEFINE_PCI_DEVICE_TABLE(_table) \ - const struct pci_device_id _table[] - -/** * PCI_DEVICE - macro used to describe a specific pci device * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4de3cc4..206a6b3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3570,15 +3570,6 @@ sub process { } } -# check for uses of DEFINE_PCI_DEVICE_TABLE - if ($line =~ /\bDEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=/) { - if (WARN("DEFINE_PCI_DEVICE_TABLE", - "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) && - $fix) { - $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /; - } - } - # check for new typedefs, only function parameters and sparse annotations # make sense. if ($line =~ /\btypedef\s/ && diff --git a/scripts/tags.sh b/scripts/tags.sh index ed7eef2..b3775a96 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -206,7 +206,6 @@ regex_c=( '/\ Date: Thu, 1 Sep 2016 16:15:04 -0700 Subject: printk/nmi: avoid direct printk()-s from __printk_nmi_flush() __printk_nmi_flush() can be called from nmi_panic(), therefore it has to test whether it's executed in NMI context and thus must route the messages through deferred printk() or via direct printk(). This is to avoid potential deadlocks, as described in commit cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic"). However there remain two places where __printk_nmi_flush() does unconditional direct printk() calls: - pr_err("printk_nmi_flush: internal error ...") - pr_cont("\n") Factor out print_nmi_seq_line() parts into a new printk_nmi_flush_line() function, which takes care of in_nmi(), and use it in __printk_nmi_flush() for printing and error-reporting. Link: http://lkml.kernel.org/r/20160830161354.581-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Petr Mladek Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/nmi.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index b69eb8a..16bab47 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c @@ -99,27 +99,33 @@ again: return add; } -/* - * printk one line from the temporary buffer from @start index until - * and including the @end index. - */ -static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end) +static void printk_nmi_flush_line(const char *text, int len) { - const char *buf = s->buffer + start; - /* * The buffers are flushed in NMI only on panic. The messages must * go only into the ring buffer at this stage. Consoles will get * explicitly called later when a crashdump is not generated. */ if (in_nmi()) - printk_deferred("%.*s", (end - start) + 1, buf); + printk_deferred("%.*s", len, text); else - printk("%.*s", (end - start) + 1, buf); + printk("%.*s", len, text); } /* + * printk one line from the temporary buffer from @start index until + * and including the @end index. + */ +static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s, + int start, int end) +{ + const char *buf = s->buffer + start; + + printk_nmi_flush_line(buf, (end - start) + 1); +} + +/* * Flush data from the associated per_CPU buffer. The function * can be called either via IRQ work or independently. */ @@ -150,9 +156,11 @@ more: * the buffer an unexpected way. If we printed something then * @len must only increase. */ - if (i && i >= len) - pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", - i, len); + if (i && i >= len) { + const char *msg = "printk_nmi_flush: internal error\n"; + + printk_nmi_flush_line(msg, strlen(msg)); + } if (!len) goto out; /* Someone else has already flushed the buffer. */ @@ -166,14 +174,14 @@ more: /* Print line by line. */ for (; i < size; i++) { if (s->buffer[i] == '\n') { - print_nmi_seq_line(s, last_i, i); + printk_nmi_flush_seq_line(s, last_i, i); last_i = i + 1; } } /* Check if there was a partial line. */ if (last_i < size) { - print_nmi_seq_line(s, last_i, size - 1); - pr_cont("\n"); + printk_nmi_flush_seq_line(s, last_i, size - 1); + printk_nmi_flush_line("\n", strlen("\n")); } /* -- cgit v1.1 From c11600e4fed67ae4cd6a8096936afd445410e8ed Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 1 Sep 2016 16:15:07 -0700 Subject: mm, mempolicy: task->mempolicy must be NULL before dropping final reference KASAN allocates memory from the page allocator as part of kmem_cache_free(), and that can reference current->mempolicy through any number of allocation functions. It needs to be NULL'd out before the final reference is dropped to prevent a use-after-free bug: BUG: KASAN: use-after-free in alloc_pages_current+0x363/0x370 at addr ffff88010b48102c CPU: 0 PID: 15425 Comm: trinity-c2 Not tainted 4.8.0-rc2+ #140 ... Call Trace: dump_stack kasan_object_err kasan_report_error __asan_report_load2_noabort alloc_pages_current <-- use after free depot_save_stack save_stack kasan_slab_free kmem_cache_free __mpol_put <-- free do_exit This patch sets current->mempolicy to NULL before dropping the final reference. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1608301442180.63329@chino.kir.corp.google.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: David Rientjes Reported-by: Vegard Nossum Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 4 ++++ kernel/exit.c | 7 +------ mm/mempolicy.c | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4429d25..5e5b296 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -195,6 +195,7 @@ static inline bool vma_migratable(struct vm_area_struct *vma) } extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +extern void mpol_put_task_policy(struct task_struct *); #else @@ -297,5 +298,8 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, return -1; /* no node preference */ } +static inline void mpol_put_task_policy(struct task_struct *task) +{ +} #endif /* CONFIG_NUMA */ #endif diff --git a/kernel/exit.c b/kernel/exit.c index 2f974ae..091a78b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -848,12 +848,7 @@ void do_exit(long code) TASKS_RCU(preempt_enable()); exit_notify(tsk, group_dead); proc_exit_connector(tsk); -#ifdef CONFIG_NUMA - task_lock(tsk); - mpol_put(tsk->mempolicy); - tsk->mempolicy = NULL; - task_unlock(tsk); -#endif + mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d8c4e38..2da72a5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2336,6 +2336,23 @@ out: return ret; } +/* + * Drop the (possibly final) reference to task->mempolicy. It needs to be + * dropped after task->mempolicy is set to NULL so that any allocation done as + * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed + * policy. + */ +void mpol_put_task_policy(struct task_struct *task) +{ + struct mempolicy *pol; + + task_lock(task); + pol = task->mempolicy; + task->mempolicy = NULL; + task_unlock(task); + mpol_put(pol); +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); -- cgit v1.1 From c4e297386bd1621b83f6f7d58a729fb770597a91 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 1 Sep 2016 16:15:09 -0700 Subject: MAINTAINERS: Vladimir has moved vdavydov@{parallels,virtuozzo}.com will bounce from now on. Link: http://lkml.kernel.org/r/20160831180752.GB10353@esperanza Signed-off-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 2a91c14..093a8ac 100644 --- a/.mailmap +++ b/.mailmap @@ -158,6 +158,8 @@ Valdis Kletnieks Viresh Kumar Viresh Kumar Viresh Kumar +Vladimir Davydov +Vladimir Davydov Takashi YOSHII Yusuke Goda Gustavo Padovan diff --git a/MAINTAINERS b/MAINTAINERS index 71aa5da..d44be8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3247,7 +3247,7 @@ F: kernel/cpuset.c CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner M: Michal Hocko -M: Vladimir Davydov +M: Vladimir Davydov L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained -- cgit v1.1 From 735f2770a770156100f534646158cb58cb8b2939 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 1 Sep 2016 16:15:13 -0700 Subject: kernel/fork: fix CLONE_CHILD_CLEARTID regression in nscd Commit fec1d0115240 ("[PATCH] Disable CLONE_CHILD_CLEARTID for abnormal exit") has caused a subtle regression in nscd which uses CLONE_CHILD_CLEARTID to clear the nscd_certainly_running flag in the shared databases, so that the clients are notified when nscd is restarted. Now, when nscd uses a non-persistent database, clients that have it mapped keep thinking the database is being updated by nscd, when in fact nscd has created a new (anonymous) one (for non-persistent databases it uses an unlinked file as backend). The original proposal for the CLONE_CHILD_CLEARTID change claimed (https://lkml.org/lkml/2006/10/25/233): : The NPTL library uses the CLONE_CHILD_CLEARTID flag on clone() syscalls : on behalf of pthread_create() library calls. This feature is used to : request that the kernel clear the thread-id in user space (at an address : provided in the syscall) when the thread disassociates itself from the : address space, which is done in mm_release(). : : Unfortunately, when a multi-threaded process incurs a core dump (such as : from a SIGSEGV), the core-dumping thread sends SIGKILL signals to all of : the other threads, which then proceed to clear their user-space tids : before synchronizing in exit_mm() with the start of core dumping. This : misrepresents the state of process's address space at the time of the : SIGSEGV and makes it more difficult for someone to debug NPTL and glibc : problems (misleading him/her to conclude that the threads had gone away : before the fault). : : The fix below is to simply avoid the CLONE_CHILD_CLEARTID action if a : core dump has been initiated. The resulting patch from Roland (https://lkml.org/lkml/2006/10/26/269) seems to have a larger scope than the original patch asked for. It seems that limitting the scope of the check to core dumping should work for SIGSEGV issue describe above. [Changelog partly based on Andreas' description] Fixes: fec1d0115240 ("[PATCH] Disable CLONE_CHILD_CLEARTID for abnormal exit") Link: http://lkml.kernel.org/r/1471968749-26173-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: William Preston Acked-by: Oleg Nesterov Cc: Roland McGrath Cc: Andreas Schwab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index aaf7823..93bdba1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -913,14 +913,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) deactivate_mm(tsk, mm); /* - * If we're exiting normally, clear a user-space tid field if - * requested. We leave this alone when dying by signal, to leave - * the value intact in a core dump, and to save the unnecessary - * trouble, say, a killed vfork parent shouldn't touch this mm. - * Userland only wants this done for a sys_exit. + * Signal userspace if we're not exiting with a core dump + * because we want to leave the value intact for debugging + * purposes. */ if (tsk->clear_child_tid) { - if (!(tsk->flags & PF_SIGNALED) && + if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has -- cgit v1.1 From 1e1011af7a5725141aa4c8d30132acd93fe3da4e Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 1 Sep 2016 16:15:15 -0700 Subject: rapidio/documentation/mport_cdev: add missing parameter description Add missing description for rio_mport_cdev driver parameter 'dma_timeout'. This patch is applicable to kernel versions starting from v4.6. Link: http://lkml.kernel.org/r/20160901173104.2928-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/rapidio/mport_cdev.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt index 6e491a6..a53f786 100644 --- a/Documentation/rapidio/mport_cdev.txt +++ b/Documentation/rapidio/mport_cdev.txt @@ -80,6 +80,10 @@ functionality of their platform when planning to use this driver: III. Module parameters +- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000). + This parameter set a maximum completion wait time for SYNC mode DMA + transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests. + - 'dbg_level' - This parameter allows to control amount of debug information generated by this device driver. This parameter is formed by set of bit masks that correspond to the specific functional blocks. -- cgit v1.1 From b30069291dc7f9b9a073c33d619818fe4a8e50de Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 1 Sep 2016 16:15:18 -0700 Subject: rapidio/tsi721: fix incorrect detection of address translation condition Fix incorrect condition to identify involvment of a address translation mechanism. This bug results in NULL pointer kernel crash dump in cases when mapping of inbound RapidIO address range is requested within existing aprture. Link: http://lkml.kernel.org/r/20160901173144.2983-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/tsi721.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 32f0f01..9d19b9a 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -1161,7 +1161,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, } else if (ibw_start < (ib_win->rstart + ib_win->size) && (ibw_start + ibw_size) > ib_win->rstart) { /* Return error if address translation involved */ - if (direct && ib_win->xlat) { + if (!direct || ib_win->xlat) { ret = -EFAULT; break; } -- cgit v1.1 From 635c223cfa05af9523146b2f37e119d945f449ae Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 31 Aug 2016 14:15:05 +0800 Subject: rps: flow_dissector: Fix uninitialized flow_keys used in __skb_get_hash possibly The original codes depend on that the function parameters are evaluated from left to right. But the parameter's evaluation order is not defined in C standard actually. When flow_keys_have_l4(&keys) is invoked before ___skb_get_hash(skb, &keys, hashrnd) with some compilers or environment, the keys passed to flow_keys_have_l4 is not initialized. Fixes: 6db61d79c1e1 ("flow_dissector: Ignore flow dissector return value from ___skb_get_hash") Acked-by: Eric Dumazet Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 61ad43f..52742a0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -680,11 +680,13 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; + u32 hash; __flow_hash_secret_init(); - __skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd), - flow_keys_have_l4(&keys)); + hash = ___skb_get_hash(skb, &keys, hashrnd); + + __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -- cgit v1.1 From b44e108b6f322eb5f20aa6eba39b468a1ffc10ff Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 31 Aug 2016 12:11:57 -0300 Subject: bnx2x: don't reset chip on cleanup if PCI function is offline When PCI error is detected, in some architectures (like PowerPC) a slot reset is performed - the driver's error handlers are in charge of "disable" device before the reset, and re-enable it after a successful slot reset. There are two cases though that another path is taken on the code: if the slot reset is not successful or if too many errors already happened in the specific adapter (meaning that possibly the device is experiencing a HW failure that slot reset is not able to solve), the core PCI error mechanism (called EEH in PowerPC) will remove the adapter from the system, since it will consider this as a permanent failure on device. In this case, a path is taken that leads to bnx2x_chip_cleanup() calling bnx2x_reset_hw(), which then tries to perform a HW reset on chip. This reset won't succeed since the HW is in a fault state, which can be seen by multiple messages on kernel log like below: bnx2x: [bnx2x_issue_dmae_with_comp:552(eth1)]DMAE timeout! bnx2x: [bnx2x_write_dmae:600(eth1)]DMAE returned failure -1 After some time, the PCI error mechanism gives up on waiting the driver's correct removal procedure and forcibly remove the adapter from the system. We can see soft lockup while core PCI error mechanism is waiting for driver to accomplish the right removal process. This patch adds a verification to avoid a chip reset whenever the function is in PCI error state - since this case is only reached when we have a device being removed because of a permanent failure, the HW chip reset is not expected to work fine neither is necessary. Also, as a minor improvement in error path, we avoid the MCP information dump in case of non-recoverable PCI error (when adapter is about to be removed), since it will certainly fail. Reported-by: Harsha Thyagaraja Signed-off-by: Guilherme G. Piccoli Acked-By: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 97e8925..fa3386b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -772,6 +772,11 @@ void bnx2x_fw_dump_lvl(struct bnx2x *bp, const char *lvl) (bp->common.bc_ver & 0xff00) >> 8, (bp->common.bc_ver & 0xff)); + if (pci_channel_offline(bp->pdev)) { + BNX2X_ERR("Cannot dump MCP info while in PCI error\n"); + return; + } + val = REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER); if (val == REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER)) BNX2X_ERR("%s" "MCP PC at 0x%x\n", lvl, val); @@ -9415,10 +9420,16 @@ unload_error: /* Release IRQs */ bnx2x_free_irq(bp); - /* Reset the chip */ - rc = bnx2x_reset_hw(bp, reset_code); - if (rc) - BNX2X_ERR("HW_RESET failed\n"); + /* Reset the chip, unless PCI function is offline. If we reach this + * point following a PCI error handling, it means device is really + * in a bad state and we're about to remove it, so reset the chip + * is not a good idea. + */ + if (!pci_channel_offline(bp->pdev)) { + rc = bnx2x_reset_hw(bp, reset_code); + if (rc) + BNX2X_ERR("HW_RESET failed\n"); + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, keep_link); -- cgit v1.1 From ab34380162cbc9b5172afdadf5136643c687bb73 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Fri, 26 Aug 2016 23:52:29 +0800 Subject: ipv6: Don't unset flowi6_proto in ipxip6_tnl_xmit() Commit 8eb30be0352d0916 ("ipv6: Create ip6_tnl_xmit") unsets flowi6_proto in ip4ip6_tnl_xmit() and ip6ip6_tnl_xmit(). Since xfrm_selector_match() relies on this info, IPv6 packets sent by an ip6tunnel cannot be properly selected by their protocols after removing it. This patch puts flowi6_proto back. Cc: stable@vger.kernel.org Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7b0481e..888543d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1174,6 +1174,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1233,6 +1234,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) -- cgit v1.1 From c2f321126e31cd69365e65ecd4a7c774e4fc71d2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Aug 2016 21:50:22 +0200 Subject: ARM: shmobile: fix regulator quirk for Gen2 The current implementation only works if the da9xxx devices are added before their drivers are registered. Only then it can apply the fixes to both devices. Otherwise, the driver for the first device gets probed before the fix for the second device can be applied. This is what fails when using the IP core switcher or when having the i2c master driver as a module. So, we need to disable both da9xxx once we detected one of them. We now use i2c_transfer with hardcoded i2c_messages and device addresses, so we don't need the da9xxx client devices to be instantiated. Because the fixup is used on specific boards only, the addresses are not going to change. Fixes: 663fbb52159cca ("ARM: shmobile: R-Car Gen2: Add da9063/da9210 regulator quirk") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven (r8a7791/koelsch) Tested-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c | 62 +++++++++------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 62437b5..73e3adb 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -41,39 +41,26 @@ #define REGULATOR_IRQ_MASK BIT(2) /* IRQ2, active low */ -static void __iomem *irqc; - -static const u8 da9063_mask_regs[] = { - DA9063_REG_IRQ_MASK_A, - DA9063_REG_IRQ_MASK_B, - DA9063_REG_IRQ_MASK_C, - DA9063_REG_IRQ_MASK_D, -}; - -/* DA9210 System Control and Event Registers */ +/* start of DA9210 System Control and Event Registers */ #define DA9210_REG_MASK_A 0x54 -#define DA9210_REG_MASK_B 0x55 - -static const u8 da9210_mask_regs[] = { - DA9210_REG_MASK_A, - DA9210_REG_MASK_B, -}; - -static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[], - unsigned int nregs) -{ - unsigned int i; - dev_info(&client->dev, "Masking %s interrupt sources\n", client->name); +static void __iomem *irqc; - for (i = 0; i < nregs; i++) { - int error = i2c_smbus_write_byte_data(client, regs[i], ~0); - if (error) { - dev_err(&client->dev, "i2c error %d\n", error); - return; - } - } -} +/* first byte sets the memory pointer, following are consecutive reg values */ +static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff }; +static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff }; + +static struct i2c_msg da9xxx_msgs[2] = { + { + .addr = 0x58, + .len = ARRAY_SIZE(da9063_irq_clr), + .buf = da9063_irq_clr, + }, { + .addr = 0x68, + .len = ARRAY_SIZE(da9210_irq_clr), + .buf = da9210_irq_clr, + }, +}; static int regulator_quirk_notify(struct notifier_block *nb, unsigned long action, void *data) @@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb, client = to_i2c_client(dev); dev_dbg(dev, "Detected %s\n", client->name); - if ((client->addr == 0x58 && !strcmp(client->name, "da9063"))) - da9xxx_mask_irqs(client, da9063_mask_regs, - ARRAY_SIZE(da9063_mask_regs)); - else if (client->addr == 0x68 && !strcmp(client->name, "da9210")) - da9xxx_mask_irqs(client, da9210_mask_regs, - ARRAY_SIZE(da9210_mask_regs)); + if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) || + (client->addr == 0x68 && !strcmp(client->name, "da9210"))) { + int ret; + + dev_info(&client->dev, "clearing da9063/da9210 interrupts\n"); + ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs)); + if (ret != ARRAY_SIZE(da9xxx_msgs)) + dev_err(&client->dev, "i2c error %d\n", ret); + } mon = ioread32(irqc + IRQC_MONITOR); if (mon & REGULATOR_IRQ_MASK) -- cgit v1.1 From 08d072599234c959b0b82b63fa252c129225a899 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 2 Sep 2016 14:38:23 +0800 Subject: tick/nohz: Fix softlockup on scheduler stalls in kvm guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tick_nohz_start_idle() is prevented to be called if the idle tick can't be stopped since commit 1f3b0f8243cb934 ("tick/nohz: Optimize nohz idle enter"). As a result, after suspend/resume the host machine, full dynticks kvm guest will softlockup: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:0] Call Trace: default_idle+0x31/0x1a0 arch_cpu_idle+0xf/0x20 default_idle_call+0x2a/0x50 cpu_startup_entry+0x39b/0x4d0 rest_init+0x138/0x140 ? rest_init+0x5/0x140 start_kernel+0x4c1/0x4ce ? set_init_arg+0x55/0x55 ? early_idt_handler_array+0x120/0x120 x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x142/0x14f In addition, cat /proc/stat | grep cpu in guest or host: cpu 398 16 5049 15754 5490 0 1 46 0 0 cpu0 206 5 450 0 0 0 1 14 0 0 cpu1 81 0 3937 3149 1514 0 0 9 0 0 cpu2 45 6 332 6052 2243 0 0 11 0 0 cpu3 65 2 328 6552 1732 0 0 11 0 0 The idle and iowait states are weird 0 for cpu0(housekeeping). The bug is present in both guest and host kernels, and they both have cpu0's idle and iowait states issue, however, host kernel's suspend/resume path etc will touch watchdog to avoid the softlockup. - The watchdog will not be touched in tick_nohz_stop_idle path (need be touched since the scheduler stall is expected) if idle_active flags are not detected. - The idle and iowait states will not be accounted when exit idle loop (resched or interrupt) if idle start time and idle_active flags are not set. This patch fixes it by reverting commit 1f3b0f8243cb934 since can't stop idle tick doesn't mean can't be idle. Fixes: 1f3b0f8243cb934 ("tick/nohz: Optimize nohz idle enter") Signed-off-by: Wanpeng Li Cc: Sanjeev Yadav Cc: Gaurav Jindal Cc: stable@vger.kernel.org Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1472798303-4154-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 204fdc8..2ec7c00 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); + now = tick_nohz_start_idle(ts); + if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; - now = tick_nohz_start_idle(ts); ts->idle_calls++; expires = tick_nohz_stop_sched_tick(ts, now, cpu); -- cgit v1.1 From 11749e086b2766cccf6217a527ef5c5604ba069c Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 28 Aug 2016 10:13:07 +0200 Subject: ALSA: timer: fix NULL pointer dereference in read()/ioctl() race I got this with syzkaller: ================================================================== BUG: KASAN: null-ptr-deref on address 0000000000000020 Read of size 32 by task syz-executor/22519 CPU: 1 PID: 22519 Comm: syz-executor Not tainted 4.8.0-rc2+ #169 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2 014 0000000000000001 ffff880111a17a00 ffffffff81f9f141 ffff880111a17a90 ffff880111a17c50 ffff880114584a58 ffff880114584a10 ffff880111a17a80 ffffffff8161fe3f ffff880100000000 ffff880118d74a48 ffff880118d74a68 Call Trace: [] dump_stack+0x83/0xb2 [] kasan_report_error+0x41f/0x4c0 [] kasan_report+0x34/0x40 [] ? snd_timer_user_read+0x554/0x790 [] check_memory_region+0x13e/0x1a0 [] kasan_check_read+0x11/0x20 [] snd_timer_user_read+0x554/0x790 [] ? snd_timer_user_info_compat.isra.5+0x2b0/0x2b0 [] ? proc_fault_inject_write+0x1c1/0x250 [] ? next_tgid+0x2a0/0x2a0 [] ? do_group_exit+0x108/0x330 [] ? fsnotify+0x72a/0xca0 [] __vfs_read+0x10e/0x550 [] ? snd_timer_user_info_compat.isra.5+0x2b0/0x2b0 [] ? do_sendfile+0xc50/0xc50 [] ? __fsnotify_update_child_dentry_flags+0x60/0x60 [] ? kcov_ioctl+0x56/0x190 [] ? common_file_perm+0x2e2/0x380 [] ? __fsnotify_parent+0x5e/0x2b0 [] ? security_file_permission+0x86/0x1e0 [] ? rw_verify_area+0xe5/0x2b0 [] vfs_read+0x115/0x330 [] SyS_read+0xd1/0x1a0 [] ? vfs_write+0x4b0/0x4b0 [] ? __this_cpu_preempt_check+0x1c/0x20 [] ? __context_tracking_exit.part.4+0x3a/0x1e0 [] ? vfs_write+0x4b0/0x4b0 [] do_syscall_64+0x1c4/0x4e0 [] ? syscall_return_slowpath+0x16c/0x1d0 [] entry_SYSCALL64_slow_path+0x25/0x25 ================================================================== There are a couple of problems that I can see: - ioctl(SNDRV_TIMER_IOCTL_SELECT), which potentially sets tu->queue/tu->tqueue to NULL on memory allocation failure, so read() would get a NULL pointer dereference like the above splat - the same ioctl() can free tu->queue/to->tqueue which means read() could potentially see (and dereference) the freed pointer We can fix both by taking the ioctl_lock mutex when dereferencing ->queue/->tqueue, since that's always held over all the ioctl() code. Just looking at the code I find it likely that there are more problems here such as tu->qhead pointing outside the buffer if the size is changed concurrently using SNDRV_TIMER_IOCTL_PARAMS. Signed-off-by: Vegard Nossum Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 2d6e3e7..2706061 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1972,6 +1972,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, tu->qused--; spin_unlock_irq(&tu->qlock); + mutex_lock(&tu->ioctl_lock); if (tu->tread) { if (copy_to_user(buffer, &tu->tqueue[qhead], sizeof(struct snd_timer_tread))) @@ -1981,6 +1982,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, sizeof(struct snd_timer_read))) err = -EFAULT; } + mutex_unlock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); if (err < 0) -- cgit v1.1 From 64ed5771aca2fcfb8ea440fc679741054011fd7e Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Tue, 2 Aug 2016 16:13:14 +0530 Subject: ath10k: Add WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT wmi service WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT service has missed in the commit 7e247a9e88dc ("ath10k: add dynamic tx mode switch config support for qca4019"). This patch adds the service to avoid mismatch between host and target. Fixes: 7e247a9e88dc ("ath10k: add dynamic tx mode switch config support for qca4019") Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 89adfa9..2f89c4b 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -180,6 +180,7 @@ enum wmi_service { WMI_SERVICE_MESH_NON_11S, WMI_SERVICE_PEER_STATS, WMI_SERVICE_RESTRT_CHNL_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_PULL, WMI_SERVICE_TX_MODE_DYNAMIC, @@ -305,6 +306,7 @@ enum wmi_10_4_service { WMI_10_4_SERVICE_RESTRT_CHNL_SUPPORT, WMI_10_4_SERVICE_PEER_STATS, WMI_10_4_SERVICE_MESH_11S, + WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, WMI_10_4_SERVICE_TX_MODE_DYNAMIC, @@ -402,6 +404,7 @@ static inline char *wmi_service_name(int service_id) SVCSTR(WMI_SERVICE_MESH_NON_11S); SVCSTR(WMI_SERVICE_PEER_STATS); SVCSTR(WMI_SERVICE_RESTRT_CHNL_SUPPORT); + SVCSTR(WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_ONLY); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_PULL); SVCSTR(WMI_SERVICE_TX_MODE_DYNAMIC); @@ -652,6 +655,8 @@ static inline void wmi_10_4_svc_map(const __le32 *in, unsigned long *out, WMI_SERVICE_PEER_STATS, len); SVCMAP(WMI_10_4_SERVICE_MESH_11S, WMI_SERVICE_MESH_11S, len); + SVCMAP(WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_ONLY, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, -- cgit v1.1 From 75b34800a228b5cadc7196485fa0fdabfb9e7684 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Thu, 4 Aug 2016 19:21:51 +0530 Subject: ath10k: hide kernel addresses from logs using %pK format specifier With the %pK format specifier we hide the kernel addresses with the help of kptr_restrict sysctl. In this patch, %p is changed to %pK in the driver code. The sysctl is documented in Documentation/sysctl/kernel.txt. Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 2 +- drivers/net/wireless/ath/ath10k/bmi.c | 4 ++-- drivers/net/wireless/ath/ath10k/ce.c | 4 ++-- drivers/net/wireless/ath/ath10k/core.c | 4 ++-- drivers/net/wireless/ath/ath10k/htc.c | 6 +++--- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++---------- drivers/net/wireless/ath/ath10k/pci.c | 2 +- drivers/net/wireless/ath/ath10k/testmode.c | 4 ++-- drivers/net/wireless/ath/ath10k/txrx.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 4 ++-- 11 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index acec16b..dede026 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -577,7 +577,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "irq: %d\n", ar_ahb->irq); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%p mem_len: %lu gcc mem: 0x%p tcsr_mem: 0x%p\n", + ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%pK mem_len: %lu gcc mem: 0x%pK tcsr_mem: 0x%pK\n", ar_ahb->mem, ar_ahb->mem_len, ar_ahb->gcc_mem, ar_ahb->tcsr_mem); return 0; diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c index 3d29b08..2872d34 100644 --- a/drivers/net/wireless/ath/ath10k/bmi.c +++ b/drivers/net/wireless/ath/ath10k/bmi.c @@ -221,7 +221,7 @@ int ath10k_bmi_lz_data(struct ath10k *ar, const void *buffer, u32 length) u32 txlen; int ret; - ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%p length %d\n", + ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%pK length %d\n", buffer, length); if (ar->bmi.done_sent) { @@ -287,7 +287,7 @@ int ath10k_bmi_fast_download(struct ath10k *ar, int ret; ath10k_dbg(ar, ATH10K_DBG_BMI, - "bmi fast download address 0x%x buffer 0x%p length %d\n", + "bmi fast download address 0x%x buffer 0x%pK length %d\n", address, buffer, length); ret = ath10k_bmi_lz_stream_start(ar, address); diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 9fb8d74..65d8d71 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -840,7 +840,7 @@ static int ath10k_ce_init_src_ring(struct ath10k *ar, ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot init ce src ring id %d entries %d base_addr %p\n", + "boot init ce src ring id %d entries %d base_addr %pK\n", ce_id, nentries, src_ring->base_addr_owner_space); return 0; @@ -874,7 +874,7 @@ static int ath10k_ce_init_dest_ring(struct ath10k *ar, ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot ce dest ring id %d entries %d base_addr %p\n", + "boot ce dest ring id %d entries %d base_addr %pK\n", ce_id, nentries, dest_ring->base_addr_owner_space); return 0; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 6b49374..f46f916 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -699,7 +699,7 @@ static int ath10k_download_and_run_otp(struct ath10k *ar) if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { - ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n", + ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; @@ -753,7 +753,7 @@ static int ath10k_download_fw(struct ath10k *ar) } ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot uploading firmware image %p len %d\n", + "boot uploading firmware image %pK len %d\n", data, data_len); ret = ath10k_bmi_fast_download(ar, address, data, data_len); diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 5b3c6bc..175aae3 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -44,7 +44,7 @@ static struct sk_buff *ath10k_htc_build_tx_ctrl_skb(void *ar) skb_cb = ATH10K_SKB_CB(skb); memset(skb_cb, 0, sizeof(*skb_cb)); - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %p\n", __func__, skb); + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %pK\n", __func__, skb); return skb; } @@ -62,7 +62,7 @@ static void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep, { struct ath10k *ar = ep->htc->ar; - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %p\n", __func__, + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__, ep->eid, skb); ath10k_htc_restore_tx_skb(ep->htc, skb); @@ -404,7 +404,7 @@ void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb) goto out; } - ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %pK\n", eid, skb); ep->ep_ops.ep_rx_complete(ar, skb); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 78db5d6..ae6931b 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -931,7 +931,7 @@ static void ath10k_process_rx(struct ath10k *ar, *status = *rx_status; ath10k_dbg(ar, ATH10K_DBG_DATA, - "rx skb %p len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", + "rx skb %pK len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", skb, skb->len, ieee80211_get_SA(hdr), diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0bbd0a0..2a1d9fd 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -824,7 +824,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) */ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { if (ar->peer_map[i] == peer) { - ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n", + ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", peer->addr, peer, i); ar->peer_map[i] = NULL; } @@ -3524,7 +3524,7 @@ static int ath10k_mac_tx(struct ath10k *ar, if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n", skb); skb_queue_tail(&ar->offchan_tx_queue, skb); @@ -3586,7 +3586,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n", skb); hdr = (struct ieee80211_hdr *)skb->data; @@ -3643,7 +3643,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %p\n", + ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n", skb); if (!peer && tmp_peer_created) { @@ -6001,7 +6001,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, continue; if (peer->sta == sta) { - ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n", + ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n", sta->addr, peer, i, arvif->vdev_id); peer->sta = NULL; @@ -7134,7 +7134,7 @@ ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx add freq %hu width %d ptr %p\n", + "mac chanctx add freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7158,7 +7158,7 @@ ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx remove freq %hu width %d ptr %p\n", + "mac chanctx remove freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7223,7 +7223,7 @@ ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx change freq %hu width %d ptr %p changed %x\n", + "mac chanctx change freq %hu width %d ptr %pK changed %x\n", ctx->def.chan->center_freq, ctx->def.width, ctx, changed); /* This shouldn't really happen because channel switching should use @@ -7281,7 +7281,7 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx assign ptr %p vdev_id %i\n", + "mac chanctx assign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); if (WARN_ON(arvif->is_started)) { @@ -7342,7 +7342,7 @@ ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx unassign ptr %p vdev_id %i\n", + "mac chanctx unassign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); WARN_ON(!arvif->is_started); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 9a22c47..1b841ad 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3062,7 +3062,7 @@ static int ath10k_pci_claim(struct ath10k *ar) goto err_master; } - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem); + ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%pK\n", ar_pci->mem); return 0; err_master: diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 091f29d..ed85f93 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -46,7 +46,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) int ret; ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode event wmi cmd_id %d skb %p skb->len %d\n", + "testmode event wmi cmd_id %d skb %pK skb->len %d\n", cmd_id, skb, skb->len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); @@ -383,7 +383,7 @@ static int ath10k_tm_cmd_wmi(struct ath10k *ar, struct nlattr *tb[]) cmd_id = nla_get_u32(tb[ATH10K_TM_ATTR_WMI_CMDID]); ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode cmd wmi cmd_id %d buf %p buf_len %d\n", + "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n", cmd_id, buf, buf_len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len); diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index b29a86a..1e695d1 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -44,7 +44,7 @@ static void ath10k_report_offchan_tx(struct ath10k *ar, struct sk_buff *skb) complete(&ar->offchan_tx_completed); ar->offchan_tx_skb = NULL; /* just for sanity */ - ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %p\n", skb); + ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %pK\n", skb); out: spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ae5f541..b9a3b09 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1877,7 +1877,7 @@ ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu) ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr)); memcpy(cmd->buf, msdu->data, msdu->len); - ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %p len %d ftype %02x stype %02x\n", + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n", msdu, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); trace_ath10k_tx_hdr(ar, skb->data, skb->len); @@ -2350,7 +2350,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); ath10k_dbg(ar, ATH10K_DBG_MGMT, - "event mgmt rx skb %p len %d ftype %02x stype %02x\n", + "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); -- cgit v1.1 From 7d42298eb43d27442e64d1e52e9f55f9cf9387e1 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Tue, 9 Aug 2016 12:01:51 +0530 Subject: ath10k: fix group privacy action frame decryption for qca4019 Recent commit 46f6b06050b7 ("mac80211: Encrypt "Group addressed privacy" action frames") encrypts group privacy action frames. But qca99x0 family chipset delivers broadcast/multicast management frames as encrypted and it should be decrypted by mac80211. Setting RX_FLAG_DECRYPTED stats for those frames is breaking mesh connection establishment. Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 4 ++++ drivers/net/wireless/ath/ath10k/core.h | 5 +++++ drivers/net/wireless/ath/ath10k/wmi.c | 29 ++++++++++++++++++++++++----- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index f46f916..ffedc03 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -182,6 +182,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -205,6 +206,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -227,6 +229,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -285,6 +288,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA4019_BOARD_DATA_SZ, .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, }, }; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 56daeb7..7e329dc 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -771,6 +771,11 @@ struct ath10k { size_t board_size; size_t board_ext_size; } fw; + + /* qca99x0 family chips deliver broadcast/multicast management + * frames encrypted and expect software do decryption. + */ + bool sw_decrypt_mcast_mgmt; } hw_params; /* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */ diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b9a3b09..4b4bf23 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2243,6 +2243,29 @@ static int ath10k_wmi_10_4_op_pull_mgmt_rx_ev(struct ath10k *ar, return 0; } +static bool ath10k_wmi_rx_is_decrypted(struct ath10k *ar, + struct ieee80211_hdr *hdr) +{ + if (!ieee80211_has_protected(hdr->frame_control)) + return false; + + /* FW delivers WEP Shared Auth frame with Protected Bit set and + * encrypted payload. However in case of PMF it delivers decrypted + * frames with Protected Bit set. + */ + if (ieee80211_is_auth(hdr->frame_control)) + return false; + + /* qca99x0 based FW delivers broadcast or multicast management frames + * (ex: group privacy action frames in mesh) as encrypted payload. + */ + if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) && + ar->hw_params.sw_decrypt_mcast_mgmt) + return false; + + return true; +} + int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_mgmt_rx_ev_arg arg = {}; @@ -2329,11 +2352,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_wmi_handle_wep_reauth(ar, skb, status); - /* FW delivers WEP Shared Auth frame with Protected Bit set and - * encrypted payload. However in case of PMF it delivers decrypted - * frames with Protected Bit set. */ - if (ieee80211_has_protected(hdr->frame_control) && - !ieee80211_is_auth(hdr->frame_control)) { + if (ath10k_wmi_rx_is_decrypted(ar, hdr)) { status->flag |= RX_FLAG_DECRYPTED; if (!ieee80211_is_action(hdr->frame_control) && -- cgit v1.1 From 03c41cc126c8868ef483c2480acfcd5490a844b3 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 17 Aug 2016 16:58:00 +0530 Subject: ath10k: suppress warnings when getting wmi WDS peer event id 'WMI_10_4_WDS_PEER_EVENTID' is not yet handled/implemented for WDS mode, as of now suppress the warning message "Unknown eventid: 36903" Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 4b4bf23..15b7efc 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -5379,6 +5379,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) break; case WMI_10_4_WOW_WAKEUP_HOST_EVENTID: case WMI_10_4_PEER_RATECODE_LIST_EVENTID: + case WMI_10_4_WDS_PEER_EVENTID: ath10k_dbg(ar, ATH10K_DBG_WMI, "received event id %d not implemented\n", id); break; -- cgit v1.1 From 83e164b7679d46a6a172ca0fd0ead68b48e22103 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 17 Aug 2016 21:02:53 +0530 Subject: ath10k: improve wake_tx_queue ops performance txqs_lock is interfering with wake_tx_queue submitting more frames. so queues don't get filled in and don't keep firmware/hardware busy enough. This change helps to reduce the txqs_lock contention and wake_tx_queue() blockage to being possible in txrx_unref(). To reduce turn around time of wake_tx_queue ops and to maintain fairness among all txqs, the callback is updated to push first txq alone from pending list for every wake_tx_queue call. Remaining txqs will be processed later upon tx completion. Below improvements are observed in push-only mode and validated on IPQ4019 platform. With this change, in AP mode ~10Mbps increase is observed in downlink (AP -> STA) traffic and approx. 5-10% of CPU usage is reduced. Major improvement is observed in 1-hop Mesh mode topology in 11ACVHT80. Compared to Infra mode, CPU overhead is higher in Mesh mode due to path lookup and no fast-xmit support. So reducing spin lock contention is helping in Mesh. TOT +change -------- -------- TCP DL 545 Mbps 595 Mbps TCP UL 555 Mbps 585 Mbps Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 2a1d9fd..05250af 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4100,13 +4100,29 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, { struct ath10k *ar = hw->priv; struct ath10k_txq *artxq = (void *)txq->drv_priv; + struct ieee80211_txq *f_txq; + struct ath10k_txq *f_artxq; + int ret = 0; + int max = 16; spin_lock_bh(&ar->txqs_lock); if (list_empty(&artxq->list)) list_add_tail(&artxq->list, &ar->txqs); + + f_artxq = list_first_entry(&ar->txqs, struct ath10k_txq, list); + f_txq = container_of((void *)f_artxq, struct ieee80211_txq, drv_priv); + list_del_init(&f_artxq->list); + + while (ath10k_mac_tx_can_push(hw, f_txq) && max--) { + ret = ath10k_mac_tx_push_txq(hw, f_txq); + if (ret) + break; + } + if (ret != -ENOENT) + list_add_tail(&f_artxq->list, &ar->txqs); spin_unlock_bh(&ar->txqs_lock); - ath10k_mac_tx_push_pending(ar); + ath10k_htt_tx_txq_update(hw, f_txq); ath10k_htt_tx_txq_update(hw, txq); } -- cgit v1.1 From e4fd726f21cdae0dc9cea6cbfcb7e27f21393f88 Mon Sep 17 00:00:00 2001 From: Ashok Raj Nagarajan Date: Thu, 18 Aug 2016 15:30:04 +0530 Subject: ath10k: fix sending frame in management path in push txq logic In the wake tx queue path, we are not checking if the frame to be sent takes management path or not. For eg. QOS null func frame coming here will take the management path. Since we are not incrementing the descriptor counter (num_pending_mgmt_tx) w.r.t tx management, on tx completion it is possible to see negative values. When the above counter reaches a negative value, we will not be sending a probe response out. if (is_presp && ar->hw_params.max_probe_resp_desc_thres < htt->num_pending_mgmt_tx) For IPQ4019, max_probe_resp_desc_thres (u32) is 24 is compared against num_pending_mgmt_tx (int) and the above condtions comes true if the counter is negative and we drop the probe response. To avoid this, check on the wake tx queue path as well for the tx path of the frame and increment the appropriate counters Fixes: cac085524cf1 "ath10k: move mgmt descriptor limit handle under mgmt_tx" Signed-off-by: Ashok Raj Nagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 05250af..ac7a368 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3777,7 +3777,9 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; struct sk_buff *skb; + struct ieee80211_hdr *hdr; size_t skb_len; + bool is_mgmt, is_presp; int ret; spin_lock_bh(&ar->htt.tx_lock); @@ -3801,6 +3803,22 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, skb_len = skb->len; txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); + is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT); + + if (is_mgmt) { + hdr = (struct ieee80211_hdr *)skb->data; + is_presp = ieee80211_is_probe_resp(hdr->frame_control); + + spin_lock_bh(&ar->htt.tx_lock); + ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp); + + if (ret) { + ath10k_htt_tx_dec_pending(htt); + spin_unlock_bh(&ar->htt.tx_lock); + return ret; + } + spin_unlock_bh(&ar->htt.tx_lock); + } ret = ath10k_mac_tx(ar, vif, sta, txmode, txpath, skb); if (unlikely(ret)) { @@ -3808,6 +3826,8 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); + if (is_mgmt) + ath10k_htt_tx_mgmt_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return ret; -- cgit v1.1 From 881ed54ecc138776adc20058c43d93f9b24f8b6d Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 18 Aug 2016 15:12:06 +0200 Subject: ath10k: use complete() instead complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: waiter context waker context scan.started ------------ ath10k_start_scan() lockdep_assert_held(conf_mutex) auth10k_wmi_start_scan() wait_for_completion_timeout(scan.started) ath10k_wmi_event_scan_start_failed() complete(scan.started) ath10k_wmi_event_scan_started() complete(scan.started) scan.completed -------------- ath10k_scan_stop() lockdep_assert_held(conf_mutex) ath10k_wmi_stop_scan() wait_for_completion_timeout(scan.completed) __ath10k_scan_finish() complete(scan.completed) scan.on_channel --------------- ath10k_remain_on_channel() mutex_lock(conf_mutex) ath10k_start_scan() wait_for_completion_timeout(scan.on_channel) ath10k_wmi_event_scan_foreign_chan() complete(scan.on_channel) offchan_tx_completed -------------------- ath10k_offchan_tx_work() mutex_lock(conf_mutex) reinit_completion(offchan_tx_completed) wait_for_completion_timeout(offchan_tx_completed) ath10k_report_offchain_tx() complete(offchan_tx_completed) install_key_done ---------------- ath10k_install_key() lockep_assert_held(conf_mutex) reinit_completion(install_key_done) wait_for_completion_timeout(install_key_done) ath10k_htt_t2h_msg_handler() complete(install_key_done) vdev_setup_done --------------- ath10k_monitor_vdev_start() lockdep_assert_held(conf_mutex) reinit_completion(vdev_setup_done) ath10k_vdev_setup_sync() wait_for_completion_timeout(vdev_setup_done) ath10k_wmi_event_vdev_start_resp() complete(vdev_setup_done) ath10k_monitor_vdev_stop() lockdep_assert_held(conf_mutex) reinit_completion(vdev_setup_done() ath10k_vdev_setup_sync() wait_for_completion_timeout(vdev_setup_done) ath10k_wmi_event_vdev_stopped() complete(vdev_setup_done) thermal.wmi_sync ---------------- ath10k_thermal_show_temp() mutex_lock(conf_mutex) reinit_completion(thermal.wmi_sync) wait_for_completion_timeout(thermal.wmi_sync) ath10k_thermal_event_temperature() complete(thermal.wmi_sync) bss_survey_done --------------- ath10k_mac_update_bss_chan_survey lockdep_assert_held(conf_mutex) reinit_completion(bss_survey_done) wait_for_completion_timeout(bss_survey_done) ath10k_wmi_event_pdev_bss_chan_info() complete(bss_survey_done) All complete() calls happen while the conf_mutex is taken. That means at max one waiter is possible. Signed-off-by: Daniel Wagner Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 16 ++++++++-------- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index ffedc03..a9b9fb0 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1501,14 +1501,14 @@ static void ath10k_core_restart(struct work_struct *work) ieee80211_stop_queues(ar->hw); ath10k_drain_tx(ar); - complete_all(&ar->scan.started); - complete_all(&ar->scan.completed); - complete_all(&ar->scan.on_channel); - complete_all(&ar->offchan_tx_completed); - complete_all(&ar->install_key_done); - complete_all(&ar->vdev_setup_done); - complete_all(&ar->thermal.wmi_sync); - complete_all(&ar->bss_survey_done); + complete(&ar->scan.started); + complete(&ar->scan.completed); + complete(&ar->scan.on_channel); + complete(&ar->offchan_tx_completed); + complete(&ar->install_key_done); + complete(&ar->vdev_setup_done); + complete(&ar->thermal.wmi_sync); + complete(&ar->bss_survey_done); wake_up(&ar->htt.empty_tx_wq); wake_up(&ar->wmi.tx_credits_wq); wake_up(&ar->peer_mapping_wq); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index ac7a368..de6e65f 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3914,7 +3914,7 @@ void __ath10k_scan_finish(struct ath10k *ar) ar->scan.roc_freq = 0; ath10k_offchan_tx_purge(ar); cancel_delayed_work(&ar->scan.timeout); - complete_all(&ar->scan.completed); + complete(&ar->scan.completed); break; } } -- cgit v1.1 From afcbc82cea527a046d66ff3088a75e56417abfc5 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Tue, 23 Aug 2016 15:35:36 +0530 Subject: ath10k: Added support for extended dbglog module id for 10.4 For 10.4 fw versions, dbglog module id has been extended from u32 to u64, hence this patch fixes the same in the ath10k driver side. This patch doesn't break the older 10.4 releases. The FW change is already present in the older FWs. Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 2 +- drivers/net/wireless/ath/ath10k/debug.c | 11 ++++---- drivers/net/wireless/ath/ath10k/wmi-ops.h | 4 +-- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 42 +++++++++++++++++++++++++++++-- drivers/net/wireless/ath/ath10k/wmi.h | 14 +++++++++++ 6 files changed, 64 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 7e329dc..e13e078 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -441,7 +441,7 @@ struct ath10k_debug { struct completion tpc_complete; /* protected by conf_mutex */ - u32 fw_dbglog_mask; + u64 fw_dbglog_mask; u32 fw_dbglog_level; u32 pktlog_filter; u32 reg_addr; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 8f0fd41..832da6e 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1228,9 +1228,9 @@ static ssize_t ath10k_read_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; unsigned int len; - char buf[64]; + char buf[96]; - len = scnprintf(buf, sizeof(buf), "0x%08x %u\n", + len = scnprintf(buf, sizeof(buf), "0x%16llx %u\n", ar->debug.fw_dbglog_mask, ar->debug.fw_dbglog_level); return simple_read_from_buffer(user_buf, count, ppos, buf, len); @@ -1242,15 +1242,16 @@ static ssize_t ath10k_write_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; int ret; - char buf[64]; - unsigned int log_level, mask; + char buf[96]; + unsigned int log_level; + u64 mask; simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); /* make sure that buf is null terminated */ buf[sizeof(buf) - 1] = 0; - ret = sscanf(buf, "%x %u", &mask, &log_level); + ret = sscanf(buf, "%llx %u", &mask, &log_level); if (!ret) return -EINVAL; diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index c67eda7..c9a8bb1 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -125,7 +125,7 @@ struct wmi_ops { enum wmi_force_fw_hang_type type, u32 delay_ms); struct sk_buff *(*gen_mgmt_tx)(struct ath10k *ar, struct sk_buff *skb); - struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u32 module_enable, + struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable, u32 log_level); struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter); struct sk_buff *(*gen_pktlog_disable)(struct ath10k *ar); @@ -945,7 +945,7 @@ ath10k_wmi_force_fw_hang(struct ath10k *ar, } static inline int -ath10k_wmi_dbglog_cfg(struct ath10k *ar, u32 module_enable, u32 log_level) +ath10k_wmi_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct sk_buff *skb; diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index a42f52d..e64f593 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -2468,7 +2468,7 @@ ath10k_wmi_tlv_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_tlv_dbglog_cmd *cmd; struct wmi_tlv *tlv; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 15b7efc..eb4ab6f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -6937,7 +6937,7 @@ ath10k_wmi_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_dbglog_cfg_cmd *cmd; @@ -6975,6 +6975,44 @@ ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, } static struct sk_buff * +ath10k_wmi_10_4_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, + u32 log_level) +{ + struct wmi_10_4_dbglog_cfg_cmd *cmd; + struct sk_buff *skb; + u32 cfg; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_10_4_dbglog_cfg_cmd *)skb->data; + + if (module_enable) { + cfg = SM(log_level, + ATH10K_DBGLOG_CFG_LOG_LVL); + } else { + /* set back defaults, all modules with WARN level */ + cfg = SM(ATH10K_DBGLOG_LEVEL_WARN, + ATH10K_DBGLOG_CFG_LOG_LVL); + module_enable = ~0; + } + + cmd->module_enable = __cpu_to_le64(module_enable); + cmd->module_valid = __cpu_to_le64(~0); + cmd->config_enable = __cpu_to_le32(cfg); + cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi dbglog cfg modules 0x%016llx 0x%016llx config %08x %08x\n", + __le64_to_cpu(cmd->module_enable), + __le64_to_cpu(cmd->module_valid), + __le32_to_cpu(cmd->config_enable), + __le32_to_cpu(cmd->config_valid)); + return skb; +} + +static struct sk_buff * ath10k_wmi_op_gen_pktlog_enable(struct ath10k *ar, u32 ev_bitmap) { struct wmi_pdev_pktlog_enable_cmd *cmd; @@ -8092,7 +8130,7 @@ static const struct wmi_ops wmi_10_4_ops = { .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm, .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang, .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx, - .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg, + .gen_dbglog_cfg = ath10k_wmi_10_4_op_gen_dbglog_cfg, .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable, .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable, .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 2f89c4b..48e04b9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6174,6 +6174,20 @@ struct wmi_dbglog_cfg_cmd { __le32 config_valid; } __packed; +struct wmi_10_4_dbglog_cfg_cmd { + /* bitmask to hold mod id config*/ + __le64 module_enable; + + /* see ATH10K_DBGLOG_CFG_ */ + __le32 config_enable; + + /* mask of module id bits to be changed */ + __le64 module_valid; + + /* mask of config bits to be changed, see ATH10K_DBGLOG_CFG_ */ + __le32 config_valid; +} __packed; + enum wmi_roam_reason { WMI_ROAM_REASON_BETTER_AP = 1, WMI_ROAM_REASON_BEACON_MISS = 2, -- cgit v1.1 From 749bc03ae2cd763df19ab8000d21b4342ed3383c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 24 Aug 2016 01:27:26 +0900 Subject: ath10k: replace config_enabled() with IS_REACHABLE() Commit 97f2645f358b ("tree-wide: replace config_enabled() with IS_ENABLED()") mostly did away with config_enabled(). This is one of the postponed TODO items as config_enabled() is used for a tristate option here. Theoretically, config_enabled() is equivalent to IS_BUILTIN(), but I guess IS_REACHABLE() is the best fit for this case because both CONFIG_HWMON and CONFIG_ATH10K are tristate. Signed-off-by: Masahiro Yamada Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c index 444b52c..0a47269 100644 --- a/drivers/net/wireless/ath/ath10k/thermal.c +++ b/drivers/net/wireless/ath/ath10k/thermal.c @@ -192,7 +192,7 @@ int ath10k_thermal_register(struct ath10k *ar) /* Avoid linking error on devm_hwmon_device_register_with_groups, I * guess linux/hwmon.h is missing proper stubs. */ - if (!config_enabled(CONFIG_HWMON)) + if (!IS_REACHABLE(CONFIG_HWMON)) return 0; hwmon_dev = devm_hwmon_device_register_with_groups(ar->dev, -- cgit v1.1 From 2cdce425aa3301648e3a68a361f7f48b681fc5a6 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Fri, 26 Aug 2016 13:42:20 +0530 Subject: ath10k: Fix broken NULL func data frame status for 10.4 Older firmware with HTT delivers incorrect tx status for null func frames to driver, but this fixed in 10.2 and 10.4 firmware versions. Also this workaround results in reporting of incorrect null func status for 10.4. Fix this is by introducing a firmware feature flag for 10.4 so that this workaround is skipped and proper tx status for null func frames are reported Signed-off-by: Tamizh chelvam Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 1 + drivers/net/wireless/ath/ath10k/core.h | 7 +++++++ drivers/net/wireless/ath/ath10k/mac.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index a9b9fb0..c9d163e 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -308,6 +308,7 @@ static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp", [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl", [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", + [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index e13e078..b367e9c 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -552,6 +552,13 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_BTCOEX_PARAM = 14, + /* Older firmware with HTT delivers incorrect tx status for null func + * frames to driver, but this fixed in 10.2 and 10.4 firmware versions. + * Also this workaround results in reporting of incorrect null func + * status for 10.4. This flag is used to skip the workaround. + */ + ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15, + /* keep last */ ATH10K_FW_FEATURE_COUNT, }; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index de6e65f..a110325 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3255,6 +3255,8 @@ ath10k_mac_tx_h_get_txmode(struct ath10k *ar, if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, + ar->running_fw->fw_file.fw_features) && + !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; -- cgit v1.1 From 7f03d3069381266278c058c4ce8349a0d172da7b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Aug 2016 19:08:52 +0100 Subject: ath10k: fix spelling mistake "montior" -> "monitor" Trivial fix to spelling mistake in ath10k_warn message. Signed-off-by: Colin Ian King Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index a110325..4565321 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5224,7 +5224,7 @@ static void ath10k_configure_filter(struct ieee80211_hw *hw, ret = ath10k_monitor_recalc(ar); if (ret) - ath10k_warn(ar, "failed to recalc montior: %d\n", ret); + ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); mutex_unlock(&ar->conf_mutex); } -- cgit v1.1 From c39265f72ae6dbcb0367be808837e2f182095d15 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Mon, 29 Aug 2016 20:21:13 +0800 Subject: ath9k: mark ath_fill_led_pin() static We get 1 warning about global functions without a declaration in the ath9k gpio driver when building with W=1: drivers/net/wireless/ath/ath9k/gpio.c:25:6: warning: no previous prototype for 'ath_fill_led_pin' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c index 490f74d..ddb2886 100644 --- a/drivers/net/wireless/ath/ath9k/gpio.c +++ b/drivers/net/wireless/ath/ath9k/gpio.c @@ -22,7 +22,7 @@ #ifdef CONFIG_MAC80211_LEDS -void ath_fill_led_pin(struct ath_softc *sc) +static void ath_fill_led_pin(struct ath_softc *sc) { struct ath_hw *ah = sc->sc_ah; -- cgit v1.1 From 753246840d012ae34ea80a1d40bc1546c62fb957 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 27 Aug 2016 16:19:49 +0000 Subject: drivers/perf: arm_pmu: Fix leak in error path In case of a IRQ type mismatch in of_pmu_irq_cfg() the device node for interrupt affinity isn't freed. So fix this issue by calling of_node_put(). Signed-off-by: Stefan Wahren Fixes: fa8ad7889d83 ("arm: perf: factor arm_pmu core out to drivers") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index c494613..1b48bf0 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -925,6 +925,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (i > 0 && spi != using_spi) { pr_err("PPI/SPI IRQ type mismatch for %s!\n", dn->name); + of_node_put(dn); kfree(irqs); return -EINVAL; } -- cgit v1.1 From 63fb0a9516b2c4e23293d7253c14c40aa9c2b7d1 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 27 Aug 2016 16:19:50 +0000 Subject: drivers/perf: arm_pmu: Fix NULL pointer dereference during probe Patch 7f1d642fbb5c ("drivers/perf: arm-pmu: Fix handling of SPI lacking interrupt-affinity property") unintended also fixes perf_event support for bcm2835 which doesn't have PMU interrupts. Unfortunately this change introduce a NULL pointer dereference on bcm2835, because irq_is_percpu always expected to be called with a valid IRQ. So fix this regression by validating the IRQ before. Tested-by: Kevin Hilman Signed-off-by: Stefan Wahren Fixes: 7f1d642fbb5c ("drivers/perf: arm-pmu: Fix handling of SPI lacking "interrupt-affinity" property") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1b48bf0..f5e1008 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -970,7 +970,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (cpumask_weight(&pmu->supported_cpus) == 0) { int irq = platform_get_irq(pdev, 0); - if (irq_is_percpu(irq)) { + if (irq >= 0 && irq_is_percpu(irq)) { /* If using PPIs, check the affinity of the partition */ int ret; -- cgit v1.1 From 744c6c37cc18705d19e179622f927f5b781fe9cc Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 26 Aug 2016 16:03:42 +0100 Subject: arm64: kernel: Fix unmasked debug exceptions when restoring mdscr_el1 Changes to make the resume from cpu_suspend() code behave more like secondary boot caused debug exceptions to be unmasked early by __cpu_setup(). We then go on to restore mdscr_el1 in cpu_do_resume(), potentially taking break or watch points based on uninitialised registers. Mask debug exceptions in cpu_do_resume(), which is specific to resume from cpu_suspend(). Debug exceptions will be restored to their original state by local_dbg_restore() in cpu_suspend(), which runs after hw_breakpoint_restore() has re-initialised the other registers. Reported-by: Lorenzo Pieralisi Fixes: cabe1c81ea5b ("arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va") Cc: # 4.7+ Signed-off-by: James Morse Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5bb61de..9d37e96 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -100,7 +100,16 @@ ENTRY(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 + + /* + * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking + * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug + * exception. Mask them until local_dbg_restore() in cpu_suspend() + * resets them. + */ + disable_dbg msr mdscr_el1, x10 + msr sctlr_el1, x12 /* * Restore oslsr_el1 by writing oslar_el1 -- cgit v1.1 From db7b542e4a789156a4fb61278f2074795fe2eb9c Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 4 Aug 2016 15:48:34 +0800 Subject: ath9k: fix AR5416 access GPIO warning The warning was seen on AR5416 chip, which invoke ath9k_hw_gio_get() before the GPIO initialized correctly. WARNING: CPU: 1 PID: 1159 at ~/drivers/net/wireless/ath/ath9k/hw.c:2776 ath9k_hw_gpio_get+0x148/0x1a0 [ath9k_hw] ... CPU: 1 PID: 1159 Comm: systemd-udevd Not tainted 4.7.0-rc7-aptosid-amd64 #1 aptosid 4.7~rc7-1~git92.slh.3 Hardware name: /DH67CL, BIOS BLH6710H.86A.0160.2012.1204.1156 12/04/2012 0000000000000286 00000000f912d633 ffffffff81290fd3 0000000000000000 0000000000000000 ffffffff81063fd4 ffff88040c6dc018 0000000000000000 0000000000000002 0000000000000000 0000000000000100 ffff88040c6dc018 Call Trace: [] ? dump_stack+0x5c/0x79 [] ? __warn+0xb4/0xd0 [] ? ath9k_hw_gpio_get+0x148/0x1a0 [ath9k_hw] Signed-off-by: Miaoqing Pan Reported-by: Stefan Lippers-Hollmann Tested-by: Stefan Lippers-Hollmann Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index d1d0c06..14b13f0 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2482,6 +2482,8 @@ int ath9k_hw_fill_cap_info(struct ath_hw *ah) return -EINVAL; } + ath9k_gpio_cap_init(ah); + if (AR_SREV_9485(ah) || AR_SREV_9285(ah) || AR_SREV_9330(ah) || @@ -2531,8 +2533,6 @@ int ath9k_hw_fill_cap_info(struct ath_hw *ah) else pCap->hw_caps &= ~ATH9K_HW_CAP_HT; - ath9k_gpio_cap_init(ah); - if (AR_SREV_9160_10_OR_LATER(ah) || AR_SREV_9100(ah)) pCap->rts_aggr_limit = ATH_AMPDU_LIMIT_MAX; else -- cgit v1.1 From 15301a570754c7af60335d094dd2d1808b0641a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2016 13:47:26 -0400 Subject: x86/paravirt: Do not trace _paravirt_ident_*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Łukasz Daniluk reported that on a RHEL kernel that his machine would lock up after enabling function tracer. I asked him to bisect the functions within available_filter_functions, which he did and it came down to three: _paravirt_nop(), _paravirt_ident_32() and _paravirt_ident_64() It was found that this is only an issue when noreplace-paravirt is added to the kernel command line. This means that those functions are most likely called within critical sections of the funtion tracer, and must not be traced. In newer kenels _paravirt_nop() is defined within gcc asm(), and is no longer an issue. But both _paravirt_ident_{32,64}() causes the following splat when they are traced: mm/pgtable-generic.c:33: bad pmd ffff8800d2435150(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff8800d3624190(0000000001d00070) mm/pgtable-generic.c:33: bad pmd ffff8800d36a5110(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff880118eb1450(0000000001d00054) NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [systemd-journal:469] Modules linked in: e1000e CPU: 2 PID: 469 Comm: systemd-journal Not tainted 4.6.0-rc4-test+ #513 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 task: ffff880118f740c0 ti: ffff8800d4aec000 task.ti: ffff8800d4aec000 RIP: 0010:[] [] queued_spin_lock_slowpath+0x118/0x1a0 RSP: 0018:ffff8800d4aefb90 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011eb16d40 RDX: ffffffff82485760 RSI: 000000001f288820 RDI: ffffea0000008030 RBP: ffff8800d4aefb90 R08: 00000000000c0000 R09: 0000000000000000 R10: ffffffff821c8e0e R11: 0000000000000000 R12: ffff880000200fb8 R13: 00007f7a4e3f7000 R14: ffffea000303f600 R15: ffff8800d4b562e0 FS: 00007f7a4e3d7840(0000) GS:ffff88011eb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7a4e3f7000 CR3: 00000000d3e71000 CR4: 00000000001406e0 Call Trace: _raw_spin_lock+0x27/0x30 handle_pte_fault+0x13db/0x16b0 handle_mm_fault+0x312/0x670 __do_page_fault+0x1b1/0x4e0 do_page_fault+0x22/0x30 page_fault+0x28/0x30 __vfs_read+0x28/0xe0 vfs_read+0x86/0x130 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1e/0xa8 Code: 12 48 c1 ea 0c 83 e8 01 83 e2 30 48 98 48 81 c2 40 6d 01 00 48 03 14 c5 80 6a 5d 82 48 89 0a 8b 41 08 85 c0 75 09 f3 90 8b 41 08 <85> c0 74 f7 4c 8b 09 4d 85 c9 74 08 41 0f 18 09 eb 02 f3 90 8b Reported-by: Łukasz Daniluk Signed-off-by: Steven Rostedt Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ad5bc95..1acfd76 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -56,12 +56,12 @@ asm (".pushsection .entry.text, \"ax\"\n" ".popsection"); /* identity function, which can be inlined */ -u32 _paravirt_ident_32(u32 x) +u32 notrace _paravirt_ident_32(u32 x) { return x; } -u64 _paravirt_ident_64(u64 x) +u64 notrace _paravirt_ident_64(u64 x) { return x; } -- cgit v1.1 From fffd68734dc685e208e86d8c5f6522cd695a8d60 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 26 Aug 2016 07:16:17 +0200 Subject: IB/mlx5: Fix the size parameter to find_first_bit The 2nd parameter of 'find_first_bit' is the number of bits to search. In this case, we are passing 'sizeof(tmp)' which is likely to be 4 or 8 because 'tmp' is an 'unsigned long'. It is likely that the number of bits of 'tmp' was expected here. So use BITS_PER_LONG instead. It has been spotted by the following coccinelle script: @@ expression ret, x; @@ * ret = \(find_first_bit \| find_first_zero_bit\) (x, sizeof(...)); Signed-off-by: Christophe JAILLET Acked-by: Majd Dibbiny Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 40df2cc..996b54e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; i = 0; @@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; - m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp))); + m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); skip = 1 << m; mask = skip - 1; base = pfn; @@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, } else { if (base + p != pfn) { tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; base = pfn; -- cgit v1.1 From 6aaa382f1267644072f288916476879684502f73 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 26 Aug 2016 06:49:09 +0200 Subject: IB/hfi1: Fix the size parameter to find_first_bit The 2nd parameter of 'find_first_bit' is the number of bits to search. In this case, we are passing 'sizeof(u64)' which is 8. It is likely that the number of bits of 'port_mask' was expected here. Use sizeof() * 8 to get the correct number. It has been spotted by the following coccinelle script: @@ expression ret, x; @@ * ret = \(find_first_bit \| find_first_zero_bit\) (x, sizeof(...)); Signed-off-by: Christophe JAILLET Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 9912d2c..7ffc14f 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2638,7 +2638,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -2842,7 +2842,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3015,7 +3015,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3252,7 +3252,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; -- cgit v1.1 From 63b268d232b869dfbc92e49c77f7e0648e1d039c Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 29 Aug 2016 17:15:49 +0530 Subject: IB/isert: Properly release resources on DEVICE_REMOVAL When the low level driver exercises the hot unplug they would call rdma_cm cma_remove_one which would fire DEVICE_REMOVAL event to all cma consumers. Now, if consumer doesn't make sure they destroy all IB objects created on that IB device instance prior to finalizing all processing of DEVICE_REMOVAL callback, rdma_cm will let the lld to de-register with IB core and destroy the IB device instance. And if the consumer calls (say) ib_dereg_mr(), it will crash since that dev object is NULL. In the current implementation, iser-target just initiates the cleanup and returns from DEVICE_REMOVAL callback. This deferred work creates a race between iser-target cleaning IB objects(say MR) and lld destroying IB device instance. This patch includes the following fixes -> make sure that consumer frees all IB objects associated with device instance -> return non-zero from the callback to destroy the rdma_cm id Signed-off-by: Raju Rangoju Acked-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/isert/ib_isert.c | 23 ++++++++++++++++++++--- drivers/infiniband/ulp/isert/ib_isert.h | 2 ++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 7914c14..cae9bbc 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn) INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); + init_waitqueue_head(&isert_conn->rem_wait); kref_init(&isert_conn->kref); mutex_init(&isert_conn->mutex); INIT_WORK(&isert_conn->release_work, isert_release_work); @@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn) BUG_ON(!device); isert_free_rx_descriptors(isert_conn); - if (isert_conn->cm_id) + if (isert_conn->cm_id && + !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); if (isert_conn->qp) { @@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn) isert_device_put(device); - kfree(isert_conn); + if (isert_conn->dev_removed) + wake_up_interruptible(&isert_conn->rem_wait); + else + kfree(isert_conn); } static void @@ -753,6 +758,7 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", @@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ - case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_conn = cma_id->qp->qp_context; + isert_conn->dev_removed = true; + isert_disconnected_handler(cma_id, event->event); + wait_event_interruptible(isert_conn->rem_wait, + isert_conn->state == ISER_CONN_DOWN); + kfree(isert_conn); + /* + * return non-zero from the callback to destroy + * the rdma cm id + */ + return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index fc791ef..c02ada5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -158,6 +158,8 @@ struct isert_conn { struct work_struct release_work; bool logout_posted; bool snd_w_inv; + wait_queue_head_t rem_wait; + bool dev_removed; }; #define ISERT_MAX_CQ 64 -- cgit v1.1 From 656aacea6c90ce8e15c2bdef4f89b74b73e2e34a Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 28 Aug 2016 22:57:11 +0800 Subject: IB/cxgb4: Make _free_qp static to silence build warning We get 1 warning when build kernel with W=1: drivers/infiniband/hw/cxgb4/qp.c:686:6: warning: no previous prototype for '_free_qp' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks it 'static'. Signed-off-by: Baoyou Xie Reviewed-by: Yuval Shaia Reviewed-by: Leon Romanovsky Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index edb1172..6904352 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, return 0; } -void _free_qp(struct kref *kref) +static void _free_qp(struct kref *kref) { struct c4iw_qp *qhp; -- cgit v1.1 From 68c6bcdd8bd00394c234b915ab9b97c74104130c Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 28 Aug 2016 10:58:30 +0300 Subject: IB/core: Fix use after free in send_leave function The function send_leave sets the member: group->query_id (group->query_id = ret) after calling the sa_query, but leave_handler can be executed before the setting and it might delete the group object, and will get a memory corruption. Additionally, this patch gets rid of group->query_id variable which is not used. Fixes: faec2f7b96b5 ('IB/sa: Track multicast join/leave requests') Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/multicast.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 3a3c5d7..51c79b2 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -106,7 +106,6 @@ struct mcast_group { atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; - int query_id; u16 pkey_index; u8 leave_state; int retries; @@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member) member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) @@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state) IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, -- cgit v1.1 From 546481c2816ea3c061ee9d5658eb48070f69212e Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 28 Aug 2016 10:58:31 +0300 Subject: IB/ipoib: Fix memory corruption in ipoib cm mode connect flow When a new CM connection is being requested, ipoib driver copies data from the path pointer in the CM/tx object, the path object might be invalid at the point and memory corruption will happened later when now the CM driver will try using that data. The next scenario demonstrates it: neigh_add_path --> ipoib_cm_create_tx --> queue_work (pointer to path is in the cm/tx struct) #while the work is still in the queue, #the port goes down and causes the ipoib_flush_paths: ipoib_flush_paths --> path_free --> kfree(path) #at this point the work scheduled starts. ipoib_cm_tx_start --> copy from the (invalid)path pointer: (memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);) -> memory corruption. To fix that the driver now starts the CM/tx connection only if that specific path exists in the general paths database. This check is protected with the relevant locks, and uses the gid from the neigh member in the CM/tx object which is valid according to the ref count that was taken by the CM/tx. Fixes: 839fcaba35 ('IPoIB: Connected mode experimental support') Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 ++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4f7d9b4..9dbfcc0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 951d9ab..4ad297d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) } } +#define QPN_AND_OPTIONS_OFFSET 4 + static void ipoib_cm_tx_start(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, @@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ipoib_neigh *neigh; struct ipoib_cm_tx *p; unsigned long flags; + struct ipoib_path *path; int ret; struct ib_sa_path_rec pathrec; @@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; + qpn = IPOIB_QPN(neigh->daddr); + /* + * As long as the search is with these 2 locks, + * path existence indicates its validity. + */ + path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); + if (!path) { + pr_info("%s ignore not valid path %pI6\n", + __func__, + neigh->daddr + QPN_AND_OPTIONS_OFFSET); + goto free_neigh; + } memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) spin_lock_irqsave(&priv->lock, flags); if (ret) { +free_neigh: neigh = p->neigh; if (neigh) { neigh->cm = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 74bcaa0..cc1c1b0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) return -EINVAL; } -static struct ipoib_path *__path_find(struct net_device *dev, void *gid) +struct ipoib_path *__path_find(struct net_device *dev, void *gid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; -- cgit v1.1 From 25b64fc5f2c0779b53ec155d69cc06b7cdc6e99f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Aug 2016 10:58:32 +0300 Subject: Revert "IB/mlx4: Return EAGAIN for any error in mlx4_ib_poll_one" By Mellanox HW design and SW implementation, poll_cq never fails and returns errors, so all these printks are to catch ULP bugs. In case of such bug, the reverted patch will cause reentry of the function, resulting in a printk storm. This reverts commit 5412352fcd8f ("IB/mlx4: Return EAGAIN for any error in mlx4_ib_poll_one") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 006db64..15b6289 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -690,7 +690,7 @@ repoll: if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { pr_warn("Completion for NOP opcode detected!\n"); - return -EAGAIN; + return -EINVAL; } /* Resize CQ in progress */ @@ -721,7 +721,7 @@ repoll: if (unlikely(!mqp)) { pr_warn("CQ %06x with entry for unknown QPN %06x\n", cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EAGAIN; + return -EINVAL; } *cur_qp = to_mibqp(mqp); @@ -739,7 +739,7 @@ repoll: if (unlikely(!msrq)) { pr_warn("CQ %06x with entry for unknown SRQN %06x\n", cq->mcq.cqn, srq_num); - return -EAGAIN; + return -EINVAL; } } -- cgit v1.1 From 20697434b6ea9c6d895ebc5217a46f18850a109f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Aug 2016 10:58:33 +0300 Subject: IB/mlx4: Don't return errors from poll_cq Remove returning errors from mlx4 poll_cq function. Polling CQ operation in kernel never fails by Mellanox HCA architecture and respective driver design. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 15b6289..5df63da 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -687,12 +687,6 @@ repoll: is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && - is_send)) { - pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; - } - /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { @@ -718,12 +712,6 @@ repoll: */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); - if (unlikely(!mqp)) { - pr_warn("CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -736,11 +724,6 @@ repoll: /* SRQ is also in the radix tree */ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, srq_num); - if (unlikely(!msrq)) { - pr_warn("CQ %06x with entry for unknown SRQN %06x\n", - cq->mcq.cqn, srq_num); - return -EINVAL; - } } if (is_send) { @@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; - int err = 0; struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); @@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } for (npolled = 0; npolled < num_entries; ++npolled) { - err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) break; } @@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return npolled; } int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -- cgit v1.1 From 24be409beede1a7cbe95b1740c4cdade3b6a8187 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 28 Aug 2016 10:58:34 +0300 Subject: IB/mlx5: Return EINVAL when caller specifies too many SGEs The returned value should be EINVAL, because it is caused by wrong caller and not by internal overflow event. Signed-off-by: Chuck Lever Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0dd7d93..acb3b72 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3758,7 +3758,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; + err = -EINVAL; *bad_wr = wr; goto out; } -- cgit v1.1 From b2a232d21f301d600b02c6f9ccbc9f977331bb39 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Aug 2016 10:58:35 +0300 Subject: IB/mlx5: Simplify code by removing return variable Return variable was set in a line before the actual return was called in begin_wqe function. This patch removes such variable and simplifies the code. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index acb3b72..174d09b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3658,12 +3658,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { - int err = 0; - - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { - err = -ENOMEM; - return err; - } + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); @@ -3679,7 +3675,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; - return err; + return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, -- cgit v1.1 From d9f88e5ab9a73058ebdde589219c0d37da250f06 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 10:58:37 +0300 Subject: IB/mlx5: Use TIR number based on selector Use TIR number based on selector, it should be done to differentiate between RSS QP to RAW one. Reported-by: Sagi Grimberg Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Tested-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 6 +++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1b4094b..8150ea3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1849,6 +1849,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio; @@ -1875,7 +1876,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 372385d..95146f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -402,6 +402,7 @@ enum mlx5_ib_qp_flags { /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, + MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 174d09b..affc3f6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1449,6 +1449,7 @@ create_tir: kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; + qp->flags |= MLX5_IB_QP_RSS; return 0; err: -- cgit v1.1 From dbdf7d4e7f911f79ceb08365a756bbf6eecac81c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Aug 2016 10:58:38 +0300 Subject: IB/mlx5: Don't return errors from poll_cq Remove returning errors from mlx5 poll_cq function. Polling CQ operation in kernel never fails by Mellanox HCA architecture and respective driver design. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 308a358..e4fac929 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -553,12 +553,6 @@ repoll: * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -619,13 +613,6 @@ repoll: read_lock(&dev->mdev->priv.mkey_table.lock); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmkey)) { - read_unlock(&dev->mdev->priv.mkey_table.lock); - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", - cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); - return -EINVAL; - } - mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -676,7 +663,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -688,8 +674,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); - if (err) + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } @@ -698,10 +683,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return soft_polled + npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -- cgit v1.1 From 3e6c3b0fd5d071ed17bf91586aae35a6cfb8cdb3 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 31 Aug 2016 07:24:20 -0700 Subject: IB/hfi1: Fix SGE length for misaligned PIO copy When trying to align the source pointer and there's a byte carry in an SGE copy, bytes are borrowed from the next quad-word X to complete the required quad-word copy. Then, the SGE length is reduced by the number of borrowed bytes. After this, if the remaining number of bytes from quad-word X (extra bytes) is greater than the new SGE length, the number of extra bytes needs to be updated to the new SGE length. Otherwise, when the SGE length gets updated again after the extra bytes are read to create the new byte carry, it goes negative, which then becomes a very large number as the SGE length is an unsigned integer. This causes SGE buffer to be over-read. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio_copy.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b..3a1ef30 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -771,6 +771,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_extra_bytes(pbuf, from, to_fill); from += to_fill; nbytes -= to_fill; + /* may not be enough valid bytes left to align */ + if (extra > nbytes) + extra = nbytes; /* ...now write carry */ dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); @@ -798,6 +801,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_low_bytes(pbuf, from, extra); from += extra; nbytes -= extra; + /* + * If no bytes are left, return early - we are done. + * NOTE: This short-circuit is *required* because + * "extra" may have been reduced in size and "from" + * is not aligned, as required when leaving this + * if block. + */ + if (nbytes == 0) + return; } /* at this point, from is QW aligned */ -- cgit v1.1 From af53493916693343955930556aaa83f875b8436a Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 31 Aug 2016 07:24:27 -0700 Subject: IB/hfi1: Fix AHG KDETH Intr shift In the set_txreq_header_ahg(), The KDETH Intr bit is obtained from the header in the user sdma request using a KDETH_GET shift and mask macro. This value is then futher right shifted by 16 causing us to lose the value i.e it is shifted to zero, leading to the following smatch warning: drivers/infiniband/hw/hfi1/user_sdma.c:1482 set_txreq_header_ahg() warn: mask and shift to zero The Intr bit should be left shifted into its correct position in the KDETH header before the AHG update. Reported-by: Dan Carpenter Reviewed-by: Mitko Haralanov Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0ecf279..1694037 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_SHIFT 24 #define KDETH_HCRC_LOWER_MASK 0xff +#define AHG_KDETH_INTR_SHIFT 12 + #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -1480,7 +1482,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* Clear KDETH.SH on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) >> 16); + INTR) << + AHG_KDETH_INTR_SHIFT); val &= cpu_to_le16(~(1U << 13)); AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } else { -- cgit v1.1 From 673b975f1fbad5382f76afdb987e7513c5f4b71b Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 31 Aug 2016 07:24:33 -0700 Subject: IB/hfi1: Add QSFP sanity pre-check Sometimes a QSFP device does not respond in the expected time after a power-on. Add a read pre-check/retry when starting the link on driver load. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 86 +++++++++++++++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/hfi.h | 2 + drivers/infiniband/hw/hfi1/init.c | 1 + 4 files changed, 82 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b32638d..ec3635a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00); } +/* + * Perform a test read on the QSFP. Return 0 on success, -ERRNO + * on error. + */ +static int test_qsfp_read(struct hfi1_pportdata *ppd) +{ + int ret; + u8 status; + + /* report success if not a QSFP */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; + + /* read byte 2, the status byte */ + ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1); + if (ret < 0) + return ret; + if (ret != 1) + return -EIO; + + return 0; /* success */ +} + +/* + * Values for QSFP retry. + * + * Give up after 10s (20 x 500ms). The overall timeout was empirically + * arrived at from experience on a large cluster. + */ +#define MAX_QSFP_RETRIES 20 +#define QSFP_RETRY_WAIT 500 /* msec */ + +/* + * Try a QSFP read. If it fails, schedule a retry for later. + * Called on first link activation after driver load. + */ +static void try_start_link(struct hfi1_pportdata *ppd) +{ + if (test_qsfp_read(ppd)) { + /* read failed */ + if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) { + dd_dev_err(ppd->dd, "QSFP not responding, giving up\n"); + return; + } + dd_dev_info(ppd->dd, + "QSFP not responding, waiting and retrying %d\n", + (int)ppd->qsfp_retry_count); + ppd->qsfp_retry_count++; + queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + msecs_to_jiffies(QSFP_RETRY_WAIT)); + return; + } + ppd->qsfp_retry_count = 0; + + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + start_link(ppd); +} + +/* + * Workqueue function to start the link after a delay. + */ +void handle_start_link(struct work_struct *work) +{ + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + start_link_work.work); + try_start_link(ppd); +} + int bringup_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; @@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd) set_qsfp_int_n(ppd, 1); } - /* - * Tune the SerDes to a ballpark setting for - * optimal signal and bit error rate - * Needs to be done before starting the link - */ - tune_serdes(ppd); - - return start_link(ppd); + try_start_link(ppd); + return 0; } void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) @@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->link_enabled = 0; + ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */ + flush_delayed_work(&ppd->start_link_work); + cancel_delayed_work_sync(&ppd->start_link_work); + ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index ed11107..e295737 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work); void handle_link_down(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); +void handle_start_link(struct work_struct *work); void handle_sma_message(struct work_struct *work); void reset_qsfp(struct hfi1_pportdata *ppd); void qsfp_event(struct work_struct *work); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a021e66..28b9128 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -605,6 +605,7 @@ struct hfi1_pportdata { struct work_struct freeze_work; struct work_struct link_downgrade_work; struct work_struct link_bounce_work; + struct delayed_work start_link_work; /* host link state variables */ struct mutex hls_lock; u32 host_link_state; @@ -659,6 +660,7 @@ struct hfi1_pportdata { u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ u8 last_pstate; /* info only */ + u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ u8 overrun_threshold; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b793545..000dc07 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -500,6 +500,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); + INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link); INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); -- cgit v1.1 From 429b6a721727d49d8565b50a6bc0dc42432383a9 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 31 Aug 2016 07:24:40 -0700 Subject: IB/hfi1: Make n_krcvqs be an unsigned long integer The global variable n_krcvqs stores the sum of the number of kernel receive queues of VLs 0-7 which the user can pass to the driver through the module parameter array krcvqs which is of type unsigned integer. If the user passes large value(s) into krcvqs parameter array, it can cause an arithmetic overflow while calculating n_krcvqs which is also of type unsigned int. The overflow results in an incorrect value of n_krcvqs which can lead to kernel crash while loading the driver. Fix by changing the data type of n_krcvqs to unsigned long. This patch also changes the data type of other variables that get their values from n_krcvqs. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/init.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ec3635a..cc38004 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12935,7 +12935,7 @@ fail: */ static int set_up_context_variables(struct hfi1_devdata *dd) { - int num_kernel_contexts; + unsigned long num_kernel_contexts; int total_contexts; int ret; unsigned ngroups; @@ -12964,9 +12964,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { dd_dev_err(dd, - "Reducing # kernel rcv contexts to: %d, from %d\n", + "Reducing # kernel rcv contexts to: %d, from %lu\n", (int)(dd->chip_send_contexts - num_vls - 1), - (int)num_kernel_contexts); + num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 28b9128..325ec21 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1806,7 +1806,7 @@ extern unsigned int hfi1_max_mtu; extern unsigned int hfi1_cu; extern unsigned int user_credit_return_threshold; extern int num_user_contexts; -extern unsigned n_krcvqs; +extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; extern uint kdeth_qp; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 000dc07..384b43d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); /* computed based on above array */ -unsigned n_krcvqs; +unsigned long n_krcvqs; static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); -- cgit v1.1 From 16170d9c102764f76c58aad244e947f4e3f44590 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 31 Aug 2016 07:24:46 -0700 Subject: IB/hfi1: Rework debugfs to use SRCU The debugfs RCU trips many debug kernel warnings because of potential sleeps with an RCU read lock held. This includes both user copy calls and slab allocations throughout the file. This patch switches the RCU to use SRCU for file remove/access race protection. In one case, the SRCU is implicit in the use of the raw debugfs file object and just works. In the seq_file case, a wrapper around seq_read() and seq_lseek() is used to enforce the SRCU using the debugfs supplied functions debugfs_use_file_start() and debugfs_use_file_stop(). The sychronize_rcu() is deleted since the SRCU prevents the remove access race. The RCU locking is kept for qp_stats since the QP hash list is protected using the non-sleepable RCU. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 132 ++++++++++++++--------------------- 1 file changed, 52 insertions(+), 80 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index a49cc88..5e9be16 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -59,6 +59,40 @@ static struct dentry *hfi1_dbg_root; +/* wrappers to enforce srcu in seq file */ +static ssize_t hfi1_seq_read( + struct file *file, + char __user *buf, + size_t size, + loff_t *ppos) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + ssize_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_read(file, buf, size, ppos); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static loff_t hfi1_seq_lseek( + struct file *file, + loff_t offset, + int whence) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + loff_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_lseek(file, offset, whence); + debugfs_use_file_finish(srcu_idx); + return r; +} + #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) @@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \ static const struct file_operations _##name##_file_ops = { \ .owner = THIS_MODULE, \ .open = _##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ .release = seq_release \ } @@ -105,11 +139,9 @@ do { \ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_opcode_stats_perctx *opstats; - rcu_read_lock(); if (*pos >= ARRAY_SIZE(opstats->stats)) return NULL; return pos; @@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _opcode_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _opcode_stats_seq_show(struct seq_file *s, void *v) @@ -285,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats) DEBUGFS_FILE_OPS(qp_stats); static void *_sdes_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_ibdev *ibd; struct hfi1_devdata *dd; - rcu_read_lock(); ibd = (struct hfi1_ibdev *)s->private; dd = dd_from_dev(ibd); if (!dd->per_sdma || *pos >= dd->num_sdma) @@ -310,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _sdes_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _sdes_seq_show(struct seq_file *s, void *v) @@ -339,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -356,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -383,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_portcntrs(dd->pport, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -400,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, struct hfi1_pportdata *ppd; ssize_t rval; - rcu_read_lock(); ppd = private2ppd(file); avail = hfi1_read_portcntrs(ppd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -434,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, int used; int i; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; size = PAGE_SIZE; used = 0; tmp = kmalloc(size, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); used += scnprintf(tmp + used, size - used, @@ -470,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, used += scnprintf(tmp + used, size - used, "Write bits to clear\n"); ret = simple_read_from_buffer(buf, count, ppos, tmp, used); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -486,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, u64 scratch0; u64 clear; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; buff = kmalloc(count + 1, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto do_return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -527,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, do_free: kfree(buff); - do_return: - rcu_read_unlock(); return ret; } @@ -542,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, char *tmp; int ret; - rcu_read_lock(); ppd = private2ppd(file); tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } ret = qsfp_dump(ppd, tmp, PAGE_SIZE); if (ret > 0) ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -569,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, int offset; int total_written; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -577,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -606,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -636,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, int offset; int total_read; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -644,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { @@ -673,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -701,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, int ret; int total_written; - rcu_read_lock(); - if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ - ret = -EINVAL; - goto _return; - } + if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */ + return -EINVAL; ppd = private2ppd(file); buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { ret = -EFAULT; goto _free; } - total_written = qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; @@ -733,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -761,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, int ret; int total_read; - rcu_read_lock(); if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ ret = -EINVAL; goto _return; @@ -794,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); _return: - rcu_read_unlock(); return ret; } @@ -1010,7 +991,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: ibd->hfi1_ibdev_dbg = NULL; - synchronize_rcu(); } /* @@ -1035,9 +1015,7 @@ static const char * const hfi1_statnames[] = { }; static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1055,9 +1033,7 @@ static void *_driver_stats_names_seq_next( } static void _driver_stats_names_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _driver_stats_names_seq_show(struct seq_file *s, void *v) @@ -1073,9 +1049,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names) DEBUGFS_FILE_OPS(driver_stats_names); static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1090,9 +1064,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _driver_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static u64 hfi1_sps_ints(void) -- cgit v1.1 From d1992996753132e2dafe955cccb2fb0714d3cfc4 Mon Sep 17 00:00:00 2001 From: Emanuel Czirai Date: Fri, 2 Sep 2016 07:35:50 +0200 Subject: x86/AMD: Apply erratum 665 on machines without a BIOS fix AMD F12h machines have an erratum which can cause DIV/IDIV to behave unpredictably. The workaround is to set MSRC001_1029[31] but sometimes there is no BIOS update containing that workaround so let's do it ourselves unconditionally. It is simple enough. [ Borislav: Wrote commit message. ] Signed-off-by: Emanuel Czirai Signed-off-by: Borislav Petkov Cc: Yaowu Xu Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160902053550.18097-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/amd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f5c69d8..b81fe2d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } +#define MSR_AMD64_DE_CFG 0xC0011029 + +static void init_amd_ln(struct cpuinfo_x86 *c) +{ + /* + * Apply erratum 665 fix unconditionally so machines without a BIOS + * fix work. + */ + msr_set_bit(MSR_AMD64_DE_CFG, 31); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 6: init_amd_k7(c); break; case 0xf: init_amd_k8(c); break; case 0x10: init_amd_gh(c); break; + case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; } -- cgit v1.1 From 2f86953e7436c9b9a4690909c5e2db24799e173b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 2 Sep 2016 10:22:54 +0200 Subject: l2tp: fix use-after-free during module unload Tunnel deletion is delayed by both a workqueue (l2tp_tunnel_delete -> wq -> l2tp_tunnel_del_work) and RCU (sk_destruct -> RCU -> l2tp_tunnel_destruct). By the time l2tp_tunnel_destruct() runs to destroy the tunnel and finish destroying the socket, the private data reserved via the net_generic mechanism has already been freed, but l2tp_tunnel_destruct() actually uses this data. Make sure tunnel deletion for the netns has completed before returning from l2tp_exit_net() by first flushing the tunnel removal workqueue, and then waiting for RCU callbacks to complete. Fixes: 167eb17e0b17 ("l2tp: create tunnel sockets in the right namespace") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1e40dac..a2ed3bd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1855,6 +1855,9 @@ static __net_exit void l2tp_exit_net(struct net *net) (void)l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); + + flush_workqueue(l2tp_wq); + rcu_barrier(); } static struct pernet_operations l2tp_net_ops = { -- cgit v1.1 From 3feab13c919f99b0a17d0ca22ae00cf90f5d3fd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 16 Aug 2016 16:59:52 +0100 Subject: ACPI / drivers: fix typo in ACPI_DECLARE_PROBE_ENTRY macro When the ACPI_DECLARE_PROBE_ENTRY macro was added in commit e647b532275b ("ACPI: Add early device probing infrastructure"), a stub macro adding an unused entry was added for the !CONFIG_ACPI Kconfig option case to make sure kernel code making use of the macro did not require to be guarded within CONFIG_ACPI in order to be compiled. The stub macro was never used since all kernel code that defines ACPI_DECLARE_PROBE_ENTRY entries is currently guarded within CONFIG_ACPI; it contains a typo that should be nonetheless fixed. Fix the typo in the stub (ie !CONFIG_ACPI) ACPI_DECLARE_PROBE_ENTRY() macro so that it can actually be used if needed. Signed-off-by: Lorenzo Pieralisi Fixes: e647b532275b (ACPI: Add early device probing infrastructure) Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d8452c..c5eaf2f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1056,7 +1056,7 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, return NULL; } -#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \ +#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ = { (void *) table_id, \ -- cgit v1.1 From 5331d9cab32ef640b4cd38a43b0858874fbb7168 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 16 Aug 2016 16:59:53 +0100 Subject: ACPI / drivers: replace acpi_probe_lock spinlock with mutex Commit e647b532275b ("ACPI: Add early device probing infrastructure") introduced code that allows inserting driver specific struct acpi_probe_entry probe entries into ACPI linker sections (one per-subsystem, eg irqchip, clocksource) that are then walked to retrieve the data and function hooks required to probe the respective kernel components. Probing for all entries in a section is triggered through the __acpi_probe_device_table() function, that in turn, according to the table ID a given probe entry reports parses the table with the function retrieved from the respective section structures (ie struct acpi_probe_entry). Owing to the current ACPI table parsing implementation, the __acpi_probe_device_table() function has to share global variables with the acpi_match_madt() function, so in order to guarantee mutual exclusion locking is required between the two functions. Current kernel code implements the locking through the acpi_probe_lock spinlock; this has the side effect of requiring all code called within the lock (ie struct acpi_probe_entry.probe_{table/subtbl} hooks) not to sleep. However, kernel subsystems that make use of the early probing infrastructure are relying on kernel APIs that may sleep (eg irq_domain_alloc_fwnode(), among others) in the function calls pointed at by struct acpi_probe_entry.{probe_table/subtbl} entries (eg gic_v2_acpi_init()), which is a bug. Since __acpi_probe_device_table() is called from context that is allowed to sleep the acpi_probe_lock spinlock can be replaced with a mutex; this fixes the issue whilst still guaranteeing mutual exclusion. Signed-off-by: Lorenzo Pieralisi Fixes: e647b532275b (ACPI: Add early device probing infrastructure) Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ad9fc84..e878fc7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2054,7 +2054,7 @@ int __init acpi_scan_init(void) static struct acpi_probe_entry *ape; static int acpi_probe_count; -static DEFINE_SPINLOCK(acpi_probe_lock); +static DEFINE_MUTEX(acpi_probe_mutex); static int __init acpi_match_madt(struct acpi_subtable_header *header, const unsigned long end) @@ -2073,7 +2073,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr) if (acpi_disabled) return 0; - spin_lock(&acpi_probe_lock); + mutex_lock(&acpi_probe_mutex); for (ape = ap_head; nr; ape++, nr--) { if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) { acpi_probe_count = 0; @@ -2086,7 +2086,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr) count++; } } - spin_unlock(&acpi_probe_lock); + mutex_unlock(&acpi_probe_mutex); return count; } -- cgit v1.1 From 4a29b3484f857ff350f203a2d9bf3b428c9e0bf2 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 2 Sep 2016 02:36:58 +0300 Subject: tpm: invalid self test error message The driver emits invalid self test error message even though the init succeeds. Signed-off-by: Jarkko Sakkinen Fixes: cae8b441fc20 ("tpm: Factor out common startup code") Reviewed-by: James Morris Signed-off-by: James Morris --- drivers/char/tpm/tpm2-cmd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 08c7e23..0c75c3f 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -957,7 +957,7 @@ int tpm2_auto_startup(struct tpm_chip *chip) goto out; rc = tpm2_do_selftest(chip); - if (rc != TPM2_RC_INITIALIZE) { + if (rc != 0 && rc != TPM2_RC_INITIALIZE) { dev_err(&chip->dev, "TPM self test failed\n"); goto out; } @@ -974,7 +974,6 @@ int tpm2_auto_startup(struct tpm_chip *chip) } } - return rc; out: if (rc > 0) rc = -ENODEV; -- cgit v1.1 From 1358bd5a7477b346dfb6b502051d61f29b11a200 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 1 Sep 2016 15:15:06 -0500 Subject: net: smsc911x: Remove multiple exit points from smsc911x_open Rework the error handling in smsc911x open in preparation for the mdio startup being moved here. Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index ca31345..c9b0e05 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1520,17 +1520,20 @@ static int smsc911x_open(struct net_device *dev) unsigned int timeout; unsigned int temp; unsigned int intcfg; + int retval; /* if the phy is not yet registered, retry later*/ if (!dev->phydev) { SMSC_WARN(pdata, hw, "phy_dev is NULL"); - return -EAGAIN; + retval = -EAGAIN; + goto out; } /* Reset the LAN911x */ - if (smsc911x_soft_reset(pdata)) { + retval = smsc911x_soft_reset(pdata); + if (retval) { SMSC_WARN(pdata, hw, "soft reset failed"); - return -EIO; + goto out; } smsc911x_reg_write(pdata, HW_CFG, 0x00050000); @@ -1600,7 +1603,8 @@ static int smsc911x_open(struct net_device *dev) if (!pdata->software_irq_signal) { netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n", dev->irq); - return -ENODEV; + retval = -ENODEV; + goto out; } SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d", dev->irq); @@ -1646,6 +1650,8 @@ static int smsc911x_open(struct net_device *dev) netif_start_queue(dev); return 0; +out: + return retval; } /* Entry point for stopping the interface */ -- cgit v1.1 From aea95dd52db436f406f3f45a455a710774a3a210 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 1 Sep 2016 15:15:07 -0500 Subject: net: smsc911x: Fix register_netdev, phy startup, driver unload ordering Move phy startup/shutdown into the smsc911x_open/stop routines. This allows the module to be unloaded because phy_connect_direct is no longer always holding the module use count. This one change also resolves a number of other problems. The link status of a downed interface no longer reflects a stale state. Errors caused by the net device being opened before the mdio/phy was configured. There is also a potential power savings as the phy's don't remain powered when the interface isn't running. Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 48 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c9b0e05..823ad3f 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1099,15 +1099,8 @@ static int smsc911x_mii_init(struct platform_device *pdev, goto err_out_free_bus_2; } - if (smsc911x_mii_probe(dev) < 0) { - SMSC_WARN(pdata, probe, "Error registering mii bus"); - goto err_out_unregister_bus_3; - } - return 0; -err_out_unregister_bus_3: - mdiobus_unregister(pdata->mii_bus); err_out_free_bus_2: mdiobus_free(pdata->mii_bus); err_out_1: @@ -1522,18 +1515,20 @@ static int smsc911x_open(struct net_device *dev) unsigned int intcfg; int retval; - /* if the phy is not yet registered, retry later*/ + /* find and start the given phy */ if (!dev->phydev) { - SMSC_WARN(pdata, hw, "phy_dev is NULL"); - retval = -EAGAIN; - goto out; + retval = smsc911x_mii_probe(dev); + if (retval < 0) { + SMSC_WARN(pdata, probe, "Error starting phy"); + goto out; + } } /* Reset the LAN911x */ retval = smsc911x_soft_reset(pdata); if (retval) { SMSC_WARN(pdata, hw, "soft reset failed"); - goto out; + goto mii_free_out; } smsc911x_reg_write(pdata, HW_CFG, 0x00050000); @@ -1604,7 +1599,7 @@ static int smsc911x_open(struct net_device *dev) netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n", dev->irq); retval = -ENODEV; - goto out; + goto mii_free_out; } SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d", dev->irq); @@ -1650,6 +1645,10 @@ static int smsc911x_open(struct net_device *dev) netif_start_queue(dev); return 0; + +mii_free_out: + phy_disconnect(dev->phydev); + dev->phydev = NULL; out: return retval; } @@ -1674,8 +1673,12 @@ static int smsc911x_stop(struct net_device *dev) smsc911x_tx_update_txcounters(dev); /* Bring the PHY down */ - if (dev->phydev) + if (dev->phydev) { phy_stop(dev->phydev); + phy_disconnect(dev->phydev); + dev->phydev = NULL; + } + netif_carrier_off(dev); SMSC_TRACE(pdata, ifdown, "Interface stopped"); return 0; @@ -2297,11 +2300,10 @@ static int smsc911x_drv_remove(struct platform_device *pdev) pdata = netdev_priv(dev); BUG_ON(!pdata); BUG_ON(!pdata->ioaddr); - BUG_ON(!dev->phydev); + WARN_ON(dev->phydev); SMSC_TRACE(pdata, ifdown, "Stopping driver"); - phy_disconnect(dev->phydev); mdiobus_unregister(pdata->mii_bus); mdiobus_free(pdata->mii_bus); @@ -2500,6 +2502,12 @@ static int smsc911x_drv_probe(struct platform_device *pdev) netif_carrier_off(dev); + retval = smsc911x_mii_init(pdev, dev); + if (retval) { + SMSC_WARN(pdata, probe, "Error %i initialising mii", retval); + goto out_free_irq; + } + retval = register_netdev(dev); if (retval) { SMSC_WARN(pdata, probe, "Error %i registering device", retval); @@ -2509,12 +2517,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev) "Network interface: \"%s\"", dev->name); } - retval = smsc911x_mii_init(pdev, dev); - if (retval) { - SMSC_WARN(pdata, probe, "Error %i initialising mii", retval); - goto out_unregister_netdev_5; - } - spin_lock_irq(&pdata->mac_lock); /* Check if mac address has been specified when bringing interface up */ @@ -2550,8 +2552,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev) return 0; -out_unregister_netdev_5: - unregister_netdev(dev); out_free_irq: free_irq(dev->irq, dev); out_disable_resources: -- cgit v1.1 From a85f00c36ef53ea3cb5ebf3dee4ce9cc6726671f Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 1 Sep 2016 15:15:08 -0500 Subject: net: smsc911x: Move interrupt handler before open In preparation for the allocating/enabling interrupts in the ndo_open routine move the irq handler before it. Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 122 +++++++++++++++++------------------ 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 823ad3f..c2e56f0 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1507,6 +1507,67 @@ static void smsc911x_disable_irq_chip(struct net_device *dev) smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF); } +static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct smsc911x_data *pdata = netdev_priv(dev); + u32 intsts = smsc911x_reg_read(pdata, INT_STS); + u32 inten = smsc911x_reg_read(pdata, INT_EN); + int serviced = IRQ_NONE; + u32 temp; + + if (unlikely(intsts & inten & INT_STS_SW_INT_)) { + temp = smsc911x_reg_read(pdata, INT_EN); + temp &= (~INT_EN_SW_INT_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_); + pdata->software_irq_signal = 1; + smp_wmb(); + serviced = IRQ_HANDLED; + } + + if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) { + /* Called when there is a multicast update scheduled and + * it is now safe to complete the update */ + SMSC_TRACE(pdata, intr, "RX Stop interrupt"); + smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_); + if (pdata->multicast_update_pending) + smsc911x_rx_multicast_update_workaround(pdata); + serviced = IRQ_HANDLED; + } + + if (intsts & inten & INT_STS_TDFA_) { + temp = smsc911x_reg_read(pdata, FIFO_INT); + temp |= FIFO_INT_TX_AVAIL_LEVEL_; + smsc911x_reg_write(pdata, FIFO_INT, temp); + smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_); + netif_wake_queue(dev); + serviced = IRQ_HANDLED; + } + + if (unlikely(intsts & inten & INT_STS_RXE_)) { + SMSC_TRACE(pdata, intr, "RX Error interrupt"); + smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_); + serviced = IRQ_HANDLED; + } + + if (likely(intsts & inten & INT_STS_RSFL_)) { + if (likely(napi_schedule_prep(&pdata->napi))) { + /* Disable Rx interrupts */ + temp = smsc911x_reg_read(pdata, INT_EN); + temp &= (~INT_EN_RSFL_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + /* Schedule a NAPI poll */ + __napi_schedule(&pdata->napi); + } else { + SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed"); + } + serviced = IRQ_HANDLED; + } + + return serviced; +} + static int smsc911x_open(struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); @@ -1820,67 +1881,6 @@ static void smsc911x_set_multicast_list(struct net_device *dev) spin_unlock_irqrestore(&pdata->mac_lock, flags); } -static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct smsc911x_data *pdata = netdev_priv(dev); - u32 intsts = smsc911x_reg_read(pdata, INT_STS); - u32 inten = smsc911x_reg_read(pdata, INT_EN); - int serviced = IRQ_NONE; - u32 temp; - - if (unlikely(intsts & inten & INT_STS_SW_INT_)) { - temp = smsc911x_reg_read(pdata, INT_EN); - temp &= (~INT_EN_SW_INT_EN_); - smsc911x_reg_write(pdata, INT_EN, temp); - smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_); - pdata->software_irq_signal = 1; - smp_wmb(); - serviced = IRQ_HANDLED; - } - - if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) { - /* Called when there is a multicast update scheduled and - * it is now safe to complete the update */ - SMSC_TRACE(pdata, intr, "RX Stop interrupt"); - smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_); - if (pdata->multicast_update_pending) - smsc911x_rx_multicast_update_workaround(pdata); - serviced = IRQ_HANDLED; - } - - if (intsts & inten & INT_STS_TDFA_) { - temp = smsc911x_reg_read(pdata, FIFO_INT); - temp |= FIFO_INT_TX_AVAIL_LEVEL_; - smsc911x_reg_write(pdata, FIFO_INT, temp); - smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_); - netif_wake_queue(dev); - serviced = IRQ_HANDLED; - } - - if (unlikely(intsts & inten & INT_STS_RXE_)) { - SMSC_TRACE(pdata, intr, "RX Error interrupt"); - smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_); - serviced = IRQ_HANDLED; - } - - if (likely(intsts & inten & INT_STS_RSFL_)) { - if (likely(napi_schedule_prep(&pdata->napi))) { - /* Disable Rx interrupts */ - temp = smsc911x_reg_read(pdata, INT_EN); - temp &= (~INT_EN_RSFL_EN_); - smsc911x_reg_write(pdata, INT_EN, temp); - /* Schedule a NAPI poll */ - __napi_schedule(&pdata->napi); - } else { - SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed"); - } - serviced = IRQ_HANDLED; - } - - return serviced; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void smsc911x_poll_controller(struct net_device *dev) { -- cgit v1.1 From f252974eaa64f64b940894f24bfa162a8e7f6b0d Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 1 Sep 2016 15:15:09 -0500 Subject: net: smsc911x: Move interrupt allocation to open/stop The /proc/irq/xx information is incorrect for smsc911x because the request_irq is happening before the register_netdev has the proper device name. Moving it to the open also fixes the case of when the device is renamed. Reported-by: Will Deacon Signed-off-by: Jeremy Linton Tested-by: Will Deacon Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 47 ++++++++++++++---------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c2e56f0..4f8910b 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1575,6 +1575,7 @@ static int smsc911x_open(struct net_device *dev) unsigned int temp; unsigned int intcfg; int retval; + int irq_flags; /* find and start the given phy */ if (!dev->phydev) { @@ -1645,6 +1646,15 @@ static int smsc911x_open(struct net_device *dev) pdata->software_irq_signal = 0; smp_wmb(); + irq_flags = irq_get_trigger_type(dev->irq); + retval = request_irq(dev->irq, smsc911x_irqhandler, + irq_flags | IRQF_SHARED, dev->name, dev); + if (retval) { + SMSC_WARN(pdata, probe, + "Unable to claim requested irq: %d", dev->irq); + goto mii_free_out; + } + temp = smsc911x_reg_read(pdata, INT_EN); temp |= INT_EN_SW_INT_EN_; smsc911x_reg_write(pdata, INT_EN, temp); @@ -1660,7 +1670,7 @@ static int smsc911x_open(struct net_device *dev) netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n", dev->irq); retval = -ENODEV; - goto mii_free_out; + goto irq_stop_out; } SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d", dev->irq); @@ -1707,6 +1717,8 @@ static int smsc911x_open(struct net_device *dev) netif_start_queue(dev); return 0; +irq_stop_out: + free_irq(dev->irq, dev); mii_free_out: phy_disconnect(dev->phydev); dev->phydev = NULL; @@ -1733,6 +1745,8 @@ static int smsc911x_stop(struct net_device *dev) dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP); smsc911x_tx_update_txcounters(dev); + free_irq(dev->irq, dev); + /* Bring the PHY down */ if (dev->phydev) { phy_stop(dev->phydev); @@ -2308,7 +2322,6 @@ static int smsc911x_drv_remove(struct platform_device *pdev) mdiobus_free(pdata->mii_bus); unregister_netdev(dev); - free_irq(dev->irq, dev); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smsc911x-memory"); if (!res) @@ -2393,8 +2406,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) struct smsc911x_data *pdata; struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev); struct resource *res; - unsigned int intcfg = 0; - int res_size, irq, irq_flags; + int res_size, irq; int retval; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, @@ -2433,7 +2445,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev) pdata = netdev_priv(dev); dev->irq = irq; - irq_flags = irq_get_trigger_type(irq); pdata->ioaddr = ioremap_nocache(res->start, res_size); pdata->dev = dev; @@ -2480,38 +2491,18 @@ static int smsc911x_drv_probe(struct platform_device *pdev) if (retval < 0) goto out_disable_resources; - /* configure irq polarity and type before connecting isr */ - if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH) - intcfg |= INT_CFG_IRQ_POL_; - - if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL) - intcfg |= INT_CFG_IRQ_TYPE_; - - smsc911x_reg_write(pdata, INT_CFG, intcfg); - - /* Ensure interrupts are globally disabled before connecting ISR */ - smsc911x_disable_irq_chip(dev); - - retval = request_irq(dev->irq, smsc911x_irqhandler, - irq_flags | IRQF_SHARED, dev->name, dev); - if (retval) { - SMSC_WARN(pdata, probe, - "Unable to claim requested irq: %d", dev->irq); - goto out_disable_resources; - } - netif_carrier_off(dev); retval = smsc911x_mii_init(pdev, dev); if (retval) { SMSC_WARN(pdata, probe, "Error %i initialising mii", retval); - goto out_free_irq; + goto out_disable_resources; } retval = register_netdev(dev); if (retval) { SMSC_WARN(pdata, probe, "Error %i registering device", retval); - goto out_free_irq; + goto out_disable_resources; } else { SMSC_TRACE(pdata, probe, "Network interface: \"%s\"", dev->name); @@ -2552,8 +2543,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev) return 0; -out_free_irq: - free_irq(dev->irq, dev); out_disable_resources: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); -- cgit v1.1 From 8432ebd66205ef1e088005ae3738600dedc7d9b4 Mon Sep 17 00:00:00 2001 From: Rajan Vaja Date: Thu, 21 Jul 2016 13:44:44 +0530 Subject: hostap: Use memdup_user() to reuse code Fix coccicheck warning which recommends to use memdup_user() instead of reimplementing its code. This patch fixes below coccicheck warnings: drivers/net/wireless/intersil/hostap/hostap_ioctl.c:3044:9-16: WARNING opportunity for memdup_user drivers/net/wireless/intersil/hostap/hostap_ioctl.c:3806:9-16: WARNING opportunity for memdup_user Signed-off-by: Rajan Vaja Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/intersil/hostap/hostap_ioctl.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index 3e5fa78..a5656bc 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -3041,13 +3041,9 @@ static int prism2_ioctl_priv_download(local_info_t *local, struct iw_point *p) p->length > 1024 || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } if (p->length < sizeof(struct prism2_download_param) + @@ -3803,13 +3799,9 @@ static int prism2_ioctl_priv_hostapd(local_info_t *local, struct iw_point *p) p->length > PRISM2_HOSTAPD_MAX_BUF_SIZE || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } switch (param->cmd) { -- cgit v1.1 From 4ad0579a28c0a02613c1d4a53c03ae746f14b0ac Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jul 2016 14:08:08 +0000 Subject: wlcore: spi: fix non static symbol warning Fixes the following sparse warning: drivers/net/wireless/ti/wlcore/spi.c:87:34: warning: symbol 'wilink_data' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 6d24040..0ed526e 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -84,7 +84,7 @@ struct wilink_familiy_data { char name[8]; }; -const struct wilink_familiy_data *wilink_data; +static const struct wilink_familiy_data *wilink_data; static const struct wilink_familiy_data wl18xx_data = { .name = "wl18xx", -- cgit v1.1 From 902831a7629b8b72d333d214b031a717309bb1eb Mon Sep 17 00:00:00 2001 From: Karthik D A Date: Mon, 25 Jul 2016 21:21:04 +0530 Subject: mwifiex: Fixed endianness problem for big endian platform The driver sends and recives information to and from the firmware. Correct endianness should be ensured as firmware follows little endian format and host can be little/big endian. Signed-off-by: Karthik D A Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/fw.h | 30 +++++++++++----------- drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 27 ++++++++++--------- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 2 +- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 5596b6b..c46267b 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -1646,7 +1646,7 @@ struct mwifiex_ie_types_sta_info { }; struct host_cmd_ds_sta_list { - u16 sta_count; + __le16 sta_count; u8 tlv[0]; } __packed; @@ -2034,26 +2034,26 @@ struct host_cmd_ds_set_bss_mode { struct host_cmd_ds_pcie_details { /* TX buffer descriptor ring address */ - u32 txbd_addr_lo; - u32 txbd_addr_hi; + __le32 txbd_addr_lo; + __le32 txbd_addr_hi; /* TX buffer descriptor ring count */ - u32 txbd_count; + __le32 txbd_count; /* RX buffer descriptor ring address */ - u32 rxbd_addr_lo; - u32 rxbd_addr_hi; + __le32 rxbd_addr_lo; + __le32 rxbd_addr_hi; /* RX buffer descriptor ring count */ - u32 rxbd_count; + __le32 rxbd_count; /* Event buffer descriptor ring address */ - u32 evtbd_addr_lo; - u32 evtbd_addr_hi; + __le32 evtbd_addr_lo; + __le32 evtbd_addr_hi; /* Event buffer descriptor ring count */ - u32 evtbd_count; + __le32 evtbd_count; /* Sleep cookie buffer physical address */ - u32 sleep_cookie_addr_lo; - u32 sleep_cookie_addr_hi; + __le32 sleep_cookie_addr_lo; + __le32 sleep_cookie_addr_hi; } __packed; struct mwifiex_ie_types_rssi_threshold { @@ -2093,8 +2093,8 @@ struct mwifiex_ie_types_mc_group_info { u8 chan_buf_weight; u8 band_config; u8 chan_num; - u32 chan_time; - u32 reserved; + __le32 chan_time; + __le32 reserved; union { u8 sdio_func_num; u8 usb_ep_num; @@ -2185,7 +2185,7 @@ struct host_cmd_ds_robust_coex { } __packed; struct host_cmd_ds_wakeup_reason { - u16 wakeup_reason; + __le16 wakeup_reason; } __packed; struct host_cmd_ds_gtk_rekey_params { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 7897037..108c11c 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1244,20 +1244,23 @@ mwifiex_cmd_pcie_host_spec(struct mwifiex_private *priv, return 0; /* Send the ring base addresses and count to firmware */ - host_spec->txbd_addr_lo = (u32)(card->txbd_ring_pbase); - host_spec->txbd_addr_hi = (u32)(((u64)card->txbd_ring_pbase)>>32); - host_spec->txbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->rxbd_addr_lo = (u32)(card->rxbd_ring_pbase); - host_spec->rxbd_addr_hi = (u32)(((u64)card->rxbd_ring_pbase)>>32); - host_spec->rxbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->evtbd_addr_lo = (u32)(card->evtbd_ring_pbase); - host_spec->evtbd_addr_hi = (u32)(((u64)card->evtbd_ring_pbase)>>32); - host_spec->evtbd_count = MWIFIEX_MAX_EVT_BD; + host_spec->txbd_addr_lo = cpu_to_le32((u32)(card->txbd_ring_pbase)); + host_spec->txbd_addr_hi = + cpu_to_le32((u32)(((u64)card->txbd_ring_pbase) >> 32)); + host_spec->txbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->rxbd_addr_lo = cpu_to_le32((u32)(card->rxbd_ring_pbase)); + host_spec->rxbd_addr_hi = + cpu_to_le32((u32)(((u64)card->rxbd_ring_pbase) >> 32)); + host_spec->rxbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->evtbd_addr_lo = cpu_to_le32((u32)(card->evtbd_ring_pbase)); + host_spec->evtbd_addr_hi = + cpu_to_le32((u32)(((u64)card->evtbd_ring_pbase) >> 32)); + host_spec->evtbd_count = cpu_to_le32(MWIFIEX_MAX_EVT_BD); if (card->sleep_cookie_vbase) { host_spec->sleep_cookie_addr_lo = - (u32)(card->sleep_cookie_pbase); - host_spec->sleep_cookie_addr_hi = - (u32)(((u64)(card->sleep_cookie_pbase)) >> 32); + cpu_to_le32((u32)(card->sleep_cookie_pbase)); + host_spec->sleep_cookie_addr_hi = cpu_to_le32((u32)(((u64) + (card->sleep_cookie_pbase)) >> 32)); mwifiex_dbg(priv->adapter, INFO, "sleep_cook_lo phy addr: 0x%x\n", host_spec->sleep_cookie_addr_lo); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index ccf54932..90e191b 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -962,7 +962,7 @@ static int mwifiex_ret_uap_sta_list(struct mwifiex_private *priv, int i; struct mwifiex_sta_node *sta_node; - for (i = 0; i < sta_list->sta_count; i++) { + for (i = 0; i < (le16_to_cpu(sta_list->sta_count)); i++) { sta_node = mwifiex_get_sta_entry(priv, sta_info->mac); if (unlikely(!sta_node)) continue; -- cgit v1.1 From e5988c62b9e6e5fb279188db916c51fdb5981403 Mon Sep 17 00:00:00 2001 From: Karthik D A Date: Mon, 25 Jul 2016 21:21:05 +0530 Subject: mwifiex: add region code information in debugfs region code is an EEPROM setting received from firmware. Let's display this in debugfs along with other information. Signed-off-by: Karthik D A Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index bccf17a..b9284b5 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -118,6 +118,8 @@ mwifiex_info_read(struct file *file, char __user *ubuf, p += sprintf(p, "bssid=\"%pM\"\n", info.bssid); p += sprintf(p, "channel=\"%d\"\n", (int) info.bss_chan); p += sprintf(p, "country_code = \"%s\"\n", info.country_code); + p += sprintf(p, "region_code=\"0x%x\"\n", + priv->adapter->region_code); netdev_for_each_mc_addr(ha, netdev) p += sprintf(p, "multicast_address[%d]=\"%pM\"\n", -- cgit v1.1 From c8ccf3ade7851054f82bf88f5fcd393a394038a3 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 25 Jul 2016 21:21:06 +0530 Subject: mwifiex: fix failed to reconnect after interface disabled/enabled Recent patch "mwifiex: fix NULL pointer" skips extended scan event handling when suspend is in progress. It created a problem for scan after interface disabled/enabled case. This patch solves the problem by checking netif_running() status. Fixes:16d25da94f3d654 ("mwifiex: fix NULL pointer dereference during suspend") Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index a422f33..7e394d4 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -708,7 +708,11 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_EXT_SCAN_REPORT: mwifiex_dbg(adapter, EVENT, "event: EXT_SCAN Report\n"); - if (adapter->ext_scan && !priv->scan_aborting) + /* We intend to skip this event during suspend, but handle + * it in interface disabled case + */ + if (adapter->ext_scan && (!priv->scan_aborting || + !netif_running(priv->netdev))) ret = mwifiex_handle_event_ext_scan_report(priv, adapter->event_skb->data); -- cgit v1.1 From c2a8f0ff9c6ca8d04adb68b7959a56a3cbb665b3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 25 Jul 2016 21:21:07 +0530 Subject: mwifiex: support random MAC address for scanning This patch advertises RANDOM_MAC_ADDR feature to cfg80211. It allow the application to issue scan with a MAC address and mask. Random MACs are generated and used in probe requests sent for scanning until it is changed by the application or device is restarted. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 15 ++++++++++++++- drivers/net/wireless/marvell/mwifiex/fw.h | 7 +++++++ drivers/net/wireless/marvell/mwifiex/main.h | 1 + drivers/net/wireless/marvell/mwifiex/scan.c | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a8ff969..871ad8a 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2485,6 +2485,16 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; + if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + ether_addr_copy(priv->random_mac, request->mac_addr); + for (i = 0; i < ETH_ALEN; i++) { + priv->random_mac[i] &= request->mac_addr_mask[i]; + priv->random_mac[i] |= get_random_int() & + ~(request->mac_addr_mask[i]); + } + } + + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = request->n_ssids; user_scan_cfg->ssid_list = request->ssids; @@ -4173,7 +4183,10 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->features |= NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_INACTIVITY_TIMER | NL80211_FEATURE_LOW_PRIORITY_SCAN | - NL80211_FEATURE_NEED_OBSS_SCAN; + NL80211_FEATURE_NEED_OBSS_SCAN | + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info)) wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH; diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index c46267b..3797ef4 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -188,6 +188,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_BTCOEX_WL_AGGR_WINSIZE (PROPRIETARY_TLV_BASE_ID + 202) #define TLV_BTCOEX_WL_SCANTIME (PROPRIETARY_TLV_BASE_ID + 203) #define TLV_TYPE_BSS_MODE (PROPRIETARY_TLV_BASE_ID + 206) +#define TLV_TYPE_RANDOM_MAC (PROPRIETARY_TLV_BASE_ID + 236) #define MWIFIEX_TX_DATA_BUF_SIZE_2K 2048 @@ -780,6 +781,11 @@ struct mwifiex_ie_types_scan_chan_gap { __le16 chan_gap; } __packed; +struct mwifiex_ie_types_random_mac { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; +} __packed; + struct mwifiex_ietypes_chanstats { struct mwifiex_ie_types_header header; struct mwifiex_fw_chan_stats chanstats[0]; @@ -1464,6 +1470,7 @@ struct mwifiex_user_scan_cfg { /* Variable number (fixed maximum) of channels to scan up */ struct mwifiex_user_scan_chan chan_list[MWIFIEX_USER_SCAN_CHAN_MAX]; u16 scan_chan_gap; + u8 random_mac[ETH_ALEN]; } __packed; #define MWIFIEX_BG_SCAN_CHAN_MAX 38 diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 9f6bb40..5902600 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -675,6 +675,7 @@ struct mwifiex_private { struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; u8 assoc_resp_ht_param; bool ht_param_present; + u8 random_mac[ETH_ALEN]; }; diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 21ec847..8daf0d86 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -820,6 +820,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, struct mwifiex_adapter *adapter = priv->adapter; struct mwifiex_ie_types_num_probes *num_probes_tlv; struct mwifiex_ie_types_scan_chan_gap *chan_gap_tlv; + struct mwifiex_ie_types_random_mac *random_mac_tlv; struct mwifiex_ie_types_wildcard_ssid_params *wildcard_ssid_tlv; struct mwifiex_ie_types_bssid_list *bssid_tlv; u8 *tlv_pos; @@ -835,6 +836,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, u8 ssid_filter; struct mwifiex_ie_types_htcap *ht_cap; struct mwifiex_ie_types_bss_mode *bss_mode; + const u8 zero_mac[6] = {0, 0, 0, 0, 0, 0}; /* The tlv_buf_len is calculated for each scan command. The TLVs added in this routine will be preserved since the routine that sends the @@ -967,6 +969,18 @@ mwifiex_config_scan(struct mwifiex_private *priv, tlv_pos += sizeof(struct mwifiex_ie_types_scan_chan_gap); } + + if (!ether_addr_equal(user_scan_in->random_mac, zero_mac)) { + random_mac_tlv = (void *)tlv_pos; + random_mac_tlv->header.type = + cpu_to_le16(TLV_TYPE_RANDOM_MAC); + random_mac_tlv->header.len = + cpu_to_le16(sizeof(random_mac_tlv->mac)); + ether_addr_copy(random_mac_tlv->mac, + user_scan_in->random_mac); + tlv_pos += + sizeof(struct mwifiex_ie_types_random_mac); + } } else { scan_cfg_out->bss_mode = (u8) adapter->scan_mode; num_probes = adapter->scan_probes; @@ -1922,6 +1936,7 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) } adapter->active_scan_triggered = true; + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = priv->scan_request->n_ssids; user_scan_cfg->ssid_list = priv->scan_request->ssids; @@ -2761,6 +2776,7 @@ static int mwifiex_scan_specific_ssid(struct mwifiex_private *priv, if (!scan_cfg) return -ENOMEM; + ether_addr_copy(scan_cfg->random_mac, priv->random_mac); scan_cfg->ssid_list = req_ssid; scan_cfg->num_ssids = 1; -- cgit v1.1 From 99ffe72cdae4f7c326d094c85167802ee0ecacbb Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Mon, 25 Jul 2016 21:21:08 +0530 Subject: mwifiex: process rxba_sync event Firmware may filter and drop packets under certain condition, for example, ARP SA=DA packet. this event will be used to synchronize the Rx Block Acknowledgment (BA) window bitmap and to fill any holes in driver side. Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 78 +++++++++++++++++++++- .../net/wireless/marvell/mwifiex/11n_rxreorder.h | 3 +- drivers/net/wireless/marvell/mwifiex/fw.h | 12 ++++ drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 ++ drivers/net/wireless/marvell/mwifiex/uap_event.c | 7 +- 5 files changed, 103 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index a74cc43..9448012 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -78,8 +78,15 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, */ static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload) { - int ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); + int ret; + + if (!payload) { + mwifiex_dbg(priv->adapter, INFO, "info: fw drop data\n"); + return 0; + } + + ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); if (!ret) return 0; @@ -921,3 +928,72 @@ void mwifiex_coex_ampdu_rxwinsize(struct mwifiex_adapter *adapter) else mwifiex_update_ampdu_rxwinsize(adapter, false); } + +/* This function handles rxba_sync event + */ +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len) +{ + struct mwifiex_ie_types_rxba_sync *tlv_rxba = (void *)event_buf; + u16 tlv_type, tlv_len; + struct mwifiex_rx_reorder_tbl *rx_reor_tbl_ptr; + u8 i, j; + u16 seq_num, tlv_seq_num, tlv_bitmap_len; + int tlv_buf_left = len; + int ret; + u8 *tmp; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:", + event_buf, len); + while (tlv_buf_left >= sizeof(*tlv_rxba)) { + tlv_type = le16_to_cpu(tlv_rxba->header.type); + tlv_len = le16_to_cpu(tlv_rxba->header.len); + if (tlv_type != TLV_TYPE_RXBA_SYNC) { + mwifiex_dbg(priv->adapter, ERROR, + "Wrong TLV id=0x%x\n", tlv_type); + return; + } + + tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num); + tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len); + mwifiex_dbg(priv->adapter, INFO, + "%pM tid=%d seq_num=%d bitmap_len=%d\n", + tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, + tlv_bitmap_len); + + rx_reor_tbl_ptr = + mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid, + tlv_rxba->mac); + if (!rx_reor_tbl_ptr) { + mwifiex_dbg(priv->adapter, ERROR, + "Can not find rx_reorder_tbl!"); + return; + } + + for (i = 0; i < tlv_bitmap_len; i++) { + for (j = 0 ; j < 8; j++) { + if (tlv_rxba->bitmap[i] & (1 << j)) { + seq_num = (MAX_TID_VALUE - 1) & + (tlv_seq_num + i * 8 + j); + + mwifiex_dbg(priv->adapter, ERROR, + "drop packet,seq=%d\n", + seq_num); + + ret = mwifiex_11n_rx_reorder_pkt + (priv, seq_num, tlv_rxba->tid, + tlv_rxba->mac, 0, NULL); + + if (ret) + mwifiex_dbg(priv->adapter, + ERROR, + "Fail to drop packet"); + } + } + } + + tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); + tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; + } +} diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h index 63ecea8..22d991f 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h @@ -81,5 +81,6 @@ struct mwifiex_rx_reorder_tbl * mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta); void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta); void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags); - +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len); #endif /* _MWIFIEX_11N_RXREORDER_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 3797ef4..3b40e0d 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -176,6 +176,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_PWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 145) #define TLV_TYPE_GWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 146) #define TLV_TYPE_TX_PAUSE (PROPRIETARY_TLV_BASE_ID + 148) +#define TLV_TYPE_RXBA_SYNC (PROPRIETARY_TLV_BASE_ID + 153) #define TLV_TYPE_COALESCE_RULE (PROPRIETARY_TLV_BASE_ID + 154) #define TLV_TYPE_KEY_PARAM_V2 (PROPRIETARY_TLV_BASE_ID + 156) #define TLV_TYPE_REPEAT_COUNT (PROPRIETARY_TLV_BASE_ID + 176) @@ -532,6 +533,7 @@ enum P2P_MODES { #define EVENT_CHANNEL_REPORT_RDY 0x00000054 #define EVENT_TX_DATA_PAUSE 0x00000055 #define EVENT_EXT_SCAN_REPORT 0x00000058 +#define EVENT_RXBA_SYNC 0x00000059 #define EVENT_BG_SCAN_STOPPED 0x00000065 #define EVENT_REMAIN_ON_CHAN_EXPIRED 0x0000005f #define EVENT_MULTI_CHAN_INFO 0x0000006a @@ -735,6 +737,16 @@ struct mwifiex_ie_types_chan_list_param_set { struct mwifiex_chan_scan_param_set chan_scan_param[1]; } __packed; +struct mwifiex_ie_types_rxba_sync { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; + u8 tid; + u8 reserved; + __le16 seq_num; + __le16 bitmap_len; + u8 bitmap[1]; +} __packed; + struct chan_band_param_set { u8 radio_type; u8 chan_number; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 7e394d4..b973ee8 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -873,6 +873,12 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) mwifiex_bt_coex_wlan_param_update_event(priv, adapter->event_skb); break; + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, ERROR, "event: unknown event id: %#x\n", eventcause); diff --git a/drivers/net/wireless/marvell/mwifiex/uap_event.c b/drivers/net/wireless/marvell/mwifiex/uap_event.c index 86ff542..d24eca3 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_event.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_event.c @@ -306,7 +306,12 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv) mwifiex_dbg(adapter, EVENT, "event: multi-chan info\n"); mwifiex_process_multi_chan_event(priv, adapter->event_skb); break; - + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, EVENT, "event: unknown event id: %#x\n", eventcause); -- cgit v1.1 From 5536c4aafcac094fb7ea6c3c1e6d999ae586171d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 25 Jul 2016 21:21:09 +0530 Subject: mwifiex: remove misleading disconnect message Disconnect message in mwifiex_reset_connect_state() would displays necessary information. We unnecessarily have exactly same message in cfg80211_disconnect(). As priv->cfg_bssid is cleared at this point of time, it prints incorrect(all zero) MAC. This message is removed here. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 871ad8a..235fb39 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2012,10 +2012,6 @@ mwifiex_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, if (mwifiex_deauthenticate(priv, NULL)) return -EFAULT; - mwifiex_dbg(priv->adapter, MSG, - "info: successfully disconnected from %pM:\t" - "reason code %d\n", priv->cfg_bssid, reason_code); - eth_zero_addr(priv->cfg_bssid); priv->hs2_enabled = false; -- cgit v1.1 From 432da7d243da32651e1fae677f3a83c16b346d47 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Mon, 25 Jul 2016 21:21:10 +0530 Subject: mwifiex: add HT aggregation support for adhoc mode This patch adds HT support for adhoc station. Firmware will upload ibss sta connect event with beacon data, whenever new station joins the adhoc network. Driver will check the HT IE and decide whether to support HT aggreagation or not. Signed-off-by: Xinming Hu Signed-off-by: Cathy Luo Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n.h | 7 +- drivers/net/wireless/marvell/mwifiex/fw.h | 9 ++ drivers/net/wireless/marvell/mwifiex/sta_event.c | 132 ++++++++++++++++++++++- 3 files changed, 144 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.h b/drivers/net/wireless/marvell/mwifiex/11n.h index afdd58a..ea0fa68 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.h +++ b/drivers/net/wireless/marvell/mwifiex/11n.h @@ -171,9 +171,10 @@ mwifiex_find_stream_to_delete(struct mwifiex_private *priv, int ptr_tid, static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv, struct mwifiex_sta_node *node) { - - if (!node || (priv->bss_role != MWIFIEX_BSS_ROLE_UAP) || - !priv->ap_11n_enabled) + if (!node || ((priv->bss_role == MWIFIEX_BSS_ROLE_UAP) && + !priv->ap_11n_enabled) || + ((priv->bss_mode == NL80211_IFTYPE_ADHOC) && + !priv->adapter->adhoc_11n_enabled)) return 0; return node->is_11n_enabled; diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 3b40e0d..a88030a 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -210,6 +210,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define MWIFIEX_TX_DATA_BUF_SIZE_4K 4096 #define MWIFIEX_TX_DATA_BUF_SIZE_8K 8192 +#define MWIFIEX_TX_DATA_BUF_SIZE_12K 12288 #define ISSUPP_11NENABLED(FwCapInfo) (FwCapInfo & BIT(11)) #define ISSUPP_TDLS_ENABLED(FwCapInfo) (FwCapInfo & BIT(14)) @@ -506,6 +507,8 @@ enum P2P_MODES { #define EVENT_RSSI_HIGH 0x0000001c #define EVENT_SNR_HIGH 0x0000001d #define EVENT_IBSS_COALESCED 0x0000001e +#define EVENT_IBSS_STA_CONNECT 0x00000020 +#define EVENT_IBSS_STA_DISCONNECT 0x00000021 #define EVENT_DATA_RSSI_LOW 0x00000024 #define EVENT_DATA_SNR_LOW 0x00000025 #define EVENT_DATA_RSSI_HIGH 0x00000026 @@ -1686,6 +1689,12 @@ struct mwifiex_ie_types_wmm_param_set { u8 wmm_ie[1]; }; +struct mwifiex_ie_types_mgmt_frame { + struct mwifiex_ie_types_header header; + __le16 frame_control; + u8 frame_contents[0]; +}; + struct mwifiex_ie_types_wmm_queue_status { struct mwifiex_ie_types_header header; u8 queue_index; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index b973ee8..9df0c4d 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -25,6 +25,99 @@ #include "wmm.h" #include "11n.h" +#define MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE 12 + +static int mwifiex_check_ibss_peer_capabilties(struct mwifiex_private *priv, + struct mwifiex_sta_node *sta_ptr, + struct sk_buff *event) +{ + int evt_len, ele_len; + u8 *curr; + struct ieee_types_header *ele_hdr; + struct mwifiex_ie_types_mgmt_frame *tlv_mgmt_frame; + const struct ieee80211_ht_cap *ht_cap; + const struct ieee80211_vht_cap *vht_cap; + + skb_pull(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + evt_len = event->len; + curr = event->data; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "ibss peer capabilties:", + event->data, event->len); + + skb_push(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + + tlv_mgmt_frame = (void *)curr; + if (evt_len >= sizeof(*tlv_mgmt_frame) && + le16_to_cpu(tlv_mgmt_frame->header.type) == + TLV_TYPE_UAP_MGMT_FRAME) { + /* Locate curr pointer to the start of beacon tlv, + * timestamp 8 bytes, beacon intervel 2 bytes, + * capability info 2 bytes, totally 12 byte beacon header + */ + evt_len = le16_to_cpu(tlv_mgmt_frame->header.len); + curr += (sizeof(*tlv_mgmt_frame) + 12); + } else { + mwifiex_dbg(priv->adapter, MSG, + "management frame tlv not found!\n"); + return 0; + } + + while (evt_len >= sizeof(*ele_hdr)) { + ele_hdr = (struct ieee_types_header *)curr; + ele_len = ele_hdr->len; + + if (evt_len < ele_len + sizeof(*ele_hdr)) + break; + + switch (ele_hdr->element_id) { + case WLAN_EID_HT_CAPABILITY: + sta_ptr->is_11n_enabled = true; + ht_cap = (void *)(ele_hdr + 2); + sta_ptr->max_amsdu = le16_to_cpu(ht_cap->cap_info) & + IEEE80211_HT_CAP_MAX_AMSDU ? + MWIFIEX_TX_DATA_BUF_SIZE_8K : + MWIFIEX_TX_DATA_BUF_SIZE_4K; + mwifiex_dbg(priv->adapter, INFO, + "11n enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + + case WLAN_EID_VHT_CAPABILITY: + sta_ptr->is_11ac_enabled = true; + vht_cap = (void *)(ele_hdr + 2); + /* check VHT MAXMPDU capability */ + switch (le32_to_cpu(vht_cap->vht_cap_info) & 0x3) { + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_12K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_8K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_4K; + default: + break; + } + + mwifiex_dbg(priv->adapter, INFO, + "11ac enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + default: + break; + } + + curr += (ele_len + sizeof(*ele_hdr)); + evt_len -= (ele_len + sizeof(*ele_hdr)); + } + + return 0; +} + /* * This function resets the connection state. * @@ -519,6 +612,8 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, * - EVENT_LINK_QUALITY * - EVENT_PRE_BEACON_LOST * - EVENT_IBSS_COALESCED + * - EVENT_IBSS_STA_CONNECT + * - EVENT_IBSS_STA_DISCONNECT * - EVENT_WEP_ICV_ERR * - EVENT_BW_CHANGE * - EVENT_HOSTWAKE_STAIE @@ -547,9 +642,11 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, int mwifiex_process_sta_event(struct mwifiex_private *priv) { struct mwifiex_adapter *adapter = priv->adapter; - int ret = 0; + int ret = 0, i; u32 eventcause = adapter->event_cause; u16 ctrl, reason_code; + u8 ibss_sta_addr[ETH_ALEN]; + struct mwifiex_sta_node *sta_ptr; switch (eventcause) { case EVENT_DUMMY_HOST_WAKEUP_SIGNAL: @@ -775,6 +872,39 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) HostCmd_CMD_802_11_IBSS_COALESCING_STATUS, HostCmd_ACT_GEN_GET, 0, NULL, false); break; + case EVENT_IBSS_STA_CONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_CONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_add_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && adapter->adhoc_11n_enabled) { + mwifiex_check_ibss_peer_capabilties(priv, sta_ptr, + adapter->event_skb); + if (sta_ptr->is_11n_enabled) + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + priv->aggr_prio_tbl[i].ampdu_user; + else + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + BA_STREAM_NOT_ALLOWED; + memset(sta_ptr->rx_seq, 0xff, sizeof(sta_ptr->rx_seq)); + } + + break; + case EVENT_IBSS_STA_DISCONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_DISCONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_get_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && sta_ptr->is_11n_enabled) { + mwifiex_11n_del_rx_reorder_tbl_by_ta(priv, + ibss_sta_addr); + mwifiex_del_tx_ba_stream_tbl_by_ra(priv, ibss_sta_addr); + } + mwifiex_wmm_del_peer_ra_list(priv, ibss_sta_addr); + mwifiex_del_sta_entry(priv, ibss_sta_addr); + break; case EVENT_ADDBA: mwifiex_dbg(adapter, EVENT, "event: ADDBA Request\n"); mwifiex_send_cmd(priv, HostCmd_CMD_11N_ADDBA_RSP, -- cgit v1.1 From 441756b6a6e3818dc6f2e76b9526558d450ce778 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 25 Jul 2016 21:21:11 +0530 Subject: mwifiex: fix radar detection issue It's been observed that firmware sends RADAR detected event without specifying bss_num/bss_type. Also, the event body is empty. Currently the event is being ignored by driver. This patch checks on which interface 11H is active, accordingly fills bss_num/bss_type and handles the event. Condition "if (le32_to_cpu(rdr_event->passed))" which always fails is also removed. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11h.c | 27 +++++++++++---------------- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 18 ++++++++++++++++-- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 81c60d0..43dccd5 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -260,22 +260,17 @@ int mwifiex_11h_handle_radar_detected(struct mwifiex_private *priv, rdr_event = (void *)(skb->data + sizeof(u32)); - if (le32_to_cpu(rdr_event->passed)) { - mwifiex_dbg(priv->adapter, MSG, - "radar detected; indicating kernel\n"); - if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) - mwifiex_dbg(priv->adapter, ERROR, - "Failed to stop CAC in FW\n"); - cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, - GFP_KERNEL); - mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", - rdr_event->reg_domain); - mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", - rdr_event->det_type); - } else { - mwifiex_dbg(priv->adapter, MSG, - "false radar detection event!\n"); - } + mwifiex_dbg(priv->adapter, MSG, + "radar detected; indicating kernel\n"); + if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) + mwifiex_dbg(priv->adapter, ERROR, + "Failed to stop CAC in FW\n"); + cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, + GFP_KERNEL); + mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", + rdr_event->reg_domain); + mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", + rdr_event->det_type); return 0; } diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index c29f26d..d433aa0 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -480,13 +480,27 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter) */ int mwifiex_process_event(struct mwifiex_adapter *adapter) { - int ret; + int ret, i; struct mwifiex_private *priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); struct sk_buff *skb = adapter->event_skb; - u32 eventcause = adapter->event_cause; + u32 eventcause; struct mwifiex_rxinfo *rx_info; + if ((adapter->event_cause & EVENT_ID_MASK) == EVENT_RADAR_DETECTED) { + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && mwifiex_is_11h_active(priv)) { + adapter->event_cause |= + ((priv->bss_num & 0xff) << 16) | + ((priv->bss_type & 0xff) << 24); + break; + } + } + } + + eventcause = adapter->event_cause; + /* Save the last event to debug log */ adapter->dbg.last_event_index = (adapter->dbg.last_event_index + 1) % DBG_CMD_NUM; -- cgit v1.1 From 3f37ec79dd21fbdbbab8143a48a87272b22fef22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 25 Jul 2016 20:33:56 +0200 Subject: bcma: support BCM53573 series of wireless SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM53573 seems to be the first series of Northstar family with wireless on the chip. The base models are BCM53573-s (A0, A1) and there is also BCM47189B0 which seems to be some small modification. The only problem with these chipsets seems to be watchdog. It's totally unavailable on 53573A0 / 53573A1 and preferable PMU watchdog is broken on 53573B0 / 53573B1. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/driver_chipcommon.c | 32 +++++++++++++++++++++++++++++--- include/linux/bcma/bcma.h | 3 +++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index 921ce18..b4f6520 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -36,12 +36,31 @@ u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) } EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock); +static bool bcma_core_cc_has_pmu_watchdog(struct bcma_drv_cc *cc) +{ + struct bcma_bus *bus = cc->core->bus; + + if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573) { + WARN(bus->chipinfo.rev <= 1, "No watchdog available\n"); + /* 53573B0 and 53573B1 have bugged PMU watchdog. It can + * be enabled but timer can't be bumped. Use CC one + * instead. + */ + return false; + } + return true; + } else { + return false; + } +} + static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc) { struct bcma_bus *bus = cc->core->bus; u32 nb; - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706) nb = 32; else if (cc->core->id.rev < 26) @@ -95,9 +114,16 @@ static int bcma_chipco_watchdog_ticks_per_ms(struct bcma_drv_cc *cc) int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) { + struct bcma_bus *bus = cc->core->bus; struct bcm47xx_wdt wdt = {}; struct platform_device *pdev; + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573 && + bus->chipinfo.rev <= 1) { + pr_debug("No watchdog on 53573A0 / 53573A1\n"); + return 0; + } + wdt.driver_data = cc; wdt.timer_set = bcma_chipco_watchdog_timer_set_wdt; wdt.timer_set_ms = bcma_chipco_watchdog_timer_set_ms_wdt; @@ -105,7 +131,7 @@ int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) bcma_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms; pdev = platform_device_register_data(NULL, "bcm47xx-wdt", - cc->core->bus->num, &wdt, + bus->num, &wdt, sizeof(wdt)); if (IS_ERR(pdev)) return PTR_ERR(pdev); @@ -217,7 +243,7 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks) u32 maxt; maxt = bcma_chipco_watchdog_get_max_timer(cc); - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (ticks == 1) ticks = 2; else if (ticks > maxt) diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df..8eeedb2 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ -- cgit v1.1 From 2f69e67058fbe9750a4f66ea30b4b6a8648a2fdc Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 31 Jul 2016 12:39:15 +0200 Subject: mwifiex: remove superfluous condition for_each_property_of_node is only executed if the property prop is not NULL. Signed-off-by: Heinrich Schuchardt Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 108c11c..d1f8011 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1485,7 +1485,7 @@ int mwifiex_dnld_dt_cfgdata(struct mwifiex_private *priv, continue; /* property header is 6 bytes, data must fit in cmd buffer */ - if (prop && prop->value && prop->length > 6 && + if (prop->value && prop->length > 6 && prop->length <= MWIFIEX_SIZE_OF_CMD_BUFFER - S_DS_GEN) { ret = mwifiex_send_cmd(priv, HostCmd_CMD_CFG_DATA, HostCmd_ACT_GEN_SET, 0, -- cgit v1.1 From b0d80f19c14fc5752e806860fe2c702448f5b442 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 31 Jul 2016 14:11:21 +0200 Subject: mwifiex: key_material_v2 remove superfluous condition We are using mac as source address in a memcpy. In the lines below we can assume mac is not NULL. Signed-off-by: Heinrich Schuchardt Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index d1f8011..1d8f284 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -706,15 +706,10 @@ mwifiex_cmd_802_11_key_material_v2(struct mwifiex_private *priv, (priv->wep_key_curr_index & KEY_INDEX_MASK)) key_info |= KEY_DEFAULT; } else { - if (mac) { - if (is_broadcast_ether_addr(mac)) - key_info |= KEY_MCAST; - else - key_info |= KEY_UNICAST | - KEY_DEFAULT; - } else { + if (is_broadcast_ether_addr(mac)) key_info |= KEY_MCAST; - } + else + key_info |= KEY_UNICAST | KEY_DEFAULT; } } km->key_param_set.key_info = cpu_to_le16(key_info); -- cgit v1.1 From bd6b0242652a8a284eed2adec5ac1fc043b8f2ef Mon Sep 17 00:00:00 2001 From: Pavel Andrianov Date: Tue, 2 Aug 2016 12:41:53 +0300 Subject: wl3501_cs: Add spinlock to wl3501_reset Likely wl3501_reset should acquire spinlock as wl3501_{open, close}. One of calls of wl3501_reset has been already protected. The others were unprotected and might lead to a race condition. The patch adds spinlock into the wl3501_reset and removes it from wl3501_tx_timeout. Found by Linux Driver Verification project (linuxtesting.org) Signed-off-by: Pavel Andrianov Acked-by: Vaishali Thakkar Signed-off-by: Kalle Valo --- drivers/net/wireless/wl3501_cs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 82d94f8..932f3f81 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1258,7 +1258,9 @@ static int wl3501_reset(struct net_device *dev) { struct wl3501_card *this = netdev_priv(dev); int rc = -ENODEV; + unsigned long flags; + spin_lock_irqsave(&this->lock, flags); wl3501_block_interrupt(this); if (wl3501_init_firmware(this)) { @@ -1280,20 +1282,17 @@ static int wl3501_reset(struct net_device *dev) pr_debug("%s: device reset", dev->name); rc = 0; out: + spin_unlock_irqrestore(&this->lock, flags); return rc; } static void wl3501_tx_timeout(struct net_device *dev) { - struct wl3501_card *this = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; - unsigned long flags; int rc; stats->tx_errors++; - spin_lock_irqsave(&this->lock, flags); rc = wl3501_reset(dev); - spin_unlock_irqrestore(&this->lock, flags); if (rc) printk(KERN_ERR "%s: Error %d resetting card on Tx timeout!\n", dev->name, rc); -- cgit v1.1 From af8a9a67c3466f70ab28a112d18eb5d327d40ca2 Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Tue, 2 Aug 2016 14:19:28 +0300 Subject: ath5k: fix EEPROM dumping via debugfs EEPROM size calculated in 16-bit words, so we should take into account this fact during buffer allocation. CC: Jiri Slaby CC: Nick Kossifidis CC: Luis R. Rodriguez Signed-off-by: Sergey Ryazanov Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath5k/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c index 929d7cc..4f8d9ed 100644 --- a/drivers/net/wireless/ath/ath5k/debug.c +++ b/drivers/net/wireless/ath/ath5k/debug.c @@ -909,7 +909,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) struct ath5k_hw *ah = inode->i_private; bool res; int i, ret; - u32 eesize; + u32 eesize; /* NB: in 16-bit words */ u16 val, *buf; /* Get eeprom size */ @@ -932,7 +932,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) /* Create buffer and read in eeprom */ - buf = vmalloc(eesize); + buf = vmalloc(eesize * 2); if (!buf) { ret = -ENOMEM; goto err; @@ -952,7 +952,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) } ep->buf = buf; - ep->len = i; + ep->len = eesize * 2; file->private_data = (void *)ep; -- cgit v1.1 From f898005ff99f348febba88dff8840df6e4367758 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 2 Aug 2016 21:26:21 +0200 Subject: rtlwifi: remove superfluous condition If sta == NULL, the changed line will not be reached. So no need to check that sta != NULL here. Signed-off-by: Heinrich Schuchardt Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 41f77f8..7aee5ebb1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1135,7 +1135,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, mac->mode = WIRELESS_MODE_AC_24G; } - if (vif->type == NL80211_IFTYPE_STATION && sta) + if (vif->type == NL80211_IFTYPE_STATION) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); rcu_read_unlock(); -- cgit v1.1 From ba852018d493c99d3183fdcc7e41b725f2ec1321 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 8 Aug 2016 09:38:48 +0200 Subject: mwifiex: fix the length parameter of a memset In 'mwifiex_get_ver_ext', we have: struct mwifiex_ver_ext ver_ext; memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); This is likely that memset'ing sizeof(struct mwifiex_ver_ext) was expected. Remove the ambiguity by using the variable name directly instead of its type. Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index e06647a..78819e8 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -1180,7 +1180,7 @@ mwifiex_get_ver_ext(struct mwifiex_private *priv, u32 version_str_sel) { struct mwifiex_ver_ext ver_ext; - memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); + memset(&ver_ext, 0, sizeof(ver_ext)); ver_ext.version_str_sel = version_str_sel; if (mwifiex_send_cmd(priv, HostCmd_CMD_VERSION_EXT, HostCmd_ACT_GEN_GET, 0, &ver_ext, true)) -- cgit v1.1 From 6a1622000ac92244ae605e2345c348c0bf281122 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 8 Aug 2016 09:39:00 +0200 Subject: mwifiex: simplify length computation for some memset This patch should be a no-op. It just simplifies code by using the name of a variable instead of its type when calling 'sizeof'. Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 78819e8..644f3a2 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -574,7 +574,7 @@ int mwifiex_enable_hs(struct mwifiex_adapter *adapter) adapter->hs_activate_wait_q_woken = false; - memset(&hscfg, 0, sizeof(struct mwifiex_ds_hs_cfg)); + memset(&hscfg, 0, sizeof(hscfg)); hscfg.is_invoke_hostcmd = true; adapter->hs_enabling = true; @@ -1138,7 +1138,7 @@ int mwifiex_set_encode(struct mwifiex_private *priv, struct key_params *kp, { struct mwifiex_ds_encrypt_key encrypt_key; - memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + memset(&encrypt_key, 0, sizeof(encrypt_key)); encrypt_key.key_len = key_len; encrypt_key.key_index = key_index; -- cgit v1.1 From b64db1b252e9974a43a51ba083fa7d03e4716167 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Tue, 9 Aug 2016 20:20:44 +0530 Subject: mwifiex: correct aid value during tdls setup AID gets updated during TDLS setup, but modified value isn't reflected in "priv->assoc_rsp_buf". This causes TDLS setup failure. The problem is fixed here. Fixes: 4aff53ef18e4a4 ("mwifiex: parsing aid while receiving..") Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/join.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c index 1c7b006..b89596c 100644 --- a/drivers/net/wireless/marvell/mwifiex/join.c +++ b/drivers/net/wireless/marvell/mwifiex/join.c @@ -669,9 +669,8 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv, priv->assoc_rsp_size = min(le16_to_cpu(resp->size) - S_DS_GEN, sizeof(priv->assoc_rsp_buf)); - memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); - assoc_rsp->a_id = cpu_to_le16(aid); + memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); if (status_code) { priv->adapter->dbg.num_cmd_assoc_failure++; -- cgit v1.1 From 41960b4dfdfce7d669dbec6a492202d1b18accb7 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 9 Aug 2016 20:20:45 +0530 Subject: mwifiex: add CHAN_REGION_CFG command This patch adds command preparation and response handling for CHAN_REGION_CFG command. These changes are prerequisites for adding custom regulatory domain support. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/fw.h | 7 ++++ drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 21 +++++++++++ drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 41 ++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index a88030a..085db99 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -190,6 +190,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_BTCOEX_WL_SCANTIME (PROPRIETARY_TLV_BASE_ID + 203) #define TLV_TYPE_BSS_MODE (PROPRIETARY_TLV_BASE_ID + 206) #define TLV_TYPE_RANDOM_MAC (PROPRIETARY_TLV_BASE_ID + 236) +#define TLV_TYPE_CHAN_ATTR_CFG (PROPRIETARY_TLV_BASE_ID + 237) #define MWIFIEX_TX_DATA_BUF_SIZE_2K 2048 @@ -382,6 +383,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define HostCmd_CMD_MC_POLICY 0x0121 #define HostCmd_CMD_TDLS_OPER 0x0122 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG 0x0223 +#define HostCmd_CMD_CHAN_REGION_CFG 0x0242 #define PROTOCOL_NO_SECURITY 0x01 #define PROTOCOL_STATIC_WEP 0x02 @@ -2224,6 +2226,10 @@ struct host_cmd_ds_gtk_rekey_params { __le32 replay_ctr_high; } __packed; +struct host_cmd_ds_chan_region_cfg { + __le16 action; +} __packed; + struct host_cmd_ds_command { __le16 command; __le16 size; @@ -2298,6 +2304,7 @@ struct host_cmd_ds_command { struct host_cmd_ds_robust_coex coex; struct host_cmd_ds_wakeup_reason hs_wakeup_reason; struct host_cmd_ds_gtk_rekey_params rekey; + struct host_cmd_ds_chan_region_cfg reg_cfg; } params; } __packed; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 1d8f284..49048b4 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -1594,6 +1594,21 @@ static int mwifiex_cmd_gtk_rekey_offload(struct mwifiex_private *priv, return 0; } +static int mwifiex_cmd_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd, + u16 cmd_action) +{ + struct host_cmd_ds_chan_region_cfg *reg = &cmd->params.reg_cfg; + + cmd->command = cpu_to_le16(HostCmd_CMD_CHAN_REGION_CFG); + cmd->size = cpu_to_le16(sizeof(*reg) + S_DS_GEN); + + if (cmd_action == HostCmd_ACT_GEN_GET) + reg->action = cpu_to_le16(cmd_action); + + return 0; +} + static int mwifiex_cmd_coalesce_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *cmd, @@ -2134,6 +2149,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, ret = mwifiex_cmd_gtk_rekey_offload(priv, cmd_ptr, cmd_action, data_buf); break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_cmd_chan_region_cfg(priv, cmd_ptr, cmd_action); + break; default: mwifiex_dbg(priv->adapter, ERROR, "PREP_CMD: unknown cmd- %#x\n", cmd_no); @@ -2271,6 +2289,9 @@ int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta, bool init) if (ret) return -1; } + + mwifiex_send_cmd(priv, HostCmd_CMD_CHAN_REGION_CFG, + HostCmd_ACT_GEN_GET, 0, NULL, true); } /* get tx rate */ diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 90e191b..db85330 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1022,6 +1022,44 @@ static int mwifiex_ret_robust_coex(struct mwifiex_private *priv, return 0; } +static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *resp) +{ + struct host_cmd_ds_chan_region_cfg *reg = &resp->params.reg_cfg; + u16 action = le16_to_cpu(reg->action); + u16 tlv, tlv_buf_len, tlv_buf_left; + struct mwifiex_ie_types_header *head; + u8 *tlv_buf; + + if (action != HostCmd_ACT_GEN_GET) + return 0; + + tlv_buf = (u8 *)reg + sizeof(*reg); + tlv_buf_left = le16_to_cpu(resp->size) - S_DS_GEN - sizeof(*reg); + + while (tlv_buf_left >= sizeof(*head)) { + head = (struct mwifiex_ie_types_header *)tlv_buf; + tlv = le16_to_cpu(head->type); + tlv_buf_len = le16_to_cpu(head->len); + + if (tlv_buf_left < (sizeof(*head) + tlv_buf_len)) + break; + + switch (tlv) { + case TLV_TYPE_CHAN_ATTR_CFG: + mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", + (u8 *)head + sizeof(*head), + tlv_buf_len); + break; + } + + tlv_buf += (sizeof(*head) + tlv_buf_len); + tlv_buf_left -= (sizeof(*head) + tlv_buf_len); + } + + return 0; +} + /* * This function handles the command responses. * @@ -1239,6 +1277,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no, break; case HostCmd_CMD_GTK_REKEY_OFFLOAD_CFG: break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_ret_chan_region_cfg(priv, resp); + break; default: mwifiex_dbg(adapter, ERROR, "CMD_RESP: unknown cmd response %#x\n", -- cgit v1.1 From 72539799104d4d70c2afcb8f0fe2a7a507a41c81 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 9 Aug 2016 20:20:46 +0530 Subject: mwifiex: add custom regulatory domain support This patch creates custom regulatory rules based on the information received from firmware and enable them during wiphy registration. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 39 ++++++---- drivers/net/wireless/marvell/mwifiex/fw.h | 8 ++ drivers/net/wireless/marvell/mwifiex/main.c | 2 + drivers/net/wireless/marvell/mwifiex/main.h | 1 + drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 91 ++++++++++++++++++++++ 5 files changed, 127 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 235fb39..876d420 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4141,9 +4141,12 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->cipher_suites = mwifiex_cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(mwifiex_cipher_suites); - if (adapter->region_code) - wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS | + if (adapter->regd) { + wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | + REGULATORY_DISABLE_BEACON_HINTS | REGULATORY_COUNTRY_IE_IGNORE; + wiphy_apply_custom_regulatory(wiphy, adapter->regd); + } ether_addr_copy(wiphy->perm_addr, adapter->perm_addr); wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; @@ -4209,19 +4212,27 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) return ret; } - if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { - mwifiex_dbg(adapter, INFO, - "driver hint alpha2: %2.2s\n", reg_alpha2); - regulatory_hint(wiphy, reg_alpha2); - } else { - if (adapter->region_code == 0x00) { - mwifiex_dbg(adapter, WARN, "Ignore world regulatory domain\n"); + if (!adapter->regd) { + if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { + mwifiex_dbg(adapter, INFO, + "driver hint alpha2: %2.2s\n", reg_alpha2); + regulatory_hint(wiphy, reg_alpha2); } else { - country_code = - mwifiex_11d_code_2_region(adapter->region_code); - if (country_code && - regulatory_hint(wiphy, country_code)) - mwifiex_dbg(priv->adapter, ERROR, "regulatory_hint() failed\n"); + if (adapter->region_code == 0x00) { + mwifiex_dbg(adapter, WARN, + "Ignore world regulatory domain\n"); + } else { + wiphy->regulatory_flags |= + REGULATORY_DISABLE_BEACON_HINTS | + REGULATORY_COUNTRY_IE_IGNORE; + country_code = + mwifiex_11d_code_2_region( + adapter->region_code); + if (country_code && + regulatory_hint(wiphy, country_code)) + mwifiex_dbg(priv->adapter, ERROR, + "regulatory_hint() failed\n"); + } } } diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 085db99..18aa525 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -416,6 +416,14 @@ enum P2P_MODES { P2P_MODE_CLIENT = 3, }; +enum mwifiex_channel_flags { + MWIFIEX_CHANNEL_PASSIVE = BIT(0), + MWIFIEX_CHANNEL_DFS = BIT(1), + MWIFIEX_CHANNEL_NOHT40 = BIT(2), + MWIFIEX_CHANNEL_NOHT80 = BIT(3), + MWIFIEX_CHANNEL_DISABLED = BIT(7), +}; + #define HostCmd_RET_BIT 0x8000 #define HostCmd_ACT_GEN_GET 0x0000 #define HostCmd_ACT_GEN_SET 0x0001 diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index db4925d..51d4dfc 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -139,6 +139,8 @@ static int mwifiex_unregister(struct mwifiex_adapter *adapter) adapter->nd_info = NULL; } + kfree(adapter->regd); + vfree(adapter->chan_stats); kfree(adapter); return 0; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 5902600..cd9a4f1 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -1005,6 +1005,7 @@ struct mwifiex_adapter { bool usb_mc_status; bool usb_mc_setup; struct cfg80211_wowlan_nd_info *nd_info; + struct ieee80211_regdomain *regd; }; void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index db85330..3344a26 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -1022,6 +1022,93 @@ static int mwifiex_ret_robust_coex(struct mwifiex_private *priv, return 0; } +static struct ieee80211_regdomain * +mwifiex_create_custom_regdomain(struct mwifiex_private *priv, + u8 *buf, u16 buf_len) +{ + u16 num_chan = buf_len / 2; + struct ieee80211_regdomain *regd; + struct ieee80211_reg_rule *rule; + bool new_rule; + int regd_size, idx, freq, prev_freq = 0; + u32 bw, prev_bw = 0; + u8 chflags, prev_chflags = 0, valid_rules = 0; + + if (WARN_ON_ONCE(num_chan > NL80211_MAX_SUPP_REG_RULES)) + return ERR_PTR(-EINVAL); + + regd_size = sizeof(struct ieee80211_regdomain) + + num_chan * sizeof(struct ieee80211_reg_rule); + + regd = kzalloc(regd_size, GFP_KERNEL); + if (!regd) + return ERR_PTR(-ENOMEM); + + for (idx = 0; idx < num_chan; idx++) { + u8 chan; + enum nl80211_band band; + + chan = *buf++; + if (!chan) + return NULL; + chflags = *buf++; + band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; + freq = ieee80211_channel_to_frequency(chan, band); + new_rule = false; + + if (chflags & MWIFIEX_CHANNEL_DISABLED) + continue; + + if (band == NL80211_BAND_5GHZ) { + if (!(chflags & MWIFIEX_CHANNEL_NOHT80)) + bw = MHZ_TO_KHZ(80); + else if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } else { + if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } + + if (idx == 0 || prev_chflags != chflags || prev_bw != bw || + freq - prev_freq > 20) { + valid_rules++; + new_rule = true; + } + + rule = ®d->reg_rules[valid_rules - 1]; + + rule->freq_range.end_freq_khz = MHZ_TO_KHZ(freq + 10); + + prev_chflags = chflags; + prev_freq = freq; + prev_bw = bw; + + if (!new_rule) + continue; + + rule->freq_range.start_freq_khz = MHZ_TO_KHZ(freq - 10); + rule->power_rule.max_eirp = DBM_TO_MBM(19); + + if (chflags & MWIFIEX_CHANNEL_PASSIVE) + rule->flags = NL80211_RRF_NO_IR; + + if (chflags & MWIFIEX_CHANNEL_DFS) + rule->flags = NL80211_RRF_DFS; + + rule->freq_range.max_bandwidth_khz = bw; + } + + regd->n_reg_rules = valid_rules; + regd->alpha2[0] = '9'; + regd->alpha2[1] = '9'; + + return regd; +} + static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *resp) { @@ -1050,6 +1137,10 @@ static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", (u8 *)head + sizeof(*head), tlv_buf_len); + priv->adapter->regd = + mwifiex_create_custom_regdomain(priv, + (u8 *)head + + sizeof(*head), tlv_buf_len); break; } -- cgit v1.1 From 410280bac6224e066f4cf0b87db08f0418a135b6 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Thu, 11 Aug 2016 16:38:54 +0200 Subject: rt2x00usb: Fix error return code We know that 'retval = 0' because it has been tested a few lines above. So, if 'devm_kmalloc' fails, 0 will be returned instead of an error code. Return -ENOMEM instead. Fixes: 8b4c0009313f ("rt2x00usb: Use usb anchor to manage URB") Signed-off-by: Christophe JAILLET Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 7cf26c6..6005e14 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -831,8 +831,10 @@ int rt2x00usb_probe(struct usb_interface *usb_intf, rt2x00dev->anchor = devm_kmalloc(&usb_dev->dev, sizeof(struct usb_anchor), GFP_KERNEL); - if (!rt2x00dev->anchor) + if (!rt2x00dev->anchor) { + retval = -ENOMEM; goto exit_free_reg; + } init_usb_anchor(rt2x00dev->anchor); return 0; -- cgit v1.1 From d393be3ed0bebb30a4666d7f5ed4486cd6b31716 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2016 13:10:13 +0100 Subject: mwifiex: fix missing break on IEEE80211_STYPE_ACTION case The IEEE80211_STYPE_ACTION case is missing a break in the switch statement, causing it to fall through to the default case that reports a debug message about an unknown frame subtype. Fix this by adding in the missing break statement. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 6681be0..18fbb96 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -386,6 +386,7 @@ mwifiex_parse_mgmt_packet(struct mwifiex_private *priv, u8 *payload, u16 len, "unknown public action frame category %d\n", category); } + break; default: mwifiex_dbg(priv->adapter, INFO, "unknown mgmt frame subtype %#x\n", stype); -- cgit v1.1 From 8af92af3f2d55db143417a5d401696f4b642009a Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Mon, 29 Aug 2016 20:39:35 +0800 Subject: brcmfmac: add missing header dependencies We get 1 warning when building kernel with W=1: drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c:23:6: warning: no previous prototype for '__brcmf_err' [-Wmissing-prototypes] In fact, this function is declared in brcmfmac/debug.h, so this patch adds missing header dependencies. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c index a10f35c..fe67559 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c @@ -19,6 +19,7 @@ #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "tracepoint.h" +#include "debug.h" void __brcmf_err(const char *func, const char *fmt, ...) { -- cgit v1.1 From d0716dde375eb6bff332763bb2137302120d263d Mon Sep 17 00:00:00 2001 From: Sien Wu Date: Thu, 1 Sep 2016 18:24:29 -0500 Subject: spi: Prevent unexpected SPI time out due to arithmetic overflow When reading SPI flash as MTD device, the transfer length is directly passed to the spi driver. If the requested data size exceeds 512KB, it will cause the time out calculation to overflow since transfer length is 32-bit unsigned integer. This issue is resolved by using 64-bit unsigned integer to perform the arithmetic. Signed-off-by: Sien Wu Acked-by: Brad Keryan Acked-by: Gratian Crisan Acked-by: Brad Mouring Natinst-ReviewBoard-ID 150232 Signed-off-by: Mark Brown --- drivers/spi/spi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 51ad42f..ac889df 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -960,7 +960,7 @@ static int spi_transfer_one_message(struct spi_master *master, struct spi_transfer *xfer; bool keep_cs = false; int ret = 0; - unsigned long ms = 1; + unsigned long long ms = 1; struct spi_statistics *statm = &master->statistics; struct spi_statistics *stats = &msg->spi->statistics; @@ -991,9 +991,13 @@ static int spi_transfer_one_message(struct spi_master *master, if (ret > 0) { ret = 0; - ms = xfer->len * 8 * 1000 / xfer->speed_hz; + ms = 8LL * 1000LL * xfer->len; + do_div(ms, xfer->speed_hz); ms += ms + 100; /* some tolerance */ + if (ms > UINT_MAX) + ms = UINT_MAX; + ms = wait_for_completion_timeout(&master->xfer_completion, msecs_to_jiffies(ms)); } -- cgit v1.1 From a41bd25ae67d3e4052c7f00ee9f2b4ba9219309e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Aug 2016 18:42:35 +0200 Subject: sunrpc: fix UDP memory accounting The commit f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context"), as a side effect, moved the skb_free_datagram() call outside the scope of the related socket lock, but UDP sockets require such lock to be held for proper memory accounting. Fix it by replacing skb_free_datagram() with skb_free_datagram_locked(). Fixes: f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context") Reported-and-tested-by: Jan Stancek Signed-off-by: Paolo Abeni Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ede3bc..bf16883 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1074,7 +1074,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) skb = skb_recv_datagram(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram(sk, skb); + skb_free_datagram_locked(sk, skb); continue; } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) -- cgit v1.1 From 36afb176d3c9580651d7f410ed7f000ec48b5137 Mon Sep 17 00:00:00 2001 From: "Kweh, Hock Leong" Date: Mon, 29 Aug 2016 18:50:56 +0800 Subject: iio: fix pressure data output unit in hid-sensor-attributes According to IIO ABI definition, IIO_PRESSURE data output unit is kilopascal: http://lxr.free-electrons.com/source/Documentation/ABI/testing/sysfs-bus-iio This patch fix output unit of HID pressure sensor IIO driver from pascal to kilopascal to follow IIO ABI definition. Signed-off-by: Kweh, Hock Leong Reviewed-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-attributes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index e81f434..dc33c1d 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -56,8 +56,8 @@ static struct { {HID_USAGE_SENSOR_ALS, 0, 1, 0}, {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0}, - {HID_USAGE_SENSOR_PRESSURE, 0, 100000, 0}, - {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 1, 0}, + {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0}, + {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000}, }; static int pow_10(unsigned power) -- cgit v1.1 From c49edecd513693ea7530ab18efbd7d6d5b7cbf90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 12:05:31 -0400 Subject: NFS: Fix error reporting in nfs_file_write() When doing O_DSYNC writes, the actual write errors are reported through generic_write_sync(), so we must test the result. Reported-by: J. R. Okajima Fixes: 18290650b1c8 ("NFS: Move buffered I/O locking into nfs_file_write()") Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7d62097..ca699dd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result <= 0) goto out; - written = generic_write_sync(iocb, result); + result = generic_write_sync(iocb, result); + if (result < 0) + goto out; + written = result; iocb->ki_pos += written; /* Return error values */ -- cgit v1.1 From bf0291dd2267a2b9a4cd74d65249553d11bb45d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 10:39:51 -0400 Subject: pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised According to RFC5661, the client is responsible for serialising LAYOUTGET and LAYOUTRETURN to avoid ambiguity. Consider the case where we send both in parallel. Client Server ====== ====== LAYOUTGET(seqid=X) LAYOUTRETURN(seqid=X) LAYOUTGET return seqid=X+1 LAYOUTRETURN return seqid=X+2 Process LAYOUTRETURN Forget layout stateid Process LAYOUTGET Set seqid=X+1 The client processes the layoutget/layoutreturn in the wrong order, and since the result of the layoutreturn was to clear the only existing layout segment, the client forgets the layout stateid. When the LAYOUTGET comes in, it is treated as having a completely new stateid, and so the client sets the wrong sequence id... Fix is to check if there are outstanding LAYOUTGET requests before we send the LAYOUTRETURN (note that LAYOUGET will already wait if it sees an outstanding LAYOUTRETURN). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6daf034..519ad32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -899,6 +899,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, enum pnfs_iomode *iomode) { + /* Serialise LAYOUTGET/LAYOUTRETURN */ + if (atomic_read(&lo->plh_outstanding) != 0) + return false; if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return false; pnfs_get_layout_hdr(lo); -- cgit v1.1 From 2a59a0411671ef9daf17ba21da57809c696f4119 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:20:04 -0400 Subject: pNFS: Fix pnfs_set_layout_stateid() to clear NFS_LAYOUT_INVALID_STID If the layout was marked as invalid, we want to ensure to initialise the layout header fields correctly. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 519ad32..cd8b5fc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -768,17 +768,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { u32 oldseq, newseq, new_barrier = 0; - bool invalid = !pnfs_layout_is_valid(lo); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) { + + if (!pnfs_layout_is_valid(lo)) { + nfs4_stateid_copy(&lo->plh_stateid, new); + lo->plh_barrier = newseq; + pnfs_clear_layoutreturn_info(lo); + clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + return; + } + if (pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); /* * Because of wraparound, we want to keep the barrier @@ -790,7 +805,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) lo->plh_barrier = new_barrier; } @@ -886,14 +901,6 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, @@ -1801,16 +1808,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); - lo->plh_barrier = be32_to_cpu(res->stateid.seqid); + pnfs_set_layout_stateid(lo, &res->stateid, true); } pnfs_get_lseg(lseg); pnfs_layout_insert_lseg(lo, lseg, &free_me); - if (!pnfs_layout_is_valid(lo)) { - pnfs_clear_layoutreturn_info(lo); - clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - } if (res->return_on_close) -- cgit v1.1 From 52ec7be2e27392201adf77892ba883f68df88c99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:05:28 -0400 Subject: pNFS: Clear out all layout segments if the server unsets lrp->res.lrs_present If the server fails to set lrp->res.lrs_present in the LAYOUTRETURN reply, then that means it believes the client holds no more layout state for that file, and that the layout stateid is now invalid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f5aecaa..c380d2e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8190,10 +8190,13 @@ static void nfs4_layoutreturn_release(void *calldata) dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid)); - if (lrp->res.lrs_present && pnfs_layout_is_valid(lo)) + if (lrp->res.lrs_present) { + pnfs_mark_matching_lsegs_invalid(lo, &freeme, + &lrp->args.range, + be32_to_cpu(lrp->args.stateid.seqid)); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&lo->plh_inode->i_lock); nfs4_sequence_free_slot(&lrp->res.seq_res); -- cgit v1.1 From 1c500840934a138bd6b13556c210516e9301fbee Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 25 Aug 2016 09:45:33 -0700 Subject: iio: accel: bmc150: reset chip at init time In at least one known setup, the chip comes up in a state where reading the chip ID returns garbage unless it's been reset, due to noise on the wires during system boot. All supported chips have the same reset method, and based on the datasheets they all need 1.3 or 1.8ms to recover after reset. So, do the conservative thing here and always reset the chip. Signed-off-by: Olof Johansson Reviewed-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index bf17aae..59b380d 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -67,6 +67,9 @@ #define BMC150_ACCEL_REG_PMU_BW 0x10 #define BMC150_ACCEL_DEF_BW 125 +#define BMC150_ACCEL_REG_RESET 0x14 +#define BMC150_ACCEL_RESET_VAL 0xB6 + #define BMC150_ACCEL_REG_INT_MAP_0 0x19 #define BMC150_ACCEL_INT_MAP_0_BIT_SLOPE BIT(2) @@ -1497,6 +1500,14 @@ static int bmc150_accel_chip_init(struct bmc150_accel_data *data) int ret, i; unsigned int val; + /* + * Reset chip to get it in a known good state. A delay of 1.8ms after + * reset is required according to the data sheets of supported chips. + */ + regmap_write(data->regmap, BMC150_ACCEL_REG_RESET, + BMC150_ACCEL_RESET_VAL); + usleep_range(1800, 2500); + ret = regmap_read(data->regmap, BMC150_ACCEL_REG_CHIP_ID, &val); if (ret < 0) { dev_err(dev, "Error: Reading chip id\n"); -- cgit v1.1 From a0c7858e74793242733a09a3e34356f434bc1571 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Tue, 9 Aug 2016 21:19:57 +0200 Subject: rtlwifi: rtl8192de: Fix leak in _rtl92de_read_adapter_info() In case rtl_get_hwinfo() fails, the function directly returns and leaks the already allocated hwinfo memory. Go through the correct exit path. Signed-off-by: Christian Engelmayer Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index b0f6324..5720551 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1757,7 +1757,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) return; if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params)) - return; + goto exit; _rtl92de_efuse_update_chip_version(hw); _rtl92de_read_macphymode_and_bandtype(hw, hwinfo); @@ -1790,6 +1790,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) break; } rtlefuse->txpwr_fromeprom = true; +exit: kfree(hwinfo); } -- cgit v1.1 From 3eeacaa902a31bdf06bc53f23087dcb1c5f260d6 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Tue, 9 Aug 2016 21:54:12 +0200 Subject: rtlwifi: rtl8723ae: Fix leak in _rtl8723e_read_adapter_info() In case of (rtlhal->oem_id != RT_CID_DEFAULT), the function directly returns and leaks the already allocated hwinfo memory. Go through the correct exit path. Signed-off-by: Christian Engelmayer Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index b88c7ee..ba30efc 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -1654,7 +1654,7 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw, rtlefuse->autoload_failflag, hwinfo); if (rtlhal->oem_id != RT_CID_DEFAULT) - return; + goto exit; switch (rtlefuse->eeprom_oemid) { case EEPROM_CID_DEFAULT: -- cgit v1.1 From d8c872b57e0f71b5eccb83b06898d678c7f3353f Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 15 Aug 2016 11:23:38 +0300 Subject: wlcore: Remove wl pointer from wl_sta structure This field was added to wl_sta struct to get hw in situations where it was not given to driver by mac80211. In our case, get_expected_throughput op did not send hw to driver. This patch reverts the change, as it is no longer needed due to commit 4fdbc67a25ce ("mac80211: call get_expected_throughput only after adding station") as hw is now sent as a parameter. Signed-off-by: Maxim Altshul Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/main.c | 1 - drivers/net/wireless/ti/wlcore/wlcore_i.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 9e1f2d9..ef6c15b 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4986,7 +4986,6 @@ static int wl12xx_sta_add(struct wl1271 *wl, return ret; wl_sta = (struct wl1271_station *)sta->drv_priv; - wl_sta->wl = wl; hlid = wl_sta->hlid; ret = wl12xx_cmd_add_peer(wl, wlvif, sta, hlid); diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 242b4e3..0277ae5 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -347,7 +347,6 @@ struct wl1271_station { * Used in both AP and STA mode. */ u64 total_freed_pkts; - struct wl1271 *wl; }; struct wl12xx_vif { -- cgit v1.1 From b81669b9e0b4864f59421e7681512731fae01ab9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:23 -0400 Subject: rtl8xxxu: Mark 0x20f4:0x648b as tested Successfully tested by Jocelyn Mayer Reported-by: J. Mayer Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 77048db..47d0868 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5971,6 +5971,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x1004) untested = 0; break; + case 0x20f4: + if (id->idProduct == 0x648b) + untested = 0; + break; default: break; } @@ -6140,6 +6144,9 @@ static struct usb_device_id dev_table[] = { /* Tested by Andrea Merello */ {USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1004, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Jocelyn Mayer */ +{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, /* Currently untested 8188 series devices */ {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, @@ -6199,8 +6206,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xed17, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4855, 0x0090, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4856, 0x0091, 0xff, 0xff, 0xff), -- cgit v1.1 From 76a8e07d49b65294ed006ff1e37fa152a1bfd230 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:24 -0400 Subject: rtl8xxxu: Mark 0x2001:0x3308 as tested D-Link DWA-121 is reported as working. Reported-by: Stefano Bravi Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 47d0868..ca4c40d 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5975,6 +5975,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x648b) untested = 0; break; + case 0x2001: + if (id->idProduct == 0x3308) + untested = 0; + break; default: break; } @@ -6147,6 +6151,9 @@ static struct usb_device_id dev_table[] = { /* Tested by Jocelyn Mayer */ {USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Stefano Bravi */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, /* Currently untested 8188 series devices */ {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, @@ -6194,8 +6201,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3357, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0x4902, 0xff, 0xff, 0xff), -- cgit v1.1 From deb6176e561324884652dd6a49a862bd8b152959 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:25 -0400 Subject: rtl8xxxu: Fix error handling if rtl8xxxu_init_device() fails For some reason we lost the code bailing if rtl8xxxu_init_device() returned an error. This catches the error and also cleans up the error handling. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index ca4c40d..9c6305b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5947,7 +5947,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, struct ieee80211_hw *hw; struct usb_device *udev; struct ieee80211_supported_band *sband; - int ret = 0; + int ret; int untested = 1; udev = usb_get_dev(interface_to_usbdev(interface)); @@ -5995,6 +5995,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, hw = ieee80211_alloc_hw(sizeof(struct rtl8xxxu_priv), &rtl8xxxu_ops); if (!hw) { ret = -ENOMEM; + priv = NULL; goto exit; } @@ -6043,6 +6044,8 @@ static int rtl8xxxu_probe(struct usb_interface *interface, } ret = rtl8xxxu_init_device(hw); + if (ret) + goto exit; hw->wiphy->max_scan_ssids = 1; hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; @@ -6093,9 +6096,20 @@ static int rtl8xxxu_probe(struct usb_interface *interface, goto exit; } + return 0; + exit: - if (ret < 0) - usb_put_dev(udev); + usb_set_intfdata(interface, NULL); + + if (priv) { + kfree(priv->fw_data); + mutex_destroy(&priv->usb_buf_mutex); + mutex_destroy(&priv->h2c_mutex); + } + usb_put_dev(udev); + + ieee80211_free_hw(hw); + return ret; } -- cgit v1.1 From 690a6d268bdf85f8d233823a18d3200b99e5568d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:26 -0400 Subject: rtl8xxxu: Add TP-Link TL-WN823N v2 to list of supported devices This is an rtl8192eu based dongle (the v1 is an rtl8192cu). Reported and tested by Myckel Habets. Reported-by: Myckel Habets Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 9c6305b..948f153 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5979,6 +5979,10 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x3308) untested = 0; break; + case 0x2357: + if (id->idProduct == 0x0109) + untested = 0; + break; default: break; } @@ -6146,6 +6150,9 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8723au_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Tested by Myckel Habets */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0109, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0xb720, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8723bu_fops}, #ifdef CONFIG_RTL8XXXU_UNTESTED -- cgit v1.1 From 44abaa08d002235e1bbc2b9e0fd46a64e4694596 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:27 -0400 Subject: rtl8xxxu: Add TX page defines for 8723b This switches the 8723b driver to use the new rtl8xxxu_init_queue_reserved_page() function. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 6 ++++++ drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 4341d56..da4f148 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -43,6 +43,7 @@ #define TX_TOTAL_PAGE_NUM 0xf8 #define TX_TOTAL_PAGE_NUM_8192E 0xf3 +#define TX_TOTAL_PAGE_NUM_8723B 0xf7 /* (HPQ + LPQ + NPQ + PUBQ) = TX_TOTAL_PAGE_NUM */ #define TX_PAGE_NUM_PUBQ 0xe7 #define TX_PAGE_NUM_HI_PQ 0x0c @@ -54,6 +55,11 @@ #define TX_PAGE_NUM_LO_PQ_8192E 0x0c #define TX_PAGE_NUM_NORM_PQ_8192E 0x00 +#define TX_PAGE_NUM_PUBQ_8723B 0xe7 +#define TX_PAGE_NUM_HI_PQ_8723B 0x0c +#define TX_PAGE_NUM_LO_PQ_8723B 0x02 +#define TX_PAGE_NUM_NORM_PQ_8723B 0x02 + #define RTL_FW_PAGE_SIZE 4096 #define RTL8XXXU_FIRMWARE_POLL_MAX 1000 diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 9d45afb..c1323f0 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1674,4 +1674,8 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .pbp_rx = PBP_PAGE_SIZE_256, .pbp_tx = PBP_PAGE_SIZE_256, .mactable = rtl8723b_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM_8723B, + .page_num_hi = TX_PAGE_NUM_HI_PQ_8723B, + .page_num_lo = TX_PAGE_NUM_LO_PQ_8723B, + .page_num_norm = TX_PAGE_NUM_NORM_PQ_8723B, }; -- cgit v1.1 From e366f45d36275b80a5615c68123c88a3de75cc22 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:28 -0400 Subject: rtl8xxxu: Switch 8723a to use new rtl8xxxu_init_queue_reserved_page() routine This changes the pub-queue value written to REQ_RQPN, however the old code used a hard coded minimum value assuming there would always be an active lo-queue, even when no USB EP was found for it. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c index 686c551..d6f4968 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c @@ -396,4 +396,8 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; -- cgit v1.1 From b492940dc1f7372fb95930dc5bde8d7dbc560cd3 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:29 -0400 Subject: rtl8xxxu: Switch 8192cu/8188cu devices to use rtl8xxxu_init_queue_reserved_page() This was the last user of the old rtl8xxxu_old_init_queue_reserved_page() which can now be removed. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c index 69d1a14..cd13c25 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c @@ -579,5 +579,9 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; #endif -- cgit v1.1 From efeb8ce7a98cfb60932c6a53ce86359bf6b33b67 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:30 -0400 Subject: rtl8xxxu: Remove now obsolete rtl8xxxu_old_init_queue_reserved_page() Switching over the old devices to use the new function allows us to get rid of this legacy. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 30 ++-------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 948f153..efe24a6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3847,28 +3847,6 @@ void rtl8xxxu_gen2_disable_rf(struct rtl8xxxu_priv *priv) rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); } -static void rtl8xxxu_old_init_queue_reserved_page(struct rtl8xxxu_priv *priv) -{ - u8 val8; - u32 val32; - - if (priv->ep_tx_normal_queue) - val8 = TX_PAGE_NUM_NORM_PQ; - else - val8 = 0; - - rtl8xxxu_write8(priv, REG_RQPN_NPQ, val8); - - val32 = (TX_PAGE_NUM_PUBQ << RQPN_PUB_PQ_SHIFT) | RQPN_LOAD; - - if (priv->ep_tx_high_queue) - val32 |= (TX_PAGE_NUM_HI_PQ << RQPN_HI_PQ_SHIFT); - if (priv->ep_tx_low_queue) - val32 |= (TX_PAGE_NUM_LO_PQ << RQPN_LO_PQ_SHIFT); - - rtl8xxxu_write32(priv, REG_RQPN, val32); -} - static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) { struct rtl8xxxu_fileops *fops = priv->fops; @@ -3929,12 +3907,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) goto exit; } - if (!macpower) { - if (priv->fops->total_page_num) - rtl8xxxu_init_queue_reserved_page(priv); - else - rtl8xxxu_old_init_queue_reserved_page(priv); - } + if (!macpower) + rtl8xxxu_init_queue_reserved_page(priv); ret = rtl8xxxu_init_queue_priority(priv); dev_dbg(dev, "%s: init_queue_priority %i\n", __func__, ret); -- cgit v1.1 From e02aa3eef786aab82f9929fea25f5f7e9f77ba69 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:31 -0400 Subject: rtl8xxxu: Simplify code setting TX buffer boundary With all devices now offering fops->total_page_num, get rid of the if mess for setting the TX buffer boundary. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index efe24a6..7a697c0 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3968,13 +3968,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set TX buffer boundary */ - if (priv->rtl_chip == RTL8192E) - val8 = TX_TOTAL_PAGE_NUM_8192E + 1; - else - val8 = TX_TOTAL_PAGE_NUM + 1; - - if (priv->rtl_chip == RTL8723B) - val8 -= 1; + val8 = priv->fops->total_page_num + 1; rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8); rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8); -- cgit v1.1 From dce7548fd9700c4c4ecda936dcf7326b9241a6d3 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:32 -0400 Subject: rtl8xxxu: Add bit definitions for REG_FPGA0_TX_INFO This adds TX antenna selection bit defines for OFDM mode. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index 921c565..a338890 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -780,6 +780,10 @@ #define FPGA_RF_MODE_OFDM BIT(25) #define REG_FPGA0_TX_INFO 0x0804 +#define FPGA0_TX_INFO_OFDM_PATH_A BIT(0) +#define FPGA0_TX_INFO_OFDM_PATH_B BIT(1) +#define FPGA0_TX_INFO_OFDM_PATH_C BIT(2) +#define FPGA0_TX_INFO_OFDM_PATH_D BIT(3) #define REG_FPGA0_PSD_FUNC 0x0808 #define REG_FPGA0_TX_GAIN 0x080c #define REG_FPGA0_RF_TIMING1 0x0810 -- cgit v1.1 From 0b09628948bce970e14ef61a6788caa93285a132 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:33 -0400 Subject: rtl8xxxu: Add interrupt bit definitions for gen2 parts These are primarily needed for SDIO/PCI parts, but the vendor driver still sets them for some USB devices. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index a338890..3555a2f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -213,10 +213,66 @@ #define REG_HMBOX_EXT_1 0x008a #define REG_HMBOX_EXT_2 0x008c #define REG_HMBOX_EXT_3 0x008e + /* Interrupt registers for 8192e/8723bu/8812 */ #define REG_HIMR0 0x00b0 +#define IMR0_TXCCK BIT(30) /* TXRPT interrupt when CCX bit + of the packet is set */ +#define IMR0_PSTIMEOUT BIT(29) /* Power Save Time Out Int */ +#define IMR0_GTINT4 BIT(28) /* Set when GTIMER4 expires */ +#define IMR0_GTINT3 BIT(27) /* Set when GTIMER3 expires */ +#define IMR0_TBDER BIT(26) /* Transmit Beacon0 Error */ +#define IMR0_TBDOK BIT(25) /* Transmit Beacon0 OK */ +#define IMR0_TSF_BIT32_TOGGLE BIT(24) /* TSF Timer BIT32 toggle + indication interrupt */ +#define IMR0_BCNDMAINT0 BIT(20) /* Beacon DMA Interrupt 0 */ +#define IMR0_BCNDERR0 BIT(16) /* Beacon Queue DMA Error 0 */ +#define IMR0_HSISR_IND_ON_INT BIT(15) /* HSISR Indicator (HSIMR & + HSISR is true) */ +#define IMR0_BCNDMAINT_E BIT(14) /* Beacon DMA Interrupt + Extension for Win7 */ +#define IMR0_ATIMEND BIT(12) /* CTWidnow End or + ATIM Window End */ +#define IMR0_HISR1_IND_INT BIT(11) /* HISR1 Indicator + (HISR1 & HIMR1 is true) */ +#define IMR0_C2HCMD BIT(10) /* CPU to Host Command INT + Status, Write 1 to clear */ +#define IMR0_CPWM2 BIT(9) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_CPWM BIT(8) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_HIGHDOK BIT(7) /* High Queue DMA OK */ +#define IMR0_MGNTDOK BIT(6) /* Management Queue DMA OK */ +#define IMR0_BKDOK BIT(5) /* AC_BK DMA OK */ +#define IMR0_BEDOK BIT(4) /* AC_BE DMA OK */ +#define IMR0_VIDOK BIT(3) /* AC_VI DMA OK */ +#define IMR0_VODOK BIT(2) /* AC_VO DMA OK */ +#define IMR0_RDU BIT(1) /* Rx Descriptor Unavailable */ +#define IMR0_ROK BIT(0) /* Receive DMA OK */ #define REG_HISR0 0x00b4 #define REG_HIMR1 0x00b8 +#define IMR1_BCNDMAINT7 BIT(27) /* Beacon DMA Interrupt 7 */ +#define IMR1_BCNDMAINT6 BIT(26) /* Beacon DMA Interrupt 6 */ +#define IMR1_BCNDMAINT5 BIT(25) /* Beacon DMA Interrupt 5 */ +#define IMR1_BCNDMAINT4 BIT(24) /* Beacon DMA Interrupt 4 */ +#define IMR1_BCNDMAINT3 BIT(23) /* Beacon DMA Interrupt 3 */ +#define IMR1_BCNDMAINT2 BIT(22) /* Beacon DMA Interrupt 2 */ +#define IMR1_BCNDMAINT1 BIT(21) /* Beacon DMA Interrupt 1 */ +#define IMR1_BCNDERR7 BIT(20) /* Beacon Queue DMA Err Int 7 */ +#define IMR1_BCNDERR6 BIT(19) /* Beacon Queue DMA Err Int 6 */ +#define IMR1_BCNDERR5 BIT(18) /* Beacon Queue DMA Err Int 5 */ +#define IMR1_BCNDERR4 BIT(17) /* Beacon Queue DMA Err Int 4 */ +#define IMR1_BCNDERR3 BIT(16) /* Beacon Queue DMA Err Int 3 */ +#define IMR1_BCNDERR2 BIT(15) /* Beacon Queue DMA Err Int 2 */ +#define IMR1_BCNDERR1 BIT(14) /* Beacon Queue DMA Err Int 1 */ +#define IMR1_ATIMEND_E BIT(13) /* ATIM Window End Extension + for Win7 */ +#define IMR1_TXERR BIT(11) /* Tx Error Flag Int Status, + write 1 to clear */ +#define IMR1_RXERR BIT(10) /* Rx Error Flag Int Status, + write 1 to clear */ +#define IMR1_TXFOVW BIT(9) /* Transmit FIFO Overflow */ +#define IMR1_RXFOVW BIT(8) /* Receive FIFO Overflow */ #define REG_HISR1 0x00bc /* Host suspend counter on FPGA platform */ -- cgit v1.1 From e3ebcd7428c142fb7b74ded9a624cddfb403d2c5 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:34 -0400 Subject: rtl8xxxu: Use flag to indicate whether device has TX report timer support Use a fileops flag to indicate whether the device has TX report timer support. This will make it easier to include future devices such as 8188eu to use the TX report timer. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 5 ++--- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index da4f148..c8d7075 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1341,6 +1341,7 @@ struct rtl8xxxu_fileops { char tx_desc_size; char rx_desc_size; char has_s0s1; + char has_tx_report; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index c1323f0..0b6a6ca 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1666,6 +1666,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 1, + .has_tx_report = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 7a697c0..81b9582 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4000,10 +4000,9 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) priv->fops->usb_quirks(priv); /* - * Presumably this is for 8188EU as well - * Enable TX report and TX report timer + * Enable TX report and TX report timer for 8723bu/8188eu/... */ - if (priv->rtl_chip == RTL8723B) { + if (priv->fops->has_tx_report) { val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); val8 |= TX_REPORT_CTRL_TIMER_ENABLE; rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); -- cgit v1.1 From ee675cc30e07cc925489e9663539e69873b04100 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:35 -0400 Subject: rtl8xxxu: Convert flags in rtl8xxxu_fileops to bitflags This leaves space for a few more flags within the same space. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index c8d7075..c5b6846 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1340,8 +1340,8 @@ struct rtl8xxxu_fileops { int rx_agg_buf_size; char tx_desc_size; char rx_desc_size; - char has_s0s1; - char has_tx_report; + u8 has_s0s1:1; + u8 has_tx_report:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; -- cgit v1.1 From eed145ab25a3eeba4caf2d4be0b5c7a3097f39bd Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:36 -0400 Subject: rtl8xxxu: Introduce fops bitflag indicating type of thermal meter Do not rely on TX descriptor size to determine the thermal meter type. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index c5b6846..68d80c4 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1342,6 +1342,7 @@ struct rtl8xxxu_fileops { char rx_desc_size; u8 has_s0s1:1; u8 has_tx_report:1; + u8 gen2_thermal_meter:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 9a1994f..9901199 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1505,6 +1505,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 0, + .gen2_thermal_meter = 1, .adda_1t_init = 0x0fc01616, .adda_1t_path_on = 0x0fc01616, .adda_2t_path_on_a = 0x0fc01616, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 0b6a6ca..dd2c0f0 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1667,6 +1667,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 1, .has_tx_report = 1, + .gen2_thermal_meter = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 81b9582..6de5b8f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4195,7 +4195,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * This should enable thermal meter */ - if (priv->fops->tx_desc_size == sizeof(struct rtl8xxxu_txdesc40)) + if (priv->fops->gen2_thermal_meter) rtl8xxxu_write_rfreg(priv, RF_A, RF6052_REG_T_METER_8723B, 0x37cf8); else -- cgit v1.1 From be49b1f111c76ce20a60a90587e2b8e8ab809a06 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:37 -0400 Subject: rtl8xxxu: Simplify calculating of hw value used for setting TX rate Calculating the value in one place rather than using one calculation in one place and a different one for management frames in another location makes little sense. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6de5b8f..2f951cf7 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4848,7 +4848,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, } } - if (rate_flag & IEEE80211_TX_RC_MCS) + if (rate_flag & IEEE80211_TX_RC_MCS && + !ieee80211_is_mgmt(hdr->frame_control)) rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0; else rate = tx_rate->hw_value; @@ -4869,7 +4870,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc->txdw5 = cpu_to_le32(tx_rate->hw_value); + tx_desc->txdw5 = cpu_to_le32(rate); tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE); tx_desc->txdw5 |= @@ -4923,7 +4924,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc40->txdw4 = cpu_to_le32(tx_rate->hw_value); + tx_desc40->txdw4 = cpu_to_le32(rate); tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE); tx_desc40->txdw4 |= -- cgit v1.1 From 3972cc579140e48e8390fff5e94f6e9e78c3dd87 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:38 -0400 Subject: rtl8xxxu: Determine the need for SGI before handling specific TX desc formats In order to be able to split out the TX descriptor handling code, determine in advance the need to mark SGI. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 2f951cf7..6dc3f52 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4770,7 +4770,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, u16 rate_flag = tx_info->control.rates[0].flags; int tx_desc_size = priv->fops->tx_desc_size; int ret; - bool usedesc40, ampdu_enable; + bool usedesc40, ampdu_enable, sgi = false; if (skb_headroom(skb) < tx_desc_size) { dev_warn(dev, @@ -4854,6 +4854,12 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, else rate = tx_rate->hw_value; + if (rate_flag & IEEE80211_TX_RC_SHORT_GI || + (ieee80211_is_data_qos(hdr->frame_control) && + sta && sta->ht_cap.cap & + (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) + sgi = true; + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); if (!usedesc40) { tx_desc->txdw5 = cpu_to_le32(rate); @@ -4886,12 +4892,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, (sta && vif && vif->bss_conf.use_short_preamble)) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); - if (rate_flag & IEEE80211_TX_RC_SHORT_GI || - (ieee80211_is_data_qos(hdr->frame_control) && - sta && sta->ht_cap.cap & - (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) { + if (sgi) tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); - } if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { /* -- cgit v1.1 From 99afaac4278c9581cef17ddc0c842b51f9b6206d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:39 -0400 Subject: rtl8xxxu: Determine need for shore preamble before updating TX descriptors Another patch to move this detection out of the code handling the TX descriptor update. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6dc3f52..cfb2dfd 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4770,7 +4770,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, u16 rate_flag = tx_info->control.rates[0].flags; int tx_desc_size = priv->fops->tx_desc_size; int ret; - bool usedesc40, ampdu_enable, sgi = false; + bool usedesc40, ampdu_enable, sgi = false, short_preamble = false; if (skb_headroom(skb) < tx_desc_size) { dev_warn(dev, @@ -4860,6 +4860,10 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) sgi = true; + if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || + (sta && vif && vif->bss_conf.use_short_preamble)) + short_preamble = true; + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); if (!usedesc40) { tx_desc->txdw5 = cpu_to_le32(rate); @@ -4888,8 +4892,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, if (ieee80211_is_data_qos(hdr->frame_control)) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) + if (short_preamble) tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); if (sgi) @@ -4935,8 +4938,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); } - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) + if (short_preamble) tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE); -- cgit v1.1 From b59415c2dd088ffce62fbde737c8b2d04fb6e015 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 19 Aug 2016 17:46:40 -0400 Subject: rtl8xxxu: Split filling of TX descriptors into separate functions Split the filling of TX descriptors into a generic portion used on all devices, and format specific helper functions provided in the fops structure. This also cleaned up some mess, even if non harmful, in the handling of txdesc40 descriptors, where the code randomly would switch between the pointer to tx_desc and tx_desc40. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 12 ++ .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c | 1 + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 1 + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c | 1 + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 198 ++++++++++++--------- 6 files changed, 125 insertions(+), 89 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 68d80c4..1f54b89 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1336,6 +1336,10 @@ struct rtl8xxxu_fileops { u32 ramask, int sgi); void (*report_connect) (struct rtl8xxxu_priv *priv, u8 macid, bool connect); + void (*fill_txdesc) (struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); int writeN_block_size; int rx_agg_buf_size; char tx_desc_size; @@ -1429,6 +1433,14 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb); int rtl8xxxu_gen2_channel_to_group(int channel); bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv, int result[][8], int c1, int c2); +void rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); +void rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); extern struct rtl8xxxu_fileops rtl8192cu_fops; extern struct rtl8xxxu_fileops rtl8192eu_fops; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c index cd13c25..f9e2050 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c @@ -567,6 +567,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 128, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 9901199..841522e 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1501,6 +1501,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .set_tx_power = rtl8192e_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 128, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c index d6f4968..aef3730 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c @@ -384,6 +384,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 1024, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index dd2c0f0..6c086b5 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1662,6 +1662,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .set_tx_power = rtl8723b_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 1024, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index cfb2dfd..e02fab0 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4750,6 +4750,113 @@ static void rtl8xxxu_dump_action(struct device *dev, } } +/* + * Fill in v1 (gen1) specific TX descriptor bits. + * This format is used on 8188cu/8192cu/8723au + */ +void +rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + u16 seq_number; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc->txdw5 = cpu_to_le32(rate); + + if (ieee80211_is_data(hdr->frame_control)) + tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); + + tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); + else + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc->txdw5 = cpu_to_le32(rate); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE); + tx_desc->txdw5 |= cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); + } + + if (ieee80211_is_data_qos(hdr->frame_control)) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); + + if (short_preamble) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); + + if (sgi) + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC32_RTS_RATE_SHIFT); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); + } +} + +/* + * Fill in v2 (gen2) specific TX descriptor bits. + * This format is used on 8192eu/8723bu + */ +void +rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + struct rtl8xxxu_txdesc40 *tx_desc40; + u16 seq_number; + + tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc32; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc40->txdw4 = cpu_to_le32(rate); + if (ieee80211_is_data(hdr->frame_control)) { + tx_desc40->txdw4 |= cpu_to_le32(0x1f << + TXDESC40_DATA_RATE_FB_SHIFT); + } + + tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); + else + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc40->txdw4 = cpu_to_le32(rate); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE); + tx_desc40->txdw4 |= + cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); + tx_desc40->txdw4 |= cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); + } + + if (short_preamble) + tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc40->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC40_RTS_RATE_SHIFT); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); + } +} + static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -4759,7 +4866,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info); struct rtl8xxxu_priv *priv = hw->priv; struct rtl8xxxu_txdesc32 *tx_desc; - struct rtl8xxxu_txdesc40 *tx_desc40; struct rtl8xxxu_tx_urb *tx_urb; struct ieee80211_sta *sta = NULL; struct ieee80211_vif *vif = tx_info->control.vif; @@ -4865,95 +4971,9 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, short_preamble = true; seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); - if (!usedesc40) { - tx_desc->txdw5 = cpu_to_le32(rate); - - if (ieee80211_is_data(hdr->frame_control)) - tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); - - tx_desc->txdw3 = - cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); - else - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc->txdw5 = cpu_to_le32(rate); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_USE_DRIVER_RATE); - tx_desc->txdw5 |= - cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); - tx_desc->txdw5 |= - cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); - } - - if (ieee80211_is_data_qos(hdr->frame_control)) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); - if (short_preamble) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); - - if (sgi) - tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC32_RTS_RATE_SHIFT); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); - } - } else { - tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc; - - tx_desc40->txdw4 = cpu_to_le32(rate); - if (ieee80211_is_data(hdr->frame_control)) { - tx_desc->txdw4 |= - cpu_to_le32(0x1f << - TXDESC40_DATA_RATE_FB_SHIFT); - } - - tx_desc40->txdw9 = - cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); - else - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc40->txdw4 = cpu_to_le32(rate); - tx_desc40->txdw3 |= - cpu_to_le32(TXDESC40_USE_DRIVER_RATE); - tx_desc40->txdw4 |= - cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); - tx_desc40->txdw4 |= - cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); - } - - if (short_preamble) - tx_desc40->txdw5 |= - cpu_to_le32(TXDESC40_SHORT_PREAMBLE); - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC40_RTS_RATE_SHIFT); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); - } - } + priv->fops->fill_txdesc(hdr, tx_desc, rate, rate_flag, + sgi, short_preamble, ampdu_enable); rtl8xxxu_calc_tx_desc_csum(tx_desc); -- cgit v1.1 From 77e3980201e7d54b2e63653b46a6baba26040cef Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 19 Aug 2016 17:46:41 -0400 Subject: rtl8xxxu: gen1: Fix non static symbol warning Fixes the following sparse warning: drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c:898:1: warning: symbol 'rtl8xxxu_gen1_h2c_cmd' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index e02fab0..508d46f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -894,7 +894,7 @@ int rtl8xxxu_write_rfreg(struct rtl8xxxu_priv *priv, return retval; } -int +static int rtl8xxxu_gen1_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len) { struct device *dev = &priv->udev->dev; -- cgit v1.1 From 7329dc13107b2315a7b8ba5a75048935304c55a0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 19 Aug 2016 17:46:42 -0400 Subject: rtl8xxxu: Make rtl8xxxu_ampdu_action less chatty On my home network rtl8xxxu is spamming the log with IEEE80211_AMPDU_RX_START / IEEE80211_AMPDU_RX_STOP every few seconds turn these messages into debug messages. Signed-off-by: Hans de Goede Signed-off-by: Jes Sorensen [kvalo@codeaurora.org: fix commit title] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 508d46f..c362083 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5696,7 +5696,7 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, switch (action) { case IEEE80211_AMPDU_TX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); ampdu_factor = sta->ht_cap.ampdu_factor; ampdu_density = sta->ht_cap.ampdu_density; rtl8xxxu_set_ampdu_factor(priv, ampdu_factor); @@ -5706,21 +5706,21 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, ampdu_factor, ampdu_density); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_RX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); break; case IEEE80211_AMPDU_RX_STOP: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); break; default: break; -- cgit v1.1 From b46b599328e6d46397ca353266d7ec5b7a04bb02 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 22 Aug 2016 19:35:05 +0100 Subject: zd1211rw: fix spelling mistake "firmeware" -> "firmware" Trivial fix to spelling mistake in dev_err message. Signed-off-by: Colin Ian King Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index a912dc0..c5effd6c 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -193,7 +193,7 @@ static int upload_code(struct usb_device *udev, 0, 0, p, sizeof(ret), 5000 /* ms */); if (r != sizeof(ret)) { dev_err(&udev->dev, - "control request firmeware confirmation failed." + "control request firmware confirmation failed." " Return value %d\n", r); if (r >= 0) r = -ENODEV; -- cgit v1.1 From 0c9d3491530773858ff9d705ec2a9c382f449230 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 22 Aug 2016 14:27:59 -0500 Subject: rtlwifi: Fix missing country code for Great Britain Some RTL8821AE devices sold in Great Britain have the country code of 0x25 encoded in their EEPROM. This value is not tested in the routine that establishes the regulatory info for the chip. The fix is to set this code to have the same capabilities as the EU countries. In addition, the channels allowed for COUNTRY_CODE_ETSI were more properly suited for China and Israel, not the EU. This problem has also been fixed. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/regd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c index 3524441..6ee6bf8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/regd.c +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c @@ -345,9 +345,9 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select( return &rtl_regdom_no_midband; case COUNTRY_CODE_IC: return &rtl_regdom_11; - case COUNTRY_CODE_ETSI: case COUNTRY_CODE_TELEC_NETGEAR: return &rtl_regdom_60_64; + case COUNTRY_CODE_ETSI: case COUNTRY_CODE_SPAIN: case COUNTRY_CODE_FRANCE: case COUNTRY_CODE_ISRAEL: @@ -406,6 +406,8 @@ static u8 channel_plan_to_country_code(u8 channelplan) return COUNTRY_CODE_WORLD_WIDE_13; case 0x22: return COUNTRY_CODE_IC; + case 0x25: + return COUNTRY_CODE_ETSI; case 0x32: return COUNTRY_CODE_TELEC_NETGEAR; case 0x41: -- cgit v1.1 From bccf3ffc8c6d8e0251a15541bb4d12b423c4f729 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 22 Aug 2016 19:40:07 -0300 Subject: brcmfmac: Add USB ID for Cisco Linksys AE1200 The AE1200 comes with different revisions of the BCM43235 chipset, but all have the same USB ID. Only revision 3 can be supported. Signed-off-by: Ismael Luceno Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 4 ++++ drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index fa26619..2f978a3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1458,11 +1458,15 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf) #define BRCMF_USB_DEVICE(dev_id) \ { USB_DEVICE(BRCM_USB_VENDOR_ID_BROADCOM, dev_id) } +#define LINKSYS_USB_DEVICE(dev_id) \ + { USB_DEVICE(BRCM_USB_VENDOR_ID_LINKSYS, dev_id) } + static struct usb_device_id brcmf_usb_devid_table[] = { BRCMF_USB_DEVICE(BRCM_USB_43143_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43236_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43242_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43569_DEVICE_ID), + LINKSYS_USB_DEVICE(BRCM_USB_43235_LINKSYS_DEVICE_ID), { USB_DEVICE(BRCM_USB_VENDOR_ID_LG, BRCM_USB_43242_LG_DEVICE_ID) }, /* special entry for device with firmware loaded and running */ BRCMF_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID), diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h index 3cc42be..d0407d9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h @@ -22,6 +22,7 @@ #define BRCM_USB_VENDOR_ID_BROADCOM 0x0a5c #define BRCM_USB_VENDOR_ID_LG 0x043e +#define BRCM_USB_VENDOR_ID_LINKSYS 0x13b1 #define BRCM_PCIE_VENDOR_ID_BROADCOM PCI_VENDOR_ID_BROADCOM /* Chipcommon Core Chip IDs */ @@ -58,6 +59,7 @@ /* USB Device IDs */ #define BRCM_USB_43143_DEVICE_ID 0xbd1e +#define BRCM_USB_43235_LINKSYS_DEVICE_ID 0x0039 #define BRCM_USB_43236_DEVICE_ID 0xbd17 #define BRCM_USB_43242_DEVICE_ID 0xbd1f #define BRCM_USB_43242_LG_DEVICE_ID 0x3101 -- cgit v1.1 From 7703773ef1d85b40433902a8da20167331597e4a Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Tue, 23 Aug 2016 11:37:17 +0200 Subject: brcmfmac: fix pmksa->bssid usage The struct cfg80211_pmksa defines its bssid field as: const u8 *bssid; contrary to struct brcmf_pmksa, which uses: u8 bssid[ETH_ALEN]; Therefore in brcmf_cfg80211_del_pmksa(), &pmksa->bssid takes the address of this field (of type u8**), not the one of its content (which would be u8*). Remove the & operator to make brcmf_dbg("%pM") and memcmp() behave as expected. This bug have been found using a custom static checker (which checks the usage of %p... attributes at build time). It has been introduced in commit 6c404f34f2bd ("brcmfmac: Cleanup pmksa cache handling code"), which replaced pmksa->bssid by &pmksa->bssid while refactoring the code, without modifying struct cfg80211_pmksa definition. Replace &pmk[i].bssid with pmk[i].bssid too to make the code clearer, this change does not affect the semantic. Fixes: 6c404f34f2bd ("brcmfmac: Cleanup pmksa cache handling code") Cc: stable@vger.kernel.org Signed-off-by: Nicolas Iooss Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 2628d5e..201a9801 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3884,11 +3884,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev, if (!check_vif_up(ifp->vif)) return -EIO; - brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", &pmksa->bssid); + brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid); npmk = le32_to_cpu(cfg->pmk_list.npmk); for (i = 0; i < npmk; i++) - if (!memcmp(&pmksa->bssid, &pmk[i].bssid, ETH_ALEN)) + if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN)) break; if ((npmk > 0) && (i < npmk)) { -- cgit v1.1 From c5aa9541818a1aacf05ab9a30c3f525841cdc1c9 Mon Sep 17 00:00:00 2001 From: Guy Mishol Date: Wed, 24 Aug 2016 14:35:27 +0300 Subject: wl18xx: add time sync configuration api Add time sync configuration api. The new api allows to configure the synchronization mode (STA/AP/MESH) and (in case of Mesh mode) the master address of each zone. Signed-off-by: Guy Mishol Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl18xx/acx.c | 29 +++++++++++++++++++++++++++++ drivers/net/wireless/ti/wl18xx/acx.h | 13 +++++++++++++ drivers/net/wireless/ti/wl18xx/event.c | 1 + drivers/net/wireless/ti/wlcore/wlcore.h | 3 +++ 4 files changed, 46 insertions(+) diff --git a/drivers/net/wireless/ti/wl18xx/acx.c b/drivers/net/wireless/ti/wl18xx/acx.c index 4be0409..b5525a3 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.c +++ b/drivers/net/wireless/ti/wl18xx/acx.c @@ -309,3 +309,32 @@ out: kfree(acx); return ret; } + +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl) +{ + struct acx_time_sync_cfg *acx; + int ret; + + wl1271_debug(DEBUG_ACX, "acx time sync cfg: mode %d, addr: %pM", + wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC], + wl->zone_master_mac_addr); + + acx = kzalloc(sizeof(*acx), GFP_KERNEL); + if (!acx) { + ret = -ENOMEM; + goto out; + } + + acx->sync_mode = wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC]; + memcpy(acx->zone_mac_addr, wl->zone_master_mac_addr, ETH_ALEN); + + ret = wl1271_cmd_configure(wl, ACX_TIME_SYNC_CFG, + acx, sizeof(*acx)); + if (ret < 0) { + wl1271_warning("acx time sync cfg failed: %d", ret); + goto out; + } +out: + kfree(acx); + return ret; +} diff --git a/drivers/net/wireless/ti/wl18xx/acx.h b/drivers/net/wireless/ti/wl18xx/acx.h index 342a299..2edbbbf 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.h +++ b/drivers/net/wireless/ti/wl18xx/acx.h @@ -37,6 +37,7 @@ enum { ACX_RX_BA_FILTER = 0x0058, ACX_AP_SLEEP_CFG = 0x0059, ACX_DYNAMIC_TRACES_CFG = 0x005A, + ACX_TIME_SYNC_CFG = 0x005B, }; /* numbers of bits the length field takes (add 1 for the actual number) */ @@ -388,6 +389,17 @@ struct acx_dynamic_fw_traces_cfg { __le32 dynamic_fw_traces; } __packed; +/* + * ACX_TIME_SYNC_CFG + * configure the time sync parameters + */ +struct acx_time_sync_cfg { + struct acx_header header; + u8 sync_mode; + u8 zone_mac_addr[ETH_ALEN]; + u8 padding[1]; +} __packed; + int wl18xx_acx_host_if_cfg_bitmap(struct wl1271 *wl, u32 host_cfg_bitmap, u32 sdio_blk_size, u32 extra_mem_blks, u32 len_field_size); @@ -402,5 +414,6 @@ int wl18xx_acx_interrupt_notify_config(struct wl1271 *wl, bool action); int wl18xx_acx_rx_ba_filter(struct wl1271 *wl, bool action); int wl18xx_acx_ap_sleep(struct wl1271 *wl); int wl18xx_acx_dynamic_fw_traces(struct wl1271 *wl); +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl); #endif /* __WL18XX_ACX_H__ */ diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c index 2c5df43..b36ce18 100644 --- a/drivers/net/wireless/ti/wl18xx/event.c +++ b/drivers/net/wireless/ti/wl18xx/event.c @@ -22,6 +22,7 @@ #include #include "event.h" #include "scan.h" +#include "conf.h" #include "../wlcore/cmd.h" #include "../wlcore/debug.h" #include "../wlcore/vendor_cmd.h" diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h index 8f28aa0..1827546 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore.h +++ b/drivers/net/wireless/ti/wlcore/wlcore.h @@ -501,6 +501,9 @@ struct wl1271 { /* dynamic fw traces */ u32 dynamic_fw_traces; + + /* time sync zone master */ + u8 zone_master_mac_addr[ETH_ALEN]; }; int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev); -- cgit v1.1 From fd3fbb65cab86c07f5881ccb919a440497c0960d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:16 +0200 Subject: mwifiex: make "PCI-E is not the winner" print more informative Printing ret and adapter->winner do not provide any useful information as those are always 0 at point where the massage is printed. Print value read from reg->fw_status register instead. Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 453ab6a..8abbbfe 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2074,8 +2074,7 @@ mwifiex_check_winner_status(struct mwifiex_adapter *adapter) adapter->winner = 1; } else { mwifiex_dbg(adapter, ERROR, - "PCI-E is not the winner <%#x,%d>, exit dnld\n", - ret, adapter->winner); + "PCI-E is not the winner <%#x>", winner); } return ret; -- cgit v1.1 From 09dd9ec598c3fdcb9874d13d9bbab0e2ece6748b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:17 +0200 Subject: mwifiex: print status of FW ready event For debugging purpose print content of reg->fw_status register and other variables values when waiting for firmware ready event. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 8abbbfe..2f555b0 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2043,6 +2043,10 @@ mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) ret = -1; else ret = 0; + + mwifiex_dbg(adapter, INFO, "Try %d if FW is ready <%d,%#x>", + tries, ret, firmware_stat); + if (ret) continue; if (firmware_stat == FIRMWARE_READY_PCIE) { -- cgit v1.1 From b9aebb69ecd33f5163b24c2d0f928260caf6a86b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 Aug 2016 17:05:18 +0200 Subject: mwifiex: do not print dot when downloading FW Printing about 3000 lines like this [ 20.691850] mwifiex_pcie 0000:02:00.0: . [ 20.693466] mwifiex_pcie 0000:02:00.0: . is not useful. If FW downloading will be interrupted, we will get proper error message about that. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 2f555b0..50a6a53 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1956,8 +1956,6 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, if (firmware_len - offset < txlen) txlen = firmware_len - offset; - mwifiex_dbg(adapter, INFO, "."); - tx_blocks = (txlen + card->pcie.blksz_fw_dl - 1) / card->pcie.blksz_fw_dl; -- cgit v1.1 From 5856cd5b8dda5ee013a2b0abbab0552a6f14d72d Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 26 Aug 2016 23:12:23 -0400 Subject: rtlwifi/rtl8192de: Fix print format string %ul was likely meant as %lu to print an unsigned long, not an unsigned with a letter l at the end. But in fact the value printed is u32 anyway, so just drop the l completely. Signed-off-by: Oleg Drokin Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c index d334d2a..2a4810d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c @@ -588,7 +588,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -604,7 +604,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -620,7 +620,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_5GArray_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_5GArray_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_5garray_table[i], agctab_5garray_table[i + 1]); } -- cgit v1.1 From 307fe9dd11ae44d4f8881ee449a7cbac36e1f5de Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 1 Sep 2016 11:44:35 +0200 Subject: iio: accel: kxsd9: Fix scaling bug All the scaling of the KXSD9 involves multiplication with a fraction number < 1. However the scaling value returned from IIO_INFO_SCALE was unpredictable as only the micros of the value was assigned, and not the integer part, resulting in scaling like this: $cat in_accel_scale -1057462640.011978 Fix this by assigning zero to the integer part. Cc: stable@vger.kernel.org Tested-by: Jonathan Cameron Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxsd9.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index da5fb67..9d72d4b 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -166,6 +166,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); if (ret < 0) goto error_ret; + *val = 0; *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK]; ret = IIO_VAL_INT_PLUS_MICRO; break; -- cgit v1.1 From 4c3cb6e9a9d94d1553807854a565cd27ff4c22aa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 3 Sep 2016 10:36:00 -0700 Subject: dax: fix mapping size check pgoff_to_phys() validates that both the starting address and the length of the mapping against the resource list. We need to check for a mapping size of PMD_SIZE not PAGE_SIZE in the pmd fault path. Signed-off-by: Dan Williams --- drivers/dax/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 803f395..29f600f 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -459,7 +459,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, } pgoff = linear_page_index(vma, pmd_addr); - phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE); + phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, pgoff); -- cgit v1.1 From 3e423945ea94412283eaba8bfbe9d6e0a80b434f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 3 Sep 2016 11:02:50 -0700 Subject: devpts: return NULL pts 'priv' entry for non-devpts nodes In commit 8ead9dd54716 ("devpts: more pty driver interface cleanups") I made devpts_get_priv() just return the dentry->fs_data directly. And because I thought it wouldn't happen, I added a warning if you ever saw a pts node that wasn't on devpts. And no, that warning never triggered under any actual real use, but you can trigger it by creating nonsensical pts nodes by hand. So just revert the warning, and make devpts_get_priv() return NULL for that case like it used to. Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org # 4.6+ Cc: Eric W Biederman" Signed-off-by: Linus Torvalds --- fs/devpts/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index d116453..79a5941 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -585,7 +585,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) */ void *devpts_get_priv(struct dentry *dentry) { - WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC); + if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC) + return NULL; return dentry->d_fsdata; } -- cgit v1.1 From 609e941a6bcd7ceb1cbb561941c997f6465e8698 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:43:46 -0700 Subject: iw_cxgb4: call dev_put() on l2t allocation failure Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/infiniband/hw/cxgb4/cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a6721..57b5bb5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2077,8 +2077,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, -- cgit v1.1 From 37eb816c0867b1b0db273d22b530780a0a083980 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:44:52 -0700 Subject: iw_cxgb4: block module unload until all ep resources are released Otherwise an endpoint can be still closing down causing a touch after free crash. Also WARN_ON if ulps have failed to destroy various resources during device removal. Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ drivers/infiniband/hw/cxgb4/device.c | 5 +++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57b5bb5..5621270 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -314,6 +314,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b2..63561aa 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a7..0e9cd44 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) -- cgit v1.1 From cdbecc8d24b642b67ae79a0acc2ff18d3d0e677e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 09:12:25 -0700 Subject: nvme_rdma: keep a ref on the ctrl during delete/flush Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab545fb..15b0c1d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1351,9 +1351,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) ret = 1; } - /* Queue controller deletion */ + /* + * Queue controller deletion. Keep a reference until all + * work is flushed since delete_work will free the ctrl mem + */ + kref_get(&ctrl->ctrl.kref); queue_work(nvme_rdma_wq, &ctrl->delete_work); flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } @@ -1700,15 +1706,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -- cgit v1.1 From f361e5a01ed35c0f9a00816d76a910d8a5cb4547 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:27 -0700 Subject: nvme-rdma: destroy nvme queue rdma resources on connect failure After address resolution, the nvme_rdma_queue rdma resources are allocated. If rdma route resolution or the connect fails, or the controller reconnect times out and gives up, then the rdma resources need to be freed. Otherwise, rdma resources are leaked. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 15b0c1d..a9d43f0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -82,6 +82,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), }; struct nvme_rdma_queue { @@ -480,9 +481,14 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +539,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -590,6 +597,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -652,7 +660,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; -- cgit v1.1 From 334a8f37115bf35e38617315a360a91ac4f2b2c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Sep 2016 12:46:35 -0400 Subject: pNFS: Don't forget the layout stateid if there are outstanding LAYOUTGETs If there are outstanding LAYOUTGET rpc calls, then we want to ensure that we keep the layout stateid around so we that don't inadvertently pick up an old/misordered sequence id. The race is as follows: Client Server ====== ====== LAYOUTGET(seqid) LAYOUTGET(seqid) return LAYOUTGET(seqid+1) return LAYOUTGET(seqid+2) process LAYOUTGET(seqid+2) forget layout process LAYOUTGET(seqid+1) If it forgets the layout stateid before processing seqid+1, then the client will not check the layout->plh_barrier, and so will set the stateid with seqid+1. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cd8b5fc..2c93a85 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); if (list_empty(&lo->plh_segs)) { - set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + if (atomic_read(&lo->plh_outstanding) == 0) + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); -- cgit v1.1 From 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Thu, 1 Sep 2016 22:18:34 -0700 Subject: bonding: Fix bonding crash Following few steps will crash kernel - (a) Create bonding master > modprobe bonding miimon=50 (b) Create macvlan bridge on eth2 > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \ type macvlan (c) Now try adding eth2 into the bond > echo +eth2 > /sys/class/net/bond0/bonding/slaves Bonding does lots of things before checking if the device enslaved is busy or not. In this case when the notifier call-chain sends notifications, the bond_netdev_event() assumes that the rx_handler /rx_handler_data is registered while the bond_enslave() hasn't progressed far enough to register rx_handler for the new slave. This patch adds a rx_handler check that can be performed right at the beginning of the enslave code to avoid getting into this situation. Signed-off-by: Mahesh Bandewar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++--- include/linux/netdevice.h | 1 + net/core/dev.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 217e8da..9599ed6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1341,9 +1341,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) slave_dev->name); } - /* already enslaved */ - if (slave_dev->flags & IFF_SLAVE) { - netdev_dbg(bond_dev, "Error: Device was already enslaved\n"); + /* already in-use? */ + if (netdev_is_rx_handler_busy(slave_dev)) { + netdev_err(bond_dev, + "Error: Device is in use and cannot be enslaved\n"); return -EBUSY; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a788bf..e8d79d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3267,6 +3267,7 @@ static inline void napi_free_frags(struct napi_struct *napi) napi->skb = NULL; } +bool netdev_is_rx_handler_busy(struct net_device *dev); int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, void *rx_handler_data); diff --git a/net/core/dev.c b/net/core/dev.c index dd6ce59..ea63120 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3975,6 +3975,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } /** + * netdev_is_rx_handler_busy - check if receive handler is registered + * @dev: device to check + * + * Check if a receive handler is already registered for a given device. + * Return true if there one. + * + * The caller must hold the rtnl_mutex. + */ +bool netdev_is_rx_handler_busy(struct net_device *dev) +{ + ASSERT_RTNL(); + return dev && rtnl_dereference(dev->rx_handler); +} +EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy); + +/** * netdev_rx_handler_register - register receive handler * @dev: device to register a handler for * @rx_handler: receive handler to register -- cgit v1.1 From 9b4cdd516dadc1b68c55ba24520194a06adff10c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 2 Sep 2016 13:37:11 +0200 Subject: vxlan: reject multicast destination without an interface Currently, kernel accepts configurations such as: ip l a type vxlan dstport 4789 id 1 group 239.192.0.1 ip l a type vxlan dstport 4789 id 1 group ff0e::110 However, neither of those really works. In the IPv4 case, the interface cannot be brought up ("RTNETLINK answers: No such device"). This is because multicast join will be rejected without the interface being specified. In the IPv6 case, multicast wil be joined on the first interface found. This is not what the user wants as it depends on random factors (order of interfaces). Note that it's possible to add a local address but it doesn't solve anything. For IPv4, it's not considered in the multicast join (thus the same error as above is returned on ifup). This could be added but it wouldn't help for IPv6 anyway. For IPv6, we do need the interface. Just reject a configuration that sets multicast address and does not provide an interface. Nobody can depend on the previous behavior as it never worked. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c0dda6f..6358e35 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2842,6 +2842,9 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); needed_headroom = lowerdev->hard_header_len; + } else if (vxlan_addr_multicast(&dst->remote_ip)) { + pr_info("multicast destination requires interface to be specified\n"); + return -EINVAL; } if (conf->mtu) { -- cgit v1.1 From 3555621de7fcceb79c4850f3d07d1ae4f652acf6 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 2 Sep 2016 13:37:12 +0200 Subject: vxlan: fix duplicated and wrong error messages vxlan_dev_configure outputs error messages before returning, no need to print again the same mesages in vxlan_newlink. Also, vxlan_dev_configure may return a particular error code for a different reason than vxlan_newlink thinks. Move the remaining error messages into vxlan_dev_configure and let vxlan_newlink just pass on the error code. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6358e35..6e65832 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2782,14 +2782,15 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, struct net_device *lowerdev = NULL; if (conf->flags & VXLAN_F_GPE) { - if (conf->flags & ~VXLAN_F_ALLOWED_GPE) - return -EINVAL; /* For now, allow GPE only together with COLLECT_METADATA. * This can be relaxed later; in such case, the other side * of the PtP link will have to be provided. */ - if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) + if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || + !(conf->flags & VXLAN_F_COLLECT_METADATA)) { + pr_info("unsupported combination of extensions\n"); return -EINVAL; + } vxlan_raw_setup(dev); } else { @@ -2877,8 +2878,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 && tmp->cfg.dst_port == vxlan->cfg.dst_port && (tmp->flags & VXLAN_F_RCV_FLAGS) == - (vxlan->flags & VXLAN_F_RCV_FLAGS)) - return -EEXIST; + (vxlan->flags & VXLAN_F_RCV_FLAGS)) { + pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni)); + return -EEXIST; + } } dev->ethtool_ops = &vxlan_ethtool_ops; @@ -2912,7 +2915,6 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_config conf; - int err; memset(&conf, 0, sizeof(conf)); @@ -3021,26 +3023,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (tb[IFLA_MTU]) conf.mtu = nla_get_u32(tb[IFLA_MTU]); - err = vxlan_dev_configure(src_net, dev, &conf); - switch (err) { - case -ENODEV: - pr_info("ifindex %d does not exist\n", conf.remote_ifindex); - break; - - case -EPERM: - pr_info("IPv6 is disabled via sysctl\n"); - break; - - case -EEXIST: - pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); - break; - - case -EINVAL: - pr_info("unsupported combination of extensions\n"); - break; - } - - return err; + return vxlan_dev_configure(src_net, dev, &conf); } static void vxlan_dellink(struct net_device *dev, struct list_head *head) -- cgit v1.1 From 38f7bd94a97b542de86a2be9229289717e33a7a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Sep 2016 14:56:49 -0700 Subject: Revert "af_unix: Fix splice-bind deadlock" This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1. It turns out that it just replaces one deadlock with another one: we can still get the wrong lock ordering with the readlock due to overlayfs calling back into the filesystem layer and still taking the vfs locks after the readlock. The proper solution ends up being to just split the readlock into two pieces: the bind lock (taken *outside* the vfs locks) and the IO lock (taken *inside* the filesystem locks). The two locks are independent anyway. Signed-off-by: Linus Torvalds Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- net/unix/af_unix.c | 66 +++++++++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f1dffe8..433ae1b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -954,20 +954,32 @@ fail: return NULL; } -static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode, - struct path *res) +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) { - int err; + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; - err = security_path_mknod(path, dentry, mode, 0); + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); if (!err) { - err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); + err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); if (!err) { - res->mnt = mntget(path->mnt); + res->mnt = mntget(path.mnt); res->dentry = dget(dentry); } } - + done_path_create(&path, dentry); return err; } @@ -978,12 +990,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - int err, name_err; + int err; unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path; - struct dentry *dentry; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -999,34 +1009,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - name_err = 0; - dentry = NULL; - if (sun_path[0]) { - /* Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - - if (IS_ERR(dentry)) { - /* delay report until after 'already bound' check */ - name_err = PTR_ERR(dentry); - dentry = NULL; - } - } - err = mutex_lock_interruptible(&u->readlock); if (err) - goto out_path; + goto out; err = -EINVAL; if (u->addr) goto out_up; - if (name_err) { - err = name_err == -EEXIST ? -EADDRINUSE : name_err; - goto out_up; - } - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) @@ -1037,11 +1027,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = hash ^ sk->sk_type; atomic_set(&addr->refcnt, 1); - if (dentry) { - struct path u_path; + if (sun_path[0]) { + struct path path; umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(dentry, &path, mode, &u_path); + err = unix_mknod(sun_path, mode, &path); if (err) { if (err == -EEXIST) err = -EADDRINUSE; @@ -1049,9 +1039,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); - u->path = u_path; + u->path = path; list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); @@ -1074,10 +1064,6 @@ out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->readlock); -out_path: - if (dentry) - done_path_create(&path, dentry); - out: return err; } -- cgit v1.1 From 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Sep 2016 14:43:53 -0700 Subject: af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock' Right now we use the 'readlock' both for protecting some of the af_unix IO path and for making the bind be single-threaded. The two are independent, but using the same lock makes for a nasty deadlock due to ordering with regards to filesystem locking. The bind locking would want to nest outside the VSF pathname locking, but the IO locking wants to nest inside some of those same locks. We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix splice-bind deadlock") which moved the readlock inside the vfs locks, but that caused problems with overlayfs that will then call back into filesystem routines that take the lock in the wrong order anyway. Splitting the locks means that we can go back to having the bind lock be the outermost lock, and we don't have any deadlocks with lock ordering. Acked-by: Rainer Weikusat Acked-by: Al Viro Signed-off-by: Linus Torvalds Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 45 +++++++++++++++++++++++---------------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 9b4c418..fd60ecc 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -52,7 +52,7 @@ struct unix_sock { struct sock sk; struct unix_address *addr; struct path path; - struct mutex readlock; + struct mutex iolock, bindlock; struct sock *peer; struct list_head link; atomic_long_t inflight; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 433ae1b..8309687 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - if (mutex_lock_interruptible(&u->readlock)) + if (mutex_lock_interruptible(&u->iolock)) return -EINTR; sk->sk_peek_off = val; - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); return 0; } @@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) spin_lock_init(&u->lock); atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); - mutex_init(&u->readlock); /* single task reading lock */ + mutex_init(&u->iolock); /* single task reading lock */ + mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); @@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - err = mutex_lock_interruptible(&u->readlock); + err = mutex_lock_interruptible(&u->bindlock); if (err) return err; @@ -895,7 +896,7 @@ retry: spin_unlock(&unix_table_lock); err = 0; -out: mutex_unlock(&u->readlock); +out: mutex_unlock(&u->bindlock); return err; } @@ -1009,7 +1010,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - err = mutex_lock_interruptible(&u->readlock); + err = mutex_lock_interruptible(&u->bindlock); if (err) goto out; @@ -1063,7 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) out_unlock: spin_unlock(&unix_table_lock); out_up: - mutex_unlock(&u->readlock); + mutex_unlock(&u->bindlock); out: return err; } @@ -1955,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, if (false) { alloc_skb: unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) goto err; } - /* we must acquire readlock as we modify already present + /* we must acquire iolock as we modify already present * skbs in the sk_receive_queue and mess with skb->len */ - err = mutex_lock_interruptible(&unix_sk(other)->readlock); + err = mutex_lock_interruptible(&unix_sk(other)->iolock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; goto err; @@ -2032,7 +2033,7 @@ alloc_skb: } unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); other->sk_data_ready(other); scm_destroy(&scm); @@ -2041,7 +2042,7 @@ alloc_skb: err_state_unlock: unix_state_unlock(other); err_unlock: - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); err: kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) @@ -2109,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, @@ -2117,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (skb) break; - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); if (err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, &err, &timeo, last)); - if (!skb) { /* implies readlock unlocked */ + if (!skb) { /* implies iolock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && @@ -2189,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, out_free: skb_free_datagram(sk, skb); - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); out: return err; } @@ -2284,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); if (flags & MSG_PEEK) skip = sk_peek_offset(sk, flags); @@ -2326,7 +2327,7 @@ again: break; } - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, last_len); @@ -2337,7 +2338,7 @@ again: goto out; } - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); goto redo; unlock: unix_state_unlock(sk); @@ -2440,7 +2441,7 @@ unlock: } } while (size); - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); if (state->msg) scm_recv(sock, state->msg, &scm, flags); else @@ -2481,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk, int ret; struct unix_sock *u = unix_sk(sk); - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); ret = splice_to_pipe(pipe, spd); - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); return ret; } -- cgit v1.1 From c6935931c1894ff857616ff8549b61236a19148f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Sep 2016 14:31:46 -0700 Subject: Linux 4.8-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 67f42d5..a4e6cc5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.1 From f065e9e4addd75c21bb976bb2558648bf4f61de6 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 31 Aug 2016 10:56:48 -0300 Subject: ARM: dts: imx6qdl: Fix SPDIF regression Commit 833f2cbf7091 ("ARM: dts: imx6: change the core clock of spdif") changed many more clocks than only the SPDIF core clock as stated in the commit message. The MLB clock has been added and this causes SPDIF regression as reported by Xavi Drudis Ferran and also in this forum post: https://forum.digikey.com/thread/34240 The MX6Q Reference Manual does not mention that MLB is a clock related to SPDIF, so change it back to a dummy clock to restore SPDIF functionality. Thanks to Ambika for providing the fix at: https://community.nxp.com/thread/387131 Fixes: 833f2cbf7091 ("ARM: dts: imx6: change the core clock of spdif") Cc: # 4.4.x Reported-by: Xavi Drudis Ferran Signed-off-by: Fabio Estevam Tested-by: Xavi Drudis Ferran Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index b620ac8..b13b0b2 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -243,7 +243,7 @@ clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>, <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>, - <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>, + <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>; clock-names = "core", "rxtx0", "rxtx1", "rxtx2", -- cgit v1.1 From cc2187a6e037bc64404f63c6d650ff263c2200c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 4 Sep 2016 11:37:36 +0200 Subject: x86/microcode/AMD: Fix load of builtin microcode with randomized memory We do not need to add the randomization offset when the microcode is built in. Reported-and-tested-by: Emanuel Czirai Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20160904093736.GA11939@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b816971..620ab06 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -54,6 +54,7 @@ static LIST_HEAD(pcache); */ static u8 *container; static size_t container_size; +static bool ucode_builtin; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp, void __init load_ucode_amd_bsp(unsigned int family) { struct cpio_data cp; + bool *builtin; void **data; size_t *size; #ifdef CONFIG_X86_32 data = (void **)__pa_nodebug(&ucode_cpio.data); size = (size_t *)__pa_nodebug(&ucode_cpio.size); + builtin = (bool *)__pa_nodebug(&ucode_builtin); #else data = &ucode_cpio.data; size = &ucode_cpio.size; + builtin = &ucode_builtin; #endif - if (!load_builtin_amd_microcode(&cp, family)) + *builtin = load_builtin_amd_microcode(&cp, family); + if (!*builtin) cp = find_ucode_in_initrd(); if (!(cp.data && cp.size)) @@ -373,7 +378,8 @@ void load_ucode_amd_ap(void) return; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); @@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void) container = cont_va; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); -- cgit v1.1 From d7127b5e5fa0551be21b86640f1648b224e36d43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Aug 2016 08:16:00 +0200 Subject: locking/barriers: Don't use sizeof(void) in lockless_dereference() My previous commit: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") caused sparse to complain that (in radix-tree.h) we use sizeof(void) since that rcu_dereference()s a void *. Really, all we need is to have the expression *p in here somewhere to make sure p is a pointer type, and sizeof(*p) was the thing that came to my mind first to make sure that's done without really doing anything at runtime. Another thing I had considered was using typeof(*p), but obviously we can't just declare a typeof(*p) variable either, since that may end up being void. Declaring a variable as typeof(*p)* gets around that, and still checks that typeof(*p) is valid, so do that. This type construction can't be done for _________p1 because that will actually be used and causes sparse address space warnings, so keep a separate unused variable for it. Reported-by: Fengguang Wu Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild-all@01.org Fixes: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") Link: http://lkml.kernel.org/r/1472192160-4049-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 436aa4e..6685698 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused size_t variable is to validate @p is indeed a pointer - * type by making sure it can be dereferenced. + * The seemingly unused variable ___typecheck_p validates that @p is + * indeed a pointer type by using a pointer to typeof(*p) as the type. + * Taking a pointer to typeof(*p) again is needed in case p is void *. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ - size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ + typeof(*(p)) *___typecheck_p __maybe_unused; \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) -- cgit v1.1 From 58763148758057ffc447bf990321d3ea86d199a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Aug 2016 10:15:03 +0200 Subject: perf/core: Remove WARN from perf_event_read() This effectively reverts commit: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") ... and puts in a comment explaining why we ignore the return value. Reported-by: Vegard Nossum Signed-off-by: Peter Zijlstra (Intel) Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") Signed-off-by: Ingo Molnar --- kernel/events/core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cfabdf..07ac859 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); - /* The event must have been read from an online CPU: */ - WARN_ON_ONCE(ret); - ret = ret ? : data.ret; + /* + * Purposely ignore the smp_call_function_single() return + * value. + * + * If event->oncpu isn't a valid CPU it means the event got + * scheduled out and that will have updated the event count. + * + * Therefore, either way, we'll have an up-to-date event count + * after this. + */ + (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; -- cgit v1.1 From 135e8c9250dd5c8c9aae5984fde6f230d0cbfeaf Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 5 Sep 2016 13:16:40 +1000 Subject: sched/core: Fix a race between try_to_wake_up() and a woken up task The origin of the issue I've seen is related to a missing memory barrier between check for task->state and the check for task->on_rq. The task being woken up is already awake from a schedule() and is doing the following: do { schedule() set_current_state(TASK_(UN)INTERRUPTIBLE); } while (!cond); The waker, actually gets stuck doing the following in try_to_wake_up(): while (p->on_cpu) cpu_relax(); Analysis: The instance I've seen involves the following race: CPU1 CPU2 while () { if (cond) break; do { schedule(); set_current_state(TASK_UN..) } while (!cond); wakeup_routine() spin_lock_irqsave(wait_lock) raw_spin_lock_irqsave(wait_lock) wake_up_process() } try_to_wake_up() set_current_state(TASK_RUNNING); .. list_del(&waiter.list); CPU2 wakes up CPU1, but before it can get the wait_lock and set current state to TASK_RUNNING the following occurs: CPU3 wakeup_routine() raw_spin_lock_irqsave(wait_lock) if (!list_empty) wake_up_process() try_to_wake_up() raw_spin_lock_irqsave(p->pi_lock) .. if (p->on_rq && ttwu_wakeup()) .. while (p->on_cpu) cpu_relax() .. CPU3 tries to wake up the task on CPU1 again since it finds it on the wait_queue, CPU1 is spinning on wait_lock, but immediately after CPU2, CPU3 got it. CPU3 checks the state of p on CPU1, it is TASK_UNINTERRUPTIBLE and the task is spinning on the wait_lock. Interestingly since p->on_rq is checked under pi_lock, I've noticed that try_to_wake_up() finds p->on_rq to be 0. This was the most confusing bit of the analysis, but p->on_rq is changed under runqueue lock, rq_lock, the p->on_rq check is not reliable without this fix IMHO. The race is visible (based on the analysis) only when ttwu_queue() does a remote wakeup via ttwu_queue_remote. In which case the p->on_rq change is not done uder the pi_lock. The result is that after a while the entire system locks up on the raw_spin_irqlock_save(wait_lock) and the holder spins infintely Reproduction of the issue: The issue can be reproduced after a long run on my system with 80 threads and having to tweak available memory to very low and running memory stress-ng mmapfork test. It usually takes a long time to reproduce. I am trying to work on a test case that can reproduce the issue faster, but thats work in progress. I am still testing the changes on my still in a loop and the tests seem OK thus far. Big thanks to Benjamin and Nick for helping debug this as well. Ben helped catch the missing barrier, Nick caught every missing bit in my theory. Signed-off-by: Balbir Singh [ Updated comment to clarify matching barriers. Many architectures do not have a full barrier in switch_to() so that cannot be relied upon. ] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Benjamin Herrenschmidt Cc: Alexey Kardashevskiy Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: http://lkml.kernel.org/r/e02cce7b-d9ca-1ad0-7a61-ea97c7582b37@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2a906f2..44817c6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) success = 1; /* we're going to change ->state */ cpu = task_cpu(p); + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * + * sched_ttwu_pending() try_to_wake_up() + * [S] p->on_rq = 1; [L] P->state + * UNLOCK rq->lock -----. + * \ + * +--- RMB + * schedule() / + * LOCK rq->lock -----' + * UNLOCK rq->lock + * + * [task p] + * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq + * + * Pairs with the UNLOCK+LOCK on rq->lock from the + * last wakeup of our task and the schedule that got our task + * current. + */ + smp_rmb(); if (p->on_rq && ttwu_remote(p, wake_flags)) goto stat; -- cgit v1.1 From d4c4fed08f31f3746000c46cb1b20bed2959547a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Aug 2016 09:05:45 -0600 Subject: efi: Make for_each_efi_memory_desc_in_map() cope with running on Xen While commit 55f1ea15216 ("efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps") made an attempt to deal with empty memory maps, it didn't address the case where the map field never gets set, as is apparently the case when running under Xen. Reported-by: Tested-by: Cc: Vitaly Kuznetsov Cc: Jiri Slaby Cc: Mark Rutland Cc: # v4.7+ Signed-off-by: Jan Beulich [ Guard the loop with a NULL check instead of pointer underflow ] Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 7f5a582..23cd3ce 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -946,7 +946,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ + (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** -- cgit v1.1 From 4af9ed578a50cd331a725322cfd9d555251ce788 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 30 Aug 2016 12:41:37 +0200 Subject: efi: Fix handling error value in fdt_find_uefi_params of_get_flat_dt_subnode_by_name can return negative value in case of error. Assigning the result to unsigned variable and checking if the variable is lesser than zero is incorrect and always false. The patch fixes it by using signed variable to check the result. The problem has been detected using semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci Signed-off-by: Andrzej Hajda Cc: Bartlomiej Zolnierkiewicz Cc: Marek Szyprowski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Shawn Lin Cc: Mark Rutland Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/efi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5a2631a..7dd2e2d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -657,9 +657,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, } if (subnode) { - node = of_get_flat_dt_subnode_by_name(node, subnode); - if (node < 0) + int err = of_get_flat_dt_subnode_by_name(node, subnode); + + if (err < 0) return 0; + + node = err; } return __find_uefi_params(node, info, dt_params[i].params); -- cgit v1.1 From 9d7aba7786b6c9eec6d083e43fd639228c400c3a Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 26 Aug 2016 18:43:01 -0700 Subject: Revert "usb: dwc3: gadget: always decrement by 1" This reverts commit 6f8245b4e37c ("usb: dwc3: gadget: always decrement by 1"). We can't always decrement this value. We should decrement only if the calculation of free slots results in a LINK TRB being among one of the free slots (dequeue < enqueue). Otherwise, if the LINK TRB is not among the free slots then it should not be decremented. Signed-off-by: John Youn Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7a8d3d8..122e64d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -884,9 +884,12 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) return DWC3_TRB_NUM - 1; } - trbs_left = dep->trb_dequeue - dep->trb_enqueue - 1; + trbs_left = dep->trb_dequeue - dep->trb_enqueue; trbs_left &= (DWC3_TRB_NUM - 1); + if (dep->trb_dequeue < dep->trb_enqueue) + trbs_left--; + return trbs_left; } -- cgit v1.1 From b2f1eaaee564c5593c303f4d15d827924cb6d20d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 23 Aug 2016 21:11:13 +0900 Subject: usb: gadget: udc: renesas-usb3: clear VBOUT bit in DRD_CON This driver should clear the bit. Otherwise, the VBUS will output wrongly if the usb port on a board has VBUS output capability. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 93a3bec..fb8fc34 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -106,6 +106,7 @@ /* DRD_CON */ #define DRD_CON_PERI_CON BIT(24) +#define DRD_CON_VBOUT BIT(0) /* USB_INT_ENA_1 and USB_INT_STA_1 */ #define USB_INT_1_B3_PLLWKUP BIT(31) @@ -363,6 +364,7 @@ static void usb3_init_epc_registers(struct renesas_usb3 *usb3) { /* FIXME: How to change host / peripheral mode as well? */ usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON); + usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON); usb3_write(usb3, ~0, USB3_USB_INT_STA_1); usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); -- cgit v1.1 From 7c113f7df710df2aed63709815e518608dbd338c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 22 Aug 2016 22:45:36 -0300 Subject: usb: phy: phy-generic: Check clk_prepare_enable() error clk_prepare_enable() may fail, so we should better check its return value and propagate it in the case of failure. Signed-off-by: Fabio Estevam Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-generic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 980c9de..427efb5 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -144,14 +144,18 @@ static irqreturn_t nop_gpio_vbus_thread(int irq, void *data) int usb_gen_phy_init(struct usb_phy *phy) { struct usb_phy_generic *nop = dev_get_drvdata(phy->dev); + int ret; if (!IS_ERR(nop->vcc)) { if (regulator_enable(nop->vcc)) dev_err(phy->dev, "Failed to enable power\n"); } - if (!IS_ERR(nop->clk)) - clk_prepare_enable(nop->clk); + if (!IS_ERR(nop->clk)) { + ret = clk_prepare_enable(nop->clk); + if (ret) + return ret; + } nop_reset(nop); -- cgit v1.1 From 519d8bd4b5d3d82c413eac5bb42b106bb4b9ec15 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 29 Aug 2016 18:00:38 +0900 Subject: usb: renesas_usbhs: fix clearing the {BRDY,BEMP}STS condition The previous driver is possible to stop the transfer wrongly. For example: 1) An interrupt happens, but not BRDY interruption. 2) Read INTSTS0. And than state->intsts0 is not set to BRDY. 3) BRDY is set to 1 here. 4) Read BRDYSTS. 5) Clear the BRDYSTS. And then. the BRDY is cleared wrongly. Remarks: - The INTSTS0.BRDY is read only. - If any bits of BRDYSTS are set to 1, the BRDY is set to 1. - If BRDYSTS is 0, the BRDY is set to 0. So, this patch adds condition to avoid such situation. (And about NRDYSTS, this is not used for now. But, avoiding any side effects, this patch doesn't touch it.) Fixes: d5c6a1e024dd ("usb: renesas_usbhs: fixup interrupt status clear method") Cc: # v3.8+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c index d4be5d5..28965ef 100644 --- a/drivers/usb/renesas_usbhs/mod.c +++ b/drivers/usb/renesas_usbhs/mod.c @@ -282,9 +282,16 @@ static irqreturn_t usbhs_interrupt(int irq, void *data) if (usbhs_mod_is_host(priv)) usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC); - usbhs_write(priv, BRDYSTS, ~irq_state.brdysts); + /* + * The driver should not clear the xxxSTS after the line of + * "call irq callback functions" because each "if" statement is + * possible to call the callback function for avoiding any side effects. + */ + if (irq_state.intsts0 & BRDY) + usbhs_write(priv, BRDYSTS, ~irq_state.brdysts); usbhs_write(priv, NRDYSTS, ~irq_state.nrdysts); - usbhs_write(priv, BEMPSTS, ~irq_state.bempsts); + if (irq_state.intsts0 & BEMP) + usbhs_write(priv, BEMPSTS, ~irq_state.bempsts); /* * call irq callback functions -- cgit v1.1 From dadb57abc37499f565b23933dbf49b435c3ba8af Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:51 -0600 Subject: efi/libstub: Allocate headspace in efi_get_memory_map() efi_get_memory_map() allocates a buffer to store the memory map that it retrieves. This buffer may need to be reused by the client after ExitBootServices() is called, at which point allocations are not longer permitted. To support this usecase, provide the allocated buffer size back to the client, and allocate some additional headroom to account for any reasonable growth in the map that is likely to happen between the call to efi_get_memory_map() and the client reusing the buffer. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 20 ++++-- drivers/firmware/efi/libstub/efi-stub-helper.c | 96 ++++++++++++++++++-------- drivers/firmware/efi/libstub/fdt.c | 17 +++-- drivers/firmware/efi/libstub/random.c | 12 +++- include/linux/efi.h | 15 ++-- 5 files changed, 111 insertions(+), 49 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574da..c5b7c7b 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1008,7 +1008,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { struct efi_info *efi = &boot_params->efi_info; - unsigned long map_sz, key, desc_size; + unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; const char *signature; @@ -1019,14 +1019,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, bool called_exit = false; u8 nr_entries; int i; - - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; + struct efi_boot_memmap map; + + nr_desc = 0; + e820ext = NULL; + e820ext_size = 0; + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; get_map: - status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, - &desc_version, &key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3bd127f9..29368ac 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -41,6 +41,8 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#define EFI_MMAP_NR_SLACK_SLOTS 8 + struct file_info { efi_file_handle_t *handle; u64 size; @@ -63,49 +65,62 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str) } } +static inline bool mmap_has_headroom(unsigned long buff_size, + unsigned long map_size, + unsigned long desc_size) +{ + unsigned long slack = buff_size - map_size; + + return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS; +} + efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr) + struct efi_boot_memmap *map) { efi_memory_desc_t *m = NULL; efi_status_t status; unsigned long key; u32 desc_version; - *map_size = sizeof(*m) * 32; + *map->desc_size = sizeof(*m); + *map->map_size = *map->desc_size * 32; + *map->buff_size = *map->map_size; again: - /* - * Add an additional efi_memory_desc_t because we're doing an - * allocation which may be in a new descriptor region. - */ - *map_size += sizeof(*m); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - *map_size, (void **)&m); + *map->map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; - *desc_size = 0; + *map->desc_size = 0; key = 0; - status = efi_call_early(get_memory_map, map_size, m, - &key, desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL) { + status = efi_call_early(get_memory_map, map->map_size, m, + &key, map->desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL || + !mmap_has_headroom(*map->buff_size, *map->map_size, + *map->desc_size)) { efi_call_early(free_pool, m); + /* + * Make sure there is some entries of headroom so that the + * buffer can be reused for a new map after allocations are + * no longer permitted. Its unlikely that the map will grow to + * exceed this headroom once we are ready to trigger + * ExitBootServices() + */ + *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS; + *map->buff_size = *map->map_size; goto again; } if (status != EFI_SUCCESS) efi_call_early(free_pool, m); - if (key_ptr && status == EFI_SUCCESS) - *key_ptr = key; - if (desc_ver && status == EFI_SUCCESS) - *desc_ver = desc_version; + if (map->key_ptr && status == EFI_SUCCESS) + *map->key_ptr = key; + if (map->desc_ver && status == EFI_SUCCESS) + *map->desc_ver = desc_version; fail: - *map = m; + *map->map = m; return status; } @@ -113,13 +128,20 @@ fail: unsigned long get_dram_base(efi_system_table_t *sys_table_arg) { efi_status_t status; - unsigned long map_size; + unsigned long map_size, buff_size; unsigned long membase = EFI_ERROR; struct efi_memory_map map; efi_memory_desc_t *md; + struct efi_boot_memmap boot_map; - status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map, - &map_size, &map.desc_size, NULL, NULL); + boot_map.map = (efi_memory_desc_t **)&map.map; + boot_map.map_size = &map_size; + boot_map.desc_size = &map.desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) return membase; @@ -144,15 +166,22 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr, unsigned long max) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; u64 max_addr = 0; int i; + struct efi_boot_memmap boot_map; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; @@ -230,14 +259,21 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; int i; + struct efi_boot_memmap boot_map; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index e58abfa..bec0fa8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -175,13 +175,21 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long fdt_addr, unsigned long fdt_size) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; u32 desc_ver; unsigned long mmap_key; efi_memory_desc_t *memory_map, *runtime_map; unsigned long new_fdt_size; efi_status_t status; int runtime_entry_count = 0; + struct efi_boot_memmap map; + + map.map = &runtime_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = &desc_ver; + map.key_ptr = &mmap_key; + map.buff_size = &buff_size; /* * Get a copy of the current memory map that we will use to prepare @@ -189,8 +197,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * subsequent allocations adding entries, since they could not affect * the number of EFI_MEMORY_RUNTIME regions. */ - status = efi_get_memory_map(sys_table, &runtime_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); return status; @@ -199,6 +206,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi(sys_table, "Exiting boot services and installing virtual address map...\n"); + map.map = &memory_map; /* * Estimate size of new FDT, and allocate memory for it. We * will allocate a bigger buffer if this ends up being too @@ -218,8 +226,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * we can get the memory map key needed for * exit_boot_services(). */ - status = efi_get_memory_map(sys_table, &memory_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) goto fail_free_new_fdt; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 53f6d3f..0c9f58c 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -73,12 +73,20 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, unsigned long random_seed) { unsigned long map_size, desc_size, total_slots = 0, target_slot; + unsigned long buff_size; efi_status_t status; efi_memory_desc_t *memory_map; int map_offset; + struct efi_boot_memmap map; - status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size, - &desc_size, NULL, NULL); + map.map = &memory_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = NULL; + map.key_ptr = NULL; + map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &map); if (status != EFI_SUCCESS) return status; diff --git a/include/linux/efi.h b/include/linux/efi.h index 23cd3ce..943fee5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -118,6 +118,15 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +struct efi_boot_memmap { + efi_memory_desc_t **map; + unsigned long *map_size; + unsigned long *desc_size; + u32 *desc_ver; + unsigned long *key_ptr; + unsigned long *buff_size; +}; + /* * EFI capsule flags */ @@ -1371,11 +1380,7 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, efi_loaded_image_t *image, int *cmd_line_len); efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr); + struct efi_boot_memmap *map); efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, -- cgit v1.1 From fc07716ba803483be91bc4b2344f9c84985e6f07 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:52 -0600 Subject: efi/libstub: Introduce ExitBootServices helper The spec allows ExitBootServices to fail with EFI_INVALID_PARAMETER if a race condition has occurred where the EFI has updated the memory map after the stub grabbed a reference to the map. The spec defines a retry proceedure with specific requirements to handle this scenario. This scenario was previously observed on x86 - commit d3768d885c6c ("x86, efi: retry ExitBootServices() on failure") but the current fix is not spec compliant and the scenario is now observed on the Qualcomm Technologies QDF2432 via the FDT stub which does not handle the error and thus causes boot failures. The user will notice the boot failure as the kernel is not executed and the system may drop back to a UEFI shell, but will be unresponsive to input and the system will require a power cycle to recover. Add a helper to the stub library that correctly adheres to the spec in the case of EFI_INVALID_PARAMETER from ExitBootServices and can be universally used across all stub implementations. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/libstub/efi-stub-helper.c | 73 ++++++++++++++++++++++++++ include/linux/efi.h | 10 ++++ 2 files changed, 83 insertions(+) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 29368ac..aded106 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -740,3 +740,76 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, *cmd_line_len = options_bytes; return (char *)cmdline_addr; } + +/* + * Handle calling ExitBootServices according to the requirements set out by the + * spec. Obtains the current memory map, and returns that info after calling + * ExitBootServices. The client must specify a function to perform any + * processing of the memory map data prior to ExitBootServices. A client + * specific structure may be passed to the function via priv. The client + * function may be called multiple times. + */ +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func) +{ + efi_status_t status; + + status = efi_get_memory_map(sys_table_arg, map); + + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + if (status != EFI_SUCCESS) + goto free_map; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + + if (status == EFI_INVALID_PARAMETER) { + /* + * The memory map changed between efi_get_memory_map() and + * exit_boot_services(). Per the UEFI Spec v2.6, Section 6.4: + * EFI_BOOT_SERVICES.ExitBootServices we need to get the + * updated map, and try again. The spec implies one retry + * should be sufficent, which is confirmed against the EDK2 + * implementation. Per the spec, we can only invoke + * get_memory_map() and exit_boot_services() - we cannot alloc + * so efi_get_memory_map() cannot be used, and we must reuse + * the buffer. For all practical purposes, the headroom in the + * buffer should account for any changes in the map so the call + * to get_memory_map() is expected to succeed here. + */ + *map->map_size = *map->buff_size; + status = efi_call_early(get_memory_map, + map->map_size, + *map->map, + map->key_ptr, + map->desc_size, + map->desc_ver); + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + } + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + return EFI_SUCCESS; + +free_map: + efi_call_early(free_pool, *map->map); +fail: + return status; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 943fee5..0148a30 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1462,4 +1462,14 @@ extern void efi_call_virt_check_flags(unsigned long flags, const char *call); arch_efi_call_virt_teardown(); \ }) +typedef efi_status_t (*efi_exit_boot_map_processing)( + efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv); + +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func); #endif /* _LINUX_EFI_H */ -- cgit v1.1 From ed9cc156c42ff0c0bf9b1d09df48a12bf0873473 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:53 -0600 Subject: efi/libstub: Use efi_exit_boot_services() in FDT The FDT code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The FDT code does not handle EFI_INVALID_PARAMETER as required by the spec, which causes intermittent boot failures on the Qualcomm Technologies QDF2432. Call the efi_exit_boot_services() helper intead, which handles the EFI_INVALID_PARAMETER scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/libstub/fdt.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index bec0fa8..a6a9311 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -152,6 +152,27 @@ fdt_set_fail: #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif +struct exit_boot_struct { + efi_memory_desc_t *runtime_map; + int *runtime_entry_count; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + struct exit_boot_struct *p = priv; + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight to SetVirtualAddressMap() + */ + efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, + p->runtime_map, p->runtime_entry_count); + + return EFI_SUCCESS; +} + /* * Allocate memory for a new FDT, then add EFI, commandline, and * initrd related fields to the FDT. This routine increases the @@ -183,6 +204,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, efi_status_t status; int runtime_entry_count = 0; struct efi_boot_memmap map; + struct exit_boot_struct priv; map.map = &runtime_map; map.map_size = &map_size; @@ -257,16 +279,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } } - /* - * Update the memory map with virtual addresses. The function will also - * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME - * entries so that we can pass it straight into SetVirtualAddressMap() - */ - efi_get_virtmap(memory_map, map_size, desc_size, runtime_map, - &runtime_entry_count); - - /* Now we are ready to exit_boot_services.*/ - status = sys_table->boottime->exit_boot_services(handle, mmap_key); + sys_table->boottime->free_pool(memory_map); + priv.runtime_map = runtime_map; + priv.runtime_entry_count = &runtime_entry_count; + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; -- cgit v1.1 From d64934019f6cc39202e2f78063709f61ca5cb364 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:54 -0600 Subject: x86/efi: Use efi_exit_boot_services() The eboot code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The eboot code attempts allocations after calling ExitBootSerives which is not permitted per the spec. Call the efi_exit_boot_services() helper intead, which handles the allocation scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 136 +++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 69 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c5b7c7b..94dd4a3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1004,85 +1004,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +struct exit_boot_struct { + struct boot_params *boot_params; + struct efi_info *efi; + struct setup_data *e820ext; + __u32 e820ext_size; + bool is64; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + static bool first = true; + const char *signature; + __u32 nr_desc; + efi_status_t status; + struct exit_boot_struct *p = priv; + + if (first) { + nr_desc = *map->buff_size / *map->desc_size; + if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - + ARRAY_SIZE(p->boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &p->e820ext, + &p->e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + first = false; + } + + signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); + + p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_memdesc_size = *map->desc_size; + p->efi->efi_memdesc_version = *map->desc_ver; + p->efi->efi_memmap = (unsigned long)*map->map; + p->efi->efi_memmap_size = *map->map_size; + +#ifdef CONFIG_X86_64 + p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; +#endif + + return EFI_SUCCESS; +} + static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { - struct efi_info *efi = &boot_params->efi_info; unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; - const char *signature; __u32 e820ext_size; - __u32 nr_desc, prev_nr_desc; efi_status_t status; __u32 desc_version; - bool called_exit = false; - u8 nr_entries; - int i; struct efi_boot_memmap map; + struct exit_boot_struct priv; + + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + priv.boot_params = boot_params; + priv.efi = &boot_params->efi_info; + priv.e820ext = NULL; + priv.e820ext_size = 0; + priv.is64 = is64; - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; - map.map = &mem_map; - map.map_size = &map_sz; - map.desc_size = &desc_size; - map.desc_ver = &desc_version; - map.key_ptr = &key; - map.buff_size = &buff_size; - -get_map: - status = efi_get_memory_map(sys_table, &map); - + /* Might as well exit boot services now */ + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status != EFI_SUCCESS) return status; - prev_nr_desc = nr_desc; - nr_desc = map_sz / desc_size; - if (nr_desc > prev_nr_desc && - nr_desc > ARRAY_SIZE(boot_params->e820_map)) { - u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); - - status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); - if (status != EFI_SUCCESS) - goto free_mem_map; - - efi_call_early(free_pool, mem_map); - goto get_map; /* Allocated memory, get map again */ - } - - signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; - memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); - - efi->efi_systab = (unsigned long)sys_table; - efi->efi_memdesc_size = desc_size; - efi->efi_memdesc_version = desc_version; - efi->efi_memmap = (unsigned long)mem_map; - efi->efi_memmap_size = map_sz; - -#ifdef CONFIG_X86_64 - efi->efi_systab_hi = (unsigned long)sys_table >> 32; - efi->efi_memmap_hi = (unsigned long)mem_map >> 32; -#endif - - /* Might as well exit boot services now */ - status = efi_call_early(exit_boot_services, handle, key); - if (status != EFI_SUCCESS) { - /* - * ExitBootServices() will fail if any of the event - * handlers change the memory map. In which case, we - * must be prepared to retry, but only once so that - * we're guaranteed to exit on repeated failures instead - * of spinning forever. - */ - if (called_exit) - goto free_mem_map; - - called_exit = true; - efi_call_early(free_pool, mem_map); - goto get_map; - } - + e820ext = priv.e820ext; + e820ext_size = priv.e820ext_size; /* Historic? */ boot_params->alt_mem_k = 32 * 1024; @@ -1091,10 +1093,6 @@ get_map: return status; return EFI_SUCCESS; - -free_mem_map: - efi_call_early(free_pool, mem_map); - return status; } /* -- cgit v1.1 From 4d21cef3ea00ba3ac508eb61fb8db70e3e31df67 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Sep 2016 12:33:49 +0200 Subject: KVM: s390: vsie: fix riccbd We store the address of riccbd at the wrong location, overwriting gvrd. This means that our nested guest will not be able to use runtime instrumentation. Also, a memory leak, if our KVM guest actually sets gvrd. Not noticed until now, as KVM guests never make use of gvrd and runtime instrumentation wasn't completely tested yet. Reported-by: Fan Zhang Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Cornelia Huck --- arch/s390/kvm/vsie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c106488..d8673e2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -584,7 +584,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Validity 0x0044 will be checked by SIE */ if (rc) goto unpin; - scb_s->gvrd = hpa; + scb_s->riccbd = hpa; } return 0; unpin: -- cgit v1.1 From e1ff3dd1ae52cef5b5373c8cc4ad949c2c25a71c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 5 Sep 2016 13:55:20 +0200 Subject: ovl: fix workdir creation Workdir creation fails in latest kernel. Fix by allowing EOPNOTSUPP as a valid return value from vfs_removexattr(XATTR_NAME_POSIX_ACL_*). Upper filesystem may not support ACL and still be perfectly able to support overlayfs. Reported-by: Martin Ziegler Signed-off-by: Miklos Szeredi Fixes: c11b9fdd6a61 ("ovl: remove posix_acl_default from workdir") Cc: --- fs/overlayfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index a4585f9..e2a94a2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -835,11 +835,11 @@ retry: goto out_dput; err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); - if (err && err != -ENODATA) + if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); - if (err && err != -ENODATA) + if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; /* Clear any inherited mode bits */ -- cgit v1.1 From 0f5aa88a7bb28b73253fb42b3df8202142769f39 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 28 Aug 2016 18:47:12 +0200 Subject: ceph: do not modify fi->frag in need_reset_readdir() Commit f3c4ebe65ea1 ("ceph: using hash value to compose dentry offset") modified "if (fpos_frag(new_pos) != fi->frag)" to "if (fi->frag |= fpos_frag(new_pos))" in need_reset_readdir(), thus replacing a comparison operator with an assignment one. This looks like a typo which is reported by clang when building the kernel with some warning flags: fs/ceph/dir.c:600:22: error: using the result of an assignment as a condition without parentheses [-Werror,-Wparentheses] } else if (fi->frag |= fpos_frag(new_pos)) { ~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ fs/ceph/dir.c:600:22: note: place parentheses around the assignment to silence this warning } else if (fi->frag |= fpos_frag(new_pos)) { ^ ( ) fs/ceph/dir.c:600:22: note: use '!=' to turn this compound assignment into an inequality comparison } else if (fi->frag |= fpos_frag(new_pos)) { ^~ != Fixes: f3c4ebe65ea1 ("ceph: using hash value to compose dentry offset") Signed-off-by: Nicolas Iooss Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c64a0b7..df4b3e6 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -597,7 +597,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) if (is_hash_order(new_pos)) { /* no need to reset last_name for a forward seek when * dentries are sotred in hash order */ - } else if (fi->frag |= fpos_frag(new_pos)) { + } else if (fi->frag != fpos_frag(new_pos)) { return true; } rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; -- cgit v1.1 From c86d06ba2818c5126078cb0cf4e0175ec381045b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Sep 2016 08:38:13 -0400 Subject: PM / QoS: avoid calling cancel_delayed_work_sync() during early boot of_clk_init() ends up calling into pm_qos_update_request() very early during boot where irq is expected to stay disabled. pm_qos_update_request() uses cancel_delayed_work_sync() which correctly assumes that irq is enabled on invocation and unconditionally disables and re-enables it. Gate cancel_delayed_work_sync() invocation with kevented_up() to avoid enabling irq unexpectedly during early boot. Signed-off-by: Tejun Heo Reported-and-tested-by: Qiao Zhou Link: http://lkml.kernel.org/r/d2501c4c-8e7b-bea3-1b01-000b36b5dfe9@asrmicro.com Signed-off-by: Rafael J. Wysocki --- kernel/power/qos.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 97b0df7..168ff44 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -482,7 +482,16 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } - cancel_delayed_work_sync(&req->work); + /* + * This function may be called very early during boot, for example, + * from of_clk_init(), where irq needs to stay disabled. + * cancel_delayed_work_sync() assumes that irq is enabled on + * invocation and re-enables it on return. Avoid calling it until + * workqueue is initialized. + */ + if (keventd_up()) + cancel_delayed_work_sync(&req->work); + __pm_qos_update_request(req, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); -- cgit v1.1 From e12c8f36f3f7a60d55938c5aed5999278fa92bcb Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 30 Aug 2016 16:14:00 +0800 Subject: KVM: lapic: adjust preemption timer correctly when goes TSC backward MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TSC_OFFSET will be adjusted if discovers TSC backward during vCPU load. The preemption timer, which relies on the guest tsc to reprogram its preemption timer value, is also reprogrammed if vCPU is scheded in to a different pCPU. However, the current implementation reprogram preemption timer before TSC_OFFSET is adjusted to the right value, resulting in the preemption timer firing prematurely. This patch fix it by adjusting TSC_OFFSET before reprogramming preemption timer if TSC backward. Cc: Paolo Bonzini Cc: Radim Krċmář Cc: Yunhong Jiang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19f9f9e..699f872 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2743,16 +2743,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (kvm_lapic_hv_timer_in_use(vcpu) && - kvm_x86_ops->set_hv_timer(vcpu, - kvm_get_lapic_tscdeadline_msr(vcpu))) - kvm_lapic_switch_to_sw_timer(vcpu); if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } + if (kvm_lapic_hv_timer_in_use(vcpu) && + kvm_x86_ops->set_hv_timer(vcpu, + kvm_get_lapic_tscdeadline_msr(vcpu))) + kvm_lapic_switch_to_sw_timer(vcpu); /* * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration -- cgit v1.1 From ed7a6948394305b810d0c6203268648715e5006f Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Fri, 26 Aug 2016 11:33:14 +0800 Subject: btrfs: do not decrease bytes_may_use when replaying extents When replaying extents, there is no need to update bytes_may_use in btrfs_alloc_logged_file_extent(), otherwise it'll trigger a WARN_ON about bytes_may_use. Fixes: ("btrfs: update btrfs_space_info's bytes_may_use timely") Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 64676a1..4483487 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8216,6 +8216,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, { int ret; struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; /* * Mixed block groups will exclude before processing the log so we only @@ -8231,9 +8232,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!block_group) return -EINVAL; - ret = btrfs_add_reserved_bytes(block_group, ins->offset, - ins->offset, 0); - BUG_ON(ret); /* logic error */ + space_info = block_group->space_info; + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + space_info->bytes_reserved += ins->offset; + block_group->reserved += ins->offset; + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); btrfs_put_block_group(block_group); -- cgit v1.1 From 5210d393ef84e5d2a4854671a9af2d97fd1b8dd4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 2 Sep 2016 20:49:12 +0800 Subject: netfilter: nf_tables_trace: fix endiness when dump chain policy NFTA_TRACE_POLICY attribute is big endian, but we forget to call htonl to convert it. Fortunately, this attribute is parsed as big endian in libnftnl. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc..fa24a5b 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -237,7 +237,7 @@ void nft_trace_notify(struct nft_traceinfo *info) break; case NFT_TRACETYPE_POLICY: if (nla_put_be32(skb, NFTA_TRACE_POLICY, - info->basechain->policy)) + htonl(info->basechain->policy))) goto nla_put_failure; break; } -- cgit v1.1 From 5dba4b14bafe801083d01e1f400816df7e5a8f2e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 15:39:06 +0100 Subject: iio: ensure ret is initialized to zero before entering do loop A recent fix to iio_buffer_read_first_n_outer removed ret from being set by a return from wait_event_interruptible and also added a continue in a loop which causes the variable ret to not be set when it reaches the end of the loop. Fix this by initializing ret to zero. Also remove extraneous white space at the end of the loop. Fixes: fcf68f3c0bb2a5 ("fix sched WARNING "do not call blocking ops when !TASK_RUNNING") Signed-off-by: Colin Ian King Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 49bf9c5..158aaf4 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -110,7 +110,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, DEFINE_WAIT_FUNC(wait, woken_wake_function); size_t datum_size; size_t to_wait; - int ret; + int ret = 0; if (!indio_dev->info) return -ENODEV; @@ -153,7 +153,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, ret = rb->access->read_first_n(rb, n, buf); if (ret == 0 && (filp->f_flags & O_NONBLOCK)) ret = -EAGAIN; - } while (ret == 0); + } while (ret == 0); remove_wait_queue(&rb->pollq, &wait); return ret; -- cgit v1.1 From 171c0091837c81ed5c949fec6966bb5afff2d1cf Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Fri, 2 Sep 2016 20:27:46 +0200 Subject: iio:core: fix IIO_VAL_FRACTIONAL sign handling 7985e7c100 ("iio: Introduce a new fractional value type") introduced a new IIO_VAL_FRACTIONAL value type meant to represent rational type numbers expressed by a numerator and denominator combination. Formating of IIO_VAL_FRACTIONAL values relies upon do_div() usage. This fails handling negative values properly since parameters are reevaluated as unsigned values. Fix this by using div_s64_rem() instead. Computed integer part will carry properly signed value. Formatted fractional part will always be positive. Fixes: 7985e7c100 ("iio: Introduce a new fractional value type") Signed-off-by: Gregor Boirie Reviewed-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index f914d5d..d2b8899 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -613,9 +613,8 @@ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals) return sprintf(buf, "%d.%09u\n", vals[0], vals[1]); case IIO_VAL_FRACTIONAL: tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]); - vals[1] = do_div(tmp, 1000000000LL); - vals[0] = tmp; - return sprintf(buf, "%d.%09u\n", vals[0], vals[1]); + vals[0] = (int)div_s64_rem(tmp, 1000000000, &vals[1]); + return sprintf(buf, "%d.%09u\n", vals[0], abs(vals[1])); case IIO_VAL_FRACTIONAL_LOG2: tmp = (s64)vals[0] * 1000000000LL >> vals[1]; vals[1] = do_div(tmp, 1000000000LL); -- cgit v1.1 From d81d8258218228d3137055afe8acf981c3ebed46 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Tue, 23 Aug 2016 19:57:39 -0300 Subject: powerpc/pseries: Fix little endian build with CONFIG_KEXEC=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc64le, builds with CONFIG_KEXEC=n fail with: arch/powerpc/platforms/pseries/setup.c: In function ‘pseries_big_endian_exceptions’: arch/powerpc/platforms/pseries/setup.c:403:13: error: implicit declaration of function ‘kdump_in_progress’ if (rc && !kdump_in_progress()) This is because pseries/setup.c includes , but kdump_in_progress() is defined in . This is a problem because the former only includes the latter if CONFIG_KEXEC_CORE=y. Fix it by including directly, as is done in powernv/setup.c. Fixes: d3cbff1b5a90 ("powerpc: Put exception configuration in a common place") Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4ffcaa6..a39d20e 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -66,6 +65,7 @@ #include #include #include +#include #include "pseries.h" -- cgit v1.1 From f8e33475b0da98c4bffc91017a3fbbfb443a7bdb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Sep 2016 13:43:45 +1000 Subject: powerpc/xics/opal: Fix processor numbers in OPAL ICP When using the OPAL ICP backend we incorrectly pass Linux CPU numbers rather than HW CPU numbers to OPAL. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 57d72f1..9114243 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -23,10 +23,10 @@ static void icp_opal_teardown_cpu(void) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); /* Clear any pending IPI */ - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); } static void icp_opal_flush_ipi(void) @@ -101,14 +101,16 @@ static void icp_opal_eoi(struct irq_data *d) static void icp_opal_cause_ipi(int cpu, unsigned long data) { - opal_int_set_mfrr(cpu, IPI_PRIORITY); + int hw_cpu = get_hard_smp_processor_id(cpu); + + opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); } static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); return smp_ipi_demux(); } -- cgit v1.1 From b314427a52fd8daa1215d30e56de01e32edec531 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 6 Sep 2016 14:16:44 +1000 Subject: powerpc/powernv: Fix crash on releasing compound PE The compound PE is created to accommodate the devices attached to one specific PCI bus that consume multiple M64 segments. The compound PE is made up of one master PE and possibly multiple slave PEs. The slave PEs should be destroyed when releasing the master PE. A kernel crash happens when derferencing @pe->pdev on releasing the slave PE in pnv_ioda_deconfigure_pe(). # echo 0 > /sys/bus/pci/slots/C7/power iommu: Removing device 0000:01:00.1 from group 0 iommu: Removing device 0000:01:00.0 from group 0 Unable to handle kernel paging request for data at address 0x00000010 Faulting instruction address: 0xc00000000005d898 cpu 0x1: Vector: 300 (Data Access) at [c000000fe8217620] pc: c00000000005d898: pnv_ioda_release_pe+0x288/0x610 lr: c00000000005dbdc: pnv_ioda_release_pe+0x5cc/0x610 sp: c000000fe82178a0 msr: 9000000000009033 dar: 10 dsisr: 40000000 current = 0xc000000fe815ab80 paca = 0xc00000000ff00400 softe: 0 irq_happened: 0x01 pid = 2709, comm = sh Linux version 4.8.0-rc5-gavin-00006-g745efdb (gwshan@gwshan) \ (gcc version 4.9.3 (Buildroot 2016.02-rc2-00093-g5ea3bce) ) #586 SMP \ Tue Sep 6 13:37:29 AEST 2016 enter ? for help [c000000fe8217940] c00000000005d684 pnv_ioda_release_pe+0x74/0x610 [c000000fe82179e0] c000000000034460 pcibios_release_device+0x50/0x70 [c000000fe8217a10] c0000000004aba80 pci_release_dev+0x50/0xa0 [c000000fe8217a40] c000000000704898 device_release+0x58/0xf0 [c000000fe8217ac0] c000000000470510 kobject_release+0x80/0xf0 [c000000fe8217b00] c000000000704dd4 put_device+0x24/0x40 [c000000fe8217b20] c0000000004af94c pci_remove_bus_device+0x12c/0x150 [c000000fe8217b60] c000000000034244 pci_hp_remove_devices+0x94/0xd0 [c000000fe8217ba0] c0000000004ca444 pnv_php_disable_slot+0x64/0xb0 [c000000fe8217bd0] c0000000004c88c0 power_write_file+0xa0/0x190 [c000000fe8217c50] c0000000004c248c pci_slot_attr_store+0x3c/0x60 [c000000fe8217c70] c0000000002d6494 sysfs_kf_write+0x94/0xc0 [c000000fe8217cb0] c0000000002d50f0 kernfs_fop_write+0x180/0x260 [c000000fe8217d00] c0000000002334a0 __vfs_write+0x40/0x190 [c000000fe8217d90] c000000000234738 vfs_write+0xc8/0x240 [c000000fe8217de0] c000000000236250 SyS_write+0x60/0x110 [c000000fe8217e30] c000000000009524 system_call+0x38/0x108 It fixes the kernel crash by bypassing releasing resources (DMA, IO and memory segments, PELTM) because there are no resources assigned to the slave PE. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Reported-by: Frederic Barrat Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1321826..18f6fd1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3402,12 +3402,6 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; - /* Release slave PEs in compound PE */ - if (pe->flags & PNV_IODA_PE_MASTER) { - list_for_each_entry_safe(slave, tmp, &pe->slaves, list) - pnv_ioda_release_pe(slave); - } - list_del(&pe->list); switch (phb->type) { case PNV_PHB_IODA1: @@ -3422,6 +3416,15 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) pnv_ioda_release_pe_seg(pe); pnv_ioda_deconfigure_pe(pe->phb, pe); + + /* Release slave PEs in the compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) { + list_del(&slave->list); + pnv_ioda_free_pe(slave); + } + } + pnv_ioda_free_pe(pe); } -- cgit v1.1 From 3ff488ab6000f1338684b227c499450317519cc1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 16:37:12 +0100 Subject: usb: gadget: prevent potenial null pointer dereference on skb->len An earlier fix partially fixed the null pointer dereference on skb->len by moving the assignment of len after the check on skb being non-null, however it failed to remove the erroneous dereference when assigning len. Correctly fix this by removing the initialisation of len as was originally intended. Fixes: 70237dc8efd092 ("usb: gadget: function: f_eem: socket buffer may be NULL") Acked-by: Peter Chen Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_eem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c index 8741fd7..007ec6e 100644 --- a/drivers/usb/gadget/function/f_eem.c +++ b/drivers/usb/gadget/function/f_eem.c @@ -342,7 +342,7 @@ static struct sk_buff *eem_wrap(struct gether *port, struct sk_buff *skb) struct sk_buff *skb2 = NULL; struct usb_ep *in = port->in_ep; int headroom, tailroom, padlen = 0; - u16 len = skb->len; + u16 len; if (!skb) return NULL; -- cgit v1.1 From 79d102cbfd2e9d94257fcc7c82807ef1cdf80322 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Sep 2016 17:30:07 +0200 Subject: perf/x86/intel/cqm: Check cqm/mbm enabled state in event init Yanqiu Zhang reported kernel panic when using mbm event on system where CQM is detected but without mbm event support, like with perf: # perf stat -e 'intel_cqm/event=3/' -a BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] update_sample+0xbc/0xe0 ... [] __intel_mbm_event_init+0x18/0x20 [] flush_smp_call_function_queue+0x7b/0x160 [] generic_smp_call_function_single_interrupt+0x13/0x60 [] smp_call_function_interrupt+0x27/0x40 [] call_function_interrupt+0x8c/0xa0 ... The reason is that we currently allow to init mbm event even if mbm support is not detected. Adding checks for both cqm and mbm events and support into cqm's event_init. Fixes: 33c3cc7acfd9 ("perf/x86/mbm: Add Intel Memory B/W Monitoring enumeration and init") Reported-by: Yanqiu Zhang Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Vikas Shivappa Cc: Tony Luck Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1473089407-21857-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/cqm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 783c49d..8f82b02 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, u32 evt_type); static void __intel_mbm_event_count(void *info); +static bool is_cqm_event(int e) +{ + return (e == QOS_L3_OCCUP_EVENT_ID); +} + static bool is_mbm_event(int e) { return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); @@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event) (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; + if ((is_cqm_event(event->attr.config) && !cqm_enabled) || + (is_mbm_event(event->attr.config) && !mbm_enabled)) + return -EINVAL; + /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || -- cgit v1.1 From d2896d4b55b2e32b423072a4124d7da4dc1e6cb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Aug 2016 09:01:17 +0100 Subject: arm: KVM: Fix idmap overlap detection when the kernel is idmap'ed We're trying hard to detect when the HYP idmap overlaps with the HYP va, as it makes the teardown of a cpu dangerous. But there is one case where an overlap is completely safe, which is when the whole of the kernel is idmap'ed, which is likely to happen on 32bit when RAM is at 0x8000000 and we're using a 2G/2G VA split. In that case, we can proceed safely. Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 29d0b23..a3faafe 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1714,7 +1714,8 @@ int kvm_mmu_init(void) kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL)) { + hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, * it is not safe to continue further. -- cgit v1.1 From 87260d3f7aecba9a5fadc6886c338b2a8fccfca9 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Thu, 21 Apr 2016 12:24:55 +0200 Subject: thermal: rcar_thermal: Fix priv->zone error handling In case thermal_zone_xxx_register() returns an error, priv->zone isn't NULL any more, but contains the error code. This is passed to thermal_zone_device_unregister(), then. This checks for priv->zone being NULL, but the error code is != NULL. So it works with the error code as a pointer. Crashing immediately. To fix this, reset priv->zone to NULL before entering rcar_gen3_thermal_remove(). Signed-off-by: Dirk Behme Reviewed-by: Geert Uytterhoeven Signed-off-by: Zhang Rui --- drivers/thermal/rcar_thermal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 71a3392..5f81792 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -504,6 +504,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) if (IS_ERR(priv->zone)) { dev_err(dev, "can't register thermal zone\n"); ret = PTR_ERR(priv->zone); + priv->zone = NULL; goto error_unregister; } -- cgit v1.1 From d31ed3f05763644840c654a384eaefa94c097ba2 Mon Sep 17 00:00:00 2001 From: Jan Leupold Date: Wed, 6 Jul 2016 13:22:35 +0200 Subject: drm: atmel-hlcdc: Fix vertical scaling The code is applying the same scaling for the X and Y components, thus making the scaling feature only functional when both components have the same scaling factor. Do the s/_w/_h/ replacement where appropriate to fix vertical scaling. Signed-off-by: Jan Leupold Fixes: 1a396789f65a2 ("drm: add Atmel HLCDC Display Controller support") Cc: Signed-off-by: Boris Brezillon --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 016c191..52c527f 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -320,19 +320,19 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane, u32 *coeff_tab = heo_upscaling_ycoef; u32 max_memsize; - if (state->crtc_w < state->src_w) + if (state->crtc_h < state->src_h) coeff_tab = heo_downscaling_ycoef; for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++) atmel_hlcdc_layer_update_cfg(&plane->layer, 33 + i, 0xffffffff, coeff_tab[i]); - factor = ((8 * 256 * state->src_w) - (256 * 4)) / - state->crtc_w; + factor = ((8 * 256 * state->src_h) - (256 * 4)) / + state->crtc_h; factor++; - max_memsize = ((factor * state->crtc_w) + (256 * 4)) / + max_memsize = ((factor * state->crtc_h) + (256 * 4)) / 2048; - if (max_memsize > state->src_w) + if (max_memsize > state->src_h) factor--; factor_reg |= (factor << 16) | 0x80000000; } -- cgit v1.1 From 1ba7db07ccc2825669d6e376632316813a072887 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Jul 2016 12:19:40 +0200 Subject: drm/atmel-hlcdc: Make ->reset() implementation static The atmel_hlcdc_crtc_reset() function is never used outside the file and can be static. This avoids a warning from sparse. Signed-off-by: Thierry Reding --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index a978381..9b17a66 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -387,7 +387,7 @@ void atmel_hlcdc_crtc_irq(struct drm_crtc *c) atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c)); } -void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) +static void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) { struct atmel_hlcdc_crtc_state *state; -- cgit v1.1 From cbd60aa7cd17d81a434234268c55192862147439 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Sep 2016 05:37:40 -0700 Subject: Btrfs: remove root_log_ctx from ctx list before btrfs_sync_log returns We use a btrfs_log_ctx structure to pass information into the tree log commit, and get error values out. It gets added to a per log-transaction list which we walk when things go bad. Commit d1433debe added an optimization to skip waiting for the log commit, but didn't take root_log_ctx out of the list. This patch makes sure we remove things before exiting. Signed-off-by: Chris Mason Fixes: d1433debe7f4346cf9fc0dafc71c3137d2a97bc4 cc: stable@vger.kernel.org # 3.15+ --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e935035..ef9c55b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2867,6 +2867,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { blk_finish_plug(&plug); + list_del_init(&root_log_ctx.list); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; -- cgit v1.1 From cd28e716c6869d2f06e64bcd679d0a45dd8a6295 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Tue, 6 Sep 2016 12:04:11 +0800 Subject: drm/i915: enable vGPU detection for all vGPU capability is handled by GVT-g host driver, not needed to put extra HW check for vGPU detection. And we'll actually support vGPU from BDW. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-2-zhenyuw@linux.intel.com (cherry picked from commit 8ef89995c735f978d5dfcb3ca6bce70d41728c91) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vgpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a..b81cfb3 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -65,9 +65,6 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); - if (!IS_HASWELL(dev_priv)) - return; - magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; -- cgit v1.1 From 557b1a8cae25e36ac2f125d93f003e60a7d0d014 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 6 Sep 2016 12:04:12 +0800 Subject: drm/i915: disable 48bit full PPGTT when vGPU is active Disable 48bit full PPGTT on vGPU too for now. Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-3-zhenyuw@linux.intel.com (cherry picked from commit e320d40022128845dfff900422ea9fd69f576c98) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7a30af7..f38ceff 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -122,8 +122,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; - if (intel_vgpu_active(dev_priv)) - has_full_ppgtt = false; /* emulation is too hard */ + if (intel_vgpu_active(dev_priv)) { + /* emulation is too hard */ + has_full_ppgtt = false; + has_full_48bit_ppgtt = false; + } if (!has_aliasing_ppgtt) return 0; @@ -158,7 +161,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) + if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) return has_full_48bit_ppgtt ? 3 : 2; else return has_aliasing_ppgtt ? 1 : 0; -- cgit v1.1 From ce129655c9d9aaa7b3bcc46529db1b36693575ed Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Fri, 2 Sep 2016 10:58:46 +0800 Subject: btrfs: introduce tickets_id to determine whether asynchronous metadata reclaim work makes progress In btrfs_async_reclaim_metadata_space(), we use ticket's address to determine whether asynchronous metadata reclaim work is making progress. ticket = list_first_entry(&space_info->tickets, struct reserve_ticket, list); if (last_ticket == ticket) { flush_state++; } else { last_ticket = ticket; flush_state = FLUSH_DELAYED_ITEMS_NR; if (commit_cycles) commit_cycles--; } But indeed it's wrong, we should not rely on local variable's address to do this check, because addresses may be same. In my test environment, I dd one 168MB file in a 256MB fs, found that for this file, every time wait_reserve_ticket() called, local variable ticket's address is same, For above codes, assume a previous ticket's address is addrA, last_ticket is addrA. Btrfs_async_reclaim_metadata_space() finished this ticket and wake up it, then another ticket is added, but with the same address addrA, now last_ticket will be same to current ticket, then current ticket's flush work will start from current flush_state, not initial FLUSH_DELAYED_ITEMS_NR, which may result in some enospc issues(I have seen this in my test machine). Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ec4154f..146d1c7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -427,6 +427,7 @@ struct btrfs_space_info { struct list_head ro_bgs; struct list_head priority_tickets; struct list_head tickets; + u64 tickets_id; struct rw_semaphore groups_sem; /* for block groups in our same type */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4483487..d09cf7a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4966,12 +4966,12 @@ static void wake_all_tickets(struct list_head *head) */ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) { - struct reserve_ticket *last_ticket = NULL; struct btrfs_fs_info *fs_info; struct btrfs_space_info *space_info; u64 to_reclaim; int flush_state; int commit_cycles = 0; + u64 last_tickets_id; fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); @@ -4984,8 +4984,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) spin_unlock(&space_info->lock); return; } - last_ticket = list_first_entry(&space_info->tickets, - struct reserve_ticket, list); + last_tickets_id = space_info->tickets_id; spin_unlock(&space_info->lock); flush_state = FLUSH_DELAYED_ITEMS_NR; @@ -5005,10 +5004,10 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) space_info); ticket = list_first_entry(&space_info->tickets, struct reserve_ticket, list); - if (last_ticket == ticket) { + if (last_tickets_id == space_info->tickets_id) { flush_state++; } else { - last_ticket = ticket; + last_tickets_id = space_info->tickets_id; flush_state = FLUSH_DELAYED_ITEMS_NR; if (commit_cycles) commit_cycles--; @@ -5384,6 +5383,7 @@ again: list_del_init(&ticket->list); num_bytes -= ticket->bytes; ticket->bytes = 0; + space_info->tickets_id++; wake_up(&ticket->wait); } else { ticket->bytes -= num_bytes; @@ -5426,6 +5426,7 @@ again: num_bytes -= ticket->bytes; space_info->bytes_may_use += ticket->bytes; ticket->bytes = 0; + space_info->tickets_id++; wake_up(&ticket->wait); } else { trace_btrfs_space_reservation(fs_info, "space_info", -- cgit v1.1 From d1a6cba576fc7c43e476538fe5aa72fe04bd80e1 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:31:02 +0800 Subject: netfilter: nft_chain_route: re-route before skb is queued to userspace Imagine such situation, user add the following nft rules, and queue the packets to userspace for further check: # ip rule add fwmark 0x0/0x1 lookup eth0 # ip rule add fwmark 0x1/0x1 lookup eth1 # nft add table filter # nft add chain filter output {type route hook output priority 0 \;} # nft add rule filter output mark set 0x1 # nft add rule filter output queue num 0 But after we reinject the skbuff, the packet will be sent via the wrong route, i.e. in this case, the packet will be routed via eth0 table, not eth1 table. Because we skip to do re-route when verdict is NF_QUEUE, even if the mark was changed. Acctually, we should not touch sk_buff if verdict is NF_DROP or NF_STOLEN, and when re-route fails, return NF_DROP with error code. This is consistent with the mangle table in iptables. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_chain_route_ipv4.c | 11 +++++++---- net/ipv6/netfilter/nft_chain_route_ipv6.c | 10 +++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 2375b0a..30493be 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, __be32 saddr, daddr; u_int8_t tos; const struct iphdr *iph; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv, tos = iph->tos; ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE) { + if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) - ret = NF_DROP; + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } } return ret; } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995f..2535223 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + int err; /* malformed packet, drop it */ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) @@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv, flowlabel = *((u32 *)ipv6_hdr(skb)); ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE && + if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { + err = ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } return ret; } -- cgit v1.1 From e6971009a95a74f28c58bbae415c40effad1226c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Sep 2016 11:56:01 -0700 Subject: x86/uaccess: force copy_*_user() to be inlined As already done with __copy_*_user(), mark copy_*_user() as __always_inline. Without this, the checks for things like __builtin_const_p() won't work consistently in either hardened usercopy nor the recent adjustments for detecting usercopy overflows at compile time. The change in kernel text size is detectable, but very small: text data bss dec hex filename 12118735 5768608 14229504 32116847 1ea106f vmlinux.before 12120207 5768608 14229504 32118319 1ea162f vmlinux.after Signed-off-by: Kees Cook --- arch/x86/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c3f2911..e3af86f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -705,7 +705,7 @@ static inline void copy_user_overflow(int size, unsigned long count) WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } -static inline unsigned long __must_check +static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { int sz = __compiletime_object_size(to); @@ -725,7 +725,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -static inline unsigned long __must_check +static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { int sz = __compiletime_object_size(from); -- cgit v1.1 From 81409e9e28058811c9ea865345e1753f8f677e44 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 31 Aug 2016 16:04:21 -0700 Subject: usercopy: fold builtin_const check into inline function Instead of having each caller of check_object_size() need to remember to check for a const size parameter, move the check into check_object_size() itself. This actually matches the original implementation in PaX, though this commit cleans up the now-redundant builtin_const() calls in the various architectures. Signed-off-by: Kees Cook --- arch/ia64/include/asm/uaccess.h | 12 ++++-------- arch/powerpc/include/asm/uaccess.h | 19 +++++++------------ arch/sparc/include/asm/uaccess_32.h | 9 +++------ arch/sparc/include/asm/uaccess_64.h | 7 +++---- include/linux/thread_info.h | 3 ++- 5 files changed, 19 insertions(+), 31 deletions(-) diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 465c709..0472927 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -241,8 +241,7 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { - if (!__builtin_constant_p(count)) - check_object_size(from, count, true); + check_object_size(from, count, true); return __copy_user(to, (__force void __user *) from, count); } @@ -250,8 +249,7 @@ __copy_to_user (void __user *to, const void *from, unsigned long count) static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { - if (!__builtin_constant_p(count)) - check_object_size(to, count, false); + check_object_size(to, count, false); return __copy_user((__force void __user *) to, from, count); } @@ -265,8 +263,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) long __cu_len = (n); \ \ if (__access_ok(__cu_to, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_from, __cu_len, true); \ + check_object_size(__cu_from, __cu_len, true); \ __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ } \ __cu_len; \ @@ -280,8 +277,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) \ __chk_user_ptr(__cu_from); \ if (__access_ok(__cu_from, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_to, __cu_len, false); \ + check_object_size(__cu_to, __cu_len, false); \ __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ } \ __cu_len; \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c1dc6c1..f1e3824 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -311,14 +311,12 @@ static inline unsigned long copy_from_user(void *to, unsigned long over; if (access_ok(VERIFY_READ, from, n)) { - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } if ((unsigned long)from < TASK_SIZE) { over = (unsigned long)from + n - TASK_SIZE; - if (!__builtin_constant_p(n - over)) - check_object_size(to, n - over, false); + check_object_size(to, n - over, false); return __copy_tofrom_user((__force void __user *)to, from, n - over) + over; } @@ -331,14 +329,12 @@ static inline unsigned long copy_to_user(void __user *to, unsigned long over; if (access_ok(VERIFY_WRITE, to, n)) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); } if ((unsigned long)to < TASK_SIZE) { over = (unsigned long)to + n - TASK_SIZE; - if (!__builtin_constant_p(n)) - check_object_size(from, n - over, true); + check_object_size(from, n - over, true); return __copy_tofrom_user(to, (__force void __user *)from, n - over) + over; } @@ -383,8 +379,7 @@ static inline unsigned long __copy_from_user_inatomic(void *to, return 0; } - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -412,8 +407,8 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force const void __user *)from, n); } diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 341a5a1..e722c51 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -249,8 +249,7 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { if (n && __access_ok((unsigned long) to, n)) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } else return n; @@ -258,16 +257,14 @@ static inline unsigned long copy_to_user(void __user *to, const void *from, unsi static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { if (n && __access_ok((unsigned long) from, n)) { - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); } else return n; diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 8bda94f..37a315d 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -212,8 +212,7 @@ copy_from_user(void *to, const void __user *from, unsigned long size) { unsigned long ret; - if (!__builtin_constant_p(size)) - check_object_size(to, size, false); + check_object_size(to, size, false); ret = ___copy_from_user(to, from, size); if (unlikely(ret)) @@ -233,8 +232,8 @@ copy_to_user(void __user *to, const void *from, unsigned long size) { unsigned long ret; - if (!__builtin_constant_p(size)) - check_object_size(from, size, true); + check_object_size(from, size, true); + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cbd8990..10c9e60 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -121,7 +121,8 @@ extern void __check_object_size(const void *ptr, unsigned long n, static inline void check_object_size(const void *ptr, unsigned long n, bool to_user) { - __check_object_size(ptr, n, to_user); + if (!__builtin_constant_p(n)) + __check_object_size(ptr, n, to_user); } #else static inline void check_object_size(const void *ptr, unsigned long n, -- cgit v1.1 From 3c17648c2816f6d28bd2be9293032a2901994a36 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Sep 2016 11:26:12 -0700 Subject: lkdtm: adjust usercopy tests to bypass const checks The hardened usercopy is now consistently avoiding checks against const sizes, since we really only want to perform runtime bounds checking on lengths that weren't known at build time. To test the hardened usercopy code, we must force the length arguments to be seen as non-const. Signed-off-by: Kees Cook --- drivers/misc/lkdtm_usercopy.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index 5525a20..1dd6114 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -9,7 +9,15 @@ #include #include -static size_t cache_size = 1024; +/* + * Many of the tests here end up using const sizes, but those would + * normally be ignored by hardened usercopy, so force the compiler + * into choosing the non-const path to make sure we trigger the + * hardened usercopy checks by added "unconst" to all the const copies, + * and making sure "cache_size" isn't optimized into a const. + */ +static volatile size_t unconst = 0; +static volatile size_t cache_size = 1024; static struct kmem_cache *bad_cache; static const unsigned char test_text[] = "This is a test.\n"; @@ -67,14 +75,14 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) if (to_user) { pr_info("attempting good copy_to_user of local stack\n"); if (copy_to_user((void __user *)user_addr, good_stack, - sizeof(good_stack))) { + unconst + sizeof(good_stack))) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_to_user of distant stack\n"); if (copy_to_user((void __user *)user_addr, bad_stack, - sizeof(good_stack))) { + unconst + sizeof(good_stack))) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } @@ -88,14 +96,14 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) pr_info("attempting good copy_from_user of local stack\n"); if (copy_from_user(good_stack, (void __user *)user_addr, - sizeof(good_stack))) { + unconst + sizeof(good_stack))) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_from_user of distant stack\n"); if (copy_from_user(bad_stack, (void __user *)user_addr, - sizeof(good_stack))) { + unconst + sizeof(good_stack))) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } @@ -109,7 +117,7 @@ static void do_usercopy_heap_size(bool to_user) { unsigned long user_addr; unsigned char *one, *two; - const size_t size = 1024; + size_t size = unconst + 1024; one = kmalloc(size, GFP_KERNEL); two = kmalloc(size, GFP_KERNEL); @@ -285,13 +293,14 @@ void lkdtm_USERCOPY_KERNEL(void) pr_info("attempting good copy_to_user from kernel rodata\n"); if (copy_to_user((void __user *)user_addr, test_text, - sizeof(test_text))) { + unconst + sizeof(test_text))) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_to_user from kernel text\n"); - if (copy_to_user((void __user *)user_addr, vm_mmap, PAGE_SIZE)) { + if (copy_to_user((void __user *)user_addr, vm_mmap, + unconst + PAGE_SIZE)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } -- cgit v1.1 From 03c2778a938aaba0893f6d6cdc29511d91a79848 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 2 Sep 2016 14:39:50 -0400 Subject: ipv6: release dst in ping_v6_sendmsg Neither the failure or success paths of ping_v6_sendmsg release the dst it acquires. This leads to a flood of warnings from "net/core/dst.c:288 dst_release" on older kernels that don't have 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 backported. That patch optimistically hoped this had been fixed post 3.10, but it seems at least one case wasn't, where I've seen this triggered a lot from machines doing unprivileged icmp sockets. Cc: Martin Lau Signed-off-by: Dave Jones Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ping.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 0900352..0e983b6 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -126,8 +126,10 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = (struct rt6_info *) dst; np = inet6_sk(sk); - if (!np) - return -EBADF; + if (!np) { + err = -EBADF; + goto dst_err_out; + } if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; @@ -163,6 +165,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } release_sock(sk); +dst_err_out: + dst_release(dst); + if (err) return err; -- cgit v1.1 From 78d506e1b7071b24850fd5ac22b896c459b0a04c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:49 -0400 Subject: xprtrdma: Revert 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion...") Receive buffer exhaustion, if it were to actually occur, would be catastrophic. However, when there are no reply buffers to post, that means all of them have already been posted and are waiting for incoming replies. By design, there can never be more RPCs in flight than there are available receive buffers. A receive buffer can be left posted after an RPC exits without a received reply; say, due to a credential problem or a soft timeout. This does not result in fewer posted receive buffers than there are pending RPCs, and there is already logic in xprtrdma to deal appropriately with this case. It also looks like the "+ 2" that was removed was accidentally accommodating the number of extra receive buffers needed for receiving backchannel requests. That will need to be addressed by another patch. Fixes: 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion can be...") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 536d0be..fefcba9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -923,7 +923,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < buf->rb_max_requests + 2; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1076,6 +1076,8 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) /* * Get a set of request/reply buffers. + * + * Reply buffer (if available) is attached to send buffer upon return. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@ -1094,13 +1096,13 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of request buffers (%p)\n", buffers); + pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; out_repbuf: - list_add(&req->rl_free, &buffers->rb_send_bufs); spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers); - return NULL; + pr_warn("RPC: %s: out of reply buffers\n", __func__); + req->rl_reply = NULL; + return req; } /* -- cgit v1.1 From 05c974669ecec510a85d8534099bb75404e82c41 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:58 -0400 Subject: xprtrdma: Fix receive buffer accounting An RPC can terminate before its reply arrives, if a credential problem or a soft timeout occurs. After this happens, xprtrdma reports it is out of Receive buffers. A Receive buffer is posted before each RPC is sent, and returned to the buffer pool when a reply is received. If no reply is received for an RPC, that Receive buffer remains posted. But xprtrdma tries to post another when the next RPC is sent. If this happens a few dozen times, there are no receive buffers left to be posted at send time. I don't see a way for a transport connection to recover at that point, and it will spit warnings and unnecessarily delay RPCs on occasion for its remaining lifetime. Commit 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") removed a little bit of logic to detect this case and not provide a Receive buffer so no more buffers are posted, and then transport operation continues correctly. We didn't understand what that logic did, and it wasn't commented, so it was removed as part of the overhaul to support backchannel requests. Restore it, but be wary of the need to keep extra Receives posted to deal with backchannel requests. Fixes: 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 41 +++++++++++++++++++++++++++++------------ net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fefcba9..799cce6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include /* try_module_get()/module_put() */ @@ -923,7 +924,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests + 2; i++) { + for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1018,6 +1019,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(ia, rep); } + buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@ -1032,6 +1034,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); + buf->rb_recv_count = 0; rpcrdma_destroy_mrs(buf); } @@ -1074,6 +1077,23 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) spin_unlock(&buf->rb_mwlock); } +static struct rpcrdma_rep * +rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) +{ + /* If an RPC previously completed without a reply (say, a + * credential problem or a soft timeout occurs) then hold off + * on supplying more Receive buffers until the number of new + * pending RPCs catches up to the number of posted Receives. + */ + if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) + return NULL; + + if (unlikely(list_empty(&buffers->rb_recv_bufs))) + return NULL; + buffers->rb_recv_count++; + return rpcrdma_buffer_get_rep_locked(buffers); +} + /* * Get a set of request/reply buffers. * @@ -1087,10 +1107,9 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) spin_lock(&buffers->rb_lock); if (list_empty(&buffers->rb_send_bufs)) goto out_reqbuf; + buffers->rb_send_count++; req = rpcrdma_buffer_get_req_locked(buffers); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_repbuf; - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); return req; @@ -1098,11 +1117,6 @@ out_reqbuf: spin_unlock(&buffers->rb_lock); pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; -out_repbuf: - spin_unlock(&buffers->rb_lock); - pr_warn("RPC: %s: out of reply buffers\n", __func__); - req->rl_reply = NULL; - return req; } /* @@ -1119,9 +1133,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; spin_lock(&buffers->rb_lock); + buffers->rb_send_count--; list_add_tail(&req->rl_free, &buffers->rb_send_bufs); - if (rep) + if (rep) { + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + } spin_unlock(&buffers->rb_lock); } @@ -1135,8 +1152,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; spin_lock(&buffers->rb_lock); - if (!list_empty(&buffers->rb_recv_bufs)) - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); } @@ -1150,6 +1166,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; spin_lock(&buffers->rb_lock); + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); spin_unlock(&buffers->rb_lock); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 670fad5..a71b0f5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -321,6 +321,7 @@ struct rpcrdma_buffer { char *rb_pool; spinlock_t rb_lock; /* protect buf lists */ + int rb_send_count, rb_recv_count; struct list_head rb_send_bufs; struct list_head rb_recv_bufs; u32 rb_max_requests; -- cgit v1.1 From 007e4ba3ee137f4700f39aa6dbaf01a71047c5f6 Mon Sep 17 00:00:00 2001 From: Helmut Buchsbaum Date: Sun, 4 Sep 2016 18:09:47 +0200 Subject: net: macb: initialize checksum when using checksum offloading I'm still struggling to get this fix right.. Changes since v2: - do not blindly modify SKB contents according to Dave's legitimate objection Changes since v1: - dropped disabling HW checksum offload for Zynq - initialize checksum similar to net/ethernet/freescale/fec_main.c -- >8 -- MACB/GEM needs the checksum field initialized to 0 to get correct results on transmit in all cases, e.g. on Zynq, UDP packets with payload <= 2 otherwise contain a wrong checksums. Signed-off-by: Helmut Buchsbaum Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 89c0cfa..d954a97 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1323,6 +1323,24 @@ dma_error: return 0; } +static inline int macb_clear_csum(struct sk_buff *skb) +{ + /* no change for packets without checksum offloading */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + /* make sure we can modify the header */ + if (unlikely(skb_cow_head(skb, 0))) + return -1; + + /* initialize checksum field + * This is required - at least for Zynq, which otherwise calculates + * wrong UDP header checksums for UDP packets with UDP data len <=2 + */ + *(__sum16 *)(skb_checksum_start(skb) + skb->csum_offset) = 0; + return 0; +} + static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) { u16 queue_index = skb_get_queue_mapping(skb); @@ -1362,6 +1380,11 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } + if (macb_clear_csum(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + /* Map socket buffer for DMA transfer */ if (!macb_tx_map(bp, queue, skb)) { dev_kfree_skb_any(skb); -- cgit v1.1 From daa7ee8dfa64233789221685e73a6ef8159f80de Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 4 Sep 2016 23:02:21 +0530 Subject: net: smsc: remove build warning of duplicate definition The build of m32r was giving warning: In file included from drivers/net/ethernet/smsc/smc91x.c:92:0: drivers/net/ethernet/smsc/smc91x.h:448:0: warning: "SMC_inb" redefined #define SMC_inb(ioaddr, reg) ({ BUG(); 0; }) drivers/net/ethernet/smsc/smc91x.h:106:0: note: this is the location of the previous definition #define SMC_inb(a, r) inb(((u32)a) + (r)) drivers/net/ethernet/smsc/smc91x.h:449:0: warning: "SMC_outb" redefined #define SMC_outb(x, ioaddr, reg) BUG() drivers/net/ethernet/smsc/smc91x.h:108:0: note: this is the location of the previous definition #define SMC_outb(v, a, r) outb(v, ((u32)a) + (r)) Signed-off-by: Sudip Mukherjee Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index e17671c..ea84654 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -470,7 +470,9 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma, #endif #if ! SMC_CAN_USE_8BIT +#undef SMC_inb #define SMC_inb(ioaddr, reg) ({ BUG(); 0; }) +#undef SMC_outb #define SMC_outb(x, ioaddr, reg) BUG() #define SMC_insb(a, r, p, l) BUG() #define SMC_outsb(a, r, p, l) BUG() -- cgit v1.1 From 5a56a0b3a45dd0cc5b2f7bec6afd053a474ed9f5 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 5 Sep 2016 10:20:20 +1200 Subject: net: Don't delete routes in different VRFs When deleting an IP address from an interface, there is a clean-up of routes which refer to this local address. However, there was no check to see that the VRF matched. This meant that deletion wasn't confined to the VRF it should have been. To solve this, a new field has been added to fib_info to hold a table id. When removing fib entries corresponding to a local ip address, this table id is also used in the comparison. The table id is populated when the fib_info is created. This was already done in some places, but not in ip_rt_ioctl(). This has now been fixed. Fixes: 021dd3b8a142 ("net: Add routes to the table associated with the device") Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_frontend.c | 3 ++- net/ipv4/fib_semantics.c | 8 ++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4079fc1..7d4a72e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -111,6 +111,7 @@ struct fib_info { unsigned char fib_scope; unsigned char fib_type; __be32 fib_prefsrc; + u32 fib_tb_id; u32 fib_priority; u32 *fib_metrics; #define fib_mtu fib_metrics[RTAX_MTU-1] @@ -319,7 +320,7 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); -int fib_sync_down_addr(struct net *net, __be32 local); +int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); extern u32 fib_multipath_secret __read_mostly; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ef2ebeb..1b25daf 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -509,6 +509,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; + cfg->fc_table = l3mdev_fib_table(dev); if (colon) { struct in_ifaddr *ifa; struct in_device *in_dev = __in_dev_get_rtnl(dev); @@ -1027,7 +1028,7 @@ no_promotions: * First of all, we scan fib_info list searching * for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) + if (fib_sync_down_addr(dev, ifa->ifa_local)) fib_flush(dev_net(dev)); } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 539fa26..e9f5622 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1057,6 +1057,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_type = cfg->fc_type; + fi->fib_tb_id = cfg->fc_table; fi->fib_nhs = nhs; change_nexthops(fi) { @@ -1337,18 +1338,21 @@ nla_put_failure: * referring to it. * - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down_addr(struct net *net, __be32 local) +int fib_sync_down_addr(struct net_device *dev, __be32 local) { int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + struct net *net = dev_net(dev); + int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) return 0; hlist_for_each_entry(fi, head, fib_lhash) { - if (!net_eq(fi->fib_net, net)) + if (!net_eq(fi->fib_net, net) || + fi->fib_tb_id != tb_id) continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; -- cgit v1.1 From 9d13744bb75078175ab49408f2abb980e4dbccc9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 5 Sep 2016 01:57:35 -0400 Subject: bnxt_en: Fix TX push operation on ARM64. There is a code path where we are calling __iowrite64_copy() on an address that is not 64-bit aligned. This causes an exception on some architectures such as arm64. Fix that code path by using __iowrite32_copy(). Reported-by: JD Zheng Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2cf7910..228c964 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -353,8 +353,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) push_len = (length + sizeof(*tx_push) + 7) / 8; if (push_len > 16) { __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16); - __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1, - push_len - 16); + __iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1, + (push_len - 16) << 1); } else { __iowrite64_copy(txr->tx_doorbell, tx_push_buf, push_len); -- cgit v1.1 From 751eb6b6042a596b0080967c1a529a9fe98dac1d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 5 Sep 2016 16:06:31 +0800 Subject: ipv6: addrconf: fix dev refcont leak when DAD failed In general, when DAD detected IPv6 duplicate address, ifp->state will be set to INET6_IFADDR_STATE_ERRDAD and DAD is stopped by a delayed work, the call tree should be like this: ndisc_recv_ns -> addrconf_dad_failure <- missing ifp put -> addrconf_mod_dad_work -> schedule addrconf_dad_work() -> addrconf_dad_stop() <- missing ifp hold before call it addrconf_dad_failure() called with ifp refcont holding but not put. addrconf_dad_work() call addrconf_dad_stop() without extra holding refcount. This will not cause any issue normally. But the race between addrconf_dad_failure() and addrconf_dad_work() may cause ifp refcount leak and netdevice can not be unregister, dmesg show the following messages: IPv6: eth0: IPv6 duplicate address fe80::XX:XXXX:XXXX:XX detected! ... unregister_netdevice: waiting for eth0 to become free. Usage count = 1 Cc: stable@vger.kernel.org Fixes: c15b1ccadb32 ("ipv6: move DAD and addrconf_verify processing to workqueue") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bdf368e..2f1f5d4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1948,6 +1948,7 @@ errdad: spin_unlock_bh(&ifp->lock); addrconf_mod_dad_work(ifp, 0); + in6_ifa_put(ifp); } /* Join to solicited addr multicast group. @@ -3857,6 +3858,7 @@ static void addrconf_dad_work(struct work_struct *w) addrconf_dad_begin(ifp); goto out; } else if (action == DAD_ABORT) { + in6_ifa_hold(ifp); addrconf_dad_stop(ifp, 1); if (disable_ipv6) addrconf_ifdown(idev->dev, 0); -- cgit v1.1 From 696118c016dd5f5caaa05360f13f8acd8fb9d1a7 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 7 Sep 2016 13:39:37 +0300 Subject: usb: dwc3: pci: fix build warning on !PM_SLEEP When building a kernel with CONFIG_PM_SLEEP=n, we get the following warning: drivers/usb/dwc3/dwc3-pci.c:253:12: warning: 'dwc3_pci_pm_dummy' defined but not used In order to fix this, we should only define dwc3_pci_pm_dummy() when CONFIG_PM_SLEEP is defined. Fixes: f6c274e11e3b ("usb: dwc3: pci: runtime_resume child device") Reported-by: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 0a32430..6df0f5d 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -249,7 +249,9 @@ static int dwc3_pci_runtime_resume(struct device *dev) return pm_runtime_get(&dwc3->dev); } +#endif /* CONFIG_PM */ +#ifdef CONFIG_PM_SLEEP static int dwc3_pci_pm_dummy(struct device *dev) { /* @@ -262,7 +264,7 @@ static int dwc3_pci_pm_dummy(struct device *dev) */ return 0; } -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ static struct dev_pm_ops dwc3_pci_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy) -- cgit v1.1 From 0bd2223594a4dcddc1e34b15774a3a4776f7749e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 1 Sep 2016 14:25:43 +0100 Subject: crypto: cryptd - initialize child shash_desc on import When calling .import() on a cryptd ahash_request, the structure members that describe the child transform in the shash_desc need to be initialized like they are when calling .init() Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/cryptd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 77207b4..0c654e5 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -631,9 +631,14 @@ static int cryptd_hash_export(struct ahash_request *req, void *out) static int cryptd_hash_import(struct ahash_request *req, const void *in) { - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct shash_desc *desc = cryptd_shash_desc(req); + + desc->tfm = ctx->child; + desc->flags = req->base.flags; - return crypto_shash_import(&rctx->desc, in); + return crypto_shash_import(desc, in); } static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, -- cgit v1.1 From e34f2ff40e0339f6a379e1ecf49e8f2759056453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Thu, 1 Sep 2016 20:47:02 +0300 Subject: ath9k: bring back direction setting in ath9k_{start_stop} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A regression was introduced in commit id 79d4db1214a ("ath9k: cleanup led_pin initial") that broken the WLAN status led on my laptop with AR9287 after suspending and resuming. Steps to reproduce: * Suspend (laptop) * Resume (laptop) * Observe that the WLAN led no longer turns ON/OFF depending on the status and is always red Even though for my case it only needs to be set to OUT in ath9k_start but for consistency bring back the IN direction setting as well. Fixes: 79d4db1214a0 ("ath9k: cleanup led_pin initial") Cc: Miaoqing Pan Cc: Kalle Valo Cc: # 4.7+ Link: https://bugzilla.kernel.org/show_bug.cgi?id=151711 Signed-off-by: Giedrius Statkevičius [kvalo@qca.qualcomm.com: improve commit log] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6ca4337..7cb65c3 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -718,9 +718,12 @@ static int ath9k_start(struct ieee80211_hw *hw) if (!ath_complete_reset(sc, false)) ah->reset_power_on = false; - if (ah->led_pin >= 0) + if (ah->led_pin >= 0) { ath9k_hw_set_gpio(ah, ah->led_pin, (ah->config.led_active_high) ? 1 : 0); + ath9k_hw_gpio_request_out(ah, ah->led_pin, NULL, + AR_GPIO_OUTPUT_MUX_AS_OUTPUT); + } /* * Reset key cache to sane defaults (all entries cleared) instead of @@ -864,9 +867,11 @@ static void ath9k_stop(struct ieee80211_hw *hw) spin_lock_bh(&sc->sc_pcu_lock); - if (ah->led_pin >= 0) + if (ah->led_pin >= 0) { ath9k_hw_set_gpio(ah, ah->led_pin, (ah->config.led_active_high) ? 0 : 1); + ath9k_hw_gpio_request_in(ah, ah->led_pin, NULL); + } ath_prepare_reset(sc); -- cgit v1.1 From 71a17de30733822b1ca6fbb3792581f5e7ee13de Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:43:39 +0100 Subject: rxrpc: Whitespace cleanup Remove some whitespace. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bb342f5..ad702f9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -720,7 +720,6 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) } } - static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) { if (!rxrpc_get_connection_maybe(conn)) @@ -879,7 +878,7 @@ int __init rxrpc_init_security(void); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); - + /* * sendmsg.c */ -- cgit v1.1 From e796cb419237f54b96442ae7feca1859c693865c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:42:15 +0100 Subject: rxrpc: Delete unused rxrpc_kernel_free_skb() Delete rxrpc_kernel_free_skb() as it's unused. Signed-off-by: David Howells --- net/rxrpc/skbuff.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9752f8b..a546a2b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -148,19 +148,6 @@ void rxrpc_packet_destructor(struct sk_buff *skb) _leave(""); } -/** - * rxrpc_kernel_free_skb - Free an RxRPC socket buffer - * @skb: The socket buffer to be freed - * - * Let RxRPC free its own socket buffer, permitting it to maintain debug - * accounting. - */ -void rxrpc_kernel_free_skb(struct sk_buff *skb) -{ - rxrpc_free_skb(skb); -} -EXPORT_SYMBOL(rxrpc_kernel_free_skb); - /* * Note the existence of a new-to-us socket buffer (allocated or dequeued). */ -- cgit v1.1 From ded89912156b1a47d940a0c954c43afbabd0c42c Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 10:45:47 +0100 Subject: brcmfmac: avoid potential stack overflow in brcmf_cfg80211_start_ap() User-space can choose to omit NL80211_ATTR_SSID and only provide raw IE TLV data. When doing so it can provide SSID IE with length exceeding the allowed size. The driver further processes this IE copying it into a local variable without checking the length. Hence stack can be corrupted and used as exploit. Cc: stable@vger.kernel.org # v4.7 Reported-by: Daxing Guo Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 5db56a7..b8aec5e5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4527,7 +4527,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, (u8 *)&settings->beacon.head[ie_offset], settings->beacon.head_len - ie_offset, WLAN_EID_SSID); - if (!ssid_ie) + if (!ssid_ie || ssid_ie->len > IEEE80211_MAX_SSID_LEN) return -EINVAL; memcpy(ssid_le.SSID, ssid_ie->data, ssid_ie->len); -- cgit v1.1 From fff72429c2e83bdbe32dc7f1ad6398dfe50750c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:34:21 +0100 Subject: rxrpc: Improve the call tracking tracepoint Improve the call tracking tracepoint by showing more differentiation between some of the put and get events, including: (1) Getting and putting refs for the socket call user ID tree. (2) Getting and putting refs for queueing and failing to queue the call processor work item. Note that these aren't necessarily used in this patch, but will be taken advantage of in future patches. An enum is added for the event subtype numbers rather than coding them directly as decimal numbers and a table of 3-letter strings is provided rather than a sequence of ?: operators. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 11 +++------- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 22 ++++++++++++++++++-- net/rxrpc/call_accept.c | 10 ++++----- net/rxrpc/call_event.c | 2 +- net/rxrpc/call_object.c | 48 ++++++++++++++++++++++++++++---------------- net/rxrpc/input.c | 6 +++--- net/rxrpc/recvmsg.c | 23 +++++++++++---------- net/rxrpc/sendmsg.c | 4 ++-- net/rxrpc/skbuff.c | 2 +- 10 files changed, 79 insertions(+), 51 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cbe574e..3016489 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -17,7 +17,8 @@ #include TRACE_EVENT(rxrpc_call, - TP_PROTO(struct rxrpc_call *call, int op, int usage, int nskb, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, + int usage, int nskb, const void *where, const void *aux), TP_ARGS(call, op, usage, nskb, where, aux), @@ -42,13 +43,7 @@ TRACE_EVENT(rxrpc_call, TP_printk("c=%p %s u=%d s=%d p=%pSR a=%p", __entry->call, - (__entry->op == 0 ? "NWc" : - __entry->op == 1 ? "NWs" : - __entry->op == 2 ? "SEE" : - __entry->op == 3 ? "GET" : - __entry->op == 4 ? "Gsb" : - __entry->op == 5 ? "PUT" : - "Psb"), + rxrpc_call_traces[__entry->op], __entry->usage, __entry->nskb, __entry->where, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index b66a9e6..8356cd0 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -296,7 +296,7 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); rxrpc_purge_queue(&call->knlrecv_queue); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ad702f9..913255a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -508,6 +508,24 @@ struct rxrpc_call { unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; }; +enum rxrpc_call_trace { + rxrpc_call_new_client, + rxrpc_call_new_service, + rxrpc_call_queued, + rxrpc_call_queued_ref, + rxrpc_call_seen, + rxrpc_call_got, + rxrpc_call_got_skb, + rxrpc_call_got_userid, + rxrpc_call_put, + rxrpc_call_put_skb, + rxrpc_call_put_userid, + rxrpc_call_put_noqueue, + rxrpc_call__nr_trace +}; + +extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; + #include /* @@ -555,8 +573,8 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, void rxrpc_release_call(struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); void rxrpc_see_call(struct rxrpc_call *); -void rxrpc_get_call(struct rxrpc_call *); -void rxrpc_put_call(struct rxrpc_call *); +void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void __exit rxrpc_destroy_all_calls(void); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 68a439e..487ae7a 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -115,7 +115,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, write_lock(&rx->call_lock); if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); spin_lock(&call->conn->state_lock); if (sp->hdr.securityIndex > 0 && @@ -155,7 +155,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, _debug("done"); read_unlock_bh(&local->services_lock); rxrpc_free_skb(notification); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = 0"); return 0; @@ -166,11 +166,11 @@ invalid_service: read_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); rxrpc_queue_call(call); } read_unlock_bh(&call->state_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); ret = -ECONNREFUSED; error: rxrpc_free_skb(notification); @@ -341,6 +341,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ + rxrpc_get_call(call, rxrpc_call_got_userid); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rb_link_node(&call->sock_node, parent, pp); @@ -351,7 +352,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, BUG(); rxrpc_queue_call(call); - rxrpc_get_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); _leave(" = %p{%d}", call, call->debug_id); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 4754c7f..fee8b6d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -1246,7 +1246,7 @@ send_message_2: kill_ACKs: del_timer_sync(&call->ack_timer); if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); clear_bit(RXRPC_CALL_EV_ACK, &call->events); maybe_reschedule: diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 6569174..3166b52 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -55,6 +55,21 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_NETWORK_ERROR] = "NetError", }; +const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { + [rxrpc_call_new_client] = "NWc", + [rxrpc_call_new_service] = "NWs", + [rxrpc_call_queued] = "QUE", + [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_seen] = "SEE", + [rxrpc_call_got] = "GOT", + [rxrpc_call_got_skb] = "Gsk", + [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_put] = "PUT", + [rxrpc_call_put_skb] = "Psk", + [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_noqueue] = "PNQ", +}; + struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); @@ -96,7 +111,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, atomic_read(&call->usage)); return call; @@ -252,8 +267,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto found_user_ID_now_present; } - rxrpc_get_call(call); - + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); write_unlock(&rx->call_lock); @@ -275,7 +289,7 @@ error: write_lock(&rx->call_lock); rb_erase(&call->sock_node, &rx->calls); write_unlock(&rx->call_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put_userid); write_lock_bh(&rxrpc_call_lock); list_del_init(&call->link); @@ -283,7 +297,7 @@ error: set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ERR_PTR(ret); @@ -296,7 +310,7 @@ found_user_ID_now_present: write_unlock(&rx->call_lock); set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = -EEXIST [%p]", call); return ERR_PTR(-EEXIST); } @@ -322,8 +336,8 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (!candidate) return ERR_PTR(-EBUSY); - trace_rxrpc_call(candidate, 1, atomic_read(&candidate->usage), - 0, here, NULL); + trace_rxrpc_call(candidate, rxrpc_call_new_service, + atomic_read(&candidate->usage), 0, here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->socket = rx; @@ -358,7 +372,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, read_unlock(&call->state_lock); goto aborted_call; default: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&call->state_lock); goto extant_call; } @@ -447,20 +461,20 @@ void rxrpc_see_call(struct rxrpc_call *call) int n = atomic_read(&call->usage); int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 2, n, m, here, 0); + trace_rxrpc_call(call, rxrpc_call_seen, n, m, here, NULL); } } /* * Note the addition of a ref on a call. */ -void rxrpc_get_call(struct rxrpc_call *call) +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 3, n, m, here, 0); + trace_rxrpc_call(call, op, n, m, here, NULL); } /* @@ -472,7 +486,7 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) int n = atomic_inc_return(&call->usage); int m = atomic_inc_return(&call->skb_count); - trace_rxrpc_call(call, 4, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_got_skb, n, m, here, skb); } /* @@ -575,7 +589,7 @@ static void rxrpc_dead_call_expired(unsigned long _call) write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_DEAD; write_unlock_bh(&call->state_lock); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); } /* @@ -632,7 +646,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* * release a call */ -void rxrpc_put_call(struct rxrpc_call *call) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n, m; @@ -641,7 +655,7 @@ void rxrpc_put_call(struct rxrpc_call *call) n = atomic_dec_return(&call->usage); m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, 5, n, m, here, NULL); + trace_rxrpc_call(call, op, n, m, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); @@ -661,7 +675,7 @@ void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) n = atomic_dec_return(&call->usage); m = atomic_dec_return(&call->skb_count); - trace_rxrpc_call(call, 6, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_put_skb, n, m, here, skb); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 72f016c..f7239a6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -537,7 +537,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, } read_unlock(&call->state_lock); - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -545,12 +545,12 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, else rxrpc_fast_process_packet(call, skb); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); goto done; resend_final_ack: _debug("final ack again"); - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_queue_call(call); goto free_unlock; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0ab7b33..97f8ee7 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -79,7 +79,8 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, + rxrpc_call_put); return -ENODATA; } } @@ -137,13 +138,13 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (call != continue_call || skb->mark != RXRPC_SKB_MARK_DATA) { release_sock(&rx->sk); - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d [noncont]", copied); return copied; } } - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); /* copy the peer address and timestamp */ if (!continue_call) { @@ -233,7 +234,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (!continue_call) continue_call = sp->call; else - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); call = NULL; if (flags & MSG_PEEK) { @@ -255,9 +256,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, out: release_sock(&rx->sk); if (call) - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d [data]", copied); return copied; @@ -341,18 +342,18 @@ terminal_message: } release_sock(&rx->sk); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d", ret); return ret; copy_error: _debug("copy error"); release_sock(&rx->sk); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); _leave(" = %d", ret); return ret; @@ -361,7 +362,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); if (continue_call) - rxrpc_put_call(continue_call); + rxrpc_put_call(continue_call, rxrpc_call_put); if (copied) copied = ret; _leave(" = %d [waitfail %d]", copied, ret); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7376794..803078b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -534,7 +534,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) call = rxrpc_accept_call(rx, user_call_ID, NULL); if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); return 0; } @@ -573,7 +573,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len); } - rxrpc_put_call(call); + rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index a546a2b..c0613ab 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -35,7 +35,7 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call) /* get an extra ref on the call for the final-ACK generator to * release */ - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); if (try_to_del_timer_sync(&call->ack_timer) >= 0) rxrpc_queue_call(call); -- cgit v1.1 From f4fdb3525b247e1b4f9cc563641494c96c80f91a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 14:45:26 +0100 Subject: rxrpc: Use call->peer rather than call->conn->params.peer Use call->peer rather than call->conn->params.peer to avoid the possibility of call->conn being NULL and, whilst we're at it, check it for NULL before we access it. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 3166b52..060ddc3 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -514,9 +514,11 @@ void rxrpc_release_call(struct rxrpc_call *call) */ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - spin_lock(&conn->params.peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&conn->params.peer->lock); + if (call->peer) { + spin_lock(&call->peer->lock); + hlist_del_init(&call->error_link); + spin_unlock(&call->peer->lock); + } write_lock_bh(&rx->call_lock); if (!list_empty(&call->accept_link)) { -- cgit v1.1 From 278ac0cdd5e516bdef2b9b8f5a4dd6366a5bccfe Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:19:25 +0100 Subject: rxrpc: Cache the security index in the rxrpc_call struct Cache the security index in the rxrpc_call struct so that we can get at it even when the call has been disconnected and the connection pointer cleared. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 1 + net/rxrpc/conn_client.c | 3 +++ net/rxrpc/input.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 913255a..e3dfc9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -475,6 +475,7 @@ struct rxrpc_call { atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 060ddc3..83019e4 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -345,6 +345,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->peer = conn->params.peer; candidate->cid = sp->hdr.cid; candidate->call_id = sp->hdr.callNumber; + candidate->security_ix = sp->hdr.securityIndex; candidate->rx_data_post = 0; candidate->state = RXRPC_CALL_SERVER_ACCEPTING; candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 82de1ae..9344a84 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -348,6 +348,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, if (cp->exclusive) { call->conn = candidate; + call->security_ix = candidate->security_ix; _leave(" = 0 [exclusive %d]", candidate->debug_id); return 0; } @@ -395,6 +396,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, candidate_published: set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); call->conn = candidate; + call->security_ix = candidate->security_ix; spin_unlock(&local->client_conns_lock); _leave(" = 0 [new %d]", candidate->debug_id); return 0; @@ -412,6 +414,7 @@ found_extant_conn: spin_lock(&conn->channel_lock); call->conn = conn; + call->security_ix = conn->security_ix; list_add(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index f7239a6..9242fef 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -198,7 +198,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, /* if the packet need security things doing to it, then it goes down * the slow path */ - if (call->conn->security_ix) + if (call->security_ix) goto enqueue_packet; sp->call = call; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 803078b..2439aff 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -322,7 +322,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.serial = atomic_inc_return(&conn->serial); sp->hdr.type = RXRPC_PACKET_TYPE_DATA; sp->hdr.userStatus = 0; - sp->hdr.securityIndex = conn->security_ix; + sp->hdr.securityIndex = call->security_ix; sp->hdr._rsvd = 0; sp->hdr.serviceId = call->service_id; -- cgit v1.1 From 8b7fac50ab7f2668c43795c135025c472922a344 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:28:54 +0100 Subject: rxrpc: Pass the connection pointer to rxrpc_post_packet_to_call() Pass the connection pointer to rxrpc_post_packet_to_call() as the call might get disconnected whilst we're looking at it, but the connection pointer determined by rxrpc_data_read() is guaranteed by RCU for the duration of the call. Signed-off-by: David Howells --- net/rxrpc/input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9242fef..52da437 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -497,7 +497,8 @@ protocol_error: * post an incoming packet to the appropriate call/socket to deal with * - must get rid of the sk_buff, either by freeing it or by queuing it */ -static void rxrpc_post_packet_to_call(struct rxrpc_call *call, +static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, + struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp; @@ -558,7 +559,7 @@ resend_final_ack: dead_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(call->conn->params.local, skb); + rxrpc_reject_packet(conn->params.local, skb); goto unlock; } free_unlock: @@ -754,7 +755,7 @@ void rxrpc_data_ready(struct sock *sk) goto cant_route_call; rxrpc_see_call(call); - rxrpc_post_packet_to_call(call, skb); + rxrpc_post_packet_to_call(conn, call, skb); goto out_unlock; } -- cgit v1.1 From 6543ac523558b2392271f3f8088e6455b3f00bb1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 15:26:39 +0100 Subject: rxrpc: Use rxrpc_is_service_call() rather than rxrpc_conn_is_service() Use rxrpc_is_service_call() rather than rxrpc_conn_is_service() if the call is available just in case call->conn is NULL. Signed-off-by: David Howells --- net/rxrpc/input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 52da437..8267f42 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -357,7 +357,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_PACKET_TYPE_BUSY: _proto("Rx BUSY %%%u", sp->hdr.serial); - if (rxrpc_conn_is_service(call->conn)) + if (rxrpc_is_service_call(call)) goto protocol_error; write_lock_bh(&call->state_lock); @@ -525,7 +525,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, default: goto dead_call; case RXRPC_CALL_SUCCEEDED: - if (rxrpc_conn_is_service(call->conn)) + if (rxrpc_is_service_call(call)) goto dead_call; goto resend_final_ack; } -- cgit v1.1 From 8d94aa381dab19f3c0f524f5d255248b0ae50125 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 09:19:31 +0100 Subject: rxrpc: Calls shouldn't hold socket refs rxrpc calls shouldn't hold refs on the sock struct. This was done so that the socket wouldn't go away whilst the call was in progress, such that the call could reach the socket's queues. However, we can mark the socket as requiring an RCU release and rely on the RCU read lock. To make this work, we do: (1) rxrpc_release_call() removes the call's call user ID. This is now only called from socket operations and not from the call processor: rxrpc_accept_call() / rxrpc_kernel_accept_call() rxrpc_reject_call() / rxrpc_kernel_reject_call() rxrpc_kernel_end_call() rxrpc_release_calls_on_socket() rxrpc_recvmsg() Though it is also called in the cleanup path of rxrpc_accept_incoming_call() before we assign a user ID. (2) Pass the socket pointer into rxrpc_release_call() rather than getting it from the call so that we can get rid of uninitialised calls. (3) Fix call processor queueing to pass a ref to the work queue and to release that ref at the end of the processor function (or to pass it back to the work queue if we have to requeue). (4) Skip out of the call processor function asap if the call is complete and don't requeue it if the call is complete. (5) Clean up the call immediately that the refcount reaches 0 rather than trying to defer it. Actual deallocation is deferred to RCU, however. (6) Don't hold socket refs for allocated calls. (7) Use the RCU read lock when queueing a message on a socket and treat the call's socket pointer according to RCU rules and check it for NULL. We also need to use the RCU read lock when viewing a call through procfs. (8) Transmit the final ACK/ABORT to a client call in rxrpc_release_call() if this hasn't been done yet so that we can then disconnect the call. Once the call is disconnected, it won't have any access to the connection struct and the UDP socket for the call work processor to be able to send the ACK. Terminal retransmission will be handled by the connection processor. (9) Release all calls immediately on the closing of a socket rather than trying to defer this. Incomplete calls will be aborted. The call refcount model is much simplified. Refs are held on the call by: (1) A socket's user ID tree. (2) A socket's incoming call secureq and acceptq. (3) A kernel service that has a call in progress. (4) A queued call work processor. We have to take care to put any call that we failed to queue. (5) sk_buffs on a socket's receive queue. A future patch will get rid of this. Whilst we're at it, we can do: (1) Get rid of the RXRPC_CALL_EV_RELEASE event. Release is now done entirely from the socket routines and never from the call's processor. (2) Get rid of the RXRPC_CALL_DEAD state. Calls now end in the RXRPC_CALL_COMPLETE state. (3) Get rid of the rxrpc_call::destroyer work item. Calls are now torn down when their refcount reaches 0 and then handed over to RCU for final cleanup. (4) Get rid of the rxrpc_call::deadspan timer. Calls are cleaned up immediately they're finished with and don't hang around. Post-completion retransmission is handled by the connection processor once the call is disconnected. (5) Get rid of the dead call expiry setting as there's no longer a timer to set. (6) rxrpc_destroy_all_calls() can just check that the call list is empty. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 +- net/rxrpc/ar-internal.h | 15 ++-- net/rxrpc/call_accept.c | 55 ++++-------- net/rxrpc/call_event.c | 74 +++++++--------- net/rxrpc/call_object.c | 224 +++++++++++++++++++----------------------------- net/rxrpc/input.c | 26 +++--- net/rxrpc/output.c | 145 +++++++++++++++++++++++++++++++ net/rxrpc/proc.c | 4 +- net/rxrpc/recvmsg.c | 24 +----- net/rxrpc/skbuff.c | 3 - net/rxrpc/sysctl.c | 8 -- 11 files changed, 303 insertions(+), 279 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8356cd0..77a132a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -294,8 +294,7 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); - rxrpc_purge_queue(&call->knlrecv_queue); + rxrpc_release_call(rxrpc_sk(sock->sk), call); rxrpc_put_call(call, rxrpc_call_put); } EXPORT_SYMBOL(rxrpc_kernel_end_call); @@ -558,6 +557,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_state = RXRPC_UNBOUND; sk->sk_write_space = rxrpc_write_space; sk->sk_max_ack_backlog = 0; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e3dfc9d..3addda4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -35,8 +35,6 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) -#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor) - struct rxrpc_connection; /* @@ -397,7 +395,6 @@ enum rxrpc_call_event { RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ - RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */ }; /* @@ -417,7 +414,6 @@ enum rxrpc_call_state { RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ RXRPC_CALL_COMPLETE, /* - call complete */ - RXRPC_CALL_DEAD, /* - call is dead */ NR__RXRPC_CALL_STATES }; @@ -442,12 +438,10 @@ struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ - struct rxrpc_sock *socket; /* socket responsible */ + struct rxrpc_sock __rcu *socket; /* socket responsible */ struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */ struct timer_list ack_timer; /* ACK generation timer */ struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ @@ -558,7 +552,6 @@ void rxrpc_process_call(struct work_struct *); extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; -extern unsigned int rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; @@ -571,8 +564,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_connection *, struct sk_buff *); -void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +bool __rxrpc_queue_call(struct rxrpc_call *); +bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); @@ -835,6 +830,7 @@ extern const char *rxrpc_acks(u8 reason); /* * output.c */ +int rxrpc_send_call_packet(struct rxrpc_call *, u8); int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); /* @@ -880,7 +876,6 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ -void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 487ae7a..879a964 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -163,13 +163,7 @@ invalid_service: _debug("invalid"); read_unlock_bh(&local->services_lock); - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call, rxrpc_call_got); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); + rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); ret = -ECONNREFUSED; error: @@ -236,13 +230,11 @@ found_service: if (sk_acceptq_is_full(&rx->sk)) goto backlog_full; sk_acceptq_added(&rx->sk); - sock_hold(&rx->sk); read_unlock_bh(&local->services_lock); ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); if (ret < 0) sk_acceptq_removed(&rx->sk); - sock_put(&rx->sk); switch (ret) { case -ECONNRESET: /* old calls are ignored */ case -ECONNABORTED: /* aborted calls are reaborted or ignored */ @@ -333,9 +325,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, case RXRPC_CALL_COMPLETE: ret = call->error; goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; default: BUG(); } @@ -350,24 +339,20 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, BUG(); if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) BUG(); - rxrpc_queue_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); + rxrpc_queue_call(call); _leave(" = %p{%d}", call, call->debug_id); return call; - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); + write_unlock(&rx->call_lock); + _debug("release %p", call); + rxrpc_release_call(rx, call); + _leave(" = %d", ret); + return ERR_PTR(ret); out: write_unlock(&rx->call_lock); _leave(" = %d", ret); @@ -390,8 +375,11 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->acceptq)) { + write_unlock(&rx->call_lock); + _leave(" = -ENODATA"); + return -ENODATA; + } /* dequeue the first call and check it's still valid */ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); @@ -407,30 +395,17 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) rxrpc_queue_call(call); ret = 0; - goto out_release; + break; case RXRPC_CALL_COMPLETE: ret = call->error; - goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; + break; default: BUG(); } - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ -out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + rxrpc_release_call(rx, call); _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fee8b6d..8365d33 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -811,8 +811,9 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, } /* - * handle background processing of incoming call packets and ACK / abort - * generation + * Handle background processing of incoming call packets and ACK / abort + * generation. A ref on the call is donated to us by whoever queued the work + * item. */ void rxrpc_process_call(struct work_struct *work) { @@ -827,6 +828,7 @@ void rxrpc_process_call(struct work_struct *work) unsigned long bits; __be32 data, pad; size_t len; + bool requeue = false; int loop, nbit, ioc, ret, mtu; u32 serial, abort_code = RX_PROTOCOL_ERROR; u8 *acks = NULL; @@ -838,6 +840,11 @@ void rxrpc_process_call(struct work_struct *work) call->debug_id, rxrpc_call_states[call->state], call->events, (jiffies - call->creation_jif) / (HZ / 10)); + if (call->state >= RXRPC_CALL_COMPLETE) { + rxrpc_put_call(call, rxrpc_call_put); + return; + } + if (!call->conn) goto skip_msg_init; @@ -1088,16 +1095,21 @@ skip_msg_init: spin_lock_bh(&call->lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { + struct rxrpc_sock *rx; _debug("securing"); - write_lock(&call->socket->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &call->socket->acceptq); + rcu_read_lock(); + rx = rcu_dereference(call->socket); + if (rx) { + write_lock(&rx->call_lock); + if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + _debug("not released"); + call->state = RXRPC_CALL_SERVER_ACCEPTING; + list_move_tail(&call->accept_link, + &rx->acceptq); + } + write_unlock(&rx->call_lock); } - write_unlock(&call->socket->call_lock); + rcu_read_unlock(); read_lock(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); @@ -1139,11 +1151,6 @@ skip_msg_init: goto maybe_reschedule; } - if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_release_call(call); - clear_bit(RXRPC_CALL_EV_RELEASE, &call->events); - } - /* other events may have been raised since we started checking */ goto maybe_reschedule; @@ -1209,10 +1216,8 @@ send_message_2: &msg, iov, ioc, len); if (ret < 0) { _debug("sendmsg failed: %d", ret); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + if (call->state < RXRPC_CALL_COMPLETE) + requeue = true; goto error; } @@ -1245,41 +1250,22 @@ send_message_2: kill_ACKs: del_timer_sync(&call->ack_timer); - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call, rxrpc_call_put); clear_bit(RXRPC_CALL_EV_ACK, &call->events); maybe_reschedule: if (call->events || !skb_queue_empty(&call->rx_queue)) { - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - - /* don't leave aborted connections on the accept queue */ - if (call->state >= RXRPC_CALL_COMPLETE && - !list_empty(&call->accept_link)) { - _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", - call, call->events, call->flags, call->conn->proto.cid); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + if (call->state < RXRPC_CALL_COMPLETE) + requeue = true; } error: kfree(acks); - /* because we don't want two CPUs both processing the work item for one - * call at the same time, we use a flag to note when it's busy; however - * this means there's a race between clearing the flag and setting the - * work pending bit and the work item being processed again */ - if (call->events && !work_pending(&call->processor)) { + if ((requeue || call->events) && !work_pending(&call->processor)) { _debug("jumpstart %x", call->conn->proto.cid); - rxrpc_queue_call(call); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); } _leave(""); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 83019e4..be5733d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -24,11 +24,6 @@ */ unsigned int rxrpc_max_call_lifetime = 60 * HZ; -/* - * Time till dead call expires after last use (in jiffies). - */ -unsigned int rxrpc_dead_call_expiry = 2 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", @@ -43,7 +38,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", - [RXRPC_CALL_DEAD] = "Dead ", }; const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { @@ -74,11 +68,10 @@ struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_destroy_call(struct work_struct *work); static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_dead_call_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_cleanup_call(struct rxrpc_call *call); /* * find an extant server call @@ -138,13 +131,10 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) setup_timer(&call->lifetimer, &rxrpc_call_life_expired, (unsigned long) call); - setup_timer(&call->deadspan, &rxrpc_dead_call_expired, - (unsigned long) call); setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, (unsigned long) call); setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, (unsigned long) call); - INIT_WORK(&call->destroyer, &rxrpc_destroy_call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); @@ -185,11 +175,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - - sock_hold(&rx->sk); - call->socket = rx; call->rx_data_post = 1; call->service_id = srx->srx_service; + rcu_assign_pointer(call->socket, rx); _leave(" = %p", call); return call; @@ -244,8 +232,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, 0, atomic_read(&call->usage), 0, here, - (const void *)user_call_ID); + trace_rxrpc_call(call, rxrpc_call_new_client, + atomic_read(&call->usage), 0, + here, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ call->user_call_ID = user_call_ID; @@ -295,8 +284,10 @@ error: list_del_init(&call->link); write_unlock_bh(&rxrpc_call_lock); +error_out: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); return ERR_PTR(ret); @@ -308,11 +299,8 @@ error: */ found_user_ID_now_present: write_unlock(&rx->call_lock); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = -EEXIST [%p]", call); - return ERR_PTR(-EEXIST); + ret = -EEXIST; + goto error_out; } /* @@ -340,7 +328,6 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, atomic_read(&candidate->usage), 0, here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->socket = rx; candidate->conn = conn; candidate->peer = conn->params.peer; candidate->cid = sp->hdr.cid; @@ -351,6 +338,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); if (conn->security_ix > 0) candidate->state = RXRPC_CALL_SERVER_SECURING; + rcu_assign_pointer(candidate->socket, rx); spin_lock(&conn->channel_lock); @@ -411,7 +399,6 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, candidate = NULL; conn->channels[chan].call_counter = call_id; rcu_assign_pointer(conn->channels[chan].call, call); - sock_hold(&rx->sk); rxrpc_get_connection(conn); rxrpc_get_peer(call->peer); spin_unlock(&conn->channel_lock); @@ -453,6 +440,39 @@ old_call: } /* + * Queue a call's work processor, getting a ref to pass to the work queue. + */ +bool rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&call->usage, 1, 0); + int m = atomic_read(&call->skb_count); + if (n == 0) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, m, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* + * Queue a call's work processor, passing the callers ref to the work queue. + */ +bool __rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = atomic_read(&call->usage); + int m = atomic_read(&call->skb_count); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, m, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* * Note the re-emergence of a call. */ void rxrpc_see_call(struct rxrpc_call *call) @@ -493,11 +513,8 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) /* * detach a call from a socket and set up for release */ -void rxrpc_release_call(struct rxrpc_call *call) +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; - struct rxrpc_sock *rx = call->socket; - _enter("{%d,%d,%d,%d}", call->debug_id, atomic_read(&call->usage), atomic_read(&call->ackr_not_idle), @@ -513,7 +530,7 @@ void rxrpc_release_call(struct rxrpc_call *call) /* dissociate from the socket * - the socket's ref on the call is passed to the death timer */ - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + _debug("RELEASE CALL %p (%d)", call, call->debug_id); if (call->peer) { spin_lock(&call->peer->lock); @@ -532,20 +549,30 @@ void rxrpc_release_call(struct rxrpc_call *call) rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); + rxrpc_put_call(call, rxrpc_call_put_userid); } write_unlock_bh(&rx->call_lock); /* free up the channel for reuse */ - write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) { + clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_call_completed(call); + } else { + write_lock_bh(&call->state_lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + _debug("+++ ABORTING STATE %d +++\n", call->state); + __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + } - if (call->state < RXRPC_CALL_COMPLETE && - call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + write_unlock_bh(&call->state_lock); } - write_unlock_bh(&call->state_lock); - rxrpc_disconnect_call(call); + if (call->conn) + rxrpc_disconnect_call(call); /* clean up the Rx queue */ if (!skb_queue_empty(&call->rx_queue) || @@ -569,53 +596,16 @@ void rxrpc_release_call(struct rxrpc_call *call) } spin_unlock_bh(&call->lock); } + rxrpc_purge_queue(&call->knlrecv_queue); del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; - add_timer(&call->deadspan); _leave(""); } /* - * handle a dead call being ready for reaping - */ -static void rxrpc_dead_call_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_DEAD; - write_unlock_bh(&call->state_lock); - rxrpc_put_call(call, rxrpc_call_put); -} - -/* - * mark a call as to be released, aborting it if it's still in progress - * - called with softirqs disabled - */ -static void rxrpc_mark_call_released(struct rxrpc_call *call) -{ - bool sched = false; - - rxrpc_see_call(call); - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - sched = __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); - if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - sched = true; - } - write_unlock(&call->state_lock); - if (sched) - rxrpc_queue_call(call); -} - -/* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) @@ -629,17 +619,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* kill the not-yet-accepted incoming calls */ list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } /* mark all the calls as no longer wanting incoming packets */ for (p = rb_first(&rx->calls); p; p = rb_next(p)) { call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_mark_call_released(call); + rxrpc_release_call(rx, call); } read_unlock_bh(&rx->call_lock); @@ -663,8 +653,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) if (n == 0) { _debug("call %d dead", call->debug_id); WARN_ON(m != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + rxrpc_cleanup_call(call); } } @@ -683,8 +672,7 @@ void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) if (n == 0) { _debug("call %d dead", call->debug_id); WARN_ON(m != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + rxrpc_cleanup_call(call); } } @@ -708,23 +696,19 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) { _net("DESTROY CALL %d", call->debug_id); - ASSERT(call->socket); + write_lock_bh(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock_bh(&rxrpc_call_lock); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); del_timer_sync(&call->lifetimer); - del_timer_sync(&call->deadspan); del_timer_sync(&call->ack_timer); del_timer_sync(&call->resend_timer); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->events, ==, 0); - if (work_pending(&call->processor)) { - _debug("defer destroy"); - rxrpc_queue_work(&call->destroyer); - return; - } - + ASSERT(!work_pending(&call->processor)); ASSERTCMP(call->conn, ==, NULL); if (call->acks_window) { @@ -753,40 +737,21 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_purge_queue(&call->rx_queue); ASSERT(skb_queue_empty(&call->rx_oos_queue)); rxrpc_purge_queue(&call->knlrecv_queue); - sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } /* - * destroy a call - */ -static void rxrpc_destroy_call(struct work_struct *work) -{ - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, destroyer); - - _enter("%p{%d,%x,%p}", - call, atomic_read(&call->usage), call->cid, call->conn); - - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); - - rxrpc_cleanup_call(call); - _leave(""); -} - -/* - * preemptively destroy all the call records from a transport endpoint rather - * than waiting for them to time out + * Make sure that all calls are gone. */ void __exit rxrpc_destroy_all_calls(void) { struct rxrpc_call *call; _enter(""); + + if (list_empty(&rxrpc_calls)) + return; + write_lock_bh(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { @@ -796,28 +761,15 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_see_call(call); list_del_init(&call->link); - switch (atomic_read(&call->usage)) { - case 0: - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - break; - case 1: - if (del_timer_sync(&call->deadspan) != 0 && - call->state != RXRPC_CALL_DEAD) - rxrpc_dead_call_expired((unsigned long) call); - if (call->state != RXRPC_CALL_DEAD) - break; - default: - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - rxrpc_call_states[call->state], - call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - break; - } + pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + atomic_read(&call->ackr_not_idle), + rxrpc_call_states[call->state], + call->flags, call->events); + if (!skb_queue_empty(&call->rx_queue)) + pr_err("Rx queue occupied\n"); + if (!skb_queue_empty(&call->rx_oos_queue)) + pr_err("OOS queue occupied\n"); write_unlock_bh(&rxrpc_call_lock); cond_resched(); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8267f42..79f3f58 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -39,7 +39,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, bool force, bool terminal) { struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx = call->socket; + struct rxrpc_sock *rx; struct sock *sk; int ret; @@ -59,7 +59,15 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, return 0; } + /* The socket may go away under us */ + ret = 0; + rcu_read_lock(); + rx = rcu_dereference(call->socket); + if (!rx) + goto out; sk = &rx->sk; + if (sock_flag(sk, SOCK_DEAD)) + goto out; if (!force) { /* cast skb->rcvbuf to unsigned... It's pointless, but @@ -78,7 +86,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, spin_lock_bh(&sk->sk_receive_queue.lock); if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - call->socket->sk.sk_state != RXRPC_CLOSE) { + sk->sk_state != RXRPC_CLOSE) { skb->destructor = rxrpc_packet_destructor; skb->dev = NULL; skb->sk = sk; @@ -104,8 +112,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock_bh(&sk->sk_receive_queue.lock); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + sk->sk_data_ready(sk); } skb = NULL; } else { @@ -115,6 +122,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, out: rxrpc_free_skb(skb); + rcu_read_unlock(); _leave(" = %d", ret); return ret; @@ -266,7 +274,7 @@ enqueue_packet: skb_queue_tail(&call->rx_queue, skb); atomic_inc(&call->ackr_not_idle); read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) + if (call->state < RXRPC_CALL_COMPLETE) rxrpc_queue_call(call); read_unlock(&call->state_lock); _leave(" = 0 [queued]"); @@ -408,7 +416,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_PACKET_TYPE_ACK: /* ACK processing is done in process context */ read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { + if (call->state < RXRPC_CALL_COMPLETE) { skb_queue_tail(&call->rx_queue, skb); rxrpc_queue_call(call); skb = NULL; @@ -511,9 +519,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, read_lock(&call->state_lock); switch (call->state) { - case RXRPC_CALL_DEAD: - goto dead_call; - case RXRPC_CALL_COMPLETE: switch (call->completion) { case RXRPC_CALL_LOCALLY_ABORTED: @@ -538,7 +543,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, } read_unlock(&call->state_lock); - rxrpc_get_call(call, rxrpc_call_got); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -546,12 +550,10 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, else rxrpc_fast_process_packet(call, skb); - rxrpc_put_call(call, rxrpc_call_put); goto done; resend_final_ack: _debug("final ack again"); - rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_queue_call(call); goto free_unlock; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5b5508f..8756d74 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -19,6 +19,151 @@ #include #include "ar-internal.h" +struct rxrpc_pkt_buffer { + struct rxrpc_wire_header whdr; + union { + struct { + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; + }; + __be32 abort_code; + }; + struct rxrpc_ackinfo ackinfo; +}; + +/* + * Fill out an ACK packet. + */ +static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, + struct rxrpc_pkt_buffer *pkt) +{ + u32 mtu, jmax; + u8 *ackp = pkt->acks; + + pkt->ack.bufferSpace = htons(8); + pkt->ack.maxSkew = htons(0); + pkt->ack.firstPacket = htonl(call->rx_data_eaten + 1); + pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.serial = htonl(call->ackr_serial); + pkt->ack.reason = RXRPC_ACK_IDLE; + pkt->ack.nAcks = 0; + + mtu = call->peer->if_mtu; + mtu -= call->peer->hdrsize; + jmax = rxrpc_rx_jumbo_max; + pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + pkt->ackinfo.maxMTU = htonl(mtu); + pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); + pkt->ackinfo.jumbo_max = htonl(jmax); + + *ackp++ = 0; + *ackp++ = 0; + *ackp++ = 0; + return 3; +} + +/* + * Send a final ACK or ABORT call packet. + */ +int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_pkt_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len, n; + int ioc, ret; + u32 abort_code; + + _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + rxrpc_put_connection(conn); + return -ENOMEM; + } + + serial = atomic_inc_return(&conn->serial); + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt->whdr.epoch = htonl(conn->proto.epoch); + pkt->whdr.cid = htonl(call->cid); + pkt->whdr.callNumber = htonl(call->call_id); + pkt->whdr.seq = 0; + pkt->whdr.serial = htonl(serial); + pkt->whdr.type = type; + pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.userStatus = 0; + pkt->whdr.securityIndex = call->security_ix; + pkt->whdr._rsvd = 0; + pkt->whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr); + len = sizeof(pkt->whdr); + + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + spin_lock_bh(&call->lock); + n = rxrpc_fill_out_ack(call, pkt); + call->ackr_reason = 0; + + spin_unlock_bh(&call->lock); + + _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + serial, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.previousPacket), + ntohl(pkt->ack.serial), + rxrpc_acks(pkt->ack.reason), + pkt->ack.nAcks); + + iov[0].iov_len += sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); + ioc = 2; + break; + + case RXRPC_PACKET_TYPE_ABORT: + abort_code = call->abort_code; + pkt->abort_code = htonl(abort_code); + _proto("Tx ABORT %%%u { %d }", serial, abort_code); + iov[0].iov_len += sizeof(pkt->abort_code); + len += sizeof(pkt->abort_code); + ioc = 1; + break; + + default: + BUG(); + ret = -ENOANO; + goto out; + } + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, ioc, len); + +out: + rxrpc_put_connection(conn); + kfree(pkt); + return ret; +} + /* * send a packet through the transport endpoint */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 82c6405..dfad238 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -29,6 +29,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + rcu_read_lock(); read_lock(&rxrpc_call_lock); return seq_list_start_head(&rxrpc_calls, *_pos); } @@ -41,6 +42,7 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { read_unlock(&rxrpc_call_lock); + rcu_read_unlock(); } static int rxrpc_call_seq_show(struct seq_file *seq, void *v) @@ -61,7 +63,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); - rx = READ_ONCE(call->socket); + rx = rcu_dereference(call->socket); if (rx) { local = READ_ONCE(rx->local); if (local) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 97f8ee7..6876ffb 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,28 +19,6 @@ #include "ar-internal.h" /* - * removal a call's user ID from the socket tree to make the user ID available - * again and so that it won't be seen again in association with that call - */ -void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - _debug("RELEASE CALL %d", call->debug_id); - - if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - write_lock_bh(&rx->call_lock); - rb_erase(&call->sock_node, &call->socket->calls); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_unlock_bh(&rx->call_lock); - } - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); -} - -/* * receive a message from an RxRPC socket * - we need to be careful about two or more threads calling recvmsg * simultaneously @@ -338,7 +316,7 @@ terminal_message: if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) BUG(); rxrpc_free_skb(skb); - rxrpc_remove_user_ID(rx, call); + rxrpc_release_call(rx, call); } release_sock(&rx->sk); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index c0613ab..9b8f845 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -33,9 +33,6 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call) call->state = RXRPC_CALL_CLIENT_FINAL_ACK; _debug("request final ACK"); - /* get an extra ref on the call for the final-ACK generator to - * release */ - rxrpc_get_call(call, rxrpc_call_got); set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); if (try_to_del_timer_sync(&call->ack_timer) >= 0) rxrpc_queue_call(call); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index dc380af..b7ca8cf 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -88,14 +88,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, .extra1 = (void *)&one, }, - { - .procname = "dead_call_expiry", - .data = &rxrpc_dead_call_expiry, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - .extra1 = (void *)&one, - }, /* Non-time values */ { -- cgit v1.1 From fc2780b66b15092ac68272644a522c1624c48547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Aug 2016 11:59:26 +0100 Subject: drm/i915: Add GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE to SNB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the CI test machines, SNB also uses the GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE value to report a bad GEN6_PCODE_MIN_FREQ_TABLE request. [ 157.744641] WARNING: CPU: 5 PID: 9238 at drivers/gpu/drm/i915/intel_pm.c:7760 sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744642] Missing switch case (16) in gen6_check_mailbox_status [ 157.744642] Modules linked in: snd_hda_intel i915 ax88179_178a usbnet mii x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core mei_me lpc_ich snd_pcm mei broadcom bcm_phy_lib tg3 ptp pps_core [last unloaded: vgem] [ 157.744658] CPU: 5 PID: 9238 Comm: drv_hangman Tainted: G U W 4.8.0-rc3-CI-CI_DRM_1589+ #1 [ 157.744658] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 [ 157.744659] 0000000000000000 ffff88011f093a98 ffffffff81426415 ffff88011f093ae8 [ 157.744662] 0000000000000000 ffff88011f093ad8 ffffffff8107d2a6 00001e50810d3c9f [ 157.744663] ffff880128680000 0000000000000008 0000000000000000 ffff88012868a650 [ 157.744665] Call Trace: [ 157.744669] [] dump_stack+0x67/0x92 [ 157.744672] [] __warn+0xc6/0xe0 [ 157.744673] [] warn_slowpath_fmt+0x4a/0x50 [ 157.744685] [] sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744697] [] intel_enable_gt_powersave+0x64a/0x1330 [i915] [ 157.744712] [] ? i9xx_emit_request+0x1b/0x80 [i915] [ 157.744725] [] __i915_add_request+0x1e3/0x370 [i915] [ 157.744738] [] i915_gem_do_execbuffer.isra.16+0xced/0x1b80 [i915] [ 157.744740] [] ? __might_fault+0x3e/0x90 [ 157.744752] [] i915_gem_execbuffer2+0xc2/0x2a0 [i915] [ 157.744753] [] drm_ioctl+0x207/0x4c0 [ 157.744765] [] ? i915_gem_execbuffer+0x360/0x360 [i915] [ 157.744767] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 157.744769] [] do_vfs_ioctl+0x8e/0x680 [ 157.744770] [] ? __might_fault+0x87/0x90 [ 157.744771] [] ? __might_fault+0x3e/0x90 [ 157.744773] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 157.744774] [] SyS_ioctl+0x3c/0x70 [ 157.744776] [] entry_SYSCALL_64_fastpath+0x1c/0xac Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97491 Fixes: 87660502f1a4 ("drm/i915/gen6+: Interpret mailbox error flags") Signed-off-by: Chris Wilson Cc: Lyude Cc: Matt Roper Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20160826105926.3413-1-chris@chris-wilson.co.uk Acked-by: Mika Kuoppala (cherry picked from commit 7850d1c35344c7bd6a357240f2f9f60fc2c097b5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 53e13c1..2d24813 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7859,6 +7859,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_ILLEGAL_CMD: return -ENXIO; case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: return -EOVERFLOW; case GEN6_PCODE_TIMEOUT: return -ETIMEDOUT; -- cgit v1.1 From 82469c59d222f839ded5cd282172258e026f9112 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 6 Sep 2016 17:39:13 -0300 Subject: nvme: Don't suspend admin queue that wasn't created This fixes a regression in my previous commit c21377f8366c ("nvme: Suspend all queues before deletion"), which provoked an Oops in the removal path when removing a device that became IO incapable very early at probe (i.e. after a failed EEH recovery). Turns out, if the error occurred very early at the probe path, before even configuring the admin queue, we might try to suspend the uninitialized admin queue, accessing bad memory. Fixes: c21377f8366c ("nvme: Suspend all queues before deletion") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8dcf5a9..be84a84 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1693,7 +1693,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_queue(dev->queues[i]); if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - nvme_suspend_queue(dev->queues[0]); + /* A device might become IO incapable very soon during + * probe, before the admin queue is configured. Thus, + * queue_count can be 0 here. + */ + if (dev->queue_count) + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); -- cgit v1.1 From e8d6bbb05aa5cb985c3661d0db4f858f1d251326 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Sep 2016 16:34:12 +0100 Subject: rxrpc: Fix returns of call completion helpers rxrpc_set_call_completion() returns bool, not int, so the ret variable should match this. rxrpc_call_completed() and __rxrpc_call_completed() should return the value of rxrpc_set_call_completion(). Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3addda4..0353399 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -608,7 +608,7 @@ static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, u32 abort_code, int error) { - int ret; + bool ret; write_lock_bh(&call->state_lock); ret = __rxrpc_set_call_completion(call, compl, abort_code, error); @@ -619,16 +619,19 @@ static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, /* * Record that a call successfully completed. */ -static inline void __rxrpc_call_completed(struct rxrpc_call *call) +static inline bool __rxrpc_call_completed(struct rxrpc_call *call) { - __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); } -static inline void rxrpc_call_completed(struct rxrpc_call *call) +static inline bool rxrpc_call_completed(struct rxrpc_call *call) { + bool ret; + write_lock_bh(&call->state_lock); - __rxrpc_call_completed(call); + ret = __rxrpc_call_completed(call); write_unlock_bh(&call->state_lock); + return ret; } /* -- cgit v1.1 From 5a42976d4fe5d7fddce133de995c742c87b1b7e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Sep 2016 22:19:51 +0100 Subject: rxrpc: Add tracepoint for working out where aborts happen Add a tracepoint for working out where local aborts happen. Each tracepoint call is labelled with a 3-letter code so that they can be distinguished - and the DATA sequence number is added too where available. rxrpc_kernel_abort_call() also takes a 3-letter code so that AFS can indicate the circumstances when it aborts a call. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 17 ++++--- include/net/af_rxrpc.h | 3 +- include/trace/events/rxrpc.h | 29 ++++++++++++ net/rxrpc/ar-internal.h | 14 ++++-- net/rxrpc/call_event.c | 7 +-- net/rxrpc/call_object.c | 2 +- net/rxrpc/conn_event.c | 6 +++ net/rxrpc/input.c | 7 +-- net/rxrpc/insecure.c | 19 ++++---- net/rxrpc/rxkad.c | 108 +++++++++++++++++++------------------------ net/rxrpc/sendmsg.c | 18 ++++---- 11 files changed, 132 insertions(+), 98 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 37608be..53750de 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -377,7 +377,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); error_kill_call: afs_end_call(call); _leave(" = %d", ret); @@ -425,12 +425,12 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: abort_code = RX_INVALID_OPERATION; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KIV"); goto do_abort; case -ENODATA: case -EBADMSG: @@ -440,7 +440,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, EBADMSG, "KUM"); goto do_abort; } } @@ -463,6 +463,7 @@ do_abort: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { + const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -481,9 +482,11 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } + abort_why = "KWC"; ret = call->error; if (call->state == AFS_CALL_COMPLETE) break; + abort_why = "KWI"; ret = -EINTR; if (signal_pending(current)) break; @@ -497,7 +500,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD); + RX_CALL_DEAD, -ret, abort_why); } _debug("call complete"); @@ -695,7 +698,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); default: afs_end_call(call); _leave(" [error]"); @@ -734,7 +737,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b4b6a36..08ed872 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -35,7 +35,8 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); -void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, + u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, rxrpc_notify_rx_t); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 3016489..85ee035 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -84,6 +84,35 @@ TRACE_EVENT(rxrpc_skb, __entry->where) ); +TRACE_EVENT(rxrpc_abort, + TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, + int abort_code, int error), + + TP_ARGS(why, cid, call_id, seq, abort_code, error), + + TP_STRUCT__entry( + __array(char, why, 4 ) + __field(u32, cid ) + __field(u32, call_id ) + __field(rxrpc_seq_t, seq ) + __field(int, abort_code ) + __field(int, error ) + ), + + TP_fast_assign( + memcpy(__entry->why, why, 4); + __entry->cid = cid; + __entry->call_id = call_id; + __entry->abort_code = abort_code; + __entry->error = error; + __entry->seq = seq; + ), + + TP_printk("%08x:%08x s=%u a=%d e=%d %s", + __entry->cid, __entry->call_id, __entry->seq, + __entry->abort_code, __entry->error, __entry->why) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0353399..dbfb9ed 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -155,7 +155,8 @@ struct rxrpc_security { void *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + rxrpc_seq_t, u16); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -637,9 +638,12 @@ static inline bool rxrpc_call_completed(struct rxrpc_call *call) /* * Record that a call is locally aborted. */ -static inline bool __rxrpc_abort_call(struct rxrpc_call *call, +static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { + trace_rxrpc_abort(why, call->cid, call->call_id, seq, + abort_code, error); if (__rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error)) { @@ -649,13 +653,13 @@ static inline bool __rxrpc_abort_call(struct rxrpc_call *call, return false; } -static inline bool rxrpc_abort_call(struct rxrpc_call *call, - u32 abort_code, int error) +static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { bool ret; write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(call, abort_code, error); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); write_unlock_bh(&call->state_lock); return ret; } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8365d33..af88ad7 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -598,7 +598,8 @@ process_further: /* secured packets must be verified and possibly decrypted */ if (call->conn->security->verify_packet(call, skb, - _abort_code) < 0) + sp->hdr.seq, + sp->hdr.cksum) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); @@ -982,7 +983,7 @@ skip_msg_init: } if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - rxrpc_abort_call(call, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); _debug("post timeout"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, @@ -1005,7 +1006,7 @@ skip_msg_init: case -EKEYEXPIRED: case -EKEYREJECTED: case -EPROTO: - rxrpc_abort_call(call, abort_code, -ret); + rxrpc_abort_call("PRO", call, 0, abort_code, -ret); goto kill_ACKs; } } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index be5733d..9efd9b0 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -563,7 +563,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE) { _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9db90f4..8c7938b 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -158,6 +158,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, lockdep_is_held(&conn->channel_lock)); if (call) { rxrpc_see_call(call); + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort("CON", call->cid, + call->call_id, 0, + abort_code, error); + write_lock_bh(&call->state_lock); if (rxrpc_set_call_completion(call, compl, abort_code, error)) { @@ -167,6 +172,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, write_unlock_bh(&call->state_lock); if (queue) rxrpc_queue_call(call); + } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 79f3f58..8e62410 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -429,7 +429,7 @@ protocol_error: _debug("protocol error"); write_lock_bh(&call->state_lock); protocol_error_locked: - if (__rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); free_packet_unlock: write_unlock_bh(&call->state_lock); @@ -495,9 +495,10 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, protocol_error: _debug("protocol error"); rxrpc_free_skb(part); - rxrpc_free_skb(jumbo); - if (rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (rxrpc_abort_call("PJP", call, sp->hdr.seq, + RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); + rxrpc_free_skb(jumbo); _leave(""); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index c21ad21..a4aba02 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -23,31 +23,32 @@ static int none_prime_packet_security(struct rxrpc_connection *conn) } static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) + struct sk_buff *skb, + size_t data_size, + void *sechdr) { return 0; } static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + rxrpc_seq_t seq, + u16 expected_cksum) { return 0; } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 89f475f..3777432 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -316,12 +316,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* * decrypt partial encryption on a packet (level 1 security) */ -static int rxkad_verify_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[16]; @@ -332,7 +330,10 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, _enter(""); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -351,9 +352,11 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, crypto_skcipher_decrypt(req); skcipher_request_zero(req); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -361,24 +364,24 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -391,13 +394,11 @@ nomem: /* * wholly decrypt a packet (level 2 security) */ -static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; @@ -408,7 +409,10 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, _enter(",{%d}", skb->len); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -437,9 +441,11 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, if (sg != _sg) kfree(sg); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -447,24 +453,23 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -475,40 +480,30 @@ nomem: } /* - * verify the security on a received packet + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). */ -static int rxkad_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); - struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; u32 x, y; - int ret; - - sp = rxrpc_skb(skb); _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq); + call->debug_id, key_serial(call->conn->params.key), seq); if (!call->conn->cipher) return 0; - if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) { - *_abort_code = RXKADINCONSISTENCY; - _leave(" = -EPROTO [not rxkad]"); - return -EPROTO; - } - /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; + x |= seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); @@ -524,29 +519,22 @@ static int rxkad_verify_packet(struct rxrpc_call *call, if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (sp->hdr.cksum != cksum) { - *_abort_code = RXKADSEALEDINCON; + if (cksum != expected_cksum) { + rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } switch (call->conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - ret = 0; - break; + return 0; case RXRPC_SECURITY_AUTH: - ret = rxkad_verify_packet_auth(call, skb, _abort_code); - break; + return rxkad_verify_packet_1(call, skb, seq); case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_verify_packet_encrypt(call, skb, _abort_code); - break; + return rxkad_verify_packet_2(call, skb, seq); default: - ret = -ENOANO; - break; + return -ENOANO; } - - _leave(" = %d", ret); - return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 2439aff..9a4af99 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -454,14 +454,15 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, /* * abort a call, sending an ABORT packet to the peer */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, + u32 abort_code, int error) { if (call->state >= RXRPC_CALL_COMPLETE) return; write_lock_bh(&call->state_lock); - if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { + if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); @@ -556,7 +557,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -626,20 +627,19 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. * * Allow a kernel service to abort a call, if it's still in an abortable state. */ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code) + u32 abort_code, int error, const char *why) { - _enter("{%d},%d", call->debug_id, abort_code); + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); lock_sock(sock->sk); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, why, abort_code, error); release_sock(sock->sk); _leave(""); -- cgit v1.1 From 972939e28592ec61e2e8334786152be2c80de677 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 1 Aug 2016 23:01:55 +0200 Subject: um/ptrace: Fix the syscall_trace_leave call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the same semantic as before the commit 26703c636c1f: deallocate audit context and fake a proper syscall exit. This fix a kernel panic triggered by the seccomp_bpf test: > [ RUN ] global.ERRNO_valid > BUG: failure at kernel/auditsc.c:1504/__audit_syscall_entry()! > Kernel panic - not syncing: BUG! Fixes: 26703c636c1f ("um/ptrace: run seccomp after ptrace") Signed-off-by: Mickaël Salaün Acked-by: Kees Cook Cc: Jeff Dike Cc: Richard Weinberger Cc: James Morris Cc: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: James Morris Signed-off-by: Kees Cook --- arch/um/kernel/skas/syscall.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index ef4b8f9..0728fee 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -21,11 +21,11 @@ void handle_syscall(struct uml_pt_regs *r) PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); if (syscall_trace_enter(regs)) - return; + goto out; /* Do the seccomp check after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) - return; + goto out; /* Update the syscall number after orig_ax has potentially been updated * with ptrace. @@ -37,5 +37,6 @@ void handle_syscall(struct uml_pt_regs *r) PT_REGS_SET_SYSCALL_RETURN(regs, EXECUTE_SYSCALL(syscall, regs)); +out: syscall_trace_leave(regs); } -- cgit v1.1 From ce29856a5e1aabe52e18b2c60db1490769a6ab55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 1 Aug 2016 23:01:56 +0200 Subject: um/ptrace: Fix the syscall number update after a ptrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the syscall number after each PTRACE_SETREGS on ORIG_*AX. This is needed to get the potentially altered syscall number in the seccomp filters after RET_TRACE. This fix four seccomp_bpf tests: > [ RUN ] TRACE_syscall.skip_after_RET_TRACE > seccomp_bpf.c:1560:TRACE_syscall.skip_after_RET_TRACE:Expected -1 (18446744073709551615) == syscall(39) (26) > seccomp_bpf.c:1561:TRACE_syscall.skip_after_RET_TRACE:Expected 1 (1) == (*__errno_location ()) (22) > [ FAIL ] TRACE_syscall.skip_after_RET_TRACE > [ RUN ] TRACE_syscall.kill_after_RET_TRACE > TRACE_syscall.kill_after_RET_TRACE: Test exited normally instead of by signal (code: 1) > [ FAIL ] TRACE_syscall.kill_after_RET_TRACE > [ RUN ] TRACE_syscall.skip_after_ptrace > seccomp_bpf.c:1622:TRACE_syscall.skip_after_ptrace:Expected -1 (18446744073709551615) == syscall(39) (26) > seccomp_bpf.c:1623:TRACE_syscall.skip_after_ptrace:Expected 1 (1) == (*__errno_location ()) (22) > [ FAIL ] TRACE_syscall.skip_after_ptrace > [ RUN ] TRACE_syscall.kill_after_ptrace > TRACE_syscall.kill_after_ptrace: Test exited normally instead of by signal (code: 1) > [ FAIL ] TRACE_syscall.kill_after_ptrace Fixes: 26703c636c1f ("um/ptrace: run seccomp after ptrace") Signed-off-by: Mickaël Salaün Acked-by: Kees Cook Cc: Jeff Dike Cc: Richard Weinberger Cc: James Morris Cc: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: James Morris Signed-off-by: Kees Cook --- arch/um/kernel/skas/syscall.c | 5 ----- arch/x86/um/ptrace_32.c | 3 +++ arch/x86/um/ptrace_64.c | 4 ++++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 0728fee..b783ac8 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -27,12 +27,7 @@ void handle_syscall(struct uml_pt_regs *r) if (secure_computing(NULL) == -1) goto out; - /* Update the syscall number after orig_ax has potentially been updated - * with ptrace. - */ - UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); syscall = UPT_SYSCALL_NR(r); - if (syscall >= 0 && syscall <= __NR_syscall_max) PT_REGS_SET_SYSCALL_RETURN(regs, EXECUTE_SYSCALL(syscall, regs)); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index ebd4dd6..a7ef7b1 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -84,7 +84,10 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case EAX: case EIP: case UESP: + break; case ORIG_EAX: + /* Update the syscall number. */ + UPT_SYSCALL_NR(&child->thread.regs.regs) = value; break; case FS: if (value && (value & 3) != 3) diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index faab418..0b5c184 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -78,7 +78,11 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case RSI: case RDI: case RBP: + break; + case ORIG_RAX: + /* Update the syscall number. */ + UPT_SYSCALL_NR(&child->thread.regs.regs) = value; break; case FS: -- cgit v1.1 From 4fadd04d504a82f7f1fdeaddc144a9c396d1285e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 1 Aug 2016 23:01:57 +0200 Subject: seccomp: Remove 2-phase API documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 8112c4f140fa ("seccomp: remove 2-phase API") Signed-off-by: Mickaël Salaün Acked-by: Kees Cook Cc: Andy Lutomirski Cc: James Morris Signed-off-by: James Morris Signed-off-by: Kees Cook --- arch/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index e9c9334..fd6e971 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -336,17 +336,6 @@ config HAVE_ARCH_SECCOMP_FILTER results in the system call being skipped immediately. - seccomp syscall wired up - For best performance, an arch should use seccomp_phase1 and - seccomp_phase2 directly. It should call seccomp_phase1 for all - syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not - need to be called from a ptrace-safe context. It must then - call seccomp_phase2 if seccomp_phase1 returns anything other - than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP. - - As an additional optimization, an arch may provide seccomp_data - directly to seccomp_phase1; this avoids multiple calls - to the syscall_xyz helpers for every syscall. - config SECCOMP_FILTER def_bool y depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET -- cgit v1.1 From a85d6b8242dc78ef3f4542a0f979aebcbe77fc4e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Sep 2016 09:39:32 -0700 Subject: usercopy: force check_object_size() inline Just for good measure, make sure that check_object_size() is always inlined too, as already done for copy_*_user() and __copy_*_user(). Suggested-by: Linus Torvalds Signed-off-by: Kees Cook --- include/linux/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 10c9e60..2b5b10e 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -118,8 +118,8 @@ static inline int arch_within_stack_frames(const void * const stack, extern void __check_object_size(const void *ptr, unsigned long n, bool to_user); -static inline void check_object_size(const void *ptr, unsigned long n, - bool to_user) +static __always_inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) { if (!__builtin_constant_p(n)) __check_object_size(ptr, n, to_user); -- cgit v1.1 From 8e1f74ea02cf4562404c48c6882214821552c13f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Sep 2016 09:54:34 -0700 Subject: usercopy: remove page-spanning test for now A custom allocator without __GFP_COMP that copies to userspace has been found in vmw_execbuf_process[1], so this disables the page-span checker by placing it behind a CONFIG for future work where such things can be tracked down later. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1373326 Reported-by: Vinson Lee Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Kees Cook --- mm/usercopy.c | 61 ++++++++++++++++++++++++++++++++------------------------ security/Kconfig | 11 ++++++++++ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index a3cc305..089328f 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -134,31 +134,16 @@ static inline const char *check_bogus_address(const void *ptr, unsigned long n) return NULL; } -static inline const char *check_heap_object(const void *ptr, unsigned long n, - bool to_user) +/* Checks for allocs that are marked in some way as spanning multiple pages. */ +static inline const char *check_page_span(const void *ptr, unsigned long n, + struct page *page, bool to_user) { - struct page *page, *endpage; +#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; + struct page *endpage; bool is_reserved, is_cma; /* - * Some architectures (arm64) return true for virt_addr_valid() on - * vmalloced addresses. Work around this by checking for vmalloc - * first. - */ - if (is_vmalloc_addr(ptr)) - return NULL; - - if (!virt_addr_valid(ptr)) - return NULL; - - page = virt_to_head_page(ptr); - - /* Check slab allocator for flags and size. */ - if (PageSlab(page)) - return __check_heap_object(ptr, n, page); - - /* * Sometimes the kernel data regions are not marked Reserved (see * check below). And sometimes [_sdata,_edata) does not cover * rodata and/or bss, so check each range explicitly. @@ -186,7 +171,7 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, ((unsigned long)end & (unsigned long)PAGE_MASK))) return NULL; - /* Allow if start and end are inside the same compound page. */ + /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) return NULL; @@ -199,20 +184,44 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) - goto reject; + return ""; for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) - goto reject; + return ""; if (is_cma && !is_migrate_cma_page(page)) - goto reject; + return ""; } +#endif return NULL; +} + +static inline const char *check_heap_object(const void *ptr, unsigned long n, + bool to_user) +{ + struct page *page; + + /* + * Some architectures (arm64) return true for virt_addr_valid() on + * vmalloced addresses. Work around this by checking for vmalloc + * first. + */ + if (is_vmalloc_addr(ptr)) + return NULL; + + if (!virt_addr_valid(ptr)) + return NULL; + + page = virt_to_head_page(ptr); + + /* Check slab allocator for flags and size. */ + if (PageSlab(page)) + return __check_heap_object(ptr, n, page); -reject: - return ""; + /* Verify object does not incorrectly span multiple pages. */ + return check_page_span(ptr, n, page, to_user); } /* diff --git a/security/Kconfig b/security/Kconfig index da10d9b..2dfc0ce 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -147,6 +147,17 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_PAGESPAN + bool "Refuse to copy allocations that span multiple pages" + depends on HARDENED_USERCOPY + depends on !COMPILE_TEST + help + When a multi-page allocation is done without __GFP_COMP, + hardened usercopy will reject attempts to copy it. There are, + however, several cases of this in the kernel that have not all + been removed. This config is intended to be used only while + trying to find such users. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig -- cgit v1.1 From c6c864993d9a20f8d7cacb4feaac5c46a2f2e4db Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 24 Aug 2016 13:51:33 +0200 Subject: Revert "gpio: include in gpiolib-of" This reverts commit 7d4defe21c682c934a19fce1ba8b54b7bde61b08. The commit was pointless, manically trembling in the dark for a solution. The real fixes are: commit 048c28c91e56 ("gpio: make any OF dependent driver depend on OF_GPIO") commit 2527ecc9195e ("gpio: Fix OF build problem on UM") Reported-by: Chris Wilson Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 75e7b39..a28feb3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include -- cgit v1.1 From 60f749f8e4cfdfffa5f29c966050ed680eeedac2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 23:13:20 +0200 Subject: gpio: mcp23s08: make driver depend on OF_GPIO The MCP23S08 driver certainly accesses fields inside the struct gpio_chip that are only available under CONFIG_OF_GPIO not just CONFIG_OF, so update the Kconfig and driver to reflect this. Cc: Alexander Stein Cc: Phil Reid Reported-by: kbuild test robot Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 1 + drivers/gpio/gpio-mcp23s08.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 66a9410..24caedb 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1131,6 +1131,7 @@ menu "SPI or I2C GPIO expanders" config GPIO_MCP23S08 tristate "Microchip MCP23xxx I/O expander" + depends on OF_GPIO select GPIOLIB_IRQCHIP help SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017 diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index ac22efc..99d37b5 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -564,7 +564,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, mcp->chip.direction_output = mcp23s08_direction_output; mcp->chip.set = mcp23s08_set; mcp->chip.dbg_show = mcp23s08_dbg_show; -#ifdef CONFIG_OF +#ifdef CONFIG_OF_GPIO mcp->chip.of_gpio_n_cells = 2; mcp->chip.of_node = dev->of_node; #endif -- cgit v1.1 From 56beac95cb88c188d2a885825a5da131edb41fe3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 29 Aug 2016 11:24:10 +0100 Subject: gpio: sa1100: fix irq probing for ucb1x00 ucb1x00 has used IRQ probing since it's dawn to find the GPIO interrupt that it's connected to. However, commit 23393d49fb75 ("gpio: kill off set_irq_flags usage") broke this by disabling IRQ probing on GPIO interrupts. Fix this. Fixes: 23393d49fb75 ("gpio: kill off set_irq_flags usage") Signed-off-by: Russell King Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sa1100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index 0c99e8f..8d8ee0e 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -155,7 +155,7 @@ static int sa1100_gpio_irqdomain_map(struct irq_domain *d, { irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip, handle_edge_irq); - irq_set_noprobe(irq); + irq_set_probe(irq); return 0; } -- cgit v1.1 From 9dd4aaef194e45d96c2d0b6232a9cbc5430ad789 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Sep 2016 20:22:21 -0700 Subject: MAINTAINERS: Update CPMAC email address Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0537211..c747921 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3268,7 +3268,7 @@ S: Maintained F: drivers/net/wan/cosa* CPMAC ETHERNET DRIVER -M: Florian Fainelli +M: Florian Fainelli L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ti/cpmac.c -- cgit v1.1 From 733ade23de1b72c1f11c5e4a1a9020a6f48decd2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 6 Sep 2016 21:31:17 -0700 Subject: netlink: don't forget to release a rhashtable_iter structure This bug was detected by kmemleak: unreferenced object 0xffff8804269cc3c0 (size 64): comm "criu", pid 1042, jiffies 4294907360 (age 13.713s) hex dump (first 32 bytes): a0 32 cc 2c 04 88 ff ff 00 00 00 00 00 00 00 00 .2.,............ 00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0x10f/0x280 [] __netlink_diag_dump+0x26c/0x290 [netlink_diag] v2: don't remove a reference on a rhashtable_iter structure to release it from netlink_diag_dump_done Cc: Herbert Xu Fixes: ad202074320c ("netlink: Use rhashtable walk interface in diag dump") Signed-off-by: Andrei Vagin Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/diag.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3e3e253..b2f0e98 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -127,7 +127,6 @@ stop: goto done; rhashtable_walk_exit(hti); - cb->args[2] = 0; num++; mc_list: -- cgit v1.1 From f95bf346226b9b79352e05508beececc807cc37a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 7 Sep 2016 13:38:35 +0900 Subject: net: diag: make udp_diag_destroy work for mapped addresses. udp_diag_destroy does look up the IPv4 UDP hashtable for mapped addresses, but it gets the IPv4 address to look up from the beginning of the IPv6 address instead of the end. Tested: https://android-review.googlesource.com/269874 Fixes: 5d77dca82839 ("net: diag: support SOCK_DESTROY for UDP sockets") Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 8a9f6e5..58b79c0 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -186,8 +186,8 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[3], req->id.idiag_dport, + req->id.idiag_src[3], req->id.idiag_sport, req->id.idiag_if, tbl, NULL); else -- cgit v1.1 From 72e8d5fdf58b7d398b31612e63cc376f43c9da1b Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Wed, 7 Sep 2016 19:07:00 +0800 Subject: qed: add missing header dependencies We get 4 warnings when building kernel with W=1: drivers/net/ethernet/qlogic/qed/qed_selftest.c:6:5: warning: no previous prototype for 'qed_selftest_memory' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:19:5: warning: no previous prototype for 'qed_selftest_interrupt' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:32:5: warning: no previous prototype for 'qed_selftest_register' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_selftest.c:55:5: warning: no previous prototype for 'qed_selftest_clock' [-Wmissing-prototypes] In fact, these functions are declared in qed_selftest.h, so this patch add missing header dependencies. Signed-off-by: Baoyou Xie Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_selftest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c index a342bfe..9b7678f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -2,6 +2,7 @@ #include "qed_dev_api.h" #include "qed_mcp.h" #include "qed_sp.h" +#include "qed_selftest.h" int qed_selftest_memory(struct qed_dev *cdev) { -- cgit v1.1 From 936f0600de541416ec8d82037e0e277538c9f945 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 7 Sep 2016 15:27:09 +0200 Subject: kaweth: remove obsolete debugging statements SOme statements in the driver only served to inform which functions were entered. Ftrace can do that just as good without needing memory. Remove the statements. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/kaweth.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 528b9c9..66b34dd 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -265,8 +265,6 @@ static int kaweth_control(struct kaweth_device *kaweth, struct usb_ctrlrequest *dr; int retval; - netdev_dbg(kaweth->net, "kaweth_control()\n"); - if(in_interrupt()) { netdev_dbg(kaweth->net, "in_interrupt()\n"); return -EBUSY; @@ -300,8 +298,6 @@ static int kaweth_read_configuration(struct kaweth_device *kaweth) { int retval; - netdev_dbg(kaweth->net, "Reading kaweth configuration\n"); - retval = kaweth_control(kaweth, usb_rcvctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_GET_ETHERNET_DESC, @@ -451,8 +447,6 @@ static int kaweth_trigger_firmware(struct kaweth_device *kaweth, kaweth->firmware_buf[6] = 0x00; kaweth->firmware_buf[7] = 0x00; - netdev_dbg(kaweth->net, "Triggering firmware\n"); - return kaweth_control(kaweth, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SCAN, @@ -471,7 +465,6 @@ static int kaweth_reset(struct kaweth_device *kaweth) { int result; - netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth); result = usb_reset_configuration(kaweth->dev); mdelay(10); @@ -685,8 +678,6 @@ static int kaweth_open(struct net_device *net) struct kaweth_device *kaweth = netdev_priv(net); int res; - netdev_dbg(kaweth->net, "Opening network device.\n"); - res = usb_autopm_get_interface(kaweth->intf); if (res) { dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n"); @@ -951,7 +942,6 @@ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Suspending device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status |= KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -968,7 +958,6 @@ static int kaweth_resume(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Resuming device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status &= ~KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -1190,8 +1179,6 @@ err_fw: dev_info(dev, "kaweth interface created at %s\n", kaweth->net->name); - dev_dbg(dev, "Kaweth probe returning.\n"); - return 0; err_intfdata: @@ -1219,8 +1206,6 @@ static void kaweth_disconnect(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); struct net_device *netdev; - dev_info(&intf->dev, "Unregistering\n"); - usb_set_intfdata(intf, NULL); if (!kaweth) { dev_warn(&intf->dev, "unregistering non-existent device\n"); -- cgit v1.1 From c965db44462919f613973aa618271f6c3f5a1e64 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:24 +0300 Subject: qed: Add support for debug data collection This patch adds the support for dumping and formatting the HW/FW debug data. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/Makefile | 2 +- drivers/net/ethernet/qlogic/qed/qed.h | 20 + drivers/net/ethernet/qlogic/qed/qed_debug.c | 6898 ++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_debug.h | 54 + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1056 +++- drivers/net/ethernet/qlogic/qed/qed_main.c | 4 + drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 894 +++ include/linux/qed/common_hsi.h | 3 + 8 files changed, 8919 insertions(+), 12 deletions(-) create mode 100644 drivers/net/ethernet/qlogic/qed/qed_debug.c create mode 100644 drivers/net/ethernet/qlogic/qed/qed_debug.h diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index d1f157e..86a5b4f 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -2,5 +2,5 @@ obj-$(CONFIG_QED) := qed.o qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ - qed_selftest.o qed_dcbx.o + qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 2d67469..0929582 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -23,6 +23,7 @@ #include #include #include +#include "qed_debug.h" #include "qed_hsi.h" extern const struct qed_common_ops qed_common_ops_pass; @@ -395,6 +396,8 @@ struct qed_hwfn { /* Buffer for unzipping firmware data */ void *unzip_buf; + struct dbg_tools_data dbg_info; + struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV @@ -430,6 +433,19 @@ struct qed_int_params { u8 fp_msix_cnt; }; +struct qed_dbg_feature { + struct dentry *dentry; + u8 *dump_buf; + u32 buf_size; + u32 dumped_dwords; +}; + +struct qed_dbg_params { + struct qed_dbg_feature features[DBG_FEATURE_NUM]; + u8 engine_for_debug; + bool print_data; +}; + struct qed_dev { u32 dp_module; u8 dp_level; @@ -444,6 +460,8 @@ struct qed_dev { CHIP_REV_IS_A0(dev)) #define QED_IS_BB_B0(dev) (QED_IS_BB(dev) && \ CHIP_REV_IS_B0(dev)) +#define QED_IS_AH(dev) ((dev)->type == QED_DEV_TYPE_AH) +#define QED_IS_K2(dev) QED_IS_AH(dev) #define QED_GET_TYPE(dev) (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \ QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2) @@ -544,6 +562,8 @@ struct qed_dev { } protocol_ops; void *ops_cookie; + struct qed_dbg_params dbg_params; + const struct firmware *firmware; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c new file mode 100644 index 0000000..88e7d5b --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -0,0 +1,6898 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" + +/* Chip IDs enum */ +enum chip_ids { + CHIP_RESERVED, + CHIP_BB_B0, + CHIP_K2, + MAX_CHIP_IDS +}; + +/* Memory groups enum */ +enum mem_groups { + MEM_GROUP_PXP_MEM, + MEM_GROUP_DMAE_MEM, + MEM_GROUP_CM_MEM, + MEM_GROUP_QM_MEM, + MEM_GROUP_TM_MEM, + MEM_GROUP_BRB_RAM, + MEM_GROUP_BRB_MEM, + MEM_GROUP_PRS_MEM, + MEM_GROUP_SDM_MEM, + MEM_GROUP_PBUF, + MEM_GROUP_IOR, + MEM_GROUP_RAM, + MEM_GROUP_BTB_RAM, + MEM_GROUP_RDIF_CTX, + MEM_GROUP_TDIF_CTX, + MEM_GROUP_CONN_CFC_MEM, + MEM_GROUP_TASK_CFC_MEM, + MEM_GROUP_CAU_PI, + MEM_GROUP_CAU_MEM, + MEM_GROUP_PXP_ILT, + MEM_GROUP_MULD_MEM, + MEM_GROUP_BTB_MEM, + MEM_GROUP_IGU_MEM, + MEM_GROUP_IGU_MSIX, + MEM_GROUP_CAU_SB, + MEM_GROUP_BMB_RAM, + MEM_GROUP_BMB_MEM, + MEM_GROUPS_NUM +}; + +/* Memory groups names */ +static const char * const s_mem_group_names[] = { + "PXP_MEM", + "DMAE_MEM", + "CM_MEM", + "QM_MEM", + "TM_MEM", + "BRB_RAM", + "BRB_MEM", + "PRS_MEM", + "SDM_MEM", + "PBUF", + "IOR", + "RAM", + "BTB_RAM", + "RDIF_CTX", + "TDIF_CTX", + "CONN_CFC_MEM", + "TASK_CFC_MEM", + "CAU_PI", + "CAU_MEM", + "PXP_ILT", + "MULD_MEM", + "BTB_MEM", + "IGU_MEM", + "IGU_MSIX", + "CAU_SB", + "BMB_RAM", + "BMB_MEM", +}; + +/* Idle check conditions */ +static u32 cond4(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]); +} + +static u32 cond6(const u32 *r, const u32 *imm) +{ + return ((r[0] >> imm[0]) & imm[1]) != imm[2]; +} + +static u32 cond5(const u32 *r, const u32 *imm) +{ + return (r[0] & imm[0]) != imm[1]; +} + +static u32 cond8(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != + (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5])); +} + +static u32 cond9(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]); +} + +static u32 cond1(const u32 *r, const u32 *imm) +{ + return (r[0] & ~imm[0]) != imm[1]; +} + +static u32 cond0(const u32 *r, const u32 *imm) +{ + return r[0] != imm[0]; +} + +static u32 cond10(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] == imm[0]; +} + +static u32 cond11(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] > imm[0]; +} + +static u32 cond3(const u32 *r, const u32 *imm) +{ + return r[0] != r[1]; +} + +static u32 cond12(const u32 *r, const u32 *imm) +{ + return r[0] & imm[0]; +} + +static u32 cond7(const u32 *r, const u32 *imm) +{ + return r[0] < (r[1] - imm[0]); +} + +static u32 cond2(const u32 *r, const u32 *imm) +{ + return r[0] > imm[0]; +} + +/* Array of Idle Check conditions */ +static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = { + cond0, + cond1, + cond2, + cond3, + cond4, + cond5, + cond6, + cond7, + cond8, + cond9, + cond10, + cond11, + cond12, +}; + +/******************************* Data Types **********************************/ + +enum platform_ids { + PLATFORM_ASIC, + PLATFORM_RESERVED, + PLATFORM_RESERVED2, + PLATFORM_RESERVED3, + MAX_PLATFORM_IDS +}; + +struct dbg_array { + const u32 *ptr; + u32 size_in_dwords; +}; + +/* Chip constant definitions */ +struct chip_defs { + const char *name; + struct { + u8 num_ports; + u8 num_pfs; + } per_platform[MAX_PLATFORM_IDS]; +}; + +/* Platform constant definitions */ +struct platform_defs { + const char *name; + u32 delay_factor; +}; + +/* Storm constant definitions */ +struct storm_defs { + char letter; + enum block_id block_id; + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + bool has_vfc; + u32 sem_fast_mem_addr; + u32 sem_frame_mode_addr; + u32 sem_slow_enable_addr; + u32 sem_slow_mode_addr; + u32 sem_slow_mode1_conf_addr; + u32 sem_sync_dbg_empty_addr; + u32 sem_slow_dbg_empty_addr; + u32 cm_ctx_wr_addr; + u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_ag_ctx_rd_addr; + u32 cm_conn_st_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_st_ctx_rd_addr; + u32 cm_task_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_task_ag_ctx_rd_addr; + u32 cm_task_st_ctx_lid_size; /* In quad-regs */ + u32 cm_task_st_ctx_rd_addr; +}; + +/* Block constant definitions */ +struct block_defs { + const char *name; + bool has_dbg_bus[MAX_CHIP_IDS]; + bool associated_to_storm; + u32 storm_id; /* Valid only if associated_to_storm is true */ + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + u32 dbg_select_addr; + u32 dbg_cycle_enable_addr; + u32 dbg_shift_addr; + u32 dbg_force_valid_addr; + u32 dbg_force_frame_addr; + bool has_reset_bit; + bool unreset; /* If true, the block is taken out of reset before dump */ + enum dbg_reset_regs reset_reg; + u8 reset_bit_offset; /* Bit offset in reset register */ +}; + +/* Reset register definitions */ +struct reset_reg_defs { + u32 addr; + u32 unreset_val; + bool exists[MAX_CHIP_IDS]; +}; + +struct grc_param_defs { + u32 default_val[MAX_CHIP_IDS]; + u32 min; + u32 max; + bool is_preset; + u32 exclude_all_preset_val; + u32 crash_preset_val; +}; + +struct rss_mem_defs { + const char *mem_name; + const char *type_name; + u32 addr; /* In 128b units */ + u32 num_entries[MAX_CHIP_IDS]; + u32 entry_width[MAX_CHIP_IDS]; /* In bits */ +}; + +struct vfc_ram_defs { + const char *mem_name; + const char *type_name; + u32 base_row; + u32 num_rows; +}; + +struct big_ram_defs { + const char *instance_name; + enum mem_groups mem_group_id; + enum mem_groups ram_mem_group_id; + enum dbg_grc_params grc_param; + u32 addr_reg_addr; + u32 data_reg_addr; + u32 num_of_blocks[MAX_CHIP_IDS]; +}; + +struct phy_defs { + const char *phy_name; + u32 base_addr; + u32 tbus_addr_lo_addr; + u32 tbus_addr_hi_addr; + u32 tbus_data_lo_addr; + u32 tbus_data_hi_addr; +}; + +/******************************** Constants **********************************/ + +#define MAX_LCIDS 320 +#define MAX_LTIDS 320 +#define NUM_IOR_SETS 2 +#define IORS_PER_SET 176 +#define IOR_SET_OFFSET(set_id) ((set_id) * 256) +#define BYTES_IN_DWORD sizeof(u32) + +/* In the macros below, size and offset are specified in bits */ +#define CEIL_DWORDS(size) DIV_ROUND_UP(size, 32) +#define FIELD_BIT_OFFSET(type, field) type ## _ ## field ## _ ## OFFSET +#define FIELD_BIT_SIZE(type, field) type ## _ ## field ## _ ## SIZE +#define FIELD_DWORD_OFFSET(type, field) \ + (int)(FIELD_BIT_OFFSET(type, field) / 32) +#define FIELD_DWORD_SHIFT(type, field) (FIELD_BIT_OFFSET(type, field) % 32) +#define FIELD_BIT_MASK(type, field) \ + (((1 << FIELD_BIT_SIZE(type, field)) - 1) << \ + FIELD_DWORD_SHIFT(type, field)) +#define SET_VAR_FIELD(var, type, field, val) \ + do { \ + var[FIELD_DWORD_OFFSET(type, field)] &= \ + (~FIELD_BIT_MASK(type, field)); \ + var[FIELD_DWORD_OFFSET(type, field)] |= \ + (val) << FIELD_DWORD_SHIFT(type, field); \ + } while (0) +#define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + qed_wr(dev, ptt, addr, (arr)[i]); \ + } while (0) +#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + (arr)[i] = qed_rd(dev, ptt, addr); \ + } while (0) + +#define DWORDS_TO_BYTES(dwords) ((dwords) * BYTES_IN_DWORD) +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) +#define RAM_LINES_TO_DWORDS(lines) ((lines) * 2) +#define RAM_LINES_TO_BYTES(lines) \ + DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines)) +#define REG_DUMP_LEN_SHIFT 24 +#define MEM_DUMP_ENTRY_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem)) +#define IDLE_CHK_RULE_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule)) +#define IDLE_CHK_RESULT_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr)) +#define IDLE_CHK_RESULT_REG_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr)) +#define IDLE_CHK_MAX_ENTRIES_SIZE 32 + +/* The sizes and offsets below are specified in bits */ +#define VFC_CAM_CMD_STRUCT_SIZE 64 +#define VFC_CAM_CMD_ROW_OFFSET 48 +#define VFC_CAM_CMD_ROW_SIZE 9 +#define VFC_CAM_ADDR_STRUCT_SIZE 16 +#define VFC_CAM_ADDR_OP_OFFSET 0 +#define VFC_CAM_ADDR_OP_SIZE 4 +#define VFC_CAM_RESP_STRUCT_SIZE 256 +#define VFC_RAM_ADDR_STRUCT_SIZE 16 +#define VFC_RAM_ADDR_OP_OFFSET 0 +#define VFC_RAM_ADDR_OP_SIZE 2 +#define VFC_RAM_ADDR_ROW_OFFSET 2 +#define VFC_RAM_ADDR_ROW_SIZE 10 +#define VFC_RAM_RESP_STRUCT_SIZE 256 +#define VFC_CAM_CMD_DWORDS CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE) +#define VFC_CAM_ADDR_DWORDS CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE) +#define VFC_CAM_RESP_DWORDS CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE) +#define VFC_RAM_CMD_DWORDS VFC_CAM_CMD_DWORDS +#define VFC_RAM_ADDR_DWORDS CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE) +#define VFC_RAM_RESP_DWORDS CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE) +#define NUM_VFC_RAM_TYPES 4 +#define VFC_CAM_NUM_ROWS 512 +#define VFC_OPCODE_CAM_RD 14 +#define VFC_OPCODE_RAM_RD 0 +#define NUM_RSS_MEM_TYPES 5 +#define NUM_BIG_RAM_TYPES 3 +#define BIG_RAM_BLOCK_SIZE_BYTES 128 +#define BIG_RAM_BLOCK_SIZE_DWORDS \ + BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES) +#define NUM_PHY_TBUS_ADDRESSES 2048 +#define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2) +#define RESET_REG_UNRESET_OFFSET 4 +#define STALL_DELAY_MS 500 +#define STATIC_DEBUG_LINE_DWORDS 9 +#define NUM_DBG_BUS_LINES 256 +#define NUM_COMMON_GLOBAL_PARAMS 8 +#define FW_IMG_MAIN 1 +#define REG_FIFO_DEPTH_ELEMENTS 32 +#define REG_FIFO_ELEMENT_DWORDS 2 +#define REG_FIFO_DEPTH_DWORDS \ + (REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS) +#define IGU_FIFO_DEPTH_ELEMENTS 64 +#define IGU_FIFO_ELEMENT_DWORDS 4 +#define IGU_FIFO_DEPTH_DWORDS \ + (IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS) +#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS 20 +#define PROTECTION_OVERRIDE_ELEMENT_DWORDS 2 +#define PROTECTION_OVERRIDE_DEPTH_DWORDS \ + (PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \ + PROTECTION_OVERRIDE_ELEMENT_DWORDS) +#define MCP_SPAD_TRACE_OFFSIZE_ADDR \ + (MCP_REG_SCRATCH + \ + offsetof(struct static_init, sections[SPAD_SECTION_TRACE])) +#define MCP_TRACE_META_IMAGE_SIGNATURE 0x669955aa +#define EMPTY_FW_VERSION_STR "???_???_???_???" +#define EMPTY_FW_IMAGE_STR "???????????????" + +/***************************** Constant Arrays *******************************/ + +/* Debug arrays */ +static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} }; + +/* Chip constant definitions array */ +static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = { + { "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } }, + { "bb_b0", + { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } }, + { "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } } +}; + +/* Storm constant definitions array */ +static struct storm_defs s_storm_defs[] = { + /* Tstorm */ + {'T', BLOCK_TSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCT}, true, + TSEM_REG_FAST_MEMORY, + TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE, + TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG, + TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + TCM_REG_CTX_RBC_ACCS, + 4, TCM_REG_AGG_CON_CTX, + 16, TCM_REG_SM_CON_CTX, + 2, TCM_REG_AGG_TASK_CTX, + 4, TCM_REG_SM_TASK_CTX}, + /* Mstorm */ + {'M', BLOCK_MSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCM}, false, + MSEM_REG_FAST_MEMORY, + MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE, + MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG, + MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY, + MCM_REG_CTX_RBC_ACCS, + 1, MCM_REG_AGG_CON_CTX, + 10, MCM_REG_SM_CON_CTX, + 2, MCM_REG_AGG_TASK_CTX, + 7, MCM_REG_SM_TASK_CTX}, + /* Ustorm */ + {'U', BLOCK_USEM, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCU}, false, + USEM_REG_FAST_MEMORY, + USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE, + USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG, + USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY, + UCM_REG_CTX_RBC_ACCS, + 2, UCM_REG_AGG_CON_CTX, + 13, UCM_REG_SM_CON_CTX, + 3, UCM_REG_AGG_TASK_CTX, + 3, UCM_REG_SM_TASK_CTX}, + /* Xstorm */ + {'X', BLOCK_XSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCX}, false, + XSEM_REG_FAST_MEMORY, + XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE, + XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG, + XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY, + XCM_REG_CTX_RBC_ACCS, + 9, XCM_REG_AGG_CON_CTX, + 15, XCM_REG_SM_CON_CTX, + 0, 0, + 0, 0}, + /* Ystorm */ + {'Y', BLOCK_YSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCY}, false, + YSEM_REG_FAST_MEMORY, + YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE, + YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG, + YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + YCM_REG_CTX_RBC_ACCS, + 2, YCM_REG_AGG_CON_CTX, + 3, YCM_REG_SM_CON_CTX, + 2, YCM_REG_AGG_TASK_CTX, + 12, YCM_REG_SM_TASK_CTX}, + /* Pstorm */ + {'P', BLOCK_PSEM, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCS}, true, + PSEM_REG_FAST_MEMORY, + PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE, + PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG, + PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY, + PCM_REG_CTX_RBC_ACCS, + 0, 0, + 10, PCM_REG_SM_CON_CTX, + 0, 0, + 0, 0} +}; + +/* Block definitions array */ +static struct block_defs block_grc_defs = { + "grc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE, + GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID, + GRC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_UA, 1 +}; + +static struct block_defs block_miscs_defs = { + "miscs", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_misc_defs = { + "misc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbu_defs = { + "dbu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pglue_b_defs = { + "pglue_b", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH}, + PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE, + PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID, + PGLUE_B_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 1 +}; + +static struct block_defs block_cnig_defs = { + "cnig", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2, + CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2, + CNIG_REG_DBG_FORCE_FRAME_K2, + true, false, DBG_RESET_REG_MISCS_PL_HV, 0 +}; + +static struct block_defs block_cpmu_defs = { + "cpmu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 8 +}; + +static struct block_defs block_ncsi_defs = { + "ncsi", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE, + NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID, + NCSI_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 5 +}; + +static struct block_defs block_opte_defs = { + "opte", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 4 +}; + +static struct block_defs block_bmb_defs = { + "bmb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB}, + BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE, + BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID, + BMB_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_UA, 7 +}; + +static struct block_defs block_pcie_defs = { + "pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp_defs = { + "mcp", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp2_defs = { + "mcp2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE, + MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID, + MCP2_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pswhst_defs = { + "pswhst", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE, + PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID, + PSWHST_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswhst2_defs = { + "pswhst2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE, + PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID, + PSWHST2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswrd_defs = { + "pswrd", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE, + PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID, + PSWRD_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswrd2_defs = { + "pswrd2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE, + PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID, + PSWRD2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswwr_defs = { + "pswwr", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE, + PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID, + PSWWR_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswwr2_defs = { + "pswwr2", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswrq_defs = { + "pswrq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE, + PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID, + PSWRQ_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pswrq2_defs = { + "pswrq2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE, + PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID, + PSWRQ2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pglcs_defs = { + "pglcs", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE, + PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID, + PGLCS_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 2 +}; + +static struct block_defs block_ptu_defs = { + "ptu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE, + PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID, + PTU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20 +}; + +static struct block_defs block_dmae_defs = { + "dmae", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE, + DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID, + DMAE_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28 +}; + +static struct block_defs block_tcm_defs = { + "tcm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE, + TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID, + TCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5 +}; + +static struct block_defs block_mcm_defs = { + "mcm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE, + MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID, + MCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3 +}; + +static struct block_defs block_ucm_defs = { + "ucm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE, + UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID, + UCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8 +}; + +static struct block_defs block_xcm_defs = { + "xcm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE, + XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID, + XCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19 +}; + +static struct block_defs block_ycm_defs = { + "ycm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE, + YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID, + YCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5 +}; + +static struct block_defs block_pcm_defs = { + "pcm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE, + PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID, + PCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4 +}; + +static struct block_defs block_qm_defs = { + "qm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ}, + QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE, + QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID, + QM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16 +}; + +static struct block_defs block_tm_defs = { + "tm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE, + TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID, + TM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17 +}; + +static struct block_defs block_dorq_defs = { + "dorq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE, + DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID, + DORQ_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18 +}; + +static struct block_defs block_brb_defs = { + "brb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE, + BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID, + BRB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0 +}; + +static struct block_defs block_src_defs = { + "src", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE, + SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID, + SRC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2 +}; + +static struct block_defs block_prs_defs = { + "prs", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE, + PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID, + PRS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1 +}; + +static struct block_defs block_tsdm_defs = { + "tsdm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE, + TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID, + TSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3 +}; + +static struct block_defs block_msdm_defs = { + "msdm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE, + MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID, + MSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6 +}; + +static struct block_defs block_usdm_defs = { + "usdm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE, + USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID, + USDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7 +}; + +static struct block_defs block_xsdm_defs = { + "xsdm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE, + XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID, + XSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20 +}; + +static struct block_defs block_ysdm_defs = { + "ysdm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE, + YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID, + YSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8 +}; + +static struct block_defs block_psdm_defs = { + "psdm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE, + PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID, + PSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7 +}; + +static struct block_defs block_tsem_defs = { + "tsem", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE, + TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID, + TSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4 +}; + +static struct block_defs block_msem_defs = { + "msem", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE, + MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID, + MSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9 +}; + +static struct block_defs block_usem_defs = { + "usem", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE, + USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID, + USEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9 +}; + +static struct block_defs block_xsem_defs = { + "xsem", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE, + XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID, + XSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21 +}; + +static struct block_defs block_ysem_defs = { + "ysem", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE, + YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID, + YSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11 +}; + +static struct block_defs block_psem_defs = { + "psem", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE, + PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID, + PSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10 +}; + +static struct block_defs block_rss_defs = { + "rss", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE, + RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID, + RSS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18 +}; + +static struct block_defs block_tmld_defs = { + "tmld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE, + TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID, + TMLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13 +}; + +static struct block_defs block_muld_defs = { + "muld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE, + MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID, + MULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14 +}; + +static struct block_defs block_yuld_defs = { + "yuld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE, + YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID, + YULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15 +}; + +static struct block_defs block_xyld_defs = { + "xyld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE, + XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID, + XYLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12 +}; + +static struct block_defs block_prm_defs = { + "prm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE, + PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID, + PRM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21 +}; + +static struct block_defs block_pbf_pb1_defs = { + "pbf_pb1", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE, + PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID, + PBF_PB1_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 11 +}; + +static struct block_defs block_pbf_pb2_defs = { + "pbf_pb2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE, + PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID, + PBF_PB2_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 12 +}; + +static struct block_defs block_rpb_defs = { + "rpb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE, + RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID, + RPB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13 +}; + +static struct block_defs block_btb_defs = { + "btb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV}, + BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE, + BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID, + BTB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10 +}; + +static struct block_defs block_pbf_defs = { + "pbf", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE, + PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID, + PBF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15 +}; + +static struct block_defs block_rdif_defs = { + "rdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE, + RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID, + RDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16 +}; + +static struct block_defs block_tdif_defs = { + "tdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE, + TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID, + TDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17 +}; + +static struct block_defs block_cdu_defs = { + "cdu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE, + CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID, + CDU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23 +}; + +static struct block_defs block_ccfc_defs = { + "ccfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE, + CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID, + CCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24 +}; + +static struct block_defs block_tcfc_defs = { + "tcfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE, + TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID, + TCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25 +}; + +static struct block_defs block_igu_defs = { + "igu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE, + IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID, + IGU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27 +}; + +static struct block_defs block_cau_defs = { + "cau", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE, + CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID, + CAU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19 +}; + +static struct block_defs block_umac_defs = { + "umac", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE, + UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID, + UMAC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 6 +}; + +static struct block_defs block_xmac_defs = { + "xmac", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbg_defs = { + "dbg", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3 +}; + +static struct block_defs block_nig_defs = { + "nig", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE, + NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID, + NIG_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0 +}; + +static struct block_defs block_wol_defs = { + "wol", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE, + WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID, + WOL_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7 +}; + +static struct block_defs block_bmbn_defs = { + "bmbn", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB}, + BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE, + BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID, + BMBN_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_ipc_defs = { + "ipc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_UA, 8 +}; + +static struct block_defs block_nwm_defs = { + "nwm", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE, + NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID, + NWM_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0 +}; + +static struct block_defs block_nws_defs = { + "nws", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 12 +}; + +static struct block_defs block_ms_defs = { + "ms", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 13 +}; + +static struct block_defs block_phy_pcie_defs = { + "phy_pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_led_defs = { + "led", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISCS_PL_HV, 14 +}; + +static struct block_defs block_misc_aeu_defs = { + "misc_aeu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_bar0_map_defs = { + "bar0_map", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs *s_block_defs[MAX_BLOCK_ID] = { + &block_grc_defs, + &block_miscs_defs, + &block_misc_defs, + &block_dbu_defs, + &block_pglue_b_defs, + &block_cnig_defs, + &block_cpmu_defs, + &block_ncsi_defs, + &block_opte_defs, + &block_bmb_defs, + &block_pcie_defs, + &block_mcp_defs, + &block_mcp2_defs, + &block_pswhst_defs, + &block_pswhst2_defs, + &block_pswrd_defs, + &block_pswrd2_defs, + &block_pswwr_defs, + &block_pswwr2_defs, + &block_pswrq_defs, + &block_pswrq2_defs, + &block_pglcs_defs, + &block_dmae_defs, + &block_ptu_defs, + &block_tcm_defs, + &block_mcm_defs, + &block_ucm_defs, + &block_xcm_defs, + &block_ycm_defs, + &block_pcm_defs, + &block_qm_defs, + &block_tm_defs, + &block_dorq_defs, + &block_brb_defs, + &block_src_defs, + &block_prs_defs, + &block_tsdm_defs, + &block_msdm_defs, + &block_usdm_defs, + &block_xsdm_defs, + &block_ysdm_defs, + &block_psdm_defs, + &block_tsem_defs, + &block_msem_defs, + &block_usem_defs, + &block_xsem_defs, + &block_ysem_defs, + &block_psem_defs, + &block_rss_defs, + &block_tmld_defs, + &block_muld_defs, + &block_yuld_defs, + &block_xyld_defs, + &block_prm_defs, + &block_pbf_pb1_defs, + &block_pbf_pb2_defs, + &block_rpb_defs, + &block_btb_defs, + &block_pbf_defs, + &block_rdif_defs, + &block_tdif_defs, + &block_cdu_defs, + &block_ccfc_defs, + &block_tcfc_defs, + &block_igu_defs, + &block_cau_defs, + &block_umac_defs, + &block_xmac_defs, + &block_dbg_defs, + &block_nig_defs, + &block_wol_defs, + &block_bmbn_defs, + &block_ipc_defs, + &block_nwm_defs, + &block_nws_defs, + &block_ms_defs, + &block_phy_pcie_defs, + &block_led_defs, + &block_misc_aeu_defs, + &block_bar0_map_defs, +}; + +static struct platform_defs s_platform_defs[] = { + {"asic", 1}, + {"reserved", 0}, + {"reserved2", 0}, + {"reserved3", 0} +}; + +static struct grc_param_defs s_grc_param_defs[] = { + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_TSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_MSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_USTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_XSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_YSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_PSTORM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_REGS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RAM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PBUF */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IOR */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_VFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM_CTX */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_ILT */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RSS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CAU */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_QM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MCP */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_RESERVED */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IGU */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BRB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BTB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BMB */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_NIG */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MULD */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PRS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DMAE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_TM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_SDM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DIF */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_STATIC */ + {{0, 0, 0}, 0, 1, false, 0, 0}, /* DBG_GRC_PARAM_UNSTALL */ + {{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS, + MAX_LCIDS}, /* DBG_GRC_PARAM_NUM_LCIDS */ + {{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS, + MAX_LTIDS}, /* DBG_GRC_PARAM_NUM_LTIDS */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_EXCLUDE_ALL */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_CRASH */ + {{0, 0, 0}, 0, 1, false, 1, 0}, /* DBG_GRC_PARAM_PARITY_SAFE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM */ + {{1, 1, 1}, 0, 1, false, 0, 1} /* DBG_GRC_PARAM_DUMP_PHY */ +}; + +static struct rss_mem_defs s_rss_mem_defs[] = { + { "rss_mem_cid", "rss_cid", 0, + {256, 256, 320}, + {32, 32, 32} }, + { "rss_mem_key_msb", "rss_key", 1024, + {128, 128, 208}, + {256, 256, 256} }, + { "rss_mem_key_lsb", "rss_key", 2048, + {128, 128, 208}, + {64, 64, 64} }, + { "rss_mem_info", "rss_info", 3072, + {128, 128, 208}, + {16, 16, 16} }, + { "rss_mem_ind", "rss_ind", 4096, + {(128 * 128), (128 * 128), (128 * 208)}, + {16, 16, 16} } +}; + +static struct vfc_ram_defs s_vfc_ram_defs[] = { + {"vfc_ram_tt1", "vfc_ram", 0, 512}, + {"vfc_ram_mtt2", "vfc_ram", 512, 128}, + {"vfc_ram_stt2", "vfc_ram", 640, 32}, + {"vfc_ram_ro_vect", "vfc_ram", 672, 32} +}; + +static struct big_ram_defs s_big_ram_defs[] = { + { "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB, + BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA, + {4800, 4800, 5632} }, + { "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB, + BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA, + {2880, 2880, 3680} }, + { "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB, + BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA, + {1152, 1152, 1152} } +}; + +static struct reset_reg_defs s_reset_regs_defs[] = { + { MISCS_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_UA */ + { MISCS_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_HV */ + { MISCS_REG_RESET_PL_HV_2, 0x0, + {false, false, true} }, /* DBG_RESET_REG_MISCS_PL_HV_2 */ + { MISC_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_UA */ + { MISC_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_HV */ + { MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */ + { MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */ + { MISC_REG_RESET_PL_PDA_VAUX, 0x2, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VAUX */ +}; + +static struct phy_defs s_phy_defs[] = { + {"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8}, + {"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, +}; + +/**************************** Private Functions ******************************/ + +/* Reads and returns a single dword from the specified unaligned buffer */ +static u32 qed_read_unaligned_dword(u8 *buf) +{ + u32 dword; + + memcpy((u8 *)&dword, buf, sizeof(dword)); + return dword; +} + +/* Initializes debug data for the specified device */ +static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + if (dev_data->initialized) + return DBG_STATUS_OK; + + if (QED_IS_K2(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_K2; + dev_data->mode_enable[MODE_K2] = 1; + } else if (QED_IS_BB_B0(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_BB_B0; + dev_data->mode_enable[MODE_BB_B0] = 1; + } else { + return DBG_STATUS_UNKNOWN_CHIP; + } + + dev_data->platform_id = PLATFORM_ASIC; + dev_data->mode_enable[MODE_ASIC] = 1; + dev_data->initialized = true; + return DBG_STATUS_OK; +} + +/* Reads the FW info structure for the specified Storm from the chip, + * and writes it to the specified fw_info pointer. + */ +static void qed_read_fw_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 storm_id, struct fw_info *fw_info) +{ + /* Read first the address that points to fw_info location. + * The address is located in the last line of the Storm RAM. + */ + u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) - + sizeof(struct fw_info_location); + struct fw_info_location fw_info_location; + u32 *dest = (u32 *)&fw_info_location; + u32 i; + + memset(&fw_info_location, 0, sizeof(fw_info_location)); + memset(fw_info, 0, sizeof(*fw_info)); + for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location)); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + if (fw_info_location.size > 0 && fw_info_location.size <= + sizeof(*fw_info)) { + /* Read FW version info from Storm RAM */ + addr = fw_info_location.grc_addr; + dest = (u32 *)fw_info; + for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + } +} + +/* Dumps the specified string to the specified buffer. Returns the dumped size + * in bytes (actual length + 1 for the null character termination). + */ +static u32 qed_dump_str(char *dump_buf, bool dump, const char *str) +{ + if (dump) + strcpy(dump_buf, str); + return (u32)strlen(str) + 1; +} + +/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size + * in bytes. + */ +static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset) +{ + u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size; + + align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0; + + if (dump && align_size) + memset(dump_buf, 0, align_size); + return align_size; +} + +/* Writes the specified string param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_str_param(u32 *dump_buf, + bool dump, + const char *param_name, const char *param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a string param value */ + if (dump) + *(char_buf + offset) = 1; + offset++; + + /* Dump param value */ + offset += qed_dump_str(char_buf + offset, dump, param_val); + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + return BYTES_TO_DWORDS(offset); +} + +/* Writes the specified numeric param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_num_param(u32 *dump_buf, + bool dump, const char *param_name, u32 param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a numeric param value */ + if (dump) + *(char_buf + offset) = 0; + offset++; + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + + /* Dump param value (and change offset from bytes to dwords) */ + offset = BYTES_TO_DWORDS(offset); + if (dump) + *(dump_buf + offset) = param_val; + offset++; + return offset; +} + +/* Reads the FW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char fw_ver_str[16] = EMPTY_FW_VERSION_STR; + char fw_img_str[16] = EMPTY_FW_IMAGE_STR; + struct fw_info fw_info = { {0}, {0} }; + int printed_chars; + u32 offset = 0; + + if (dump) { + /* Read FW image/version from PRAM in a non-reset SEMI */ + bool found = false; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found; + storm_id++) { + /* Read FW version/image */ + if (!dev_data->block_in_reset + [s_storm_defs[storm_id].block_id]) { + /* read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, + p_ptt, storm_id, &fw_info); + + /* Create FW version/image strings */ + printed_chars = + snprintf(fw_ver_str, + sizeof(fw_ver_str), + "%d_%d_%d_%d", + fw_info.ver.num.major, + fw_info.ver.num.minor, + fw_info.ver.num.rev, + fw_info.ver.num.eng); + if (printed_chars < 0 || printed_chars >= + sizeof(fw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid FW version string\n"); + switch (fw_info.ver.image_id) { + case FW_IMG_MAIN: + strcpy(fw_img_str, "main"); + break; + default: + strcpy(fw_img_str, "unknown"); + break; + } + + found = true; + } + } + } + + /* Dump FW version, image and timestamp */ + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-version", fw_ver_str); + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-image", fw_img_str); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "fw-timestamp", fw_info.ver.timestamp); + return offset; +} + +/* Reads the MFW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + char mfw_ver_str[16] = EMPTY_FW_VERSION_STR; + + if (dump) { + u32 global_section_offsize, global_section_addr, mfw_ver; + u32 public_data_addr, global_section_offsize_addr; + int printed_chars; + + /* Find MCP public data GRC address. + * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug. + */ + public_data_addr = qed_rd(p_hwfn, p_ptt, + MISC_REG_SHARED_MEM_ADDR) | + MCP_REG_SCRATCH; + + /* Find MCP public global section offset */ + global_section_offsize_addr = public_data_addr + + offsetof(struct mcp_public_data, + sections) + + sizeof(offsize_t) * PUBLIC_GLOBAL; + global_section_offsize = qed_rd(p_hwfn, p_ptt, + global_section_offsize_addr); + global_section_addr = MCP_REG_SCRATCH + + (global_section_offsize & + OFFSIZE_OFFSET_MASK) * 4; + + /* Read MFW version from MCP public global section */ + mfw_ver = qed_rd(p_hwfn, p_ptt, + global_section_addr + + offsetof(struct public_global, mfw_ver)); + + /* Dump MFW version param */ + printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str), + "%d_%d_%d_%d", + (u8) (mfw_ver >> 24), + (u8) (mfw_ver >> 16), + (u8) (mfw_ver >> 8), + (u8) mfw_ver); + if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid MFW version string\n"); + } + + return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str); +} + +/* Writes a section header to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_section_hdr(u32 *dump_buf, + bool dump, const char *name, u32 num_params) +{ + return qed_dump_num_param(dump_buf, dump, name, num_params); +} + +/* Writes the common global params to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 num_specific_global_params) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + + /* Find platform string and dump global params section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, + "global_params", + NUM_COMMON_GLOBAL_PARAMS + + num_specific_global_params); + + /* Store params */ + offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump); + offset += qed_dump_mfw_ver_param(p_hwfn, + p_ptt, dump_buf + offset, dump); + offset += qed_dump_num_param(dump_buf + offset, + dump, "tools-version", TOOLS_VERSION); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "chip", + s_chip_defs[dev_data->chip_id].name); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "platform", + s_platform_defs[dev_data->platform_id]. + name); + offset += + qed_dump_num_param(dump_buf + offset, dump, "pci-func", + p_hwfn->abs_pf_id); + return offset; +} + +/* Writes the last section to the specified buffer at the given offset. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump) +{ + u32 start_offset = offset, crc = ~0; + + /* Dump CRC section header */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + + /* Calculate CRC32 and add it to the dword following the "last" section. + */ + if (dump) + *(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf, + DWORDS_TO_BYTES(offset)); + offset++; + return offset - start_offset; +} + +/* Update blocks reset state */ +static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Read reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) + reg_val[i] = qed_rd(p_hwfn, + p_ptt, s_reset_regs_defs[i].addr); + + /* Check if blocks are in reset */ + for (i = 0; i < MAX_BLOCK_ID; i++) + dev_data->block_in_reset[i] = + s_block_defs[i]->has_reset_bit && + !(reg_val[s_block_defs[i]->reset_reg] & + BIT(s_block_defs[i]->reset_bit_offset)); +} + +/* Enable / disable the Debug block */ +static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool enable) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON, enable ? 1 : 0); +} + +/* Resets the Debug block */ +static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val; + + dbg_reset_reg_addr = + s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr; + old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr); + new_reset_reg_val = old_reset_reg_val & + ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset); + + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val); + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val); +} + +static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum dbg_bus_frame_modes mode) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode); +} + +/* Enable / disable Debug Bus clients according to the specified mask. + * (1 = enable, 0 = disable) + */ +static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 client_mask) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask); +} + +static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset) +{ + const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++]; + bool arg1, arg2; + + switch (tree_val) { + case INIT_MODE_OP_NOT: + return !qed_is_mode_match(p_hwfn, modes_buf_offset); + case INIT_MODE_OP_OR: + case INIT_MODE_OP_AND: + arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset); + arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset); + return (tree_val == INIT_MODE_OP_OR) ? (arg1 || + arg2) : (arg1 && arg2); + default: + return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0; + } +} + +/* Returns the value of the specified GRC param */ +static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + return dev_data->grc.param_val[grc_param]; +} + +/* Clear all GRC params */ +static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + dev_data->grc.param_set_by_user[i] = 0; +} + +/* Assign default GRC param values */ +static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + if (!dev_data->grc.param_set_by_user[i]) + dev_data->grc.param_val[i] = + s_grc_param_defs[i].default_val[dev_data->chip_id]; +} + +/* Returns true if the specified entity (indicated by GRC param) should be + * included in the dump, false otherwise. + */ +static bool qed_grc_is_included(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + return qed_grc_get_param(p_hwfn, grc_param) > 0; +} + +/* Returns true of the specified Storm should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn, + enum dbg_storms storm) +{ + return qed_grc_get_param(p_hwfn, (enum dbg_grc_params)storm) > 0; +} + +/* Returns true if the specified memory should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn, + enum block_id block_id, u8 mem_group_id) +{ + u8 i; + + /* Check Storm match */ + if (s_block_defs[block_id]->associated_to_storm && + !qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)s_block_defs[block_id]->storm_id)) + return false; + + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (mem_group_id == s_big_ram_defs[i].mem_group_id || + mem_group_id == s_big_ram_defs[i].ram_mem_group_id) + return qed_grc_is_included(p_hwfn, + s_big_ram_defs[i].grc_param); + if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id == + MEM_GROUP_PXP_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP); + if (mem_group_id == MEM_GROUP_RAM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM); + if (mem_group_id == MEM_GROUP_PBUF) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF); + if (mem_group_id == MEM_GROUP_CAU_MEM || + mem_group_id == MEM_GROUP_CAU_SB || + mem_group_id == MEM_GROUP_CAU_PI) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU); + if (mem_group_id == MEM_GROUP_QM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM); + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM || + mem_group_id == MEM_GROUP_TASK_CFC_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC); + if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id == + MEM_GROUP_IGU_MSIX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU); + if (mem_group_id == MEM_GROUP_MULD_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD); + if (mem_group_id == MEM_GROUP_PRS_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS); + if (mem_group_id == MEM_GROUP_DMAE_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE); + if (mem_group_id == MEM_GROUP_TM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM); + if (mem_group_id == MEM_GROUP_SDM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM); + if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id == + MEM_GROUP_RDIF_CTX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF); + if (mem_group_id == MEM_GROUP_CM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM); + if (mem_group_id == MEM_GROUP_IOR) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR); + + return true; +} + +/* Stalls all Storms */ +static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool stall) +{ + u8 reg_val = stall ? 1 : 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + u32 reg_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STALL_0; + + qed_wr(p_hwfn, p_ptt, reg_addr, reg_val); + } + } + + msleep(STALL_DELAY_MS); +} + +/* Takes all blocks out of reset */ +static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Fill reset regs values */ + for (i = 0; i < MAX_BLOCK_ID; i++) + if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset) + reg_val[s_block_defs[i]->reset_reg] |= + BIT(s_block_defs[i]->reset_bit_offset); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + reg_val[i] |= s_reset_regs_defs[i].unreset_val; + if (reg_val[i]) + qed_wr(p_hwfn, + p_ptt, + s_reset_regs_defs[i].addr + + RESET_REG_UNRESET_OFFSET, reg_val[i]); + } + } +} + +/* Returns the attention name offsets of the specified block */ +static const struct dbg_attn_block_type_data * +qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type) +{ + const struct dbg_attn_block *base_attn_block_arr = + (const struct dbg_attn_block *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr; + + return &base_attn_block_arr[block_id].per_type_data[attn_type]; +} + +/* Returns the attention registers of the specified block */ +static const struct dbg_attn_reg * +qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type, + u8 *num_attn_regs) +{ + const struct dbg_attn_block_type_data *block_type_data = + qed_get_block_attn_data(block_id, attn_type); + + *num_attn_regs = block_type_data->num_regs; + return &((const struct dbg_attn_reg *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data-> + regs_offset]; +} + +/* For each block, clear the status of all parities */ +static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 reg_idx, num_attn_regs; + u32 block_id; + + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id]) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + + /* Check mode */ + bool eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + u16 modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) + /* Mode match - read parity status read-clear + * register. + */ + qed_rd(p_hwfn, p_ptt, + DWORDS_TO_BYTES(reg_data-> + sts_clr_address)); + } + } +} + +/* Dumps GRC registers section header. Returns the dumped size in dwords. + * The following parameters are dumped: + * - 'count' = num_dumped_entries + * - 'split' = split_type + * - 'id'i = split_id (dumped only if split_id >= 0) + * - 'param_name' = param_val (user param, dumped only if param_name != NULL and + * param_val != NULL) + */ +static u32 qed_grc_dump_regs_hdr(u32 *dump_buf, + bool dump, + u32 num_reg_entries, + const char *split_type, + int split_id, + const char *param_name, const char *param_val) +{ + u8 num_params = 2 + (split_id >= 0 ? 1 : 0) + (param_name ? 1 : 0); + u32 offset = 0; + + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_regs", num_params); + offset += qed_dump_num_param(dump_buf + offset, + dump, "count", num_reg_entries); + offset += qed_dump_str_param(dump_buf + offset, + dump, "split", split_type); + if (split_id >= 0) + offset += qed_dump_num_param(dump_buf + offset, + dump, "id", split_id); + if (param_name && param_val) + offset += qed_dump_str_param(dump_buf + offset, + dump, param_name, param_val); + return offset; +} + +/* Dumps GRC register/memory. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + bool dump, u32 addr, u32 len) +{ + u32 offset = 0, i; + + if (dump) { + *(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT); + for (i = 0; i < len; i++, addr++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + DWORDS_TO_BYTES(addr)); + } else { + offset += len + 1; + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + u32 *num_dumped_reg_entries) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + *num_dumped_reg_entries = 0; + while (input_offset < input_regs_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr = + (const struct dbg_dump_cond_hdr *) + &input_regs_arr.ptr[input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check mode/block */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match && block_enable[cond_hdr->block_id]) { + for (i = 0; i < cond_hdr->data_size; + i++, input_offset++) { + const struct dbg_dump_reg *reg = + (const struct dbg_dump_reg *) + &input_regs_arr.ptr[input_offset]; + + offset += + qed_grc_dump_reg_entry(p_hwfn, p_ptt, + dump_buf + offset, dump, + GET_FIELD(reg->data, + DBG_DUMP_REG_ADDRESS), + GET_FIELD(reg->data, + DBG_DUMP_REG_LENGTH)); + (*num_dumped_reg_entries)++; + } + } else { + input_offset += cond_hdr->data_size; + } + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *split_type_name, + u32 split_id, + const char *param_name, + const char *param_val) +{ + u32 num_dumped_reg_entries, offset; + + /* Calculate register dump header size (and skip it for now) */ + offset = qed_grc_dump_regs_hdr(dump_buf, + false, + 0, + split_type_name, + split_id, param_name, param_val); + + /* Dump registers */ + offset += qed_grc_dump_regs_entries(p_hwfn, + p_ptt, + input_regs_arr, + dump_buf + offset, + dump, + block_enable, + &num_dumped_reg_entries); + + /* Write register dump header */ + if (dump && num_dumped_reg_entries > 0) + qed_grc_dump_regs_hdr(dump_buf, + dump, + num_dumped_reg_entries, + split_type_name, + split_id, param_name, param_val); + + return num_dumped_reg_entries > 0 ? offset : 0; +} + +/* Dumps registers according to the input registers array. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *param_name, const char *param_val) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, input_offset = 0; + u8 port_id, pf_id; + + if (dump) + DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n"); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_regs_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + case SPLIT_TYPE_VF: + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, + block_enable, + "eng", + (u32)(-1), + param_name, + param_val); + break; + case SPLIT_TYPE_PORT: + for (port_id = 0; + port_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_ports; + port_id++) { + if (dump) + qed_port_pretend(p_hwfn, p_ptt, + port_id); + offset += + qed_grc_dump_split_data(p_hwfn, p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "port", port_id, + param_name, + param_val); + } + break; + case SPLIT_TYPE_PF: + case SPLIT_TYPE_PORT_PF: + for (pf_id = 0; + pf_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_pfs; + pf_id++) { + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, pf_id); + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "pf", pf_id, param_name, + param_val); + } + break; + default: + break; + } + + input_offset += split_data_size; + } + + /* Pretend to original PF */ + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id); + return offset; +} + +/* Dump reset registers. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i, offset = 0, num_regs = 0; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (s_reset_regs_defs + [i].addr), 1); + num_regs++; + } + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, num_regs, "eng", -1, NULL, NULL); + return offset; +} + +/* Dump registers that are modified during GRC Dump and therefore must be dumped + * first. Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, num_reg_entries = 0, block_id; + u8 storm_id, reg_idx, num_attn_regs; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write parity registers */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id] && dump) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + u16 modes_buf_offset; + bool eval_mode; + + /* Check mode */ + eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) { + /* Mode match - read and dump registers */ + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + reg_data->mask_address, + 1); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + GET_FIELD(reg_data->data, + DBG_ATTN_REG_STS_ADDRESS), + 1); + num_reg_entries += 2; + } + } + } + + /* Write storm stall status registers */ + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] && + dump) + continue; + + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS(s_storm_defs[storm_id]. + sem_fast_mem_addr + + SEM_FAST_REG_STALLED), + 1); + num_reg_entries++; + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, + num_reg_entries, "eng", -1, NULL, NULL); + return offset; +} + +/* Dumps a GRC memory header (section and params). + * The following parameters are dumped: + * name - name is dumped only if it's not NULL. + * addr - byte_addr is dumped only if name is NULL. + * len - dword_len is always dumped. + * width - bit_width is dumped if it's not zero. + * packed - packed=1 is dumped if it's not false. + * mem_group - mem_group is always dumped. + * is_storm - true only if the memory is related to a Storm. + * storm_letter - storm letter (valid only if is_storm is true). + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u8 num_params = 3; + u32 offset = 0; + char buf[64]; + + if (!dword_len) + DP_NOTICE(p_hwfn, + "Unexpected GRC Dump error: dumped memory size must be non-zero\n"); + if (bit_width) + num_params++; + if (packed) + num_params++; + + /* Dump section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_mem", num_params); + if (name) { + /* Dump name */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), name); + } else { + strcpy(buf, name); + } + + offset += qed_dump_str_param(dump_buf + offset, + dump, "name", buf); + if (dump) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from %s...\n", + dword_len, buf); + } else { + /* Dump address */ + offset += qed_dump_num_param(dump_buf + offset, + dump, "addr", byte_addr); + if (dump && dword_len > 64) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from address 0x%x...\n", + dword_len, byte_addr); + } + + /* Dump len */ + offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len); + + /* Dump bit width */ + if (bit_width) + offset += qed_dump_num_param(dump_buf + offset, + dump, "width", bit_width); + + /* Dump packed */ + if (packed) + offset += qed_dump_num_param(dump_buf + offset, + dump, "packed", 1); + + /* Dump reg type */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), mem_group); + } else { + strcpy(buf, mem_group); + } + + offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf); + return offset; +} + +/* Dumps a single GRC memory. If name is NULL, the memory is stored by address. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u32 offset = 0; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + byte_addr, + dword_len, + bit_width, + packed, + mem_group, is_storm, storm_letter); + if (dump) { + u32 i; + + for (i = 0; i < dword_len; + i++, byte_addr += BYTES_IN_DWORD, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr); + } else { + offset += dword_len; + } + + return offset; +} + +/* Dumps GRC memories entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_mems_arr, + u32 *dump_buf, bool dump) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + while (input_offset < input_mems_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr; + u32 num_entries; + bool eval_mode; + + cond_hdr = (const struct dbg_dump_cond_hdr *) + &input_mems_arr.ptr[input_offset++]; + eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check required mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (!mode_match) { + input_offset += cond_hdr->data_size; + continue; + } + + num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS; + for (i = 0; i < num_entries; + i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) { + const struct dbg_dump_mem *mem = + (const struct dbg_dump_mem *) + &input_mems_arr.ptr[input_offset]; + u8 mem_group_id; + + mem_group_id = GET_FIELD(mem->dword0, + DBG_DUMP_MEM_MEM_GROUP_ID); + if (mem_group_id >= MEM_GROUPS_NUM) { + DP_NOTICE(p_hwfn, "Invalid mem_group_id\n"); + return 0; + } + + if (qed_grc_is_mem_included(p_hwfn, + (enum block_id)cond_hdr->block_id, + mem_group_id)) { + u32 mem_byte_addr = + DWORDS_TO_BYTES(GET_FIELD(mem->dword0, + DBG_DUMP_MEM_ADDRESS)); + u32 mem_len = GET_FIELD(mem->dword1, + DBG_DUMP_MEM_LENGTH); + char storm_letter = 'a'; + bool is_storm = false; + + /* Update memory length for CCFC/TCFC memories + * according to number of LCIDs/LTIDs. + */ + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS) + * (mem_len / MAX_LCIDS); + else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS) + * (mem_len / MAX_LTIDS); + + /* If memory is associated with Storm, update + * Storm details. + */ + if (s_block_defs[cond_hdr->block_id]-> + associated_to_storm) { + is_storm = true; + storm_letter = + s_storm_defs[s_block_defs[ + cond_hdr->block_id]-> + storm_id].letter; + } + + /* Dump memory */ + offset += qed_grc_dump_mem(p_hwfn, p_ptt, + dump_buf + offset, dump, NULL, + mem_byte_addr, mem_len, 0, + false, + s_mem_group_names[mem_group_id], + is_storm, storm_letter); + } + } + } + + return offset; +} + +/* Dumps GRC memories according to the input array dump_mem. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0; + + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_mems_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + offset += qed_grc_dump_mem_entries(p_hwfn, + p_ptt, + curr_input_mems_arr, + dump_buf + offset, + dump); + break; + default: + DP_NOTICE(p_hwfn, + "Dumping split memories is currently not supported\n"); + break; + } + + input_offset += split_data_size; + } + + return offset; +} + +/* Dumps GRC context data for the specified Storm. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 num_lids, + u32 lid_size, + u32 rd_reg_addr, + u8 storm_id) +{ + u32 i, lid, total_size; + u32 offset = 0; + + if (!lid_size) + return 0; + lid_size *= BYTES_IN_DWORD; + total_size = num_lids * lid_size; + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + 0, + total_size, + lid_size * 32, + false, + name, + true, s_storm_defs[storm_id].letter); + + /* Dump context data */ + if (dump) { + for (lid = 0; lid < num_lids; lid++) { + for (i = 0; i < lid_size; i++, offset++) { + qed_wr(p_hwfn, + p_ptt, + s_storm_defs[storm_id].cm_ctx_wr_addr, + BIT(9) | lid); + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + rd_reg_addr); + } + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dumps GRC contexts. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (!qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) + continue; + + /* Dump Conn AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_ag_ctx_rd_addr, + storm_id); + + /* Dump Conn ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_st_ctx_rd_addr, + storm_id); + + /* Dump Task AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_ag_ctx_rd_addr, + storm_id); + + /* Dump Task ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_st_ctx_rd_addr, + storm_id); + } + + return offset; +} + +/* Dumps GRC IORs data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + char buf[10] = "IOR_SET_?"; + u8 storm_id, set_id; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) { + u32 addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STORM_REG_FILE + + DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id)); + + buf[strlen(buf) - 1] = '0' + set_id; + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + buf, + addr, + IORS_PER_SET, + 32, + false, + "ior", + true, + s_storm_defs + [storm_id].letter); + } + } + } + + return offset; +} + +/* Dump VFC CAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 storm_id) +{ + u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS; + u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 }; + u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + "vfc_cam", + 0, + total_size, + 256, + false, + "vfc_cam", + true, s_storm_defs[storm_id].letter); + if (dump) { + /* Prepare CAM address */ + SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD); + for (row = 0; row < VFC_CAM_NUM_ROWS; + row++, offset += VFC_CAM_RESP_DWORDS) { + /* Write VFC CAM command */ + SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + cam_cmd, VFC_CAM_CMD_DWORDS); + + /* Write VFC CAM address */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + cam_addr, VFC_CAM_ADDR_DWORDS); + + /* Read VFC CAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_CAM_RESP_DWORDS); + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dump VFC RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 storm_id, struct vfc_ram_defs *ram_defs) +{ + u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS; + u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 }; + u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + ram_defs->mem_name, + 0, + total_size, + 256, + false, + ram_defs->type_name, + true, s_storm_defs[storm_id].letter); + + /* Prepare RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD); + + if (!dump) + return offset + total_size; + + for (row = ram_defs->base_row; + row < ram_defs->base_row + ram_defs->num_rows; + row++, offset += VFC_RAM_RESP_DWORDS) { + /* Write VFC RAM command */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + ram_cmd, VFC_RAM_CMD_DWORDS); + + /* Write VFC RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + ram_addr, VFC_RAM_ADDR_DWORDS); + + /* Read VFC RAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_RAM_RESP_DWORDS); + } + + return offset; +} + +/* Dumps GRC VFC data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 storm_id, i; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id) && + s_storm_defs[storm_id].has_vfc && + (storm_id != DBG_PSTORM_ID || + dev_data->platform_id == PLATFORM_ASIC)) { + /* Read CAM */ + offset += qed_grc_dump_vfc_cam(p_hwfn, + p_ptt, + dump_buf + offset, + dump, storm_id); + + /* Read RAM */ + for (i = 0; i < NUM_VFC_RAM_TYPES; i++) + offset += qed_grc_dump_vfc_ram(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + storm_id, + &s_vfc_ram_defs + [i]); + } + } + + return offset; +} + +/* Dumps GRC RSS data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + u8 rss_mem_id; + + for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) { + struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id]; + u32 num_entries = rss_defs->num_entries[dev_data->chip_id]; + u32 entry_width = rss_defs->entry_width[dev_data->chip_id]; + u32 total_size = (num_entries * entry_width) / 32; + bool packed = (entry_width == 16); + u32 addr = rss_defs->addr; + u32 i, j; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + rss_defs->mem_name, + addr, + total_size, + entry_width, + packed, + rss_defs->type_name, false, 0); + + if (!dump) { + offset += total_size; + continue; + } + + /* Dump RSS data */ + for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) { + qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr); + for (j = 0; j < BYTES_IN_DWORD; j++, offset++) + *(dump_buf + offset) = + qed_rd(p_hwfn, p_ptt, + RSS_REG_RSS_RAM_DATA + + DWORDS_TO_BYTES(j)); + } + } + + return offset; +} + +/* Dumps GRC Big RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 big_ram_id) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char mem_name[12] = "???_BIG_RAM"; + char type_name[8] = "???_RAM"; + u32 ram_size, total_blocks; + u32 offset = 0, i, j; + + total_blocks = + s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id]; + ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS; + + strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + + /* Dump memory header */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + ram_size, + BIG_RAM_BLOCK_SIZE_BYTES * 8, + false, type_name, false, 0); + + if (!dump) + return offset + ram_size; + + /* Read and dump Big RAM data */ + for (i = 0; i < total_blocks / 2; i++) { + qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr, + i); + for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, + s_big_ram_defs[big_ram_id]. + data_reg_addr + + DWORDS_TO_BYTES(j)); + } + + return offset; +} + +static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + bool block_enable[MAX_BLOCK_ID] = { 0 }; + bool halted = false; + u32 offset = 0; + + /* Halt MCP */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Dump MCP scratchpad */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_SCRATCH, + MCP_REG_SCRATCH_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP cpu_reg_file */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_CPU_REG_FILE, + MCP_REG_CPU_REG_FILE_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP registers */ + block_enable[BLOCK_MCP] = true; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + offset, + dump, block_enable, "block", "MCP"); + + /* Dump required non-MCP registers */ + offset += qed_grc_dump_regs_hdr(dump_buf + offset, + dump, 1, "eng", -1, "block", "MCP"); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (MISC_REG_SHARED_MEM_ADDR), 1); + + /* Release MCP */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt)) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + return offset; +} + +/* Dumps the tbus indirect memory for all PHYs. */ +static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, tbus_lo_offset, tbus_hi_offset; + char mem_name[32]; + u8 phy_id; + + for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) { + struct phy_defs *phy_defs = &s_phy_defs[phy_id]; + int printed_chars; + + printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s", + phy_defs->phy_name); + if (printed_chars < 0 || printed_chars >= sizeof(mem_name)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid PHY memory name\n"); + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + PHY_DUMP_SIZE_DWORDS, + 16, true, mem_name, false, 0); + if (dump) { + u32 addr_lo_addr = phy_defs->base_addr + + phy_defs->tbus_addr_lo_addr; + u32 addr_hi_addr = phy_defs->base_addr + + phy_defs->tbus_addr_hi_addr; + u32 data_lo_addr = phy_defs->base_addr + + phy_defs->tbus_data_lo_addr; + u32 data_hi_addr = phy_defs->base_addr + + phy_defs->tbus_data_hi_addr; + u8 *bytes_buf = (u8 *)(dump_buf + offset); + + for (tbus_hi_offset = 0; + tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8); + tbus_hi_offset++) { + qed_wr(p_hwfn, + p_ptt, addr_hi_addr, tbus_hi_offset); + for (tbus_lo_offset = 0; tbus_lo_offset < 256; + tbus_lo_offset++) { + qed_wr(p_hwfn, + p_ptt, + addr_lo_addr, tbus_lo_offset); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_lo_addr); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_hi_addr); + } + } + } + + offset += PHY_DUMP_SIZE_DWORDS; + } + + return offset; +} + +static void qed_config_dbg_line(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum block_id block_id, + u8 line_id, + u8 cycle_en, + u8 right_shift, u8 force_valid, u8 force_frame) +{ + struct block_defs *p_block_defs = s_block_defs[block_id]; + + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame); +} + +/* Dumps Static Debug data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, block_id, line_id, addr, i; + struct block_defs *p_block_defs; + + if (dump) { + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, "Dumping static debug data...\n"); + + /* Disable all blocks debug output */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (p_block_defs->has_dbg_bus[dev_data->chip_id]) + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } + + qed_bus_reset_dbg_block(p_hwfn, p_ptt); + qed_bus_set_framing_mode(p_hwfn, + p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST); + qed_wr(p_hwfn, + p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF); + qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1); + qed_bus_enable_dbg_block(p_hwfn, p_ptt, true); + } + + /* Dump all static debug lines for each relevant block */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (!p_block_defs->has_dbg_bus[dev_data->chip_id]) + continue; + + /* Dump static section params */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + p_block_defs->name, 0, + block_dwords, 32, false, + "STATIC", false, 0); + + if (dump && !dev_data->block_in_reset[block_id]) { + u8 dbg_client_id = + p_block_defs->dbg_client_id[dev_data->chip_id]; + + /* Enable block's client */ + qed_bus_enable_clients(p_hwfn, p_ptt, + BIT(dbg_client_id)); + + for (line_id = 0; line_id < NUM_DBG_BUS_LINES; + line_id++) { + /* Configure debug line ID */ + qed_config_dbg_line(p_hwfn, + p_ptt, + (enum block_id)block_id, + (u8)line_id, + 0xf, 0, 0, 0); + + /* Read debug line info */ + for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA; + i < STATIC_DEBUG_LINE_DWORDS; + i++, offset++, addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, + addr); + } + + /* Disable block's client and debug output */ + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } else { + /* All lines are invalid - dump zeros */ + if (dump) + memset(dump_buf + offset, 0, + DWORDS_TO_BYTES(block_dwords)); + offset += block_dwords; + } + } + + if (dump) { + qed_bus_enable_dbg_block(p_hwfn, p_ptt, false); + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + } + + return offset; +} + +/* Performs GRC Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + bool parities_masked = false; + u8 i, port_mode = 0; + u32 offset = 0; + + /* Check if emulation platform */ + *num_dumped_dwords = 0; + + /* Fill GRC parameters that were not set by the user with their default + * value. + */ + qed_dbg_grc_set_params_default(p_hwfn); + + /* Find port mode */ + if (dump) { + switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) { + case 0: + port_mode = 1; + break; + case 1: + port_mode = 2; + break; + case 2: + port_mode = 4; + break; + } + } + + /* Update reset state */ + if (dump) + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 4); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "grc-dump"); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-lcids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-ltids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, "num-ports", port_mode); + + /* Dump reset registers (dumped before taking blocks out of reset ) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_reset_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Take all blocks out of reset (using reset registers) */ + if (dump) { + qed_grc_unreset_blocks(p_hwfn, p_ptt); + qed_update_blocks_reset_state(p_hwfn, p_ptt); + } + + /* Disable all parities using MFW command */ + if (dump) { + parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1); + if (!parities_masked) { + if (qed_grc_get_param + (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE)) + return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY; + else + DP_NOTICE(p_hwfn, + "Failed to mask parities using MFW\n"); + } + } + + /* Dump modified registers (dumped before modifying them) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_modified_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Stall storms */ + if (dump && + (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_IOR) || + qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))) + qed_grc_stall_storms(p_hwfn, p_ptt, true); + + /* Dump all regs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) { + /* Dump all blocks except MCP */ + bool block_enable[MAX_BLOCK_ID]; + + for (i = 0; i < MAX_BLOCK_ID; i++) + block_enable[i] = true; + block_enable[BLOCK_MCP] = false; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + block_enable, NULL, NULL); + } + + /* Dump memories */ + offset += qed_grc_dump_memories(p_hwfn, p_ptt, dump_buf + offset, dump); + + /* Dump MCP */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP)) + offset += qed_grc_dump_mcp(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump context */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX)) + offset += qed_grc_dump_ctx(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump RSS memories */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RSS)) + offset += qed_grc_dump_rss(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump Big RAM */ + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (qed_grc_is_included(p_hwfn, s_big_ram_defs[i].grc_param)) + offset += qed_grc_dump_big_ram(p_hwfn, + p_ptt, + dump_buf + offset, + dump, i); + + /* Dump IORs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR)) + offset += qed_grc_dump_iors(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump VFC */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)) + offset += qed_grc_dump_vfc(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump PHY tbus */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id == + CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC) + offset += qed_grc_dump_phy(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump static debug data */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_STATIC) && + dev_data->bus.state == DBG_BUS_STATE_IDLE) + offset += qed_grc_dump_static_debug(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Dump last section */ + offset += qed_dump_last_section(dump_buf, offset, dump); + if (dump) { + /* Unstall storms */ + if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL)) + qed_grc_stall_storms(p_hwfn, p_ptt, false); + + /* Clear parity status */ + qed_grc_clear_all_prty(p_hwfn, p_ptt); + + /* Enable all parities using MFW command */ + if (parities_masked) + qed_mcp_mask_parities(p_hwfn, p_ptt, 0); + } + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Writes the specified failing Idle Check rule to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 * + dump_buf, + bool dump, + u16 rule_id, + const struct dbg_idle_chk_rule *rule, + u16 fail_entry_id, u32 *cond_reg_values) +{ + const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays + [BIN_BUF_DBG_IDLE_CHK_REGS]. + ptr)[rule->reg_offset]; + const struct dbg_idle_chk_cond_reg *cond_regs = ®s[0].cond_reg; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + struct dbg_idle_chk_result_hdr *hdr = + (struct dbg_idle_chk_result_hdr *)dump_buf; + const struct dbg_idle_chk_info_reg *info_regs = + ®s[rule->num_cond_regs].info_reg; + u32 next_reg_offset = 0, i, offset = 0; + u8 reg_id; + + /* Dump rule data */ + if (dump) { + memset(hdr, 0, sizeof(*hdr)); + hdr->rule_id = rule_id; + hdr->mem_entry_id = fail_entry_id; + hdr->severity = rule->severity; + hdr->num_dumped_cond_regs = rule->num_cond_regs; + } + + offset += IDLE_CHK_RESULT_HDR_DWORDS; + + /* Dump condition register values */ + for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) { + const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id]; + + /* Write register header */ + if (dump) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *)(dump_buf + + offset); + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + memset(reg_hdr, 0, + sizeof(struct dbg_idle_chk_result_reg_hdr)); + reg_hdr->start_entry = reg->start_entry; + reg_hdr->size = reg->entry_size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM, + reg->num_entries > 1 || reg->start_entry > 0 + ? 1 : 0); + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id); + + /* Write register values */ + for (i = 0; i < reg_hdr->size; + i++, next_reg_offset++, offset++) + dump_buf[offset] = + cond_reg_values[next_reg_offset]; + } else { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + + reg->entry_size; + } + } + + /* Dump info register values */ + for (reg_id = 0; reg_id < rule->num_info_regs; reg_id++) { + const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id]; + u32 block_id; + + if (!dump) { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size; + continue; + } + + /* Check if register's block is in reset */ + block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID); + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + if (!dev_data->block_in_reset[block_id]) { + bool eval_mode = GET_FIELD(reg->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(reg->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = + qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 grc_addr = + DWORDS_TO_BYTES(GET_FIELD(reg->data, + DBG_IDLE_CHK_INFO_REG_ADDRESS)); + + /* Write register header */ + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *) + (dump_buf + offset); + + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + hdr->num_dumped_info_regs++; + memset(reg_hdr, 0, sizeof(*reg_hdr)); + reg_hdr->size = reg->size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, + rule->num_cond_regs + reg_id); + + /* Write register values */ + for (i = 0; i < reg->size; + i++, offset++, grc_addr += 4) + dump_buf[offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + } + + return offset; +} + +/* Dumps idle check rule entries. Returns the dumped size in dwords. */ +static u32 +qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, + const struct dbg_idle_chk_rule *input_rules, + u32 num_input_rules, u32 *num_failing_rules) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE]; + u32 i, j, offset = 0; + u16 entry_id; + u8 reg_id; + + *num_failing_rules = 0; + for (i = 0; i < num_input_rules; i++) { + const struct dbg_idle_chk_cond_reg *cond_regs; + const struct dbg_idle_chk_rule *rule; + const union dbg_idle_chk_reg *regs; + u16 num_reg_entries = 1; + bool check_rule = true; + const u32 *imm_values; + + rule = &input_rules[i]; + regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr) + [rule->reg_offset]; + cond_regs = ®s[0].cond_reg; + imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr + [rule->imm_offset]; + + /* Check if all condition register blocks are out of reset, and + * find maximal number of entries (all condition registers that + * are memories must have the same size, which is > 1). + */ + for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule; + reg_id++) { + u32 block_id = GET_FIELD(cond_regs[reg_id].data, + DBG_IDLE_CHK_COND_REG_BLOCK_ID); + + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + check_rule = !dev_data->block_in_reset[block_id]; + if (cond_regs[reg_id].num_entries > num_reg_entries) + num_reg_entries = cond_regs[reg_id].num_entries; + } + + if (!check_rule && dump) + continue; + + /* Go over all register entries (number of entries is the same + * for all condition registers). + */ + for (entry_id = 0; entry_id < num_reg_entries; entry_id++) { + /* Read current entry of all condition registers */ + if (dump) { + u32 next_reg_offset = 0; + + for (reg_id = 0; + reg_id < rule->num_cond_regs; + reg_id++) { + const struct dbg_idle_chk_cond_reg + *reg = &cond_regs[reg_id]; + + /* Find GRC address (if it's a memory, + * the address of the specific entry is + * calculated). + */ + u32 grc_addr = + DWORDS_TO_BYTES( + GET_FIELD(reg->data, + DBG_IDLE_CHK_COND_REG_ADDRESS)); + + if (reg->num_entries > 1 || + reg->start_entry > 0) { + u32 padded_entry_size = + reg->entry_size > 1 ? + roundup_pow_of_two + (reg->entry_size) : 1; + + grc_addr += + DWORDS_TO_BYTES( + (reg->start_entry + + entry_id) + * padded_entry_size); + } + + /* Read registers */ + if (next_reg_offset + reg->entry_size >= + IDLE_CHK_MAX_ENTRIES_SIZE) { + DP_NOTICE(p_hwfn, + "idle check registers entry is too large\n"); + return 0; + } + + for (j = 0; j < reg->entry_size; + j++, next_reg_offset++, + grc_addr += 4) + cond_reg_values[next_reg_offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + + /* Call rule's condition function - a return value of + * true indicates failure. + */ + if ((*cond_arr[rule->cond_id])(cond_reg_values, + imm_values) || !dump) { + offset += + qed_idle_chk_dump_failure(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + rule->rule_id, + rule, + entry_id, + cond_reg_values); + (*num_failing_rules)++; + break; + } + } + } + + return offset; +} + +/* Performs Idle Check Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0, num_failing_rules = 0; + u32 num_failing_rules_offset; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "idle-chk"); + + /* Dump idle check section header with a single parameter */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1); + num_failing_rules_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) { + const struct dbg_idle_chk_cond_hdr *cond_hdr = + (const struct dbg_idle_chk_cond_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr + [input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 curr_failing_rules; + + offset += + qed_idle_chk_dump_rule_entries(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + (const struct dbg_idle_chk_rule *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES]. + ptr[input_offset], + cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS, + &curr_failing_rules); + num_failing_rules += curr_failing_rules; + } + + input_offset += cond_hdr->data_size; + } + + /* Overwrite num_rules parameter */ + if (dump) + qed_dump_num_param(dump_buf + num_failing_rules_offset, + dump, "num_rules", num_failing_rules); + + return offset; +} + +/* Finds the meta data image in NVRAM. */ +static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 image_type, + u32 *nvram_offset_bytes, + u32 *nvram_size_bytes) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; + struct mcp_file_att file_att; + + /* Call NVRAM get file command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT, + image_type, &ret_mcp_resp, &ret_mcp_param, + &ret_txn_size, (u32 *)&file_att) != 0) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Update return values */ + *nvram_offset_bytes = file_att.nvm_start_addr; + *nvram_size_bytes = file_att.len; + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n", + image_type, *nvram_offset_bytes, *nvram_size_bytes); + + /* Check alignment */ + if (*nvram_size_bytes & 0x3) + return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE; + return DBG_STATUS_OK; +} + +static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_bytes, + u32 nvram_size_bytes, u32 *ret_buf) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_read_size; + u32 bytes_to_copy, read_offset = 0; + s32 bytes_left = nvram_size_bytes; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "nvram_read: reading image of size %d bytes from NVRAM\n", + nvram_size_bytes); + do { + bytes_to_copy = + (bytes_left > + MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left; + + /* Call NVRAM read command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_NVM_READ_NVRAM, + (nvram_offset_bytes + + read_offset) | + (bytes_to_copy << + DRV_MB_PARAM_NVM_LEN_SHIFT), + &ret_mcp_resp, &ret_mcp_param, + &ret_read_size, + (u32 *)((u8 *)ret_buf + + read_offset)) != 0) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Update read offset */ + read_offset += ret_read_size; + bytes_left -= ret_read_size; + } while (bytes_left > 0); + + return DBG_STATUS_OK; +} + +/* Get info on the MCP Trace data in the scratchpad: + * - trace_data_grc_addr - the GRC address of the trace data + * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without + * the header) + */ +static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *trace_data_grc_addr, + u32 *trace_data_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + u32 signature; + + /* Extract MCP trace section GRC address from offsize structure (within + * scratchpad). + */ + *trace_data_grc_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize); + + /* Read signature from MCP trace section */ + signature = qed_rd(p_hwfn, p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, signature)); + if (signature != MFW_TRACE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read trace size from MCP trace section */ + *trace_data_size_bytes = qed_rd(p_hwfn, + p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, size)); + return DBG_STATUS_OK; +} + +/* Reads MCP trace meta data image from NVRAM. + * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from + * file) + * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP + * Trace meta data starts (invalid when loaded from file) + * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data + */ +static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 trace_data_size_bytes, + u32 *running_bundle_id, + u32 *trace_meta_offset_bytes, + u32 *trace_meta_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + + /* Find running bundle ID */ + u32 running_mfw_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) + + QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes; + enum dbg_status status; + u32 nvram_image_type; + + *running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr); + if (*running_bundle_id > 1) + return DBG_STATUS_INVALID_NVRAM_BUNDLE; + + /* Find image in NVRAM */ + nvram_image_type = + (*running_bundle_id == + DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2; + status = qed_find_nvram_image(p_hwfn, + p_ptt, + nvram_image_type, + trace_meta_offset_bytes, + trace_meta_size_bytes); + + return status; +} + +/* Reads the MCP Trace data from the specified GRC address into the specified + * buffer. + */ +static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 grc_addr, u32 size_in_dwords, u32 *buf) +{ + u32 i; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n", + size_in_dwords, grc_addr); + for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD) + buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr); +} + +/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified + * buffer. + */ +static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_in_bytes, + u32 size_in_bytes, u32 *buf) +{ + u8 *byte_buf = (u8 *)buf; + u8 modules_num, i; + u32 signature; + + /* Read meta data from NVRAM */ + enum dbg_status status = qed_nvram_read(p_hwfn, + p_ptt, + nvram_offset_in_bytes, + size_in_bytes, + buf); + + if (status != DBG_STATUS_OK) + return status; + + /* Extract and check first signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Extract number of modules */ + modules_num = *(byte_buf++); + + /* Skip all modules */ + for (i = 0; i < modules_num; i++) { + u8 module_len = *(byte_buf++); + + byte_buf += module_len; + } + + /* Extract and check second signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + return DBG_STATUS_OK; +} + +/* Dump MCP Trace */ +enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords; + u32 trace_meta_size_dwords, running_bundle_id, offset = 0; + u32 trace_meta_offset_bytes, trace_meta_size_bytes; + enum dbg_status status; + int halted = 0; + + *num_dumped_dwords = 0; + + /* Get trace data info */ + status = qed_mcp_trace_get_data_info(p_hwfn, + p_ptt, + &trace_data_grc_addr, + &trace_data_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "mcp-trace"); + + /* Halt MCP while reading from scratchpad so the read data will be + * consistent if halt fails, MCP trace is taken anyway, with a small + * risk that it may be corrupt. + */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Find trace data size */ + trace_data_size_dwords = + DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace), + BYTES_IN_DWORD); + + /* Dump trace data section header and param */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_data", 1); + offset += qed_dump_num_param(dump_buf + offset, + dump, "size", trace_data_size_dwords); + + /* Read trace data from scratchpad into dump buffer */ + if (dump) + qed_mcp_trace_read_data(p_hwfn, + p_ptt, + trace_data_grc_addr, + trace_data_size_dwords, + dump_buf + offset); + offset += trace_data_size_dwords; + + /* Resume MCP (only if halt succeeded) */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + + /* Dump trace meta section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_meta", 1); + + /* Read trace meta info */ + status = qed_mcp_trace_get_meta_info(p_hwfn, + p_ptt, + trace_data_size_bytes, + &running_bundle_id, + &trace_meta_offset_bytes, + &trace_meta_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump trace meta size param (trace_meta_size_bytes is always + * dword-aligned). + */ + trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + trace_meta_size_dwords); + + /* Read trace meta image into dump buffer */ + if (dump) { + status = qed_mcp_trace_read_meta(p_hwfn, + p_ptt, + trace_meta_offset_bytes, + trace_meta_size_bytes, + dump_buf + offset); + if (status != DBG_STATUS_OK) + return status; + } + + offset += trace_meta_size_dwords; + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Dump GRC FIFO */ +enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "reg-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "reg_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is REG_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += REG_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < REG_FIFO_DEPTH_DWORDS; + dwords_read += REG_FIFO_ELEMENT_DWORDS, offset += + REG_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, GRC_REG_TRACE_FIFO, + (u64)(uintptr_t)(&dump_buf[offset]), + REG_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Dump IGU FIFO */ +enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "igu-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "igu_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is IGU_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += IGU_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < IGU_FIFO_DEPTH_DWORDS; + dwords_read += IGU_FIFO_ELEMENT_DWORDS, offset += + IGU_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_MEMORY, + (u64)(uintptr_t)(&dump_buf[offset]), + IGU_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Protection Override dump */ +enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, size_param_offset, override_window_dwords; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "protection-override"); + + /* Dump data section header and param. The size param is 0 for now, and + * is overwritten after reading the data. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "protection_override_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + offset += PROTECTION_OVERRIDE_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + /* Add override window info to buffer */ + override_window_dwords = + qed_rd(p_hwfn, p_ptt, + GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) * + PROTECTION_OVERRIDE_ELEMENT_DWORDS; + if (qed_dmae_grc2host(p_hwfn, p_ptt, + GRC_REG_PROTECTION_OVERRIDE_WINDOW, + (u64)(uintptr_t)(dump_buf + offset), + override_window_dwords, 0)) + return DBG_STATUS_DMAE_FAILED; + offset += override_window_dwords; + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + override_window_dwords); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Performs FW Asserts Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char storm_letter_str[2] = "?"; + struct fw_info fw_info; + u32 offset = 0, i; + u8 storm_id; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "fw-asserts"); + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx, + last_list_idx, element_addr; + + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id]) + continue; + + /* Read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info); + + /* Dump FW Asserts section header and params */ + storm_letter_str[0] = s_storm_defs[storm_id].letter; + offset += qed_dump_section_hdr(dump_buf + offset, dump, + "fw_asserts", 2); + offset += qed_dump_str_param(dump_buf + offset, dump, "storm", + storm_letter_str); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + fw_info.fw_asserts_section. + list_element_dword_size); + + if (!dump) { + offset += fw_info.fw_asserts_section. + list_element_dword_size; + continue; + } + + /* Read and dump FW Asserts data */ + fw_asserts_section_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + RAM_LINES_TO_BYTES(fw_info.fw_asserts_section. + section_ram_line_offset); + next_list_idx_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_next_index_dword_offset); + next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr); + last_list_idx = (next_list_idx > 0 + ? next_list_idx + : fw_info.fw_asserts_section.list_num_elements) + - 1; + element_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_dword_offset) + + last_list_idx * + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_element_dword_size); + for (i = 0; + i < fw_info.fw_asserts_section.list_element_dword_size; + i++, offset++, element_addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr); + } + + /* Dump last section */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + return offset; +} + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* GRC Dump */ + status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords); + + /* Clear all GRC params */ + qed_dbg_grc_clear_params(p_hwfn); + return status; +} + +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + if (!dev_data->idle_chk.buf_size_set) { + dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn, + p_ptt, + NULL, false); + dev_data->idle_chk.buf_size_set = true; + } + + *buf_size = dev_data->idle_chk.buf_size; + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Idle Check Dump */ + *num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Perform dump */ + return qed_mcp_trace_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_reg_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_igu_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_protection_override_dump(p_hwfn, + p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_protection_override_dump(p_hwfn, + p_ptt, + dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + *buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + *num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +/******************************* Data Types **********************************/ + +struct mcp_trace_format { + u32 data; +#define MCP_TRACE_FORMAT_MODULE_MASK 0x0000ffff +#define MCP_TRACE_FORMAT_MODULE_SHIFT 0 +#define MCP_TRACE_FORMAT_LEVEL_MASK 0x00030000 +#define MCP_TRACE_FORMAT_LEVEL_SHIFT 16 +#define MCP_TRACE_FORMAT_P1_SIZE_MASK 0x000c0000 +#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT 18 +#define MCP_TRACE_FORMAT_P2_SIZE_MASK 0x00300000 +#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT 20 +#define MCP_TRACE_FORMAT_P3_SIZE_MASK 0x00c00000 +#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT 22 +#define MCP_TRACE_FORMAT_LEN_MASK 0xff000000 +#define MCP_TRACE_FORMAT_LEN_SHIFT 24 + char *format_str; +}; + +struct mcp_trace_meta { + u32 modules_num; + char **modules; + u32 formats_num; + struct mcp_trace_format *formats; +}; + +/* Reg fifo element */ +struct reg_fifo_element { + u64 data; +#define REG_FIFO_ELEMENT_ADDRESS_SHIFT 0 +#define REG_FIFO_ELEMENT_ADDRESS_MASK 0x7fffff +#define REG_FIFO_ELEMENT_ACCESS_SHIFT 23 +#define REG_FIFO_ELEMENT_ACCESS_MASK 0x1 +#define REG_FIFO_ELEMENT_PF_SHIFT 24 +#define REG_FIFO_ELEMENT_PF_MASK 0xf +#define REG_FIFO_ELEMENT_VF_SHIFT 28 +#define REG_FIFO_ELEMENT_VF_MASK 0xff +#define REG_FIFO_ELEMENT_PORT_SHIFT 36 +#define REG_FIFO_ELEMENT_PORT_MASK 0x3 +#define REG_FIFO_ELEMENT_PRIVILEGE_SHIFT 38 +#define REG_FIFO_ELEMENT_PRIVILEGE_MASK 0x3 +#define REG_FIFO_ELEMENT_PROTECTION_SHIFT 40 +#define REG_FIFO_ELEMENT_PROTECTION_MASK 0x7 +#define REG_FIFO_ELEMENT_MASTER_SHIFT 43 +#define REG_FIFO_ELEMENT_MASTER_MASK 0xf +#define REG_FIFO_ELEMENT_ERROR_SHIFT 47 +#define REG_FIFO_ELEMENT_ERROR_MASK 0x1f +}; + +/* IGU fifo element */ +struct igu_fifo_element { + u32 dword0; +#define IGU_FIFO_ELEMENT_DWORD0_FID_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD0_FID_MASK 0xff +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_SHIFT 8 +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_SHIFT 9 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_SHIFT 13 +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_SHIFT 17 +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_MASK 0x7fff + u32 dword1; + u32 dword2; +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_SHIFT 1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_MASK 0xffffffff + u32 reserved; +}; + +struct igu_fifo_wr_data { + u32 data; +#define IGU_FIFO_WR_DATA_PROD_CONS_SHIFT 0 +#define IGU_FIFO_WR_DATA_PROD_CONS_MASK 0xffffff +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_SHIFT 24 +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_MASK 0x1 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_SHIFT 25 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_MASK 0x3 +#define IGU_FIFO_WR_DATA_SEGMENT_SHIFT 27 +#define IGU_FIFO_WR_DATA_SEGMENT_MASK 0x1 +#define IGU_FIFO_WR_DATA_TIMER_MASK_SHIFT 28 +#define IGU_FIFO_WR_DATA_TIMER_MASK_MASK 0x1 +#define IGU_FIFO_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +struct igu_fifo_cleanup_wr_data { + u32 data; +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_SHIFT 0 +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_MASK 0x7ffffff +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_SHIFT 27 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_MASK 0x1 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_SHIFT 28 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_MASK 0x7 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +/* Protection override element */ +struct protection_override_element { + u64 data; +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_SHIFT 0 +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_MASK 0x7fffff +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_SHIFT 23 +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_MASK 0xffffff +#define PROTECTION_OVERRIDE_ELEMENT_READ_SHIFT 47 +#define PROTECTION_OVERRIDE_ELEMENT_READ_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_SHIFT 48 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_SHIFT 49 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_MASK 0x7 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_SHIFT 52 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_MASK 0x7 +}; + +enum igu_fifo_sources { + IGU_SRC_PXP0, + IGU_SRC_PXP1, + IGU_SRC_PXP2, + IGU_SRC_PXP3, + IGU_SRC_PXP4, + IGU_SRC_PXP5, + IGU_SRC_PXP6, + IGU_SRC_PXP7, + IGU_SRC_CAU, + IGU_SRC_ATTN, + IGU_SRC_GRC +}; + +enum igu_fifo_addr_types { + IGU_ADDR_TYPE_MSIX_MEM, + IGU_ADDR_TYPE_WRITE_PBA, + IGU_ADDR_TYPE_WRITE_INT_ACK, + IGU_ADDR_TYPE_WRITE_ATTN_BITS, + IGU_ADDR_TYPE_READ_INT, + IGU_ADDR_TYPE_WRITE_PROD_UPDATE, + IGU_ADDR_TYPE_RESERVED +}; + +struct igu_fifo_addr_data { + u16 start_addr; + u16 end_addr; + char *desc; + char *vf_desc; + enum igu_fifo_addr_types type; +}; + +/******************************** Constants **********************************/ + +#define MAX_MSG_LEN 1024 +#define MCP_TRACE_MAX_MODULE_LEN 8 +#define MCP_TRACE_FORMAT_MAX_PARAMS 3 +#define MCP_TRACE_FORMAT_PARAM_WIDTH \ + (MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT) +#define REG_FIFO_ELEMENT_ADDR_FACTOR 4 +#define REG_FIFO_ELEMENT_IS_PF_VF_VAL 127 +#define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR 4 + +/********************************* Macros ************************************/ + +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) + +/***************************** Constant Arrays *******************************/ + +/* Status string array */ +static const char * const s_status_str[] = { + "Operation completed successfully", + "Debug application version wasn't set", + "Unsupported debug application version", + "The debug block wasn't reset since the last recording", + "Invalid arguments", + "The debug output was already set", + "Invalid PCI buffer size", + "PCI buffer allocation failed", + "A PCI buffer wasn't allocated", + "Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true", + "GRC/Timestamp input overlap in cycle dword 0", + "Cannot record Storm data since the entire recording cycle is used by HW", + "The Storm was already enabled", + "The specified Storm wasn't enabled", + "The block was already enabled", + "The specified block wasn't enabled", + "No input was enabled for recording", + "Filters and triggers are not allowed when recording in 64b units", + "The filter was already enabled", + "The trigger was already enabled", + "The trigger wasn't enabled", + "A constraint can be added only after a filter was enabled or a trigger state was added", + "Cannot add more than 3 trigger states", + "Cannot add more than 4 constraints per filter or trigger state", + "The recording wasn't started", + "A trigger was configured, but it didn't trigger", + "No data was recorded", + "Dump buffer is too small", + "Dumped data is not aligned to chunks", + "Unknown chip", + "Failed allocating virtual memory", + "The input block is in reset", + "Invalid MCP trace signature found in NVRAM", + "Invalid bundle ID found in NVRAM", + "Failed getting NVRAM image", + "NVRAM image is not dword-aligned", + "Failed reading from NVRAM", + "Idle check parsing failed", + "MCP Trace data is corrupt", + "Dump doesn't contain meta data - it must be provided in an image file", + "Failed to halt MCP", + "Failed to resume MCP after halt", + "DMAE transaction failed", + "Failed to empty SEMI sync FIFO", + "IGU FIFO data is corrupt", + "MCP failed to mask parities", + "FW Asserts parsing failed", + "GRC FIFO data is corrupt", + "Protection Override data is corrupt", + "Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)", + "When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)" +}; + +/* Idle check severity names array */ +static const char * const s_idle_chk_severity_str[] = { + "Error", + "Error if no traffic", + "Warning" +}; + +/* MCP Trace level names array */ +static const char * const s_mcp_trace_level_str[] = { + "ERROR", + "TRACE", + "DEBUG" +}; + +/* Parsing strings */ +static const char * const s_access_strs[] = { + "read", + "write" +}; + +static const char * const s_privilege_strs[] = { + "VF", + "PDA", + "HV", + "UA" +}; + +static const char * const s_protection_strs[] = { + "(default)", + "(default)", + "(default)", + "(default)", + "override VF", + "override PDA", + "override HV", + "override UA" +}; + +static const char * const s_master_strs[] = { + "???", + "pxp", + "mcp", + "msdm", + "psdm", + "ysdm", + "usdm", + "tsdm", + "xsdm", + "dbu", + "dmae", + "???", + "???", + "???", + "???", + "???" +}; + +static const char * const s_reg_fifo_error_strs[] = { + "grc timeout", + "address doesn't belong to any block", + "reserved address in block or write to read-only address", + "privilege/protection mismatch", + "path isolation error" +}; + +static const char * const s_igu_fifo_source_strs[] = { + "TSTORM", + "MSTORM", + "USTORM", + "XSTORM", + "YSTORM", + "PSTORM", + "PCIE", + "NIG_QM_PBF", + "CAU", + "ATTN", + "GRC", +}; + +static const char * const s_igu_fifo_error_strs[] = { + "no error", + "length error", + "function disabled", + "VF sent command to attnetion address", + "host sent prod update command", + "read of during interrupt register while in MIMD mode", + "access to PXP BAR reserved address", + "producer update command to attention index", + "unknown error", + "SB index not valid", + "SB relative index and FID not found", + "FID not match", + "command with error flag asserted (PCI error or CAU discard)", + "VF sent cleanup and RF cleanup is disabled", + "cleanup command on type bigger than 4" +}; + +/* IGU FIFO address data */ +static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = { + {0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM}, + {0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA}, + {0x201, 0x201, "Write PBA[64:127]", "reserved", + IGU_ADDR_TYPE_WRITE_PBA}, + {0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA}, + {0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x400, 0x5ef, "Write interrupt acknowledgment", NULL, + IGU_ADDR_TYPE_WRITE_INT_ACK}, + {0x5f0, 0x5f0, "Attention bits update", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f1, 0x5f1, "Attention bits set", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f2, 0x5f2, "Attention bits clear", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f3, 0x5f3, "Read interrupt 0:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f4, 0x5f4, "Read interrupt 0:31 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f5, 0x5f5, "Read interrupt 32:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE} +}; + +/******************************** Variables **********************************/ + +/* MCP Trace meta data - used in case the dump doesn't contain the meta data + * (e.g. due to no NVRAM access). + */ +static struct dbg_array s_mcp_trace_meta = { NULL, 0 }; + +/* Temporary buffer, used for print size calculations */ +static char s_temp_buf[MAX_MSG_LEN]; + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +static u32 qed_cyclic_add(u32 a, u32 b, u32 size) +{ + return (a + b) % size; +} + +static u32 qed_cyclic_sub(u32 a, u32 b, u32 size) +{ + return (size + a - b) % size; +} + +/* Reads the specified number of bytes from the specified cyclic buffer (up to 4 + * bytes) and returns them as a dword value. the specified buffer offset is + * updated. + */ +static u32 qed_read_from_cyclic_buf(void *buf, + u32 *offset, + u32 buf_size, u8 num_bytes_to_read) +{ + u8 *bytes_buf = (u8 *)buf; + u8 *val_ptr; + u32 val = 0; + u8 i; + + val_ptr = (u8 *)&val; + + for (i = 0; i < num_bytes_to_read; i++) { + val_ptr[i] = bytes_buf[*offset]; + *offset = qed_cyclic_add(*offset, 1, buf_size); + } + + return val; +} + +/* Reads and returns the next byte from the specified buffer. + * The specified buffer offset is updated. + */ +static u8 qed_read_byte_from_buf(void *buf, u32 *offset) +{ + return ((u8 *)buf)[(*offset)++]; +} + +/* Reads and returns the next dword from the specified buffer. + * The specified buffer offset is updated. + */ +static u32 qed_read_dword_from_buf(void *buf, u32 *offset) +{ + u32 dword_val = *(u32 *)&((u8 *)buf)[*offset]; + + *offset += 4; + return dword_val; +} + +/* Reads the next string from the specified buffer, and copies it to the + * specified pointer. The specified buffer offset is updated. + */ +static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest) +{ + const char *source_str = &((const char *)buf)[*offset]; + + strncpy(dest, source_str, size); + dest[size - 1] = '\0'; + *offset += size; +} + +/* Returns a pointer to the specified offset (in bytes) of the specified buffer. + * If the specified buffer in NULL, a temporary buffer pointer is returned. + */ +static char *qed_get_buf_ptr(void *buf, u32 offset) +{ + return buf ? (char *)buf + offset : s_temp_buf; +} + +/* Reads a param from the specified buffer. Returns the number of dwords read. + * If the returned str_param is NULL, the param is numeric and its value is + * returned in num_param. + * Otheriwise, the param is a string and its pointer is returned in str_param. + */ +static u32 qed_read_param(u32 *dump_buf, + const char **param_name, + const char **param_str_val, u32 *param_num_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; /* In bytes */ + + /* Extract param name */ + *param_name = char_buf; + offset += strlen(*param_name) + 1; + + /* Check param type */ + if (*(char_buf + offset++)) { + /* String param */ + *param_str_val = char_buf + offset; + offset += strlen(*param_str_val) + 1; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + } else { + /* Numeric param */ + *param_str_val = NULL; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + *param_num_val = *(u32 *)(char_buf + offset); + offset += 4; + } + + return offset / 4; +} + +/* Reads a section header from the specified buffer. + * Returns the number of dwords read. + */ +static u32 qed_read_section_hdr(u32 *dump_buf, + const char **section_name, + u32 *num_section_params) +{ + const char *param_str_val; + + return qed_read_param(dump_buf, + section_name, ¶m_str_val, num_section_params); +} + +/* Reads section params from the specified buffer and prints them to the results + * buffer. Returns the number of dwords read. + */ +static u32 qed_print_section_params(u32 *dump_buf, + u32 num_section_params, + char *results_buf, u32 *num_chars_printed) +{ + u32 i, dump_offset = 0, results_offset = 0; + + for (i = 0; i < num_section_params; i++) { + const char *param_name; + const char *param_str_val; + u32 param_num_val = 0; + + dump_offset += qed_read_param(dump_buf + dump_offset, + ¶m_name, + ¶m_str_val, ¶m_num_val); + if (param_str_val) + /* String param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %s\n", param_name, param_str_val); + else if (strcmp(param_name, "fw-timestamp")) + /* Numeric param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %d\n", param_name, param_num_val); + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + *num_chars_printed = results_offset; + return dump_offset; +} + +const char *qed_dbg_get_status_str(enum dbg_status status) +{ + return (status < + MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status"; +} + +/* Parses the idle check rules and returns the number of characters printed. + * In case of parsing error, returns 0. + */ +static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 *dump_buf_end, + u32 num_rules, + bool print_fw_idle_chk, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */ + u16 i, j; + + *num_errors = 0; + *num_warnings = 0; + + /* Go over dumped results */ + for (rule_idx = 0; rule_idx < num_rules && dump_buf < dump_buf_end; + rule_idx++) { + const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data; + struct dbg_idle_chk_result_hdr *hdr; + const char *parsing_str; + u32 parsing_str_offset; + const char *lsi_msg; + u8 curr_reg_id = 0; + bool has_fw_msg; + + hdr = (struct dbg_idle_chk_result_hdr *)dump_buf; + rule_parsing_data = + (const struct dbg_idle_chk_rule_parsing_data *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA]. + ptr[hdr->rule_id]; + parsing_str_offset = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET); + has_fw_msg = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0; + parsing_str = &((const char *) + s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr) + [parsing_str_offset]; + lsi_msg = parsing_str; + + if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES) + return 0; + + /* Skip rule header */ + dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4); + + /* Update errors/warnings count */ + if (hdr->severity == IDLE_CHK_SEVERITY_ERROR || + hdr->severity == IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC) + (*num_errors)++; + else + (*num_warnings)++; + + /* Print rule severity */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s: ", + s_idle_chk_severity_str[hdr->severity]); + + /* Print rule message */ + if (has_fw_msg) + parsing_str += strlen(parsing_str) + 1; + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s.", + has_fw_msg && + print_fw_idle_chk ? parsing_str : lsi_msg); + parsing_str += strlen(parsing_str) + 1; + + /* Print register values */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " Registers:"); + for (i = 0; + i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs; + i++) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr + = (struct dbg_idle_chk_result_reg_hdr *) + dump_buf; + bool is_mem = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM); + u8 reg_id = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID); + + /* Skip reg header */ + dump_buf += + (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4); + + /* Skip register names until the required reg_id is + * reached. + */ + for (; reg_id > curr_reg_id; + curr_reg_id++, + parsing_str += strlen(parsing_str) + 1); + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " %s", + parsing_str); + if (i < hdr->num_dumped_cond_regs && is_mem) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "[%d]", hdr->mem_entry_id + + reg_hdr->start_entry); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "="); + for (j = 0; j < reg_hdr->size; j++, dump_buf++) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "0x%x", *dump_buf); + if (j < reg_hdr->size - 1) + results_offset += + sprintf(qed_get_buf_ptr + (results_buf, + results_offset), ","); + } + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + /* Check if end of dump buffer was exceeded */ + if (dump_buf > dump_buf_end) + return 0; + return results_offset; +} + +/* Parses an idle check dump buffer. + * If result_buf is not NULL, the idle check results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes, + u32 *num_errors, + u32 *num_warnings) +{ + const char *section_name, *param_name, *param_str_val; + u32 *dump_buf_end = dump_buf + num_dumped_dwords; + u32 num_section_params = 0, num_rules; + u32 results_offset = 0; /* Offset in results_buf in bytes */ + + *parsed_results_bytes = 0; + *num_errors = 0; + *num_warnings = 0; + if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read idle_chk section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "idle_chk") || num_section_params != 1) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, &num_rules); + if (strcmp(param_name, "num_rules") != 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + if (num_rules) { + u32 rules_print_size; + + /* Print FW output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "FW_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + true, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print LSI output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nLSI_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + false, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + } + + /* Print errors/warnings count */ + if (*num_errors) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check failed!!! (with %d errors and %d warnings)\n", + *num_errors, *num_warnings); + } else if (*num_warnings) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly (with %d warnings)\n", + *num_warnings); + } else { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly\n"); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + u32 num_errors, num_warnings; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, + results_buf_size, + &num_errors, &num_warnings); +} + +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 parsed_buf_size; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size, + num_errors, num_warnings); +} + +/* Frees the specified MCP Trace meta data */ +static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn, + struct mcp_trace_meta *meta) +{ + u32 i; + + /* Release modules */ + if (meta->modules) { + for (i = 0; i < meta->modules_num; i++) + kfree(meta->modules[i]); + kfree(meta->modules); + } + + /* Release formats */ + if (meta->formats) { + for (i = 0; i < meta->formats_num; i++) + kfree(meta->formats[i].format_str); + kfree(meta->formats); + } +} + +/* Allocates and fills MCP Trace meta data based on the specified meta data + * dump buffer. + * Returns debug status code. + */ +static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn, + const u32 *meta_buf, + struct mcp_trace_meta *meta) +{ + u8 *meta_buf_bytes = (u8 *)meta_buf; + u32 offset = 0, signature, i; + + memset(meta, 0, sizeof(*meta)); + + /* Read first signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of modules and allocate memory for all the modules + * pointers. + */ + meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset); + meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL); + if (!meta->modules) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all module strings */ + for (i = 0; i < meta->modules_num; i++) { + u8 module_len = qed_read_byte_from_buf(meta_buf_bytes, &offset); + + *(meta->modules + i) = kzalloc(module_len, GFP_KERNEL); + if (!(*(meta->modules + i))) { + /* Update number of modules to be released */ + meta->modules_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, &offset, module_len, + *(meta->modules + i)); + if (module_len > MCP_TRACE_MAX_MODULE_LEN) + (*(meta->modules + i))[MCP_TRACE_MAX_MODULE_LEN] = '\0'; + } + + /* Read second signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of formats and allocate memory for all formats */ + meta->formats_num = qed_read_dword_from_buf(meta_buf_bytes, &offset); + meta->formats = kzalloc(meta->formats_num * + sizeof(struct mcp_trace_format), + GFP_KERNEL); + if (!meta->formats) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all strings */ + for (i = 0; i < meta->formats_num; i++) { + struct mcp_trace_format *format_ptr = &meta->formats[i]; + u8 format_len; + + format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes, + &offset); + format_len = + (format_ptr->data & + MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT; + format_ptr->format_str = kzalloc(format_len, GFP_KERNEL); + if (!format_ptr->format_str) { + /* Update number of modules to be released */ + meta->formats_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, + &offset, + format_len, format_ptr->format_str); + } + + return DBG_STATUS_OK; +} + +/* Parses an MCP Trace dump buffer. + * If result_buf is not NULL, the MCP Trace results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_mask, param_shift, param_num_val; + u32 num_section_params, offset, end_offset, bytes_left; + const char *section_name, *param_name, *param_str_val; + u32 trace_data_dwords, trace_meta_dwords; + struct mcp_trace_meta meta; + struct mcp_trace *trace; + enum dbg_status status; + const u32 *meta_buf; + u8 *trace_buf; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read trace_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_data") || num_section_params != 1) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_data_dwords = param_num_val; + + /* Prepare trace info */ + trace = (struct mcp_trace *)dump_buf; + trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace); + offset = trace->trace_oldest; + end_offset = trace->trace_prod; + bytes_left = qed_cyclic_sub(end_offset, offset, trace->size); + dump_buf += trace_data_dwords; + + /* Read meta_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_meta")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size") != 0) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_meta_dwords = param_num_val; + + /* Choose meta data buffer */ + if (!trace_meta_dwords) { + /* Dump doesn't include meta data */ + if (!s_mcp_trace_meta.ptr) + return DBG_STATUS_MCP_TRACE_NO_META; + meta_buf = s_mcp_trace_meta.ptr; + } else { + /* Dump includes meta data */ + meta_buf = dump_buf; + } + + /* Allocate meta data memory */ + status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta); + if (status != DBG_STATUS_OK) + goto free_mem; + + /* Ignore the level and modules masks - just print everything that is + * already in the buffer. + */ + while (bytes_left) { + struct mcp_trace_format *format_ptr; + u8 format_level, format_module; + u32 params[3] = { 0, 0, 0 }; + u32 header, format_idx, i; + + if (bytes_left < MFW_TRACE_ENTRY_SIZE) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + header = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + MFW_TRACE_ENTRY_SIZE); + bytes_left -= MFW_TRACE_ENTRY_SIZE; + format_idx = header & MFW_TRACE_EVENTID_MASK; + + /* Skip message if its index doesn't exist in the meta data */ + if (format_idx > meta.formats_num) { + u8 format_size = + (u8)((header & + MFW_TRACE_PRM_SIZE_MASK) >> + MFW_TRACE_PRM_SIZE_SHIFT); + + if (bytes_left < format_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + offset = qed_cyclic_add(offset, + format_size, trace->size); + bytes_left -= format_size; + continue; + } + + format_ptr = &meta.formats[format_idx]; + for (i = 0, + param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift = + MCP_TRACE_FORMAT_P1_SIZE_SHIFT; + i < MCP_TRACE_FORMAT_MAX_PARAMS; + i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH, + param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) { + /* Extract param size (0..3) */ + u8 param_size = + (u8)((format_ptr->data & + param_mask) >> param_shift); + + /* If the param size is zero, there are no other + * parameters. + */ + if (!param_size) + break; + + /* Size is encoded using 2 bits, where 3 is used to + * encode 4. + */ + if (param_size == 3) + param_size = 4; + if (bytes_left < param_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + params[i] = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + param_size); + bytes_left -= param_size; + } + + format_level = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_LEVEL_MASK) >> + MCP_TRACE_FORMAT_LEVEL_SHIFT); + format_module = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_MODULE_MASK) >> + MCP_TRACE_FORMAT_MODULE_SHIFT); + if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + /* Print current message to results buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s %-8s: ", + s_mcp_trace_level_str[format_level], + meta.modules[format_module]); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + format_ptr->format_str, params[0], params[1], + params[2]); + } + +free_mem: + *parsed_results_bytes = results_offset + 1; + qed_mcp_trace_free_meta(p_hwfn, &meta); + return status; +} + +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses a Reg FIFO dump buffer. + * If result_buf is not NULL, the Reg FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct reg_fifo_element *elements; + u8 i, j, err_val, vf_val; + char vf_str[4]; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read reg_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "reg_fifo_data")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + if (param_num_val % REG_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_REG_FIFO_BAD_DATA; + num_elements = param_num_val / REG_FIFO_ELEMENT_DWORDS; + elements = (struct reg_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + bool err_printed = false; + + /* Discover if element belongs to a VF or a PF */ + vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF); + if (vf_val == REG_FIFO_ELEMENT_IS_PF_VF_VAL) + sprintf(vf_str, "%s", "N/A"); + else + sprintf(vf_str, "%d", vf_val); + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ", + elements[i].data, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ADDRESS) * + REG_FIFO_ELEMENT_ADDR_FACTOR, + s_access_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ACCESS)], + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PF), vf_str, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PORT), + s_privilege_strs[GET_FIELD(elements[i]. + data, + REG_FIFO_ELEMENT_PRIVILEGE)], + s_protection_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PROTECTION)], + s_master_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_MASTER)]); + + /* Print errors */ + for (j = 0, + err_val = GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ERROR); + j < ARRAY_SIZE(s_reg_fifo_error_strs); + j++, err_val >>= 1) { + if (!(err_val & 0x1)) + continue; + if (err_printed) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + ", "); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s", + s_reg_fifo_error_strs[j]); + err_printed = true; + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses an IGU FIFO dump buffer. + * If result_buf is not NULL, the IGU FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct igu_fifo_element *elements; + char parsed_addr_data[32]; + char parsed_wr_data[256]; + u8 i, j; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read igu_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "igu_fifo_data")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (param_num_val % IGU_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + num_elements = param_num_val / IGU_FIFO_ELEMENT_DWORDS; + elements = (struct igu_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + /* dword12 (dword index 1 and 2) contains bits 32..95 of the + * FIFO element. + */ + u64 dword12 = + ((u64)elements[i].dword2 << 32) | elements[i].dword1; + bool is_wr_cmd = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD); + bool is_pf = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_IS_PF); + u16 cmd_addr = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR); + u8 source = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_SOURCE); + u8 err_type = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE); + const struct igu_fifo_addr_data *addr_data = NULL; + + if (source >= ARRAY_SIZE(s_igu_fifo_source_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Find address data */ + for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data; + j++) + if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr && + cmd_addr <= s_igu_fifo_addr_data[j].end_addr) + addr_data = &s_igu_fifo_addr_data[j]; + if (!addr_data) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Prepare parsed address data */ + switch (addr_data->type) { + case IGU_ADDR_TYPE_MSIX_MEM: + sprintf(parsed_addr_data, + " vector_num=0x%x", cmd_addr / 2); + break; + case IGU_ADDR_TYPE_WRITE_INT_ACK: + case IGU_ADDR_TYPE_WRITE_PROD_UPDATE: + sprintf(parsed_addr_data, + " SB=0x%x", cmd_addr - addr_data->start_addr); + break; + default: + parsed_addr_data[0] = '\0'; + } + + /* Prepare parsed write data */ + if (is_wr_cmd) { + u32 wr_data = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_WR_DATA); + u32 prod_cons = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_PROD_CONS); + u8 is_cleanup = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_CMD_TYPE); + + if (source == IGU_SRC_ATTN) { + sprintf(parsed_wr_data, + "prod: 0x%x, ", prod_cons); + } else { + if (is_cleanup) { + u8 cleanup_val = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL); + u8 cleanup_type = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE); + + sprintf(parsed_wr_data, + "cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ", + cleanup_val ? "set" : "clear", + cleanup_type); + } else { + u8 update_flag = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_UPDATE_FLAG); + u8 en_dis_int_for_sb = + GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB); + u8 segment = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_SEGMENT); + u8 timer_mask = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_TIMER_MASK); + + sprintf(parsed_wr_data, + "cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ", + prod_cons, + update_flag ? "update" : "nop", + en_dis_int_for_sb + ? (en_dis_int_for_sb == + 1 ? "disable" : "nop") : + "enable", + segment ? "attn" : "regular", + timer_mask); + } + } + } else { + parsed_wr_data[0] = '\0'; + } + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n", + elements[i].dword2, elements[i].dword1, + elements[i].dword0, + is_pf ? "pf" : "vf", + GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_FID), + s_igu_fifo_source_strs[source], + is_wr_cmd ? "wr" : "rd", cmd_addr, + (!is_pf && addr_data->vf_desc) + ? addr_data->vf_desc : addr_data->desc, + parsed_addr_data, parsed_wr_data, + s_igu_fifo_error_strs[err_type]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +static enum dbg_status +qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct protection_override_element *elements; + u8 i; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read protection_override_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "protection_override_data")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS; + elements = (struct protection_override_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + u32 address = GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_ADDRESS) * + PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR; + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n", + i, address, + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE), + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)], + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION)]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "protection override contained %d elements", + num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size); +} + +/* Parses a FW Asserts dump buffer. + * If result_buf is not NULL, the FW Asserts results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, num_section_params, param_num_val, i; + const char *param_name, *param_str_val, *section_name; + bool last_section_found = false; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + while (!last_section_found) { + const char *storm_letter = NULL; + u32 storm_dump_size = 0; + + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, + &num_section_params); + if (!strcmp(section_name, "last")) { + last_section_found = true; + continue; + } else if (strcmp(section_name, "fw_asserts")) { + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + /* Extract params */ + for (i = 0; i < num_section_params; i++) { + dump_buf += qed_read_param(dump_buf, + ¶m_name, + ¶m_str_val, + ¶m_num_val); + if (!strcmp(param_name, "storm")) + storm_letter = param_str_val; + else if (!strcmp(param_name, "size")) + storm_dump_size = param_num_val; + else + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + if (!storm_letter || !storm_dump_size) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print data */ + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\n%sSTORM_ASSERT: size=%d\n", + storm_letter, storm_dump_size); + for (i = 0; i < storm_dump_size; i++, dump_buf++) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%08x\n", *dump_buf); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Wrapper for unifying the idle_chk and mcp_trace api */ +enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 num_errors, num_warnnings; + + return qed_print_idle_chk_results(p_hwfn, dump_buf, num_dumped_dwords, + results_buf, &num_errors, + &num_warnnings); +} + +/* Feature meta data lookup table */ +static struct { + char *name; + enum dbg_status (*get_size)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *size); + enum dbg_status (*perform_dump)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + u32 buf_size, u32 *dumped_dwords); + enum dbg_status (*print_results)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, u32 num_dumped_dwords, + char *results_buf); + enum dbg_status (*results_buf_size)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +} qed_features_lookup[] = { + { + "grc", qed_dbg_grc_get_dump_buf_size, + qed_dbg_grc_dump, NULL, NULL}, { + "idle_chk", + qed_dbg_idle_chk_get_dump_buf_size, + qed_dbg_idle_chk_dump, + qed_print_idle_chk_results_wrapper, + qed_get_idle_chk_results_buf_size}, { + "mcp_trace", + qed_dbg_mcp_trace_get_dump_buf_size, + qed_dbg_mcp_trace_dump, qed_print_mcp_trace_results, + qed_get_mcp_trace_results_buf_size}, { + "reg_fifo", + qed_dbg_reg_fifo_get_dump_buf_size, + qed_dbg_reg_fifo_dump, qed_print_reg_fifo_results, + qed_get_reg_fifo_results_buf_size}, { + "igu_fifo", + qed_dbg_igu_fifo_get_dump_buf_size, + qed_dbg_igu_fifo_dump, qed_print_igu_fifo_results, + qed_get_igu_fifo_results_buf_size}, { + "protection_override", + qed_dbg_protection_override_get_dump_buf_size, + qed_dbg_protection_override_dump, + qed_print_protection_override_results, + qed_get_protection_override_results_buf_size}, { + "fw_asserts", + qed_dbg_fw_asserts_get_dump_buf_size, + qed_dbg_fw_asserts_dump, + qed_print_fw_asserts_results, + qed_get_fw_asserts_results_buf_size},}; + +static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size) +{ + u32 i, precision = 80; + + if (!p_text_buf) + return; + + pr_notice("\n%.*s", precision, p_text_buf); + for (i = precision; i < text_size; i += precision) + pr_cont("%.*s", precision, p_text_buf + i); + pr_cont("\n"); +} + +#define QED_RESULTS_BUF_MIN_SIZE 16 +/* Generic function for decoding debug feature info */ +enum dbg_status format_feature(struct qed_hwfn *p_hwfn, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 text_size_bytes, null_char_pos, i; + enum dbg_status rc; + char *text_buf; + + /* Check if feature supports formatting capability */ + if (!qed_features_lookup[feature_idx].results_buf_size) + return DBG_STATUS_OK; + + /* Obtain size of formatted output */ + rc = qed_features_lookup[feature_idx]. + results_buf_size(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, &text_size_bytes); + if (rc != DBG_STATUS_OK) + return rc; + + /* Make sure that the allocated size is a multiple of dword (4 bytes) */ + null_char_pos = text_size_bytes - 1; + text_size_bytes = (text_size_bytes + 3) & ~0x3; + + if (text_size_bytes < QED_RESULTS_BUF_MIN_SIZE) { + DP_NOTICE(p_hwfn->cdev, + "formatted size of feature was too small %d. Aborting\n", + text_size_bytes); + return DBG_STATUS_INVALID_ARGS; + } + + /* Allocate temp text buf */ + text_buf = vzalloc(text_size_bytes); + if (!text_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Decode feature opcodes to string on temp buf */ + rc = qed_features_lookup[feature_idx]. + print_results(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, text_buf); + if (rc != DBG_STATUS_OK) { + vfree(text_buf); + return rc; + } + + /* Replace the original null character with a '\n' character. + * The bytes that were added as a result of the dword alignment are also + * padded with '\n' characters. + */ + for (i = null_char_pos; i < text_size_bytes; i++) + text_buf[i] = '\n'; + + /* Dump printable feature to log */ + if (p_hwfn->cdev->dbg_params.print_data) + qed_dbg_print_feature(text_buf, text_size_bytes); + + /* Free the old dump_buf and point the dump_buf to the newly allocagted + * and formatted text buffer. + */ + vfree(feature->dump_buf); + feature->dump_buf = text_buf; + feature->buf_size = text_size_bytes; + feature->dumped_dwords = text_size_bytes / 4; + return rc; +} + +/* Generic function for performing the dump of a debug feature. */ +enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 buf_size_dwords; + enum dbg_status rc; + + DP_NOTICE(p_hwfn->cdev, "Collecting a debug feature [\"%s\"]\n", + qed_features_lookup[feature_idx].name); + + /* Dump_buf was already allocated need to free (this can happen if dump + * was called but file was never read). + * We can't use the buffer as is since size may have changed. + */ + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + + /* Get buffer size from hsi, allocate accordingly, and perform the + * dump. + */ + rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + return rc; + feature->buf_size = buf_size_dwords * sizeof(u32); + feature->dump_buf = vmalloc(feature->buf_size); + if (!feature->dump_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + rc = qed_features_lookup[feature_idx]. + perform_dump(p_hwfn, p_ptt, (u32 *)feature->dump_buf, + feature->buf_size / sizeof(u32), + &feature->dumped_dwords); + + /* If mcp is stuck we get DBG_STATUS_NVRAM_GET_IMAGE_FAILED error. + * In this case the buffer holds valid binary data, but we wont able + * to parse it (since parsing relies on data in NVRAM which is only + * accessible when MFW is responsive). skip the formatting but return + * success so that binary data is provided. + */ + if (rc == DBG_STATUS_NVRAM_GET_IMAGE_FAILED) + return DBG_STATUS_OK; + + if (rc != DBG_STATUS_OK) + return rc; + + /* Format output */ + rc = format_feature(p_hwfn, feature_idx); + return rc; +} + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_GRC, num_dumped_bytes); +} + +int qed_dbg_grc_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_GRC); +} + +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IDLE_CHK, + num_dumped_bytes); +} + +int qed_dbg_idle_chk_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IDLE_CHK); +} + +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_REG_FIFO, + num_dumped_bytes); +} + +int qed_dbg_reg_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_REG_FIFO); +} + +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IGU_FIFO, + num_dumped_bytes); +} + +int qed_dbg_igu_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO); +} + +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_PROTECTION_OVERRIDE, + num_dumped_bytes); +} + +int qed_dbg_protection_override_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_PROTECTION_OVERRIDE); +} + +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_FW_ASSERTS, + num_dumped_bytes); +} + +int qed_dbg_fw_asserts_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS); +} + +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_MCP_TRACE, + num_dumped_bytes); +} + +int qed_dbg_mcp_trace_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_MCP_TRACE); +} + +/* Defines the amount of bytes allocated for recording the length of debugfs + * feature buffer. + */ +#define REGDUMP_HEADER_SIZE sizeof(u32) +#define REGDUMP_HEADER_FEATURE_SHIFT 24 +#define REGDUMP_HEADER_ENGINE_SHIFT 31 +#define REGDUMP_HEADER_OMIT_ENGINE_SHIFT 30 +enum debug_print_features { + OLD_MODE = 0, + IDLE_CHK = 1, + GRC_DUMP = 2, + MCP_TRACE = 3, + REG_FIFO = 4, + PROTECTION_OVERRIDE = 5, + IGU_FIFO = 6, + PHY = 7, + FW_ASSERTS = 8, +}; + +static u32 qed_calc_regdump_header(enum debug_print_features feature, + int engine, u32 feature_size, u8 omit_engine) +{ + /* Insert the engine, feature and mode inside the header and combine it + * with feature size. + */ + return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) | + (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) | + (engine << REGDUMP_HEADER_ENGINE_SHIFT); +} + +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) +{ + u8 cur_engine, omit_engine = 0, org_engine; + u32 offset = 0, feature_size; + int rc; + + if (cdev->num_hwfns == 1) + omit_engine = 1; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Collect idle_chks and grcDump for each hw function */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "obtaining idle_chk and grcdump for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + + /* First idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* Second idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* reg_fifo dump */ + rc = qed_dbg_reg_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(REG_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_reg_fifo failed. rc = %d\n", rc); + } + + /* igu_fifo dump */ + rc = qed_dbg_igu_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IGU_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_igu_fifo failed. rc = %d", rc); + } + + /* protection_override dump */ + rc = qed_dbg_protection_override(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, + &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(PROTECTION_OVERRIDE, + cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, + "qed_dbg_protection_override failed. rc = %d\n", + rc); + } + + /* fw_asserts dump */ + rc = qed_dbg_fw_asserts(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(FW_ASSERTS, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n", + rc); + } + + /* GRC dump - must be last because when mcp stuck it will + * clutter idle_chk, reg_fifo, ... + */ + rc = qed_dbg_grc(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(GRC_DUMP, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_grc failed. rc = %d", rc); + } + } + + /* mcp_trace */ + rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(MCP_TRACE, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); + } + + qed_set_debug_engine(cdev, org_engine); + + return 0; +} + +int qed_dbg_all_data_size(struct qed_dev *cdev) +{ + u8 cur_engine, org_engine; + u32 regs_len = 0; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Engine specific */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "calculating idle_chk and grcdump register length for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_grc_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_reg_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_igu_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + + qed_dbg_protection_override_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev); + } + + /* Engine common */ + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev); + qed_set_debug_engine(cdev, org_engine); + + return regs_len; +} + +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + enum dbg_status dbg_rc; + struct qed_ptt *p_ptt; + int rc = 0; + + /* Acquire ptt */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EINVAL; + + /* Get dump */ + dbg_rc = qed_dbg_dump(p_hwfn, p_ptt, feature); + if (dbg_rc != DBG_STATUS_OK) { + DP_VERBOSE(cdev, QED_MSG_DEBUG, "%s\n", + qed_dbg_get_status_str(dbg_rc)); + *num_dumped_bytes = 0; + rc = -EINVAL; + goto out; + } + + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "copying debugfs feature to external buffer\n"); + memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size); + *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords * + 4; + +out: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} + +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + u32 buf_size_dwords; + enum dbg_status rc; + + if (!p_ptt) + return -EINVAL; + + rc = qed_features_lookup[feature].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + buf_size_dwords = 0; + + qed_ptt_release(p_hwfn, p_ptt); + qed_feature->buf_size = buf_size_dwords * sizeof(u32); + return qed_feature->buf_size; +} + +u8 qed_get_debug_engine(struct qed_dev *cdev) +{ + return cdev->dbg_params.engine_for_debug; +} + +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number) +{ + DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n", + engine_number); + cdev->dbg_params.engine_for_debug = engine_number; +} + +void qed_dbg_pf_init(struct qed_dev *cdev) +{ + const u8 *dbg_values; + + /* Debug values are after init values. + * The offset is the first dword of the file. + */ + dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data; + qed_dbg_set_bin_ptr((u8 *)dbg_values); + qed_dbg_user_set_bin_ptr((u8 *)dbg_values); +} + +void qed_dbg_pf_exit(struct qed_dev *cdev) +{ + struct qed_dbg_feature *feature = NULL; + enum qed_dbg_features feature_idx; + + /* Debug features' buffers may be allocated if debug feature was used + * but dump wasn't called. + */ + for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) { + feature = &cdev->dbg_params.features[feature_idx]; + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + } +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h new file mode 100644 index 0000000..f872d73 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h @@ -0,0 +1,54 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_DEBUGFS_H +#define _QED_DEBUGFS_H + +enum qed_dbg_features { + DBG_FEATURE_GRC, + DBG_FEATURE_IDLE_CHK, + DBG_FEATURE_MCP_TRACE, + DBG_FEATURE_REG_FIFO, + DBG_FEATURE_IGU_FIFO, + DBG_FEATURE_PROTECTION_OVERRIDE, + DBG_FEATURE_FW_ASSERTS, + DBG_FEATURE_NUM +}; + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes); +int qed_dbg_grc_size(struct qed_dev *cdev); +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_idle_chk_size(struct qed_dev *cdev); +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_reg_fifo_size(struct qed_dev *cdev); +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_igu_fifo_size(struct qed_dev *cdev); +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_protection_override_size(struct qed_dev *cdev); +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_fw_asserts_size(struct qed_dev *cdev); +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_mcp_trace_size(struct qed_dev *cdev); +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer); +int qed_dbg_all_data_size(struct qed_dev *cdev); +u8 qed_get_debug_engine(struct qed_dev *cdev); +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number); +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes); +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature); + +void qed_dbg_pf_init(struct qed_dev *cdev); +void qed_dbg_pf_exit(struct qed_dev *cdev); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 855691f..2777d5b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -1728,13 +1728,6 @@ enum bin_dbg_buffer_type { MAX_BIN_DBG_BUFFER_TYPE }; -/* Chip IDs */ -enum chip_ids { - CHIP_RESERVED, - CHIP_BB_B0, - CHIP_RESERVED2, - MAX_CHIP_IDS -}; /* Attention bit mapping */ struct dbg_attn_bit_mapping { @@ -1813,6 +1806,371 @@ enum dbg_attn_type { MAX_DBG_ATTN_TYPE }; +/* condition header for registers dump */ +struct dbg_dump_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + u8 block_id; /* block ID */ + u8 data_size; /* size in dwords of the data following this header */ +}; + +/* memory data for registers dump */ +struct dbg_dump_mem { + __le32 dword0; +#define DBG_DUMP_MEM_ADDRESS_MASK 0xFFFFFF +#define DBG_DUMP_MEM_ADDRESS_SHIFT 0 +#define DBG_DUMP_MEM_MEM_GROUP_ID_MASK 0xFF +#define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT 24 + __le32 dword1; +#define DBG_DUMP_MEM_LENGTH_MASK 0xFFFFFF +#define DBG_DUMP_MEM_LENGTH_SHIFT 0 +#define DBG_DUMP_MEM_RESERVED_MASK 0xFF +#define DBG_DUMP_MEM_RESERVED_SHIFT 24 +}; + +/* register data for registers dump */ +struct dbg_dump_reg { + __le32 data; +#define DBG_DUMP_REG_ADDRESS_MASK 0xFFFFFF /* register address (in dwords) */ +#define DBG_DUMP_REG_ADDRESS_SHIFT 0 +#define DBG_DUMP_REG_LENGTH_MASK 0xFF /* register size (in dwords) */ +#define DBG_DUMP_REG_LENGTH_SHIFT 24 +}; + +/* split header for registers dump */ +struct dbg_dump_split_hdr { + __le32 hdr; +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK 0xFFFFFF +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT 0 +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK 0xFF +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_SHIFT 24 +}; + +/* condition header for idle check */ +struct dbg_idle_chk_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + __le16 data_size; /* size in dwords of the data following this header */ +}; + +/* Idle Check condition register */ +struct dbg_idle_chk_cond_reg { + __le32 data; +#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24 + __le16 num_entries; /* number of registers entries to check */ + u8 entry_size; /* size of registers entry (in dwords) */ + u8 start_entry; /* index of the first entry to check */ +}; + +/* Idle Check info register */ +struct dbg_idle_chk_info_reg { + __le32 data; +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24 + __le16 size; /* register size in dwords */ + struct dbg_mode_hdr mode; /* Mode header */ +}; + +/* Idle Check register */ +union dbg_idle_chk_reg { + struct dbg_idle_chk_cond_reg cond_reg; /* condition register */ + struct dbg_idle_chk_info_reg info_reg; /* info register */ +}; + +/* Idle Check result header */ +struct dbg_idle_chk_result_hdr { + __le16 rule_id; /* Failing rule index */ + __le16 mem_entry_id; /* Failing memory entry index */ + u8 num_dumped_cond_regs; /* number of dumped condition registers */ + u8 num_dumped_info_regs; /* number of dumped condition registers */ + u8 severity; /* from dbg_idle_chk_severity_types enum */ + u8 reserved; +}; + +/* Idle Check result register header */ +struct dbg_idle_chk_result_reg_hdr { + u8 data; +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_MASK 0x1 +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_SHIFT 0 +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK 0x7F +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1 + u8 start_entry; /* index of the first checked entry */ + __le16 size; /* register size in dwords */ +}; + +/* Idle Check rule */ +struct dbg_idle_chk_rule { + __le16 rule_id; /* Idle Check rule ID */ + u8 severity; /* value from dbg_idle_chk_severity_types enum */ + u8 cond_id; /* Condition ID */ + u8 num_cond_regs; /* number of condition registers */ + u8 num_info_regs; /* number of info registers */ + u8 num_imms; /* number of immediates in the condition */ + u8 reserved1; + __le16 reg_offset; /* offset of this rules registers in the idle check + * register array (in dbg_idle_chk_reg units). + */ + __le16 imm_offset; /* offset of this rules immediate values in the + * immediate values array (in dwords). + */ +}; + +/* Idle Check rule parsing data */ +struct dbg_idle_chk_rule_parsing_data { + __le32 data; +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK 0x1 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT 0 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK 0x7FFFFFFF +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_SHIFT 1 +}; + +/* idle check severity types */ +enum dbg_idle_chk_severity_types { + /* idle check failure should cause an error */ + IDLE_CHK_SEVERITY_ERROR, + /* idle check failure should cause an error only if theres no traffic */ + IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC, + /* idle check failure should cause a warning */ + IDLE_CHK_SEVERITY_WARNING, + MAX_DBG_IDLE_CHK_SEVERITY_TYPES +}; + +/* Debug Bus block data */ +struct dbg_bus_block_data { + u8 enabled; /* Indicates if the block is enabled for recording (0/1) */ + u8 hw_id; /* HW ID associated with the block */ + u8 line_num; /* Debug line number to select */ + u8 right_shift; /* Number of units to right the debug data (0-3) */ + u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */ + u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */ + u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced. + */ + u8 reserved; +}; + +/* Debug Bus Clients */ +enum dbg_bus_clients { + DBG_BUS_CLIENT_RBCN, + DBG_BUS_CLIENT_RBCP, + DBG_BUS_CLIENT_RBCR, + DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCF, + DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCH, + DBG_BUS_CLIENT_RBCZ, + DBG_BUS_CLIENT_OTHER_ENGINE, + DBG_BUS_CLIENT_TIMESTAMP, + DBG_BUS_CLIENT_CPU, + DBG_BUS_CLIENT_RBCY, + DBG_BUS_CLIENT_RBCQ, + DBG_BUS_CLIENT_RBCM, + DBG_BUS_CLIENT_RBCB, + DBG_BUS_CLIENT_RBCW, + DBG_BUS_CLIENT_RBCV, + MAX_DBG_BUS_CLIENTS +}; + +/* Debug Bus memory address */ +struct dbg_bus_mem_addr { + __le32 lo; + __le32 hi; +}; + +/* Debug Bus PCI buffer data */ +struct dbg_bus_pci_buf_data { + struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */ + struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */ + __le32 size; /* PCI buffer size in bytes */ +}; + +/* Debug Bus Storm EID range filter params */ +struct dbg_bus_storm_eid_range_params { + u8 min; /* Minimal event ID to filter on */ + u8 max; /* Maximal event ID to filter on */ +}; + +/* Debug Bus Storm EID mask filter params */ +struct dbg_bus_storm_eid_mask_params { + u8 val; /* Event ID value */ + u8 mask; /* Event ID mask. 1s in the mask = dont care bits. */ +}; + +/* Debug Bus Storm EID filter params */ +union dbg_bus_storm_eid_params { + struct dbg_bus_storm_eid_range_params range; + struct dbg_bus_storm_eid_mask_params mask; +}; + +/* Debug Bus Storm data */ +struct dbg_bus_storm_data { + u8 fast_enabled; + u8 fast_mode; + u8 slow_enabled; + u8 slow_mode; + u8 hw_id; + u8 eid_filter_en; + u8 eid_range_not_mask; + u8 cid_filter_en; + union dbg_bus_storm_eid_params eid_filter_params; + __le16 reserved; + __le32 cid; +}; + +/* Debug Bus data */ +struct dbg_bus_data { + __le32 app_version; /* The tools version number of the application */ + u8 state; /* The current debug bus state */ + u8 hw_dwords; /* HW dwords per cycle */ + u8 next_hw_id; /* Next HW ID to be associated with an input */ + u8 num_enabled_blocks; /* Number of blocks enabled for recording */ + u8 num_enabled_storms; /* Number of Storms enabled for recording */ + u8 target; /* Output target */ + u8 next_trigger_state; /* ID of next trigger state to be added */ + u8 next_constraint_id; /* ID of next filter/trigger constraint to be + * added. + */ + u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */ + u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */ + u8 timestamp_input_en; /* Indicates if timestamp recording is enabled + * (0/1). + */ + u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */ + u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */ + u8 adding_filter; /* If true, the next added constraint belong to the + * filter. Otherwise, it belongs to the last added + * trigger state. Valid only if either filter or + * triggers are enabled. + */ + u8 filter_pre_trigger; /* Indicates if the recording filter should be + * applied before the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 filter_post_trigger; /* Indicates if the recording filter should be + * applied after the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 unify_inputs; /* If true, all inputs are associated with HW ID 0. + * Otherwise, each input is assigned a different HW ID + * (0/1). + */ + u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW + * recording to this engine (0/1). + */ + struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid + * only when the target is + * DBG_BUS_TARGET_ID_PCI. + */ + __le16 reserved; + struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */ + struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */ +}; + +/* Debug bus frame modes */ +enum dbg_bus_frame_modes { + DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */ + DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */ + DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */ + MAX_DBG_BUS_FRAME_MODES +}; + +/* Debug bus states */ +enum dbg_bus_states { + DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */ + DBG_BUS_STATE_READY, /* debug bus is ready for configuration and + * recording. + */ + DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */ + DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */ + MAX_DBG_BUS_STATES +}; + +/* Debug bus target IDs */ +enum dbg_bus_targets { + /* records debug bus to DBG block internal buffer */ + DBG_BUS_TARGET_ID_INT_BUF, + /* records debug bus to the NW */ + DBG_BUS_TARGET_ID_NIG, + /* records debug bus to a PCI buffer */ + DBG_BUS_TARGET_ID_PCI, + MAX_DBG_BUS_TARGETS +}; + +/* GRC Dump data */ +struct dbg_grc_data { + __le32 param_val[40]; /* Value of each GRC parameter. Array size must + * match the enum dbg_grc_params. + */ + u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was + * set by the user (0/1). Array size must + * match the enum dbg_grc_params. + */ +}; + +/* Debug GRC params */ +enum dbg_grc_params { + DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */ + DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */ + DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */ + DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */ + DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */ + DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */ + DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */ + DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */ + DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */ + DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */ + DBG_GRC_PARAM_RESERVED, /* reserved */ + DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */ + DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */ + DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */ + DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */ + DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */ + DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */ + DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */ + DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */ + DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */ + DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */ + DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */ + /* preset: exclude all memories from dump (1 only) */ + DBG_GRC_PARAM_EXCLUDE_ALL, + /* preset: include memories for crash dump (1 only) */ + DBG_GRC_PARAM_CRASH, + /* perform dump only if MFW is responding (0/1) */ + DBG_GRC_PARAM_PARITY_SAFE, + DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */ + DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */ + MAX_DBG_GRC_PARAMS +}; + +/* Debug reset registers */ +enum dbg_reset_regs { + DBG_RESET_REG_MISCS_PL_UA, + DBG_RESET_REG_MISCS_PL_HV, + DBG_RESET_REG_MISCS_PL_HV_2, + DBG_RESET_REG_MISC_PL_UA, + DBG_RESET_REG_MISC_PL_HV, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, + DBG_RESET_REG_MISC_PL_PDA_VAUX, + MAX_DBG_RESET_REGS +}; + /* Debug status codes */ enum dbg_status { DBG_STATUS_OK, @@ -1869,6 +2227,41 @@ enum dbg_status { MAX_DBG_STATUS }; +/* Debug Storms IDs */ +enum dbg_storms { + DBG_TSTORM_ID, + DBG_MSTORM_ID, + DBG_USTORM_ID, + DBG_XSTORM_ID, + DBG_YSTORM_ID, + DBG_PSTORM_ID, + MAX_DBG_STORMS +}; + +/* Idle Check data */ +struct idle_chk_data { + __le32 buf_size; /* Idle check buffer size in dwords */ + u8 buf_size_set; /* Indicates if the idle check buffer size was set + * (0/1). + */ + u8 reserved1; + __le16 reserved2; +}; + +/* Debug Tools data (per HW function) */ +struct dbg_tools_data { + struct dbg_grc_data grc; /* GRC Dump data */ + struct dbg_bus_data bus; /* Debug Bus data */ + struct idle_chk_data idle_chk; /* Idle Check data */ + u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */ + u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1). + */ + u8 chip_id; /* Chip ID (from enum chip_ids) */ + u8 platform_id; /* Platform ID (from enum platform_ids) */ + u8 initialized; /* Indicates if the data was initialized */ + u8 reserved; +}; + /********************************/ /* HSI Init Functions constants */ /********************************/ @@ -1948,15 +2341,50 @@ struct init_qm_vport_params { /* Max size in dwords of a zipped array */ #define MAX_ZIPPED_SIZE 8192 +struct fw_asserts_ram_section { + __le16 section_ram_line_offset; + __le16 section_ram_line_size; + u8 list_dword_offset; + u8 list_element_dword_size; + u8 list_num_elements; + u8 list_next_index_dword_offset; +}; + +struct fw_ver_num { + u8 major; /* Firmware major version number */ + u8 minor; /* Firmware minor version number */ + u8 rev; /* Firmware revision version number */ + u8 eng; /* Firmware engineering version number (for bootleg versions) */ +}; + +struct fw_ver_info { + __le16 tools_ver; /* Tools version number */ + u8 image_id; /* FW image ID (e.g. main) */ + u8 reserved1; + struct fw_ver_num num; /* FW version number */ + __le32 timestamp; /* FW Timestamp in unix time (sec. since 1970) */ + __le32 reserved2; +}; + +struct fw_info { + struct fw_ver_info ver; + struct fw_asserts_ram_section fw_asserts_section; +}; + +struct fw_info_location { + __le32 grc_addr; + __le32 size; +}; + enum init_modes { MODE_RESERVED, MODE_BB_B0, - MODE_RESERVED2, + MODE_K2, MODE_ASIC, + MODE_RESERVED2, MODE_RESERVED3, MODE_RESERVED4, MODE_RESERVED5, - MODE_RESERVED6, MODE_SF, MODE_MF_SD, MODE_MF_SI, @@ -1965,7 +2393,7 @@ enum init_modes { MODE_PORTS_PER_ENG_4, MODE_100G, MODE_40G, - MODE_RESERVED7, + MODE_RESERVED6, MAX_INIT_MODES }; @@ -2223,8 +2651,276 @@ struct iro { __le16 size; }; +/***************************** Public Functions *******************************/ +/** + * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug + * arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for + * GRC Dump. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the collected GRC data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified dump buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size + * for idle check results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the idle check + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the idle check data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size + * for mcp trace results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the mcp trace data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * - the trace meta data cannot be read (from NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size + * for grc trace fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the reg fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size + * for the IGU fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the IGU fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required + * buffer size for protection override window results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for protection + * override data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_protection_override_dump - Reads protection override window + * entries and writes the results into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the protection override data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer + * size for FW Asserts results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the FW Asserts data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); /** - * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct. + * @brief qed_dbg_print_attn - Prints attention registers values in the + * specified results struct. * * @param p_hwfn * @param results - Pointer to the attention read results @@ -2236,8 +2932,212 @@ struct iro { enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn, struct dbg_attn_block_result *results); +/******************************** Constants **********************************/ + #define MAX_NAME_LEN 16 +/***************************** Public Functions *******************************/ +/** + * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with + * debug arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_get_status_str - Returns a string for the specified status. + * + * @param status - a debug status code. + * + * @return a string for the specified status + */ +const char *qed_dbg_get_status_str(enum dbg_status status); +/** + * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size + * for idle check results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_idle_chk_results - Prints idle check results + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the idle check results. + * @param num_errors - OUT: number of errors found in idle check. + * @param num_warnings - OUT: number of warnings found in idle check. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, + u32 *num_warnings); +/** + * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size + * for MCP Trace results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - MCP Trace dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_mcp_trace_results - Prints MCP Trace results + * + * @param p_hwfn - HW device data + * @param dump_buf - mcp trace dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the mcp trace results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size + * for reg_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_reg_fifo_results - Prints reg fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size + * for igu_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_igu_fifo_results - Prints IGU fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the IGU fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_protection_override_results_buf_size - Returns the required + * buffer size for protection override results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_protection_override_results - Prints protection override + * results. + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size + * for FW Asserts results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_fw_asserts_results - Prints FW Asserts results + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the FW Asserts results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); /* Win 2 */ #define GTT_BAR0_MAP_REG_IGU_CMD 0x00f000UL @@ -7039,6 +7939,35 @@ struct ystorm_iscsi_conn_ag_ctx { __le32 reg2; __le32 reg3; }; + +#define MFW_TRACE_SIGNATURE 0x25071946 + +/* The trace in the buffer */ +#define MFW_TRACE_EVENTID_MASK 0x00ffff +#define MFW_TRACE_PRM_SIZE_MASK 0x0f0000 +#define MFW_TRACE_PRM_SIZE_SHIFT 16 +#define MFW_TRACE_ENTRY_SIZE 3 + +struct mcp_trace { + u32 signature; /* Help to identify that the trace is valid */ + u32 size; /* the size of the trace buffer in bytes */ + u32 curr_level; /* 2 - all will be written to the buffer + * 1 - debug trace will not be written + * 0 - just errors will be written to the buffer + */ + u32 modules_mask[2]; /* a bit per module, 1 means write it, 0 means + * mask it. + */ + + /* Warning: the following pointers are assumed to be 32bits as they are + * used only in the MFW. + */ + u32 trace_prod; /* The next trace will be written to this offset */ + u32 trace_oldest; /* The oldest valid trace starts at this offset + * (usually very close after the current producer). + */ +}; + #define VF_MAX_STATIC 192 #define MCP_GLOB_PATH_MAX 2 @@ -7046,6 +7975,7 @@ struct ystorm_iscsi_conn_ag_ctx { #define MCP_GLOB_PORT_MAX 4 #define MCP_GLOB_FUNC_MAX 16 +typedef u32 offsize_t; /* In DWORDS !!! */ /* Offset from the beginning of the MCP scratchpad */ #define OFFSIZE_OFFSET_SHIFT 0 #define OFFSIZE_OFFSET_MASK 0x0000ffff @@ -7636,6 +8566,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 +#define DRV_MSG_CODE_NVM_GET_FILE_ATT 0x00030000 +#define DRV_MSG_CODE_NVM_READ_NVRAM 0x00050000 #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 #define DRV_MSG_CODE_MCP_HALT 0x00100000 @@ -7657,6 +8589,9 @@ struct public_drv_mb { #define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK 0x000000FF #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT 3 + +#define DRV_MB_PARAM_NVM_LEN_SHIFT 24 + #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT 0 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK 0x000000FF #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 @@ -7694,6 +8629,8 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION 0x20130000 #define FW_MSG_CODE_DRV_UNLOAD_DONE 0x21100000 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 + +#define FW_MSG_CODE_NVM_OK 0x00010000 #define FW_MSG_CODE_OK 0x00160000 #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -7930,4 +8867,101 @@ struct nvm_cfg1 { struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX]; struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX]; }; + +enum spad_sections { + SPAD_SECTION_TRACE, + SPAD_SECTION_NVM_CFG, + SPAD_SECTION_PUBLIC, + SPAD_SECTION_PRIVATE, + SPAD_SECTION_MAX +}; + +#define MCP_TRACE_SIZE 2048 /* 2kb */ + +/* This section is located at a fixed location in the beginning of the + * scratchpad, to ensure that the MCP trace is not run over during MFW upgrade. + * All the rest of data has a floating location which differs from version to + * version, and is pointed by the mcp_meta_data below. + * Moreover, the spad_layout section is part of the MFW firmware, and is loaded + * with it from nvram in order to clear this portion. + */ +struct static_init { + u32 num_sections; + offsize_t sections[SPAD_SECTION_MAX]; +#define SECTION(_sec_) (*((offsize_t *)(STRUCT_OFFSET(sections[_sec_])))) + + struct mcp_trace trace; +#define MCP_TRACE_P ((struct mcp_trace *)(STRUCT_OFFSET(trace))) + u8 trace_buffer[MCP_TRACE_SIZE]; +#define MCP_TRACE_BUF ((u8 *)(STRUCT_OFFSET(trace_buffer))) + /* running_mfw has the same definition as in nvm_map.h. + * This bit indicate both the running dir, and the running bundle. + * It is set once when the LIM is loaded. + */ + u32 running_mfw; +#define RUNNING_MFW (*((u32 *)(STRUCT_OFFSET(running_mfw)))) + u32 build_time; +#define MFW_BUILD_TIME (*((u32 *)(STRUCT_OFFSET(build_time)))) + u32 reset_type; +#define RESET_TYPE (*((u32 *)(STRUCT_OFFSET(reset_type)))) + u32 mfw_secure_mode; +#define MFW_SECURE_MODE (*((u32 *)(STRUCT_OFFSET(mfw_secure_mode)))) + u16 pme_status_pf_bitmap; +#define PME_STATUS_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_status_pf_bitmap)))) + u16 pme_enable_pf_bitmap; +#define PME_ENABLE_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_enable_pf_bitmap)))) + u32 mim_nvm_addr; + u32 mim_start_addr; + u32 ah_pcie_link_params; +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_MASK (0x000000ff) +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_SHIFT (0) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_MASK (0x0000ff00) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_SHIFT (8) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_MASK (0x00ff0000) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_SHIFT (16) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_MASK (0xff000000) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_SHIFT (24) +#define AH_PCIE_LINK_PARAMS (*((u32 *)(STRUCT_OFFSET(ah_pcie_link_params)))) + + u32 rsrv_persist[5]; /* Persist reserved for MFW upgrades */ +}; + +enum nvm_image_type { + NVM_TYPE_TIM1 = 0x01, + NVM_TYPE_TIM2 = 0x02, + NVM_TYPE_MIM1 = 0x03, + NVM_TYPE_MIM2 = 0x04, + NVM_TYPE_MBA = 0x05, + NVM_TYPE_MODULES_PN = 0x06, + NVM_TYPE_VPD = 0x07, + NVM_TYPE_MFW_TRACE1 = 0x08, + NVM_TYPE_MFW_TRACE2 = 0x09, + NVM_TYPE_NVM_CFG1 = 0x0a, + NVM_TYPE_L2B = 0x0b, + NVM_TYPE_DIR1 = 0x0c, + NVM_TYPE_EAGLE_FW1 = 0x0d, + NVM_TYPE_FALCON_FW1 = 0x0e, + NVM_TYPE_PCIE_FW1 = 0x0f, + NVM_TYPE_HW_SET = 0x10, + NVM_TYPE_LIM = 0x11, + NVM_TYPE_AVS_FW1 = 0x12, + NVM_TYPE_DIR2 = 0x13, + NVM_TYPE_CCM = 0x14, + NVM_TYPE_EAGLE_FW2 = 0x15, + NVM_TYPE_FALCON_FW2 = 0x16, + NVM_TYPE_PCIE_FW2 = 0x17, + NVM_TYPE_AVS_FW2 = 0x18, + NVM_TYPE_INIT_HW = 0x19, + NVM_TYPE_DEFAULT_CFG = 0x1a, + NVM_TYPE_MDUMP = 0x1b, + NVM_TYPE_META = 0x1c, + NVM_TYPE_ISCSI_CFG = 0x1d, + NVM_TYPE_FCOE_CFG = 0x1f, + NVM_TYPE_ETH_PHY_FW1 = 0x20, + NVM_TYPE_ETH_PHY_FW2 = 0x21, + NVM_TYPE_MAX, +}; + +#define DIR_ID_1 (0) + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index d22e3f88..250efd1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -588,6 +588,8 @@ static int qed_nic_stop(struct qed_dev *cdev) } } + qed_dbg_pf_exit(cdev); + return rc; } @@ -846,6 +848,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, /* First Dword used to diffrentiate between various sources */ data = cdev->firmware->data + sizeof(u32); + + qed_dbg_pf_init(cdev); } memset(&tunn_info, 0, sizeof(tunn_info)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index b44f09b..759cb04 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -528,9 +528,903 @@ #define QM_REG_WFQPFWEIGHT 0x2f4e80UL #define QM_REG_WFQVPWEIGHT 0x2fa000UL +#define PGLCS_REG_DBG_SELECT \ + 0x001d14UL +#define PGLCS_REG_DBG_DWORD_ENABLE \ + 0x001d18UL +#define PGLCS_REG_DBG_SHIFT \ + 0x001d1cUL +#define PGLCS_REG_DBG_FORCE_VALID \ + 0x001d20UL +#define PGLCS_REG_DBG_FORCE_FRAME \ + 0x001d24UL +#define MISC_REG_RESET_PL_PDA_VMAIN_1 \ + 0x008070UL +#define MISC_REG_RESET_PL_PDA_VMAIN_2 \ + 0x008080UL +#define MISC_REG_RESET_PL_PDA_VAUX \ + 0x008090UL +#define MISCS_REG_RESET_PL_UA \ + 0x009050UL +#define MISCS_REG_RESET_PL_HV \ + 0x009060UL +#define MISCS_REG_RESET_PL_HV_2 \ + 0x009150UL +#define DMAE_REG_DBG_SELECT \ + 0x00c510UL +#define DMAE_REG_DBG_DWORD_ENABLE \ + 0x00c514UL +#define DMAE_REG_DBG_SHIFT \ + 0x00c518UL +#define DMAE_REG_DBG_FORCE_VALID \ + 0x00c51cUL +#define DMAE_REG_DBG_FORCE_FRAME \ + 0x00c520UL +#define NCSI_REG_DBG_SELECT \ + 0x040474UL +#define NCSI_REG_DBG_DWORD_ENABLE \ + 0x040478UL +#define NCSI_REG_DBG_SHIFT \ + 0x04047cUL +#define NCSI_REG_DBG_FORCE_VALID \ + 0x040480UL +#define NCSI_REG_DBG_FORCE_FRAME \ + 0x040484UL +#define GRC_REG_DBG_SELECT \ + 0x0500a4UL +#define GRC_REG_DBG_DWORD_ENABLE \ + 0x0500a8UL +#define GRC_REG_DBG_SHIFT \ + 0x0500acUL +#define GRC_REG_DBG_FORCE_VALID \ + 0x0500b0UL +#define GRC_REG_DBG_FORCE_FRAME \ + 0x0500b4UL +#define UMAC_REG_DBG_SELECT \ + 0x051094UL +#define UMAC_REG_DBG_DWORD_ENABLE \ + 0x051098UL +#define UMAC_REG_DBG_SHIFT \ + 0x05109cUL +#define UMAC_REG_DBG_FORCE_VALID \ + 0x0510a0UL +#define UMAC_REG_DBG_FORCE_FRAME \ + 0x0510a4UL +#define MCP2_REG_DBG_SELECT \ + 0x052400UL +#define MCP2_REG_DBG_DWORD_ENABLE \ + 0x052404UL +#define MCP2_REG_DBG_SHIFT \ + 0x052408UL +#define MCP2_REG_DBG_FORCE_VALID \ + 0x052440UL +#define MCP2_REG_DBG_FORCE_FRAME \ + 0x052444UL +#define PCIE_REG_DBG_SELECT \ + 0x0547e8UL +#define PCIE_REG_DBG_DWORD_ENABLE \ + 0x0547ecUL +#define PCIE_REG_DBG_SHIFT \ + 0x0547f0UL +#define PCIE_REG_DBG_FORCE_VALID \ + 0x0547f4UL +#define PCIE_REG_DBG_FORCE_FRAME \ + 0x0547f8UL +#define DORQ_REG_DBG_SELECT \ + 0x100ad0UL +#define DORQ_REG_DBG_DWORD_ENABLE \ + 0x100ad4UL +#define DORQ_REG_DBG_SHIFT \ + 0x100ad8UL +#define DORQ_REG_DBG_FORCE_VALID \ + 0x100adcUL +#define DORQ_REG_DBG_FORCE_FRAME \ + 0x100ae0UL +#define IGU_REG_DBG_SELECT \ + 0x181578UL +#define IGU_REG_DBG_DWORD_ENABLE \ + 0x18157cUL +#define IGU_REG_DBG_SHIFT \ + 0x181580UL +#define IGU_REG_DBG_FORCE_VALID \ + 0x181584UL +#define IGU_REG_DBG_FORCE_FRAME \ + 0x181588UL +#define CAU_REG_DBG_SELECT \ + 0x1c0ea8UL +#define CAU_REG_DBG_DWORD_ENABLE \ + 0x1c0eacUL +#define CAU_REG_DBG_SHIFT \ + 0x1c0eb0UL +#define CAU_REG_DBG_FORCE_VALID \ + 0x1c0eb4UL +#define CAU_REG_DBG_FORCE_FRAME \ + 0x1c0eb8UL +#define PRS_REG_DBG_SELECT \ + 0x1f0b6cUL +#define PRS_REG_DBG_DWORD_ENABLE \ + 0x1f0b70UL +#define PRS_REG_DBG_SHIFT \ + 0x1f0b74UL +#define PRS_REG_DBG_FORCE_VALID \ + 0x1f0ba0UL +#define PRS_REG_DBG_FORCE_FRAME \ + 0x1f0ba4UL +#define CNIG_REG_DBG_SELECT_K2 \ + 0x218254UL +#define CNIG_REG_DBG_DWORD_ENABLE_K2 \ + 0x218258UL +#define CNIG_REG_DBG_SHIFT_K2 \ + 0x21825cUL +#define CNIG_REG_DBG_FORCE_VALID_K2 \ + 0x218260UL +#define CNIG_REG_DBG_FORCE_FRAME_K2 \ + 0x218264UL +#define PRM_REG_DBG_SELECT \ + 0x2306a8UL +#define PRM_REG_DBG_DWORD_ENABLE \ + 0x2306acUL +#define PRM_REG_DBG_SHIFT \ + 0x2306b0UL +#define PRM_REG_DBG_FORCE_VALID \ + 0x2306b4UL +#define PRM_REG_DBG_FORCE_FRAME \ + 0x2306b8UL +#define SRC_REG_DBG_SELECT \ + 0x238700UL +#define SRC_REG_DBG_DWORD_ENABLE \ + 0x238704UL +#define SRC_REG_DBG_SHIFT \ + 0x238708UL +#define SRC_REG_DBG_FORCE_VALID \ + 0x23870cUL +#define SRC_REG_DBG_FORCE_FRAME \ + 0x238710UL +#define RSS_REG_DBG_SELECT \ + 0x238c4cUL +#define RSS_REG_DBG_DWORD_ENABLE \ + 0x238c50UL +#define RSS_REG_DBG_SHIFT \ + 0x238c54UL +#define RSS_REG_DBG_FORCE_VALID \ + 0x238c58UL +#define RSS_REG_DBG_FORCE_FRAME \ + 0x238c5cUL +#define RPB_REG_DBG_SELECT \ + 0x23c728UL +#define RPB_REG_DBG_DWORD_ENABLE \ + 0x23c72cUL +#define RPB_REG_DBG_SHIFT \ + 0x23c730UL +#define RPB_REG_DBG_FORCE_VALID \ + 0x23c734UL +#define RPB_REG_DBG_FORCE_FRAME \ + 0x23c738UL +#define PSWRQ2_REG_DBG_SELECT \ + 0x240100UL +#define PSWRQ2_REG_DBG_DWORD_ENABLE \ + 0x240104UL +#define PSWRQ2_REG_DBG_SHIFT \ + 0x240108UL +#define PSWRQ2_REG_DBG_FORCE_VALID \ + 0x24010cUL +#define PSWRQ2_REG_DBG_FORCE_FRAME \ + 0x240110UL +#define PSWRQ_REG_DBG_SELECT \ + 0x280020UL +#define PSWRQ_REG_DBG_DWORD_ENABLE \ + 0x280024UL +#define PSWRQ_REG_DBG_SHIFT \ + 0x280028UL +#define PSWRQ_REG_DBG_FORCE_VALID \ + 0x28002cUL +#define PSWRQ_REG_DBG_FORCE_FRAME \ + 0x280030UL +#define PSWWR_REG_DBG_SELECT \ + 0x29a084UL +#define PSWWR_REG_DBG_DWORD_ENABLE \ + 0x29a088UL +#define PSWWR_REG_DBG_SHIFT \ + 0x29a08cUL +#define PSWWR_REG_DBG_FORCE_VALID \ + 0x29a090UL +#define PSWWR_REG_DBG_FORCE_FRAME \ + 0x29a094UL +#define PSWRD_REG_DBG_SELECT \ + 0x29c040UL +#define PSWRD_REG_DBG_DWORD_ENABLE \ + 0x29c044UL +#define PSWRD_REG_DBG_SHIFT \ + 0x29c048UL +#define PSWRD_REG_DBG_FORCE_VALID \ + 0x29c04cUL +#define PSWRD_REG_DBG_FORCE_FRAME \ + 0x29c050UL +#define PSWRD2_REG_DBG_SELECT \ + 0x29d400UL +#define PSWRD2_REG_DBG_DWORD_ENABLE \ + 0x29d404UL +#define PSWRD2_REG_DBG_SHIFT \ + 0x29d408UL +#define PSWRD2_REG_DBG_FORCE_VALID \ + 0x29d40cUL +#define PSWRD2_REG_DBG_FORCE_FRAME \ + 0x29d410UL +#define PSWHST2_REG_DBG_SELECT \ + 0x29e058UL +#define PSWHST2_REG_DBG_DWORD_ENABLE \ + 0x29e05cUL +#define PSWHST2_REG_DBG_SHIFT \ + 0x29e060UL +#define PSWHST2_REG_DBG_FORCE_VALID \ + 0x29e064UL +#define PSWHST2_REG_DBG_FORCE_FRAME \ + 0x29e068UL +#define PSWHST_REG_DBG_SELECT \ + 0x2a0100UL +#define PSWHST_REG_DBG_DWORD_ENABLE \ + 0x2a0104UL +#define PSWHST_REG_DBG_SHIFT \ + 0x2a0108UL +#define PSWHST_REG_DBG_FORCE_VALID \ + 0x2a010cUL +#define PSWHST_REG_DBG_FORCE_FRAME \ + 0x2a0110UL +#define PGLUE_B_REG_DBG_SELECT \ + 0x2a8400UL +#define PGLUE_B_REG_DBG_DWORD_ENABLE \ + 0x2a8404UL +#define PGLUE_B_REG_DBG_SHIFT \ + 0x2a8408UL +#define PGLUE_B_REG_DBG_FORCE_VALID \ + 0x2a840cUL +#define PGLUE_B_REG_DBG_FORCE_FRAME \ + 0x2a8410UL +#define TM_REG_DBG_SELECT \ + 0x2c07a8UL +#define TM_REG_DBG_DWORD_ENABLE \ + 0x2c07acUL +#define TM_REG_DBG_SHIFT \ + 0x2c07b0UL +#define TM_REG_DBG_FORCE_VALID \ + 0x2c07b4UL +#define TM_REG_DBG_FORCE_FRAME \ + 0x2c07b8UL +#define TCFC_REG_DBG_SELECT \ + 0x2d0500UL +#define TCFC_REG_DBG_DWORD_ENABLE \ + 0x2d0504UL +#define TCFC_REG_DBG_SHIFT \ + 0x2d0508UL +#define TCFC_REG_DBG_FORCE_VALID \ + 0x2d050cUL +#define TCFC_REG_DBG_FORCE_FRAME \ + 0x2d0510UL +#define CCFC_REG_DBG_SELECT \ + 0x2e0500UL +#define CCFC_REG_DBG_DWORD_ENABLE \ + 0x2e0504UL +#define CCFC_REG_DBG_SHIFT \ + 0x2e0508UL +#define CCFC_REG_DBG_FORCE_VALID \ + 0x2e050cUL +#define CCFC_REG_DBG_FORCE_FRAME \ + 0x2e0510UL +#define QM_REG_DBG_SELECT \ + 0x2f2e74UL +#define QM_REG_DBG_DWORD_ENABLE \ + 0x2f2e78UL +#define QM_REG_DBG_SHIFT \ + 0x2f2e7cUL +#define QM_REG_DBG_FORCE_VALID \ + 0x2f2e80UL +#define QM_REG_DBG_FORCE_FRAME \ + 0x2f2e84UL +#define RDIF_REG_DBG_SELECT \ + 0x300500UL +#define RDIF_REG_DBG_DWORD_ENABLE \ + 0x300504UL +#define RDIF_REG_DBG_SHIFT \ + 0x300508UL +#define RDIF_REG_DBG_FORCE_VALID \ + 0x30050cUL +#define RDIF_REG_DBG_FORCE_FRAME \ + 0x300510UL +#define TDIF_REG_DBG_SELECT \ + 0x310500UL +#define TDIF_REG_DBG_DWORD_ENABLE \ + 0x310504UL +#define TDIF_REG_DBG_SHIFT \ + 0x310508UL +#define TDIF_REG_DBG_FORCE_VALID \ + 0x31050cUL +#define TDIF_REG_DBG_FORCE_FRAME \ + 0x310510UL +#define BRB_REG_DBG_SELECT \ + 0x340ed0UL +#define BRB_REG_DBG_DWORD_ENABLE \ + 0x340ed4UL +#define BRB_REG_DBG_SHIFT \ + 0x340ed8UL +#define BRB_REG_DBG_FORCE_VALID \ + 0x340edcUL +#define BRB_REG_DBG_FORCE_FRAME \ + 0x340ee0UL +#define XYLD_REG_DBG_SELECT \ + 0x4c1600UL +#define XYLD_REG_DBG_DWORD_ENABLE \ + 0x4c1604UL +#define XYLD_REG_DBG_SHIFT \ + 0x4c1608UL +#define XYLD_REG_DBG_FORCE_VALID \ + 0x4c160cUL +#define XYLD_REG_DBG_FORCE_FRAME \ + 0x4c1610UL +#define YULD_REG_DBG_SELECT \ + 0x4c9600UL +#define YULD_REG_DBG_DWORD_ENABLE \ + 0x4c9604UL +#define YULD_REG_DBG_SHIFT \ + 0x4c9608UL +#define YULD_REG_DBG_FORCE_VALID \ + 0x4c960cUL +#define YULD_REG_DBG_FORCE_FRAME \ + 0x4c9610UL +#define TMLD_REG_DBG_SELECT \ + 0x4d1600UL +#define TMLD_REG_DBG_DWORD_ENABLE \ + 0x4d1604UL +#define TMLD_REG_DBG_SHIFT \ + 0x4d1608UL +#define TMLD_REG_DBG_FORCE_VALID \ + 0x4d160cUL +#define TMLD_REG_DBG_FORCE_FRAME \ + 0x4d1610UL +#define MULD_REG_DBG_SELECT \ + 0x4e1600UL +#define MULD_REG_DBG_DWORD_ENABLE \ + 0x4e1604UL +#define MULD_REG_DBG_SHIFT \ + 0x4e1608UL +#define MULD_REG_DBG_FORCE_VALID \ + 0x4e160cUL +#define MULD_REG_DBG_FORCE_FRAME \ + 0x4e1610UL +#define NIG_REG_DBG_SELECT \ + 0x502140UL +#define NIG_REG_DBG_DWORD_ENABLE \ + 0x502144UL +#define NIG_REG_DBG_SHIFT \ + 0x502148UL +#define NIG_REG_DBG_FORCE_VALID \ + 0x50214cUL +#define NIG_REG_DBG_FORCE_FRAME \ + 0x502150UL +#define BMB_REG_DBG_SELECT \ + 0x540a7cUL +#define BMB_REG_DBG_DWORD_ENABLE \ + 0x540a80UL +#define BMB_REG_DBG_SHIFT \ + 0x540a84UL +#define BMB_REG_DBG_FORCE_VALID \ + 0x540a88UL +#define BMB_REG_DBG_FORCE_FRAME \ + 0x540a8cUL +#define PTU_REG_DBG_SELECT \ + 0x560100UL +#define PTU_REG_DBG_DWORD_ENABLE \ + 0x560104UL +#define PTU_REG_DBG_SHIFT \ + 0x560108UL +#define PTU_REG_DBG_FORCE_VALID \ + 0x56010cUL +#define PTU_REG_DBG_FORCE_FRAME \ + 0x560110UL +#define CDU_REG_DBG_SELECT \ + 0x580704UL +#define CDU_REG_DBG_DWORD_ENABLE \ + 0x580708UL +#define CDU_REG_DBG_SHIFT \ + 0x58070cUL +#define CDU_REG_DBG_FORCE_VALID \ + 0x580710UL +#define CDU_REG_DBG_FORCE_FRAME \ + 0x580714UL +#define WOL_REG_DBG_SELECT \ + 0x600140UL +#define WOL_REG_DBG_DWORD_ENABLE \ + 0x600144UL +#define WOL_REG_DBG_SHIFT \ + 0x600148UL +#define WOL_REG_DBG_FORCE_VALID \ + 0x60014cUL +#define WOL_REG_DBG_FORCE_FRAME \ + 0x600150UL +#define BMBN_REG_DBG_SELECT \ + 0x610140UL +#define BMBN_REG_DBG_DWORD_ENABLE \ + 0x610144UL +#define BMBN_REG_DBG_SHIFT \ + 0x610148UL +#define BMBN_REG_DBG_FORCE_VALID \ + 0x61014cUL +#define BMBN_REG_DBG_FORCE_FRAME \ + 0x610150UL +#define NWM_REG_DBG_SELECT \ + 0x8000ecUL +#define NWM_REG_DBG_DWORD_ENABLE \ + 0x8000f0UL +#define NWM_REG_DBG_SHIFT \ + 0x8000f4UL +#define NWM_REG_DBG_FORCE_VALID \ + 0x8000f8UL +#define NWM_REG_DBG_FORCE_FRAME \ + 0x8000fcUL +#define PBF_REG_DBG_SELECT \ + 0xd80060UL +#define PBF_REG_DBG_DWORD_ENABLE \ + 0xd80064UL +#define PBF_REG_DBG_SHIFT \ + 0xd80068UL +#define PBF_REG_DBG_FORCE_VALID \ + 0xd8006cUL +#define PBF_REG_DBG_FORCE_FRAME \ + 0xd80070UL +#define PBF_PB1_REG_DBG_SELECT \ + 0xda0728UL +#define PBF_PB1_REG_DBG_DWORD_ENABLE \ + 0xda072cUL +#define PBF_PB1_REG_DBG_SHIFT \ + 0xda0730UL +#define PBF_PB1_REG_DBG_FORCE_VALID \ + 0xda0734UL +#define PBF_PB1_REG_DBG_FORCE_FRAME \ + 0xda0738UL +#define PBF_PB2_REG_DBG_SELECT \ + 0xda4728UL +#define PBF_PB2_REG_DBG_DWORD_ENABLE \ + 0xda472cUL +#define PBF_PB2_REG_DBG_SHIFT \ + 0xda4730UL +#define PBF_PB2_REG_DBG_FORCE_VALID \ + 0xda4734UL +#define PBF_PB2_REG_DBG_FORCE_FRAME \ + 0xda4738UL +#define BTB_REG_DBG_SELECT \ + 0xdb08c8UL +#define BTB_REG_DBG_DWORD_ENABLE \ + 0xdb08ccUL +#define BTB_REG_DBG_SHIFT \ + 0xdb08d0UL +#define BTB_REG_DBG_FORCE_VALID \ + 0xdb08d4UL +#define BTB_REG_DBG_FORCE_FRAME \ + 0xdb08d8UL +#define XSDM_REG_DBG_SELECT \ + 0xf80e28UL +#define XSDM_REG_DBG_DWORD_ENABLE \ + 0xf80e2cUL +#define XSDM_REG_DBG_SHIFT \ + 0xf80e30UL +#define XSDM_REG_DBG_FORCE_VALID \ + 0xf80e34UL +#define XSDM_REG_DBG_FORCE_FRAME \ + 0xf80e38UL +#define YSDM_REG_DBG_SELECT \ + 0xf90e28UL +#define YSDM_REG_DBG_DWORD_ENABLE \ + 0xf90e2cUL +#define YSDM_REG_DBG_SHIFT \ + 0xf90e30UL +#define YSDM_REG_DBG_FORCE_VALID \ + 0xf90e34UL +#define YSDM_REG_DBG_FORCE_FRAME \ + 0xf90e38UL +#define PSDM_REG_DBG_SELECT \ + 0xfa0e28UL +#define PSDM_REG_DBG_DWORD_ENABLE \ + 0xfa0e2cUL +#define PSDM_REG_DBG_SHIFT \ + 0xfa0e30UL +#define PSDM_REG_DBG_FORCE_VALID \ + 0xfa0e34UL +#define PSDM_REG_DBG_FORCE_FRAME \ + 0xfa0e38UL +#define TSDM_REG_DBG_SELECT \ + 0xfb0e28UL +#define TSDM_REG_DBG_DWORD_ENABLE \ + 0xfb0e2cUL +#define TSDM_REG_DBG_SHIFT \ + 0xfb0e30UL +#define TSDM_REG_DBG_FORCE_VALID \ + 0xfb0e34UL +#define TSDM_REG_DBG_FORCE_FRAME \ + 0xfb0e38UL +#define MSDM_REG_DBG_SELECT \ + 0xfc0e28UL +#define MSDM_REG_DBG_DWORD_ENABLE \ + 0xfc0e2cUL +#define MSDM_REG_DBG_SHIFT \ + 0xfc0e30UL +#define MSDM_REG_DBG_FORCE_VALID \ + 0xfc0e34UL +#define MSDM_REG_DBG_FORCE_FRAME \ + 0xfc0e38UL +#define USDM_REG_DBG_SELECT \ + 0xfd0e28UL +#define USDM_REG_DBG_DWORD_ENABLE \ + 0xfd0e2cUL +#define USDM_REG_DBG_SHIFT \ + 0xfd0e30UL +#define USDM_REG_DBG_FORCE_VALID \ + 0xfd0e34UL +#define USDM_REG_DBG_FORCE_FRAME \ + 0xfd0e38UL +#define XCM_REG_DBG_SELECT \ + 0x1000040UL +#define XCM_REG_DBG_DWORD_ENABLE \ + 0x1000044UL +#define XCM_REG_DBG_SHIFT \ + 0x1000048UL +#define XCM_REG_DBG_FORCE_VALID \ + 0x100004cUL +#define XCM_REG_DBG_FORCE_FRAME \ + 0x1000050UL +#define YCM_REG_DBG_SELECT \ + 0x1080040UL +#define YCM_REG_DBG_DWORD_ENABLE \ + 0x1080044UL +#define YCM_REG_DBG_SHIFT \ + 0x1080048UL +#define YCM_REG_DBG_FORCE_VALID \ + 0x108004cUL +#define YCM_REG_DBG_FORCE_FRAME \ + 0x1080050UL +#define PCM_REG_DBG_SELECT \ + 0x1100040UL +#define PCM_REG_DBG_DWORD_ENABLE \ + 0x1100044UL +#define PCM_REG_DBG_SHIFT \ + 0x1100048UL +#define PCM_REG_DBG_FORCE_VALID \ + 0x110004cUL +#define PCM_REG_DBG_FORCE_FRAME \ + 0x1100050UL +#define TCM_REG_DBG_SELECT \ + 0x1180040UL +#define TCM_REG_DBG_DWORD_ENABLE \ + 0x1180044UL +#define TCM_REG_DBG_SHIFT \ + 0x1180048UL +#define TCM_REG_DBG_FORCE_VALID \ + 0x118004cUL +#define TCM_REG_DBG_FORCE_FRAME \ + 0x1180050UL +#define MCM_REG_DBG_SELECT \ + 0x1200040UL +#define MCM_REG_DBG_DWORD_ENABLE \ + 0x1200044UL +#define MCM_REG_DBG_SHIFT \ + 0x1200048UL +#define MCM_REG_DBG_FORCE_VALID \ + 0x120004cUL +#define MCM_REG_DBG_FORCE_FRAME \ + 0x1200050UL +#define UCM_REG_DBG_SELECT \ + 0x1280050UL +#define UCM_REG_DBG_DWORD_ENABLE \ + 0x1280054UL +#define UCM_REG_DBG_SHIFT \ + 0x1280058UL +#define UCM_REG_DBG_FORCE_VALID \ + 0x128005cUL +#define UCM_REG_DBG_FORCE_FRAME \ + 0x1280060UL +#define XSEM_REG_DBG_SELECT \ + 0x1401528UL +#define XSEM_REG_DBG_DWORD_ENABLE \ + 0x140152cUL +#define XSEM_REG_DBG_SHIFT \ + 0x1401530UL +#define XSEM_REG_DBG_FORCE_VALID \ + 0x1401534UL +#define XSEM_REG_DBG_FORCE_FRAME \ + 0x1401538UL +#define YSEM_REG_DBG_SELECT \ + 0x1501528UL +#define YSEM_REG_DBG_DWORD_ENABLE \ + 0x150152cUL +#define YSEM_REG_DBG_SHIFT \ + 0x1501530UL +#define YSEM_REG_DBG_FORCE_VALID \ + 0x1501534UL +#define YSEM_REG_DBG_FORCE_FRAME \ + 0x1501538UL +#define PSEM_REG_DBG_SELECT \ + 0x1601528UL +#define PSEM_REG_DBG_DWORD_ENABLE \ + 0x160152cUL +#define PSEM_REG_DBG_SHIFT \ + 0x1601530UL +#define PSEM_REG_DBG_FORCE_VALID \ + 0x1601534UL +#define PSEM_REG_DBG_FORCE_FRAME \ + 0x1601538UL +#define TSEM_REG_DBG_SELECT \ + 0x1701528UL +#define TSEM_REG_DBG_DWORD_ENABLE \ + 0x170152cUL +#define TSEM_REG_DBG_SHIFT \ + 0x1701530UL +#define TSEM_REG_DBG_FORCE_VALID \ + 0x1701534UL +#define TSEM_REG_DBG_FORCE_FRAME \ + 0x1701538UL +#define MSEM_REG_DBG_SELECT \ + 0x1801528UL +#define MSEM_REG_DBG_DWORD_ENABLE \ + 0x180152cUL +#define MSEM_REG_DBG_SHIFT \ + 0x1801530UL +#define MSEM_REG_DBG_FORCE_VALID \ + 0x1801534UL +#define MSEM_REG_DBG_FORCE_FRAME \ + 0x1801538UL +#define USEM_REG_DBG_SELECT \ + 0x1901528UL +#define USEM_REG_DBG_DWORD_ENABLE \ + 0x190152cUL +#define USEM_REG_DBG_SHIFT \ + 0x1901530UL +#define USEM_REG_DBG_FORCE_VALID \ + 0x1901534UL +#define USEM_REG_DBG_FORCE_FRAME \ + 0x1901538UL +#define PCIE_REG_DBG_COMMON_SELECT \ + 0x054398UL +#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \ + 0x05439cUL +#define PCIE_REG_DBG_COMMON_SHIFT \ + 0x0543a0UL +#define PCIE_REG_DBG_COMMON_FORCE_VALID \ + 0x0543a4UL +#define PCIE_REG_DBG_COMMON_FORCE_FRAME \ + 0x0543a8UL +#define MISC_REG_RESET_PL_UA \ + 0x008050UL +#define MISC_REG_RESET_PL_HV \ + 0x008060UL +#define XCM_REG_CTX_RBC_ACCS \ + 0x1001800UL +#define XCM_REG_AGG_CON_CTX \ + 0x1001804UL +#define XCM_REG_SM_CON_CTX \ + 0x1001808UL +#define YCM_REG_CTX_RBC_ACCS \ + 0x1081800UL +#define YCM_REG_AGG_CON_CTX \ + 0x1081804UL +#define YCM_REG_AGG_TASK_CTX \ + 0x1081808UL +#define YCM_REG_SM_CON_CTX \ + 0x108180cUL +#define YCM_REG_SM_TASK_CTX \ + 0x1081810UL +#define PCM_REG_CTX_RBC_ACCS \ + 0x1101440UL +#define PCM_REG_SM_CON_CTX \ + 0x1101444UL +#define TCM_REG_CTX_RBC_ACCS \ + 0x11814c0UL +#define TCM_REG_AGG_CON_CTX \ + 0x11814c4UL +#define TCM_REG_AGG_TASK_CTX \ + 0x11814c8UL +#define TCM_REG_SM_CON_CTX \ + 0x11814ccUL +#define TCM_REG_SM_TASK_CTX \ + 0x11814d0UL +#define MCM_REG_CTX_RBC_ACCS \ + 0x1201800UL +#define MCM_REG_AGG_CON_CTX \ + 0x1201804UL +#define MCM_REG_AGG_TASK_CTX \ + 0x1201808UL +#define MCM_REG_SM_CON_CTX \ + 0x120180cUL +#define MCM_REG_SM_TASK_CTX \ + 0x1201810UL +#define UCM_REG_CTX_RBC_ACCS \ + 0x1281700UL +#define UCM_REG_AGG_CON_CTX \ + 0x1281704UL +#define UCM_REG_AGG_TASK_CTX \ + 0x1281708UL +#define UCM_REG_SM_CON_CTX \ + 0x128170cUL +#define UCM_REG_SM_TASK_CTX \ + 0x1281710UL +#define XSEM_REG_SLOW_DBG_EMPTY \ + 0x1401140UL +#define XSEM_REG_SYNC_DBG_EMPTY \ + 0x1401160UL +#define XSEM_REG_SLOW_DBG_ACTIVE \ + 0x1401400UL +#define XSEM_REG_SLOW_DBG_MODE \ + 0x1401404UL +#define XSEM_REG_DBG_FRAME_MODE \ + 0x1401408UL +#define XSEM_REG_DBG_MODE1_CFG \ + 0x1401420UL +#define XSEM_REG_FAST_MEMORY \ + 0x1440000UL +#define YSEM_REG_SYNC_DBG_EMPTY \ + 0x1501160UL +#define YSEM_REG_SLOW_DBG_ACTIVE \ + 0x1501400UL +#define YSEM_REG_SLOW_DBG_MODE \ + 0x1501404UL +#define YSEM_REG_DBG_FRAME_MODE \ + 0x1501408UL +#define YSEM_REG_DBG_MODE1_CFG \ + 0x1501420UL +#define YSEM_REG_FAST_MEMORY \ + 0x1540000UL +#define PSEM_REG_SLOW_DBG_EMPTY \ + 0x1601140UL +#define PSEM_REG_SYNC_DBG_EMPTY \ + 0x1601160UL +#define PSEM_REG_SLOW_DBG_ACTIVE \ + 0x1601400UL +#define PSEM_REG_SLOW_DBG_MODE \ + 0x1601404UL +#define PSEM_REG_DBG_FRAME_MODE \ + 0x1601408UL +#define PSEM_REG_DBG_MODE1_CFG \ + 0x1601420UL +#define PSEM_REG_FAST_MEMORY \ + 0x1640000UL +#define TSEM_REG_SLOW_DBG_EMPTY \ + 0x1701140UL +#define TSEM_REG_SYNC_DBG_EMPTY \ + 0x1701160UL +#define TSEM_REG_SLOW_DBG_ACTIVE \ + 0x1701400UL +#define TSEM_REG_SLOW_DBG_MODE \ + 0x1701404UL +#define TSEM_REG_DBG_FRAME_MODE \ + 0x1701408UL +#define TSEM_REG_DBG_MODE1_CFG \ + 0x1701420UL +#define TSEM_REG_FAST_MEMORY \ + 0x1740000UL +#define MSEM_REG_SLOW_DBG_EMPTY \ + 0x1801140UL +#define MSEM_REG_SYNC_DBG_EMPTY \ + 0x1801160UL +#define MSEM_REG_SLOW_DBG_ACTIVE \ + 0x1801400UL +#define MSEM_REG_SLOW_DBG_MODE \ + 0x1801404UL +#define MSEM_REG_DBG_FRAME_MODE \ + 0x1801408UL +#define MSEM_REG_DBG_MODE1_CFG \ + 0x1801420UL +#define MSEM_REG_FAST_MEMORY \ + 0x1840000UL +#define USEM_REG_SLOW_DBG_EMPTY \ + 0x1901140UL +#define USEM_REG_SYNC_DBG_EMPTY \ + 0x1901160UL +#define USEM_REG_SLOW_DBG_ACTIVE \ + 0x1901400UL +#define USEM_REG_SLOW_DBG_MODE \ + 0x1901404UL +#define USEM_REG_DBG_FRAME_MODE \ + 0x1901408UL +#define USEM_REG_DBG_MODE1_CFG \ + 0x1901420UL +#define USEM_REG_FAST_MEMORY \ + 0x1940000UL +#define SEM_FAST_REG_INT_RAM \ + 0x020000UL +#define SEM_FAST_REG_INT_RAM_SIZE \ + 20480 +#define GRC_REG_TRACE_FIFO_VALID_DATA \ + 0x050064UL +#define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \ + 0x05040cUL +#define GRC_REG_PROTECTION_OVERRIDE_WINDOW \ + 0x050500UL +#define IGU_REG_ERROR_HANDLING_MEMORY \ + 0x181520UL #define MCP_REG_CPU_MODE \ 0xe05000UL #define MCP_REG_CPU_MODE_SOFT_HALT \ (0x1 << 10) +#define BRB_REG_BIG_RAM_ADDRESS \ + 0x340800UL +#define BRB_REG_BIG_RAM_DATA \ + 0x341500UL +#define SEM_FAST_REG_STALL_0 \ + 0x000488UL +#define SEM_FAST_REG_STALLED \ + 0x000494UL +#define BTB_REG_BIG_RAM_ADDRESS \ + 0xdb0800UL +#define BTB_REG_BIG_RAM_DATA \ + 0xdb0c00UL +#define BMB_REG_BIG_RAM_ADDRESS \ + 0x540800UL +#define BMB_REG_BIG_RAM_DATA \ + 0x540f00UL +#define SEM_FAST_REG_STORM_REG_FILE \ + 0x008000UL +#define RSS_REG_RSS_RAM_ADDR \ + 0x238c30UL +#define MISCS_REG_BLOCK_256B_EN \ + 0x009074UL +#define MCP_REG_SCRATCH_SIZE \ + 57344 +#define MCP_REG_CPU_REG_FILE \ + 0xe05200UL +#define MCP_REG_CPU_REG_FILE_SIZE \ + 32 +#define DBG_REG_DEBUG_TARGET \ + 0x01005cUL +#define DBG_REG_FULL_MODE \ + 0x010060UL +#define DBG_REG_CALENDAR_OUT_DATA \ + 0x010480UL +#define GRC_REG_TRACE_FIFO \ + 0x050068UL +#define IGU_REG_ERROR_HANDLING_DATA_VALID \ + 0x181530UL +#define DBG_REG_DBG_BLOCK_ON \ + 0x010454UL +#define DBG_REG_FRAMING_MODE \ + 0x010058UL +#define SEM_FAST_REG_VFC_DATA_WR \ + 0x000b40UL +#define SEM_FAST_REG_VFC_ADDR \ + 0x000b44UL +#define SEM_FAST_REG_VFC_DATA_RD \ + 0x000b48UL +#define RSS_REG_RSS_RAM_DATA \ + 0x238c20UL +#define MISC_REG_BLOCK_256B_EN \ + 0x008c14UL +#define NWS_REG_NWS_CMU \ + 0x720000UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \ + 0x000680UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \ + 0x000684UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \ + 0x0006c0UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \ + 0x0006c4UL +#define MS_REG_MS_CMU \ + 0x6a4000UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_REG_PHY0 \ + 0x620000UL +#define PHY_PCIE_REG_PHY1 \ + 0x624000UL #endif diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 70b30e4..1902763 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -143,6 +143,9 @@ #define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) #define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) +/* Tools Version */ +#define TOOLS_VERSION 10 + /*****************/ /* CDU CONSTANTS */ /*****************/ -- cgit v1.1 From e0971c832af4cd906ab931c9f6e9e1791a62fc98 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:25 +0300 Subject: qed*: Add support for the ethtool get_regs operation Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 24 ++++++++++++++++++++++++ include/linux/qed/qed_if.h | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 250efd1..b730a63 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1398,6 +1398,8 @@ const struct qed_common_ops qed_common_ops_pass = { .get_link = &qed_get_current_link, .drain = &qed_drain, .update_msglvl = &qed_init_dp, + .dbg_all_data = &qed_dbg_all_data, + .dbg_all_data_size = &qed_dbg_all_data_size, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, .get_coalesce = &qed_get_coalesce, diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 14d5328..25a9b29 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -695,6 +695,28 @@ static int qede_set_pauseparam(struct net_device *dev, return 0; } +static void qede_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buffer) +{ + struct qede_dev *edev = netdev_priv(ndev); + + regs->version = 0; + memset(buffer, 0, regs->len); + + if (edev->ops && edev->ops->common) + edev->ops->common->dbg_all_data(edev->cdev, buffer); +} + +static int qede_get_regs_len(struct net_device *ndev) +{ + struct qede_dev *edev = netdev_priv(ndev); + + if (edev->ops && edev->ops->common) + return edev->ops->common->dbg_all_data_size(edev->cdev); + else + return -EINVAL; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -1395,6 +1417,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, + .get_regs_len = qede_get_regs_len, + .get_regs = qede_get_regs, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .nway_reset = qede_nway_reset, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d8dc5c2..e4546ab 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -455,6 +455,10 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); + + int (*dbg_all_data_size) (struct qed_dev *cdev); + /** * @brief can_link_change - can the instance change the link or not * -- cgit v1.1 From caa58f808834fca9a4443233fd09df5ab639690d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 6 Sep 2016 14:17:18 +1000 Subject: powerpc/powernv: Fix corrupted PE allocation bitmap on releasing PE In pnv_ioda_free_pe(), the PE object (including the associated PE number) is cleared before resetting the corresponding bit in the PE allocation bitmap. It means PE#0 is always released to the bitmap wrongly. This fixes above issue by caching the PE number before the PE object is cleared. Fixes: 1e9167726c41 ("powerpc/powernv: Use PE instead of number during setup and release" Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 18f6fd1..c16d790 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -162,11 +162,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; + unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); memset(pe, 0, sizeof(struct pnv_ioda_pe)); - clear_bit(pe->pe_number, phb->ioda.pe_alloc); + clear_bit(pe_num, phb->ioda.pe_alloc); } /* The default M64 BAR is shared by all PEs */ -- cgit v1.1 From 8540571e01f973d321b0821f4f32ed6e9ae8263c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Aug 2016 16:45:13 +0200 Subject: powerpc/32: Fix again csum_partial_copy_generic() Commit 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") introduced a bug when destination address is odd and len is lower than cacheline size. In that case the resulting csum value doesn't have to be rotated one byte because the cache-aligned copy part is skipped so no alignment is performed. Fixes: 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") Cc: stable@vger.kernel.org # v4.6+ Reported-by: Alessio Igor Bogani Signed-off-by: Christophe Leroy Tested-by: Alessio Igor Bogani Signed-off-by: Michael Ellerman --- arch/powerpc/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 0a57fe6..aa8214f 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - rlwinm r0,r4,3,0x8 - rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ - cmplwi cr7,r0,0 /* is destination address even ? */ addic r12,r6,0 addi r6,r4,-4 neg r0,r4 addi r4,r3,-4 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + crset 4*cr7+eq beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0 /* is destination address even ? */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+ 61f -- cgit v1.1 From f077aaf0754bcba0fffdbd925bc12f09cd1e38aa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 2 Sep 2016 21:47:59 +1000 Subject: powerpc/mm: Don't alias user region to other regions below PAGE_OFFSET In commit c60ac5693c47 ("powerpc: Update kernel VSID range", 2013-03-13) we lost a check on the region number (the top four bits of the effective address) for addresses below PAGE_OFFSET. That commit replaced a check that the top 18 bits were all zero with a check that bits 46 - 59 were zero (performed for all addresses, not just user addresses). This means that userspace can access an address like 0x1000_0xxx_xxxx_xxxx and we will insert a valid SLB entry for it. The VSID used will be the same as if the top 4 bits were 0, but the page size will be some random value obtained by indexing beyond the end of the mm_ctx_high_slices_psize array in the paca. If that page size is the same as would be used for region 0, then userspace just has an alias of the region 0 space. If the page size is different, then no HPTE will be found for the access, and the process will get a SIGSEGV (since hash_page_mm() will refuse to create a HPTE for the bogus address). The access beyond the end of the mm_ctx_high_slices_psize can be at most 5.5MB past the array, and so will be in RAM somewhere. Since the access is a load performed in real mode, it won't fault or crash the kernel. At most this bug could perhaps leak a little bit of information about blocks of 32 bytes of memory located at offsets of i * 512kB past the paca->mm_ctx_high_slices_psize array, for 1 <= i <= 11. Fixes: c60ac5693c47 ("powerpc: Update kernel VSID range") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Paul Mackerras Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb_low.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index dfdb90c..9f19834 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -113,7 +113,12 @@ BEGIN_FTR_SECTION END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T -0: +0: /* + * For userspace addresses, make sure this is region 0. + */ + cmpdi r9, 0 + bne 8f + /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array. -- cgit v1.1 From 216559d9032704a7a5d2fcd3c9c086dd9f7cd557 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 15:53:31 +0200 Subject: net: smsc911x: augment device tree bindings This adds device tree bindings for: - An optional GPIO line for releasing the RESET signal to the SMSC911x devices - An optional PME (power management event) interrupt line that can be utilized to wake up the system on network activity. This signal exist on all the SMSC911x devices, it is just not very often routed. Both these lines are routed to the SoC on the Qualcomm APQ8060 Dragonboard and thus needs to be bound in the device tree. Cc: devicetree@vger.kernel.org Cc: Jeremy Linton Signed-off-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/smsc911x.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt index 3fed3c1..16c3a950 100644 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ b/Documentation/devicetree/bindings/net/smsc911x.txt @@ -3,9 +3,11 @@ Required properties: - compatible : Should be "smsc,lan", "smsc,lan9115" - reg : Address and length of the io space for SMSC LAN -- interrupts : Should contain SMSC LAN interrupt line -- interrupt-parent : Should be the phandle for the interrupt controller - that services interrupts for this device +- interrupts : one or two interrupt specifiers + - The first interrupt is the SMSC LAN interrupt line + - The second interrupt (if present) is the PME (power + management event) interrupt that is able to wake up the host + system with a 50ms pulse on network activity - phy-mode : See ethernet.txt file in the same directory Optional properties: @@ -21,6 +23,10 @@ Optional properties: external PHY - smsc,save-mac-address : Indicates that mac address needs to be saved before resetting the controller +- reset-gpios : a GPIO line connected to the RESET (active low) signal + of the device. On many systems this is wired high so the device goes + out of reset at power-on, but if it is under program control, this + optional GPIO can wake up in response to it. Examples: @@ -29,7 +35,8 @@ lan9220@f4000000 { reg = <0xf4000000 0x2000000>; phy-mode = "mii"; interrupt-parent = <&gpio1>; - interrupts = <31>; + interrupts = <31>, <32>; + reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; reg-io-width = <4>; smsc,irq-push-pull; }; -- cgit v1.1 From dd0cb7dbb065f4acdd8d0597f122d0ed9e93f12e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 15:53:42 +0200 Subject: net: smsc911x: request and deassert optional RESET GPIO On some systems (such as the Qualcomm APQ8060 Dragonboard) the RESET signal of the SMSC911x is not pulled up by a resistor (or the internal pull-up that will pull it up if the pin is not even connected) but instead connected to a GPIO line, so that the operating system must explicitly deassert RESET before use. Support this in the SMSC911x driver so this ethernet connector can be used on such targets. Notice that we request the line to go logical low (deassert) whilst the line on the actual component is active low. This is managed in the respective hardware description when specifying the GPIO line with e.g. device tree or ACPI. With device tree it looks like this in one case: reset-gpios = <&tlmm 30 GPIO_ACTIVE_LOW>; Which means that logically requesting the RESET line to be deasserted will result in the line being driven high, taking the device out of reset. Cc: Jeremy Linton Signed-off-by: Linus Walleij Reviewed-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index ca31345..8ab8d4b 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "smsc911x.h" @@ -147,6 +148,9 @@ struct smsc911x_data { /* regulators */ struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES]; + /* Reset GPIO */ + struct gpio_desc *reset_gpiod; + /* clock */ struct clk *clk; }; @@ -438,6 +442,11 @@ static int smsc911x_request_resources(struct platform_device *pdev) netdev_err(ndev, "couldn't get regulators %d\n", ret); + /* Request optional RESET GPIO */ + pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev, + "reset", + GPIOD_OUT_LOW); + /* Request clock */ pdata->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) -- cgit v1.1 From d8b795f5e3a3f81e0635919b3c9cd746631a6a76 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 7 Sep 2016 21:16:40 -0700 Subject: Revert "ARM: tegra: fix erroneous address in dts" This reverts commit b5c86b7496d74f6e454bcab5166efa023e1f0459. This is no longer needed due to other changes going into 4.8 to rename the unit addresses on a large number of device nodes. So it was picked up for v4.8-rc1 in error. Reported-by: Ralf Ramsauer Signed-off-by: Olof Johansson --- arch/arm/boot/dts/tegra124-jetson-tk1.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 6403e0d..e52b824 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1382,7 +1382,7 @@ * Pin 41: BR_UART1_TXD * Pin 44: BR_UART1_RXD */ - serial@0,70006000 { + serial@70006000 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; @@ -1394,7 +1394,7 @@ * Pin 71: UART2_CTS_L * Pin 74: UART2_RTS_L */ - serial@0,70006040 { + serial@70006040 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; -- cgit v1.1 From 1d3ef9c2dc699fcc09320a4b642e84eb3c038f26 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 5 Sep 2016 16:27:53 +0100 Subject: arm-cci: pmu: Fix typo in event name For one of the CCI events exposed under sysfs, "snoop" was typo'd as "snopp". Correct this such that users see the expected event name when enumerating events via sysfs. Cc: arm@kernel.org Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Olof Johansson --- drivers/bus/arm-cci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 5755907f..ffa7c9d 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -551,7 +551,7 @@ static struct attribute *cci5xx_pmu_event_attrs[] = { CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), - CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE), CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), NULL }; -- cgit v1.1 From 710f3e5961a71dd58fe367eac48deecd5af45a48 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Wed, 7 Sep 2016 19:57:49 +0530 Subject: be2net: Support UE recovery in BEx/Skyhawk adapters This patch supports recovery from UEs caused due to Transient Parity Errors (TPE), in BE2, BE3 and Skyhawk adapters. This change avoids system reboot when such errors occur. The driver recovers from these errors such that the adapter resumes full operational status as prior to the UE. Following is the list of changes in the driver to support this: o The driver registers its UE recoverable capability with ARM FW at init time. This also allows the driver to know if the feature is supported in the FW. o As the UE recovery requires precise time bound processing, the driver creates its own error recovery work queue with a single worker thread (per module, shared across functions). o Each function runs an error detection task at an interval of 1 second as required by the FW. The error detection logic already exists for BEx/SH, but it now runs in the context of a separate worker thread. o When an error is detected by the task, if it is recoverable, the PF0 driver instance initiates a soft reset, while other PF driver instances wait for the reset to complete and the chip to become ready. Once the chip is ready, all driver instances including PF0, resume to reinitialize the respective functions. o The PF0 driver checks for some recovery criteria, to determine if the recovery can be initiated. If the criteria is not met, the PF0 driver does not initiate a soft reset, it retains the existing behavior to stop further processing and requires a reboot to get the chip to operational state again. o To allow each function to share the workq, while also making progress in its recovery process, a per-function recovery state machine is used. The per-function tasks avoid blocking operations like msleep() while in this state machine (until reinit state) and instead reschedule for the required delay. o With these changes, the existing error recovery code for Lancer also runs in the context of the new worker thread. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 68 ++++++- drivers/net/ethernet/emulex/benet/be_cmds.c | 53 ++++- drivers/net/ethernet/emulex/benet/be_cmds.h | 41 +++- drivers/net/ethernet/emulex/benet/be_ethtool.c | 40 ++++ drivers/net/ethernet/emulex/benet/be_hw.h | 7 +- drivers/net/ethernet/emulex/benet/be_main.c | 256 ++++++++++++++++++++++--- 6 files changed, 430 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 86780b5..eecf24e 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -399,13 +399,13 @@ enum vf_state { #define BE_FLAGS_PHY_MISCONFIGURED BIT(10) #define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_FLAGS_OS2BMC BIT(12) +#define BE_FLAGS_TRY_RECOVERY BIT(13) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 #define MAX_ERR_RECOVERY_RETRY_COUNT 3 #define ERR_DETECTION_DELAY 1000 -#define ERR_RECOVERY_RETRY_DELAY 30000 /* Ethtool set_dump flags */ #define LANCER_INITIATE_FW_DUMP 0x1 @@ -512,6 +512,66 @@ struct be_eth_addr { unsigned char mac[ETH_ALEN]; }; +#define BE_SEC 1000 /* in msec */ +#define BE_MIN (60 * BE_SEC) /* in msec */ +#define BE_HOUR (60 * BE_MIN) /* in msec */ + +#define ERR_RECOVERY_MAX_RETRY_COUNT 3 +#define ERR_RECOVERY_DETECTION_DELAY BE_SEC +#define ERR_RECOVERY_RETRY_DELAY (30 * BE_SEC) + +/* UE-detection-duration in BEx/Skyhawk: + * All PFs must wait for this duration after they detect UE before reading + * SLIPORT_SEMAPHORE register. At the end of this duration, the Firmware + * guarantees that the SLIPORT_SEMAPHORE register is updated to indicate + * if the UE is recoverable. + */ +#define ERR_RECOVERY_UE_DETECT_DURATION BE_SEC + +/* Initial idle time (in msec) to elapse after driver load, + * before UE recovery is allowed. + */ +#define ERR_IDLE_HR 24 +#define ERR_RECOVERY_IDLE_TIME (ERR_IDLE_HR * BE_HOUR) + +/* Time interval (in msec) after which UE recovery can be repeated */ +#define ERR_INTERVAL_HR 72 +#define ERR_RECOVERY_INTERVAL (ERR_INTERVAL_HR * BE_HOUR) + +/* BEx/SH UE recovery state machine */ +enum { + ERR_RECOVERY_ST_NONE = 0, /* No Recovery */ + ERR_RECOVERY_ST_DETECT = 1, /* UE detection duration */ + ERR_RECOVERY_ST_RESET = 2, /* Reset Phase (PF0 only) */ + ERR_RECOVERY_ST_PRE_POLL = 3, /* Pre-Poll Phase (all PFs) */ + ERR_RECOVERY_ST_REINIT = 4 /* Re-initialize Phase */ +}; + +struct be_error_recovery { + /* Lancer error recovery variables */ + u8 recovery_retries; + + /* BEx/Skyhawk error recovery variables */ + u8 recovery_state; + u16 ue_to_reset_time; /* Time after UE, to soft reset + * the chip - PF0 only + */ + u16 ue_to_poll_time; /* Time after UE, to Restart Polling + * of SLIPORT_SEMAPHORE reg + */ + u16 last_err_code; + bool recovery_supported; + unsigned long probe_time; + unsigned long last_recovery_time; + + /* Common to both Lancer & BEx/SH error recovery */ + u32 resched_delay; + struct delayed_work err_detection_work; +}; + +/* Ethtool priv_flags */ +#define BE_DISABLE_TPE_RECOVERY 0x1 + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -560,7 +620,6 @@ struct be_adapter { struct delayed_work work; u16 work_counter; - struct delayed_work be_err_detection_work; u8 recovery_retries; u8 err_flags; bool pcicfg_mapped; /* pcicfg obtained via pci_iomap() */ @@ -634,6 +693,8 @@ struct be_adapter { u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ + u32 priv_flags; /* ethtool get/set_priv_flags() */ + struct be_error_recovery error_recovery; }; /* Used for defered FW config cmds. Add fields to this struct as reqd */ @@ -867,6 +928,9 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; } +#define be_error_recovering(adapter) \ + (adapter->flags & BE_FLAGS_TRY_RECOVERY) + #define BE_ERROR_EEH 1 #define BE_ERROR_UE BIT(1) #define BE_ERROR_FW BIT(2) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fa11a5a..92794f3 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -705,7 +705,7 @@ static int be_mbox_notify_wait(struct be_adapter *adapter) return 0; } -static u16 be_POST_stage_get(struct be_adapter *adapter) +u16 be_POST_stage_get(struct be_adapter *adapter) { u32 sem; @@ -4954,6 +4954,57 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, 1, domain); return status; } + +int be_cmd_set_features(struct be_adapter *adapter) +{ + struct be_cmd_resp_set_features *resp; + struct be_cmd_req_set_features *req; + struct be_mcc_wrb *wrb; + int status; + + if (mutex_lock_interruptible(&adapter->mcc_lock)) + return -1; + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_SET_FEATURES, + sizeof(*req), wrb, NULL); + + req->features = cpu_to_le32(BE_FEATURE_UE_RECOVERY); + req->parameter_len = cpu_to_le32(sizeof(struct be_req_ue_recovery)); + req->parameter.req.uer = cpu_to_le32(BE_UE_RECOVERY_UER_MASK); + + status = be_mcc_notify_wait(adapter); + if (status) + goto err; + + resp = embedded_payload(wrb); + + adapter->error_recovery.ue_to_poll_time = + le16_to_cpu(resp->parameter.resp.ue2rp); + adapter->error_recovery.ue_to_reset_time = + le16_to_cpu(resp->parameter.resp.ue2sr); + adapter->error_recovery.recovery_supported = true; +err: + /* Checking "MCC_STATUS_INVALID_LENGTH" for SKH as FW + * returns this error in older firmware versions + */ + if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST || + base_status(status) == MCC_STATUS_INVALID_LENGTH) + dev_info(&adapter->pdev->dev, + "Adapter does not support HW error recovery\n"); + + mutex_unlock(&adapter->mcc_lock); + return status; +} + int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, int wrb_payload_size, u16 *cmd_status, u16 *ext_status) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 0d6be22..686cbe0 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -58,7 +58,8 @@ enum mcc_base_status { MCC_STATUS_INSUFFICIENT_BUFFER = 4, MCC_STATUS_UNAUTHORIZED_REQUEST = 5, MCC_STATUS_NOT_SUPPORTED = 66, - MCC_STATUS_FEATURE_NOT_SUPPORTED = 68 + MCC_STATUS_FEATURE_NOT_SUPPORTED = 68, + MCC_STATUS_INVALID_LENGTH = 116 }; /* Additional status */ @@ -308,6 +309,7 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_READ_OBJECT 171 #define OPCODE_COMMON_WRITE_OBJECT 172 #define OPCODE_COMMON_DELETE_OBJECT 174 +#define OPCODE_COMMON_SET_FEATURES 191 #define OPCODE_COMMON_MANAGE_IFACE_FILTERS 193 #define OPCODE_COMMON_GET_IFACE_LIST 194 #define OPCODE_COMMON_ENABLE_DISABLE_VF 196 @@ -2315,6 +2317,41 @@ struct be_cmd_resp_get_iface_list { struct be_if_desc if_desc; }; +/************** Set Features *******************/ +#define BE_FEATURE_UE_RECOVERY 0x10 +#define BE_UE_RECOVERY_UER_MASK 0x1 + +struct be_req_ue_recovery { + u32 uer; + u32 rsvd; +}; + +struct be_cmd_req_set_features { + struct be_cmd_req_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_req_ue_recovery req; + u32 rsvd[2]; + } parameter; +}; + +struct be_resp_ue_recovery { + u32 uer; + u16 ue2rp; + u16 ue2sr; +}; + +struct be_cmd_resp_set_features { + struct be_cmd_resp_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_resp_ue_recovery resp; + u32 rsvd[2]; + } parameter; +}; + /*************** Set logical link ********************/ #define PLINK_ENABLE BIT(0) #define PLINK_TRACK BIT(8) @@ -2343,6 +2380,7 @@ struct be_cmd_req_manage_iface_filters { u32 cap_control_flags; } __packed; +u16 be_POST_stage_get(struct be_adapter *adapter); int be_pci_fnum_get(struct be_adapter *adapter); int be_fw_wait_ready(struct be_adapter *adapter); int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, @@ -2470,3 +2508,4 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op); int be_cmd_set_sriov_config(struct be_adapter *adapter, struct be_resources res, u16 num_vfs, struct be_resources *vft_res); +int be_cmd_set_features(struct be_adapter *adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 50e7be5..0a48a31 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -421,6 +421,10 @@ static void be_get_ethtool_stats(struct net_device *netdev, } } +static const char be_priv_flags[][ETH_GSTRING_LEN] = { + "disable-tpe-recovery" +}; + static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) { @@ -454,6 +458,10 @@ static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, data += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(be_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, be_priv_flags[i]); + break; } } @@ -468,6 +476,8 @@ static int be_get_sset_count(struct net_device *netdev, int stringset) return ETHTOOL_STATS_NUM + adapter->num_rx_qs * ETHTOOL_RXSTATS_NUM + adapter->num_tx_qs * ETHTOOL_TXSTATS_NUM; + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(be_priv_flags); default: return -EINVAL; } @@ -1360,6 +1370,34 @@ err: return be_cmd_status(status); } +static u32 be_get_priv_flags(struct net_device *netdev) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + return adapter->priv_flags; +} + +static int be_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct be_adapter *adapter = netdev_priv(netdev); + bool tpe_old = !!(adapter->priv_flags & BE_DISABLE_TPE_RECOVERY); + bool tpe_new = !!(flags & BE_DISABLE_TPE_RECOVERY); + + if (tpe_old != tpe_new) { + if (tpe_new) { + adapter->priv_flags |= BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is disabled\n"); + } else { + adapter->priv_flags &= ~BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is enabled\n"); + } + } + + return 0; +} + const struct ethtool_ops be_ethtool_ops = { .get_settings = be_get_settings, .get_drvinfo = be_get_drvinfo, @@ -1373,6 +1411,8 @@ const struct ethtool_ops be_ethtool_ops = { .get_ringparam = be_get_ringparam, .get_pauseparam = be_get_pauseparam, .set_pauseparam = be_set_pauseparam, + .set_priv_flags = be_set_priv_flags, + .get_priv_flags = be_get_priv_flags, .get_strings = be_get_stat_strings, .set_phys_id = be_set_phys_id, .set_dump = be_set_dump, diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index c684bb3..92942c8 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -32,18 +32,23 @@ #define MPU_EP_CONTROL 0 /********** MPU semphore: used for SH & BE *************/ +#define SLIPORT_SOFTRESET_OFFSET 0x5c /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_BEx 0xac /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_SH 0x94 /* PCI-CFG offset */ #define POST_STAGE_MASK 0x0000FFFF #define POST_ERR_MASK 0x1 #define POST_ERR_SHIFT 31 +#define POST_ERR_RECOVERY_CODE_MASK 0xFFF + +/* Soft Reset register masks */ +#define SLIPORT_SOFTRESET_SR_MASK 0x00000080 /* SR bit */ /* MPU semphore POST stage values */ #define POST_STAGE_AWAITING_HOST_RDY 0x1 /* FW awaiting goahead from host */ #define POST_STAGE_HOST_RDY 0x2 /* Host has given go-ahed to FW */ #define POST_STAGE_BE_RESET 0x3 /* Host wants to reset chip */ #define POST_STAGE_ARMFW_RDY 0xc000 /* FW is done with POST */ - +#define POST_STAGE_RECOVERABLE_ERR 0xE000 /* Recoverable err detected */ /* Lancer SLIPORT registers */ #define SLIPORT_STATUS_OFFSET 0x404 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f7584d4..3be5d61 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -41,6 +41,11 @@ static ushort rx_frag_size = 2048; module_param(rx_frag_size, ushort, S_IRUGO); MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); +/* Per-module error detection/recovery workq shared across all functions. + * Each function schedules its own work request on this shared workq. + */ +struct workqueue_struct *be_err_recovery_workq; + static const struct pci_device_id be_dev_ids[] = { { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) }, @@ -3358,9 +3363,7 @@ void be_detect_error(struct be_adapter *adapter) */ if (ue_lo || ue_hi) { - dev_err(dev, - "Unrecoverable Error detected in the adapter"); - dev_err(dev, "Please reboot server to recover"); + dev_err(dev, "Error detected in the adapter"); if (skyhawk_chip(adapter)) be_set_error(adapter, BE_ERROR_UE); @@ -3903,8 +3906,13 @@ static void be_cancel_worker(struct be_adapter *adapter) static void be_cancel_err_detection(struct be_adapter *adapter) { + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { - cancel_delayed_work_sync(&adapter->be_err_detection_work); + cancel_delayed_work_sync(&err_rec->err_detection_work); adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; } } @@ -4503,10 +4511,25 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_destroy_err_recovery_workq(void) +{ + if (!be_err_recovery_workq) + return; + + flush_workqueue(be_err_recovery_workq); + destroy_workqueue(be_err_recovery_workq); + be_err_recovery_workq = NULL; +} + static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay) { - schedule_delayed_work(&adapter->be_err_detection_work, - msecs_to_jiffies(delay)); + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + + queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work, + msecs_to_jiffies(delay)); adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; } @@ -4635,10 +4658,15 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } -/* If any VFs are already enabled don't FLR the PF */ +/* If it is error recovery, FLR the PF + * Else if any VFs are already enabled don't FLR the PF + */ static bool be_reset_required(struct be_adapter *adapter) { - return pci_num_vf(adapter->pdev) ? false : true; + if (be_error_recovering(adapter)) + return true; + else + return pci_num_vf(adapter->pdev) == 0; } /* Wait for the FW to be ready and perform the required initialization */ @@ -4650,6 +4678,9 @@ static int be_func_init(struct be_adapter *adapter) if (status) return status; + /* FW is now ready; clear errors to allow cmds/doorbell */ + be_clear_error(adapter, BE_CLEAR_ALL); + if (be_reset_required(adapter)) { status = be_cmd_reset_function(adapter); if (status) @@ -4657,9 +4688,6 @@ static int be_func_init(struct be_adapter *adapter) /* Wait for interrupts to quiesce after an FLR */ msleep(100); - - /* We can clear all errors when function reset succeeds */ - be_clear_error(adapter, BE_CLEAR_ALL); } /* Tell FW we're ready to fire cmds */ @@ -4767,6 +4795,9 @@ static int be_setup(struct be_adapter *adapter) if (!status && be_pause_supported(adapter)) adapter->phy.fc_autoneg = 1; + if (be_physfn(adapter) && !lancer_chip(adapter)) + be_cmd_set_features(adapter); + be_schedule_worker(adapter); adapter->flags |= BE_FLAGS_SETUP_DONE; return 0; @@ -5210,13 +5241,145 @@ static int be_resume(struct be_adapter *adapter) return 0; } +static void be_soft_reset(struct be_adapter *adapter) +{ + u32 val; + + dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n"); + val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); + val |= SLIPORT_SOFTRESET_SR_MASK; + iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); +} + +static bool be_err_is_recoverable(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + unsigned long initial_idle_time = + msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME); + unsigned long recovery_interval = + msecs_to_jiffies(ERR_RECOVERY_INTERVAL); + u16 ue_err_code; + u32 val; + + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR) + return false; + ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK; + if (ue_err_code == 0) + return false; + + dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n", + ue_err_code); + + if (jiffies - err_rec->probe_time <= initial_idle_time) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from driver load\n", + jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC); + return false; + } + + if (err_rec->last_recovery_time && + (jiffies - err_rec->last_recovery_time <= recovery_interval)) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from last recovery\n", + jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC); + return false; + } + + if (ue_err_code == err_rec->last_err_code) { + dev_err(&adapter->pdev->dev, + "Cannot recover from a consecutive TPE error\n"); + return false; + } + + err_rec->last_recovery_time = jiffies; + err_rec->last_err_code = ue_err_code; + return true; +} + +static int be_tpe_recover(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + int status = -EAGAIN; + u32 val; + + switch (err_rec->recovery_state) { + case ERR_RECOVERY_ST_NONE: + err_rec->recovery_state = ERR_RECOVERY_ST_DETECT; + err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_DETECT: + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != + POST_STAGE_RECOVERABLE_ERR) { + dev_err(&adapter->pdev->dev, + "Unrecoverable HW error detected: 0x%x\n", val); + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n"); + + /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR + * milliseconds before it checks for final error status in + * SLIPORT_SEMAPHORE to determine if recovery criteria is met. + * If it does, then PF0 initiates a Soft Reset. + */ + if (adapter->pf_num == 0) { + err_rec->recovery_state = ERR_RECOVERY_ST_RESET; + err_rec->resched_delay = err_rec->ue_to_reset_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + } + + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_RESET: + if (!be_err_is_recoverable(adapter)) { + dev_err(&adapter->pdev->dev, + "Failed to meet recovery criteria\n"); + status = -EIO; + err_rec->resched_delay = 0; + break; + } + be_soft_reset(adapter); + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + err_rec->ue_to_reset_time; + break; + + case ERR_RECOVERY_ST_PRE_POLL: + err_rec->recovery_state = ERR_RECOVERY_ST_REINIT; + err_rec->resched_delay = 0; + status = 0; /* done */ + break; + + default: + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + return status; +} + static int be_err_recover(struct be_adapter *adapter) { int status; - /* Error recovery is supported only Lancer as of now */ - if (!lancer_chip(adapter)) - return -EIO; + if (!lancer_chip(adapter)) { + if (!adapter->error_recovery.recovery_supported || + adapter->priv_flags & BE_DISABLE_TPE_RECOVERY) + return -EIO; + status = be_tpe_recover(adapter); + if (status) + goto err; + } /* Wait for adapter to reach quiescent state before * destroying queues @@ -5225,59 +5388,74 @@ static int be_err_recover(struct be_adapter *adapter) if (status) goto err; + adapter->flags |= BE_FLAGS_TRY_RECOVERY; + be_cleanup(adapter); status = be_resume(adapter); if (status) goto err; - return 0; + adapter->flags &= ~BE_FLAGS_TRY_RECOVERY; + err: return status; } static void be_err_detection_task(struct work_struct *work) { + struct be_error_recovery *err_rec = + container_of(work, struct be_error_recovery, + err_detection_work.work); struct be_adapter *adapter = - container_of(work, struct be_adapter, - be_err_detection_work.work); + container_of(err_rec, struct be_adapter, + error_recovery); + u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY; struct device *dev = &adapter->pdev->dev; int recovery_status; - int delay = ERR_DETECTION_DELAY; be_detect_error(adapter); - - if (be_check_error(adapter, BE_ERROR_HW)) - recovery_status = be_err_recover(adapter); - else + if (!be_check_error(adapter, BE_ERROR_HW)) goto reschedule_task; + recovery_status = be_err_recover(adapter); if (!recovery_status) { - adapter->recovery_retries = 0; + err_rec->recovery_retries = 0; + err_rec->recovery_state = ERR_RECOVERY_ST_NONE; dev_info(dev, "Adapter recovery successful\n"); goto reschedule_task; - } else if (be_virtfn(adapter)) { + } else if (!lancer_chip(adapter) && err_rec->resched_delay) { + /* BEx/SH recovery state machine */ + if (adapter->pf_num == 0 && + err_rec->recovery_state > ERR_RECOVERY_ST_DETECT) + dev_err(&adapter->pdev->dev, + "Adapter recovery in progress\n"); + resched_delay = err_rec->resched_delay; + goto reschedule_task; + } else if (lancer_chip(adapter) && be_virtfn(adapter)) { /* For VFs, check if PF have allocated resources * every second. */ dev_err(dev, "Re-trying adapter recovery\n"); goto reschedule_task; - } else if (adapter->recovery_retries++ < - MAX_ERR_RECOVERY_RETRY_COUNT) { + } else if (lancer_chip(adapter) && err_rec->recovery_retries++ < + ERR_RECOVERY_MAX_RETRY_COUNT) { /* In case of another error during recovery, it takes 30 sec * for adapter to come out of error. Retry error recovery after * this time interval. */ dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n"); - delay = ERR_RECOVERY_RETRY_DELAY; + resched_delay = ERR_RECOVERY_RETRY_DELAY; goto reschedule_task; } else { dev_err(dev, "Adapter recovery failed\n"); + dev_err(dev, "Please reboot server to recover\n"); } return; + reschedule_task: - be_schedule_err_detection(adapter, delay); + be_schedule_err_detection(adapter, resched_delay); } static void be_log_sfp_info(struct be_adapter *adapter) @@ -5490,7 +5668,10 @@ static int be_drv_init(struct be_adapter *adapter) pci_save_state(adapter->pdev); INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->be_err_detection_work, + + adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE; + adapter->error_recovery.resched_delay = 0; + INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work, be_err_detection_task); adapter->rx_fc = true; @@ -5681,6 +5862,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); + adapter->error_recovery.probe_time = jiffies; /* On Die temperature not supported for VF. */ if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) { @@ -5926,6 +6108,8 @@ static struct pci_driver be_driver = { static int __init be_init_module(void) { + int status; + if (rx_frag_size != 8192 && rx_frag_size != 4096 && rx_frag_size != 2048) { printk(KERN_WARNING DRV_NAME @@ -5945,7 +6129,17 @@ static int __init be_init_module(void) return -1; } - return pci_register_driver(&be_driver); + be_err_recovery_workq = + create_singlethread_workqueue("be_err_recover"); + if (!be_err_recovery_workq) + pr_warn(DRV_NAME "Could not create error recovery workqueue\n"); + + status = pci_register_driver(&be_driver); + if (status) { + destroy_workqueue(be_wq); + be_destroy_err_recovery_workq(); + } + return status; } module_init(be_init_module); @@ -5953,6 +6147,8 @@ static void __exit be_exit_module(void) { pci_unregister_driver(&be_driver); + be_destroy_err_recovery_workq(); + if (be_wq) destroy_workqueue(be_wq); } -- cgit v1.1 From f72099e057c0b3ea3cfd16301cff9202c4db8ef4 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Sep 2016 19:57:50 +0530 Subject: be2net: Issue COMMON_RESET_FUNCTION cmd during driver unload As per SLI guideline, drivers need to issue COMMON_RESET_FUNCTION SLI cmd during driver unload to clean up any non-persistent state information. Issue this cmd only if VFs are not assigned to VMs as it is possible for PF driver to unload while it\'s VF remains functional and assigned to a VM. Signed-off-by: Somnath Kotur Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3be5d61..95d2fa3 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5706,6 +5706,9 @@ static void be_remove(struct pci_dev *pdev) be_clear(adapter); + if (!pci_vfs_assigned(adapter->pdev)) + be_cmd_reset_function(adapter); + /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); -- cgit v1.1 From 62259ac4b36e348077635e673f253cc139dd6032 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Sep 2016 19:57:51 +0530 Subject: be2net: Add privilege level check for OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES SLI cmd. Driver issues OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES cmd during init which when issued by VFs results in the logging of a cmd failure message since they don't have the required privilege for this cmd. Fix by checking privilege before issuing the cmd. Also fixed typo in CAPABILITIES. Signed-off-by: Somnath Kotur Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 13 +++++++++++-- drivers/net/ethernet/emulex/benet/be_cmds.h | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 92794f3..15d02da 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -92,6 +92,11 @@ static struct be_cmd_priv_map cmd_priv_map[] = { CMD_SUBSYSTEM_COMMON, BE_PRIV_DEVCFG | BE_PRIV_VHADM }, + { + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_DEVCFG + } }; static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem) @@ -4127,6 +4132,10 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_get_ext_fat_caps *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + if (mutex_lock_interruptible(&adapter->mbox_lock)) return -1; @@ -4138,7 +4147,7 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_GET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); req->parameter_type = cpu_to_le32(1); @@ -4167,7 +4176,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; memcpy(&req->set_params, configs, sizeof(struct be_fat_conf_params)); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_SET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); status = be_mcc_notify_wait(adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 686cbe0..1bd82bc 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -295,8 +295,8 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_GET_PHY_DETAILS 102 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP 103 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES 121 -#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITES 125 -#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITES 126 +#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES 125 +#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES 126 #define OPCODE_COMMON_GET_MAC_LIST 147 #define OPCODE_COMMON_SET_MAC_LIST 148 #define OPCODE_COMMON_GET_HSW_CONFIG 152 -- cgit v1.1 From 988d44b1636d7f608bd7926493e0f61a034b61db Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Wed, 7 Sep 2016 19:57:52 +0530 Subject: be2net: Avoid redundant addition of mac address in HW If a mac address is added to the uc_list and later the same mac address is added via ndo_set_mac_address() or vice versa, the driver does not detect this condition and tries to add it again. This results in a mac address collision error when the FW rejects it. Fix this by checking if the given mac address is present in uc_list while setting the device mac address and vice versa. Similarly skip deletion if the address is still in use in the other form. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 86 ++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 95d2fa3..a1c9920 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -269,6 +269,38 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) iowrite32(val, adapter->db + DB_CQ_OFFSET); } +static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) +{ + int i; + + /* Check if mac has already been added as part of uc-list */ + for (i = 0; i < adapter->uc_macs; i++) { + if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN], + mac)) { + /* mac already added, skip addition */ + adapter->pmac_id[0] = adapter->pmac_id[i + 1]; + return 0; + } + } + + return be_cmd_pmac_add(adapter, mac, adapter->if_handle, + &adapter->pmac_id[0], 0); +} + +static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id) +{ + int i; + + /* Skip deletion if the programmed mac is + * being used in uc-list + */ + for (i = 0; i < adapter->uc_macs; i++) { + if (adapter->pmac_id[i + 1] == pmac_id) + return; + } + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); +} + static int be_mac_addr_set(struct net_device *netdev, void *p) { struct be_adapter *adapter = netdev_priv(netdev); @@ -276,7 +308,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) struct sockaddr *addr = p; int status; u8 mac[ETH_ALEN]; - u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0; + u32 old_pmac_id = adapter->pmac_id[0]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -297,23 +329,22 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) * FILTMGMT privilege. This failure is OK, only if the PF programmed * the MAC for the VF. */ - status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data, - adapter->if_handle, &adapter->pmac_id[0], 0); + mutex_lock(&adapter->rx_filter_lock); + status = be_dev_mac_add(adapter, (u8 *)addr->sa_data); if (!status) { - curr_pmac_id = adapter->pmac_id[0]; /* Delete the old programmed MAC. This call may fail if the * old MAC was already deleted by the PF driver. */ if (adapter->pmac_id[0] != old_pmac_id) - be_cmd_pmac_del(adapter, adapter->if_handle, - old_pmac_id, 0); + be_dev_mac_del(adapter, old_pmac_id); } + mutex_unlock(&adapter->rx_filter_lock); /* Decide if the new MAC is successfully activated only after * querying the FW */ - status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac, + status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac, adapter->if_handle, true, 0); if (status) goto err; @@ -1628,6 +1659,28 @@ static void be_clear_mc_list(struct be_adapter *adapter) adapter->mc_count = 0; } +static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) +{ + if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->netdev->dev_addr)) { + adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; + return 0; + } + + return be_cmd_pmac_add(adapter, + (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->if_handle, + &adapter->pmac_id[uc_idx + 1], 0); +} + +static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id) +{ + if (pmac_id == adapter->pmac_id[0]) + return; + + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); +} + static void be_set_uc_list(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1668,13 +1721,10 @@ static void be_set_uc_list(struct be_adapter *adapter) be_clear_uc_promisc(adapter); for (i = 0; i < adapter->uc_macs; i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i + 1], 0); + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); for (i = 0; i < curr_uc_macs; i++) - be_cmd_pmac_add(adapter, adapter->uc_list[i].mac, - adapter->if_handle, - &adapter->pmac_id[i + 1], 0); + be_uc_mac_add(adapter, i); adapter->uc_macs = curr_uc_macs; adapter->update_uc_list = false; } @@ -1687,8 +1737,8 @@ static void be_clear_uc_list(struct be_adapter *adapter) __dev_uc_unsync(netdev, NULL); for (i = 0; i < adapter->uc_macs; i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i + 1], 0); + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); + adapter->uc_macs = 0; } @@ -3566,9 +3616,7 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) static void be_disable_if_filters(struct be_adapter *adapter) { - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[0], 0); - + be_dev_mac_del(adapter, adapter->pmac_id[0]); be_clear_uc_list(adapter); be_clear_mc_list(adapter); @@ -3723,9 +3771,7 @@ static int be_enable_if_filters(struct be_adapter *adapter) /* For BE3 VFs, the PF programs the initial MAC address */ if (!(BEx_chip(adapter) && be_virtfn(adapter))) { - status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr, - adapter->if_handle, - &adapter->pmac_id[0], 0); + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; } -- cgit v1.1 From c27ebf58517536c0006813007680b24db17def47 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Wed, 7 Sep 2016 19:57:53 +0530 Subject: be2net: Fix mac address collision in some configurations If the device mac address is updated using ndo_set_mac_address(), while the same mac address is already programmed, the driver does not detect this condition if its netdev->dev_addr has been changed. The driver tries to add the same mac address resulting in mac address collision error. This has been observed in bonding mode-5 configuration. To fix this, store the mac address configured in HW in the adapter structure. Use this to compare against the new address being updated to avoid collision. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index eecf24e..30a26aa 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -693,6 +693,7 @@ struct be_adapter { u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ + u8 dev_mac[ETH_ALEN]; u32 priv_flags; /* ethtool get/set_priv_flags() */ struct be_error_recovery error_recovery; }; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a1c9920..34f63ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -316,7 +316,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) /* Proceed further only if, User provided MAC is different * from active MAC */ - if (ether_addr_equal(addr->sa_data, netdev->dev_addr)) + if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) return 0; /* if device is not running, copy MAC to netdev->dev_addr */ @@ -357,6 +357,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) goto err; } done: + ether_addr_copy(adapter->dev_mac, addr->sa_data); ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -1662,7 +1663,7 @@ static void be_clear_mc_list(struct be_adapter *adapter) static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) { if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], - adapter->netdev->dev_addr)) { + adapter->dev_mac)) { adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; return 0; } @@ -3774,6 +3775,7 @@ static int be_enable_if_filters(struct be_adapter *adapter) status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } if (adapter->vlans_added) -- cgit v1.1 From 368f2f137f5401f37d2acb42c4ca4e5867570495 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Wed, 7 Sep 2016 19:57:54 +0530 Subject: be2net: Update the driver version to 11.1.0.0 Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 30a26aa..6cfa63a 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -37,7 +37,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "11.0.0.0" +#define DRV_VER "11.1.0.0" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" -- cgit v1.1 From db91e2370e087967cb6b6425c092188767fb5e00 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 7 Sep 2016 19:49:34 +0930 Subject: tools/lguest: Don't bork the terminal in case of wrong args Running lguest without arguments or with a wrong argument name borks the terminal, because the cleanup handler is set up too late in the initialization process. Signed-off-by: Daniel Baluta Signed-off-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- tools/lguest/lguest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index d9836c5..11c8d9b 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -3266,6 +3266,9 @@ int main(int argc, char *argv[]) } } + /* If we exit via err(), this kills all the threads, restores tty. */ + atexit(cleanup_devices); + /* We always have a console device, and it's always device 1. */ setup_console(); @@ -3369,9 +3372,6 @@ int main(int argc, char *argv[]) /* Ensure that we terminate if a device-servicing child dies. */ signal(SIGCHLD, kill_launcher); - /* If we exit via err(), this kills all the threads, restores tty. */ - atexit(cleanup_devices); - /* If requested, chroot to a directory */ if (chroot_path) { if (chroot(chroot_path) != 0) -- cgit v1.1 From c291b015158577be533dd5a959dfc09bab119eed Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 7 Sep 2016 10:21:33 +0800 Subject: x86/apic: Fix num_processors value in case of failure If the topology package map check of the APIC ID and the CPU is a failure, we don't generate the processor info for that APIC ID yet we increase disabled_cpus by one - which is buggy. Only increase num_processors once we are sure we don't fail. Signed-off-by: Dou Liyang Acked-by: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1473214893-16481-1-git-send-email-douly.fnst@cn.fujitsu.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 50c95af..f3e9b2d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2093,7 +2093,6 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } - num_processors++; if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed @@ -2116,10 +2115,13 @@ int generic_processor_info(int apicid, int version) pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); + disabled_cpus++; return -ENOSPC; } + num_processors++; + /* * Validate version */ -- cgit v1.1 From 0f76d2564469fd3a337de088f533364cef206130 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Tue, 6 Sep 2016 18:09:31 -0600 Subject: net: xfrm: Change u32 sysctl entries to use proc_douintvec proc_dointvec limits the values to INT_MAX in u32 sysctl entries. proc_douintvec allows to write upto UINT_MAX. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/xfrm/xfrm_sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 05a6e3d..35a7e79 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -17,13 +17,13 @@ static struct ctl_table xfrm_table[] = { .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_larval_drop", -- cgit v1.1 From bcf42aa60c2832510b9be0f30c090bfd35bb172d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 7 Sep 2016 17:26:33 +0300 Subject: xhci: fix null pointer dereference in stop command timeout function The stop endpoint command has its own 5 second timeout timer. If the timeout function is triggered between USB3 and USB2 host removal it will try to call usb_hc_died(xhci_to_hcd(xhci)->primary_hcd) the ->primary_hcd will be set to NULL at USB3 hcd removal. Fix this by first checking if the PCI host is being removed, and also by using only xhci_to_hcd() as it will always return the primary hcd. CC: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fd9fd12..797137e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -850,6 +850,10 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) spin_lock_irqsave(&xhci->lock, flags); ep->stop_cmds_pending--; + if (xhci->xhc_state & XHCI_STATE_REMOVING) { + spin_unlock_irqrestore(&xhci->lock, flags); + return; + } if (xhci->xhc_state & XHCI_STATE_DYING) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Stop EP timer ran, but another timer marked " @@ -903,7 +907,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Calling usb_hc_died()"); - usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); + usb_hc_died(xhci_to_hcd(xhci)); xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "xHCI host controller is dead."); } -- cgit v1.1 From 9b41b92bbae6a4d3e1f3adde87864fd481037814 Mon Sep 17 00:00:00 2001 From: Allen Hung Date: Fri, 15 Jul 2016 17:42:22 +0800 Subject: dmi-id: don't free dev structure after calling device_register dmi_dev is freed in error exit code but, according to the document of device_register, it should never directly free device structure after calling this function, even if it returned an error! Use put_device() instead. Signed-off-by: Allen Hung Signed-off-by: Jean Delvare --- drivers/firmware/dmi-id.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 94a58a0..44c0139 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -229,14 +229,14 @@ static int __init dmi_id_init(void) ret = device_register(dmi_dev); if (ret) - goto fail_free_dmi_dev; + goto fail_put_dmi_dev; return 0; -fail_free_dmi_dev: - kfree(dmi_dev); -fail_class_unregister: +fail_put_dmi_dev: + put_device(dmi_dev); +fail_class_unregister: class_unregister(&dmi_class); return ret; -- cgit v1.1 From 9f8a7658bcafb2a7853f7a2eae8a94e87e6e695b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Sep 2016 15:45:31 +0200 Subject: ALSA: timer: Fix zero-division by continue of uninitialized instance When a user timer instance is continued without the explicit start beforehand, the system gets eventually zero-division error like: divide error: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN CPU: 1 PID: 27320 Comm: syz-executor Not tainted 4.8.0-rc3-next-20160825+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88003c9b2280 task.stack: ffff880027280000 RIP: 0010:[] [< inline >] ktime_divns include/linux/ktime.h:195 RIP: 0010:[] [] snd_hrtimer_callback+0x1bc/0x3c0 sound/core/hrtimer.c:62 Call Trace: [< inline >] __run_hrtimer kernel/time/hrtimer.c:1238 [] __hrtimer_run_queues+0x325/0xe70 kernel/time/hrtimer.c:1302 [] hrtimer_interrupt+0x18b/0x420 kernel/time/hrtimer.c:1336 [] local_apic_timer_interrupt+0x6f/0xe0 arch/x86/kernel/apic/apic.c:933 [] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:957 [] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:487 ..... Although a similar issue was spotted and a fix patch was merged in commit [6b760bb2c63a: ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE], it seems covering only a part of iceberg. In this patch, we fix the issue a bit more drastically. Basically the continue of an uninitialized timer is supposed to be a fresh start, so we do it for user timers. For the direct snd_timer_continue() call, there is no way to pass the initial tick value, so we kick out for the uninitialized case. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 2706061..fc144f4 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -35,6 +35,9 @@ #include #include +/* internal flags */ +#define SNDRV_TIMER_IFLG_PAUSED 0x00010000 + #if IS_ENABLED(CONFIG_SND_HRTIMER) #define DEFAULT_TIMER_LIMIT 4 #else @@ -539,6 +542,10 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) } } timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); + if (stop) + timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED; + else + timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : SNDRV_TIMER_EVENT_CONTINUE); unlock: @@ -600,6 +607,10 @@ int snd_timer_stop(struct snd_timer_instance *timeri) */ int snd_timer_continue(struct snd_timer_instance *timeri) { + /* timer can continue only after pause */ + if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return -EINVAL; + if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_start_slave(timeri, false); else @@ -1831,6 +1842,9 @@ static int snd_timer_user_continue(struct file *file) tu = file->private_data; if (!tu->timeri) return -EBADFD; + /* start timer instead of continue if it's not used before */ + if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return snd_timer_user_start(file); tu->timeri->lost = 0; return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0; } -- cgit v1.1 From 816f318b2364262a51024096da7ca3b84e78e3b5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 Aug 2016 14:45:46 +0200 Subject: ALSA: rawmidi: Fix possible deadlock with virmidi registration When a seq-virmidi driver is initialized, it registers a rawmidi instance with its callback to create an associated seq kernel client. Currently it's done throughly in rawmidi's register_mutex context. Recently it was found that this may lead to a deadlock another rawmidi device that is being attached with the sequencer is accessed, as both open with the same register_mutex. This was actually triggered by syzkaller, as Dmitry Vyukov reported: ====================================================== [ INFO: possible circular locking dependency detected ] 4.8.0-rc1+ #11 Not tainted ------------------------------------------------------- syz-executor/7154 is trying to acquire lock: (register_mutex#5){+.+.+.}, at: [] snd_rawmidi_kernel_open+0x4b/0x260 sound/core/rawmidi.c:341 but task is already holding lock: (&grp->list_mutex){++++.+}, at: [] check_and_subscribe_port+0x5b/0x5c0 sound/core/seq/seq_ports.c:495 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&grp->list_mutex){++++.+}: [] lock_acquire+0x208/0x430 kernel/locking/lockdep.c:3746 [] down_read+0x49/0xc0 kernel/locking/rwsem.c:22 [< inline >] deliver_to_subscribers sound/core/seq/seq_clientmgr.c:681 [] snd_seq_deliver_event+0x35e/0x890 sound/core/seq/seq_clientmgr.c:822 [] > snd_seq_kernel_client_dispatch+0x126/0x170 sound/core/seq/seq_clientmgr.c:2418 [] snd_seq_system_broadcast+0xb2/0xf0 sound/core/seq/seq_system.c:101 [] snd_seq_create_kernel_client+0x24a/0x330 sound/core/seq/seq_clientmgr.c:2297 [< inline >] snd_virmidi_dev_attach_seq sound/core/seq/seq_virmidi.c:383 [] snd_virmidi_dev_register+0x29f/0x750 sound/core/seq/seq_virmidi.c:450 [] snd_rawmidi_dev_register+0x30c/0xd40 sound/core/rawmidi.c:1645 [] __snd_device_register.part.0+0x63/0xc0 sound/core/device.c:164 [< inline >] __snd_device_register sound/core/device.c:162 [] snd_device_register_all+0xad/0x110 sound/core/device.c:212 [] snd_card_register+0xef/0x6c0 sound/core/init.c:749 [] snd_virmidi_probe+0x3ef/0x590 sound/drivers/virmidi.c:123 [] platform_drv_probe+0x8b/0x170 drivers/base/platform.c:564 ...... -> #0 (register_mutex#5){+.+.+.}: [< inline >] check_prev_add kernel/locking/lockdep.c:1829 [< inline >] check_prevs_add kernel/locking/lockdep.c:1939 [< inline >] validate_chain kernel/locking/lockdep.c:2266 [] __lock_acquire+0x4d44/0x4d80 kernel/locking/lockdep.c:3335 [] lock_acquire+0x208/0x430 kernel/locking/lockdep.c:3746 [< inline >] __mutex_lock_common kernel/locking/mutex.c:521 [] mutex_lock_nested+0xb1/0xa20 kernel/locking/mutex.c:621 [] snd_rawmidi_kernel_open+0x4b/0x260 sound/core/rawmidi.c:341 [] midisynth_subscribe+0xf7/0x350 sound/core/seq/seq_midi.c:188 [< inline >] subscribe_port sound/core/seq/seq_ports.c:427 [] check_and_subscribe_port+0x467/0x5c0 sound/core/seq/seq_ports.c:510 [] snd_seq_port_connect+0x2c9/0x500 sound/core/seq/seq_ports.c:579 [] snd_seq_ioctl_subscribe_port+0x1d8/0x2b0 sound/core/seq/seq_clientmgr.c:1480 [] snd_seq_do_ioctl+0x184/0x1e0 sound/core/seq/seq_clientmgr.c:2225 [] snd_seq_kernel_client_ctl+0xa8/0x110 sound/core/seq/seq_clientmgr.c:2440 [] snd_seq_oss_midi_open+0x3b4/0x610 sound/core/seq/oss/seq_oss_midi.c:375 [] snd_seq_oss_synth_setup_midi+0x107/0x4c0 sound/core/seq/oss/seq_oss_synth.c:281 [] snd_seq_oss_open+0x748/0x8d0 sound/core/seq/oss/seq_oss_init.c:274 [] odev_open+0x6a/0x90 sound/core/seq/oss/seq_oss.c:138 [] soundcore_open+0x30f/0x640 sound/sound_core.c:639 ...... other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&grp->list_mutex); lock(register_mutex#5); lock(&grp->list_mutex); lock(register_mutex#5); *** DEADLOCK *** ====================================================== The fix is to simply move the registration parts in snd_rawmidi_dev_register() to the outside of the register_mutex lock. The lock is needed only to manage the linked list, and it's not necessarily to cover the whole initialization process. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 795437b..b450a27 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1633,11 +1633,13 @@ static int snd_rawmidi_dev_register(struct snd_device *device) return -EBUSY; } list_add_tail(&rmidi->list, &snd_rawmidi_devices); + mutex_unlock(®ister_mutex); err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device, &snd_rawmidi_f_ops, rmidi, &rmidi->dev); if (err < 0) { rmidi_err(rmidi, "unable to register\n"); + mutex_lock(®ister_mutex); list_del(&rmidi->list); mutex_unlock(®ister_mutex); return err; @@ -1645,6 +1647,7 @@ static int snd_rawmidi_dev_register(struct snd_device *device) if (rmidi->ops && rmidi->ops->dev_register && (err = rmidi->ops->dev_register(rmidi)) < 0) { snd_unregister_device(&rmidi->dev); + mutex_lock(®ister_mutex); list_del(&rmidi->list); mutex_unlock(®ister_mutex); return err; @@ -1677,7 +1680,6 @@ static int snd_rawmidi_dev_register(struct snd_device *device) } } #endif /* CONFIG_SND_OSSEMUL */ - mutex_unlock(®ister_mutex); sprintf(name, "midi%d", rmidi->device); entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root); if (entry) { -- cgit v1.1 From cf13258fd4cb86478dfcb7e2c93a0d844307abc6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: rxrpc: Fix ASSERTCMP and ASSERTIFCMP to handle signed values Fix ASSERTCMP and ASSERTIFCMP to be able to handle signed values by casting both parameters to the type of the first before comparing. Without this, both values are cast to unsigned long, which means that checks for values less than zero don't work. The downside of this is that the state enum values in struct rxrpc_call and struct rxrpc_connection can't be bitfields as __typeof__ can't handle them. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dbfb9ed..6dc3a59 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,8 +344,8 @@ struct rxrpc_connection { unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - enum rxrpc_conn_cache_state cache_state : 8; - enum rxrpc_conn_proto_state state : 8; /* current state of connection */ + enum rxrpc_conn_cache_state cache_state; + enum rxrpc_conn_proto_state state; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ int debug_id; /* debug ID for printks */ @@ -464,8 +464,8 @@ struct rxrpc_call { rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state : 8; /* current state of call */ - enum rxrpc_call_completion completion : 8; /* Call completion condition */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ @@ -1014,11 +1014,12 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely(!(_x OP _y))) { \ - pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -1033,11 +1034,12 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely((C) && !(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) -- cgit v1.1 From 18f1387c7d7c6827b3ed6adf6ae20f65a58dc7b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: rxrpc: Update protocol definitions slightly Update the protocol definitions in include/rxrpc/packet.h slightly: (1) Get rid of RXRPC_PROCESS_MAXCALLS as it's redundant (same as RXRPC_MAXCALLS). (2) In struct rxrpc_jumbo_header, put _rsvd in a union with a field called cksum to match struct rxrpc_wire_header. (3) Provide RXRPC_JUMBO_SUBPKTLEN which is the total of the amount of data in a non-terminal subpacket plus the following secondary header for the next packet included in the jumbo packet. Signed-off-by: David Howells --- include/rxrpc/packet.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index 3c6128e..b0ae5c1 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -34,8 +34,6 @@ struct rxrpc_wire_header { #define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ __be32 callNumber; /* call ID (0 for connection-level packets) */ -#define RXRPC_PROCESS_MAXCALLS (1<<2) /* maximum number of active calls per conn (power of 2) */ - __be32 seq; /* sequence number of pkt in call stream */ __be32 serial; /* serial number of pkt sent to network */ @@ -93,10 +91,14 @@ struct rxrpc_wire_header { struct rxrpc_jumbo_header { uint8_t flags; /* packet flags (as per rxrpc_header) */ uint8_t pad; - __be16 _rsvd; /* reserved (used by kerberos security as cksum) */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; }; #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) /*****************************************************************************/ /* -- cgit v1.1 From de8d6c7401ae8f25db3788804c86887ad7347bee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:11 +0100 Subject: rxrpc: Convert rxrpc_local::services to an hlist Convert the rxrpc_local::services list to an hlist so that it can be accessed under RCU conditions more readily. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 10 +++++----- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_accept.c | 2 +- net/rxrpc/local_object.c | 4 ++-- net/rxrpc/security.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 77a132a..f13cca1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -156,13 +156,13 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) if (rx->srx.srx_service) { write_lock_bh(&local->services_lock); - list_for_each_entry(prx, &local->services, listen_link) { + hlist_for_each_entry(prx, &local->services, listen_link) { if (prx->srx.srx_service == rx->srx.srx_service) goto service_in_use; } rx->local = local; - list_add_tail(&rx->listen_link, &local->services); + hlist_add_head_rcu(&rx->listen_link, &local->services); write_unlock_bh(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -567,7 +567,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_LIST_HEAD(&rx->listen_link); + INIT_HLIST_NODE(&rx->listen_link); INIT_LIST_HEAD(&rx->secureq); INIT_LIST_HEAD(&rx->acceptq); rwlock_init(&rx->call_lock); @@ -615,9 +615,9 @@ static int rxrpc_release_sock(struct sock *sk) ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - if (!list_empty(&rx->listen_link)) { + if (!hlist_unhashed(&rx->listen_link)) { write_lock_bh(&rx->local->services_lock); - list_del(&rx->listen_link); + hlist_del_rcu(&rx->listen_link); write_unlock_bh(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6dc3a59..fd438dc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -71,7 +71,7 @@ struct rxrpc_sock { struct sock sk; rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ struct rxrpc_local *local; /* local endpoint */ - struct list_head listen_link; /* link in the local endpoint's listen list */ + struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ struct list_head acceptq; /* calls awaiting acceptance */ struct key *key; /* security for this socket */ @@ -186,7 +186,7 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct list_head services; /* services listening on this endpoint */ + struct hlist_head services; /* services listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 879a964..4c71efc 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -217,7 +217,7 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local) /* get the socket providing the service */ read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { + hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == sp->hdr.serviceId && rx->sk.sk_state != RXRPC_CLOSE) goto found_service; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a753796..610916f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -75,7 +75,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_LIST_HEAD(&local->services); + INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); @@ -296,7 +296,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(list_empty(&local->services)); + ASSERT(hlist_empty(&local->services)); if (socket) { local->socket = NULL; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 814d285..5d79d5a 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -131,7 +131,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* find the service */ read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { + hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == conn->params.service_id) goto found_service; } -- cgit v1.1 From 2ab27215ea27475a0b279732ba8a934bfab57ef0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Remove skb_count from struct rxrpc_call Remove the sk_buff count from the rxrpc_call struct as it's less useful once we stop queueing sk_buffs. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 10 +++------- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_object.c | 34 ++++++++++++---------------------- 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 85ee035..6b06cf0 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -18,16 +18,14 @@ TRACE_EVENT(rxrpc_call, TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, - int usage, int nskb, - const void *where, const void *aux), + int usage, const void *where, const void *aux), - TP_ARGS(call, op, usage, nskb, where, aux), + TP_ARGS(call, op, usage, where, aux), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(int, op ) __field(int, usage ) - __field(int, nskb ) __field(const void *, where ) __field(const void *, aux ) ), @@ -36,16 +34,14 @@ TRACE_EVENT(rxrpc_call, __entry->call = call; __entry->op = op; __entry->usage = usage; - __entry->nskb = nskb; __entry->where = where; __entry->aux = aux; ), - TP_printk("c=%p %s u=%d s=%d p=%pSR a=%p", + TP_printk("c=%p %s u=%d sp=%pSR a=%p", __entry->call, rxrpc_call_traces[__entry->op], __entry->usage, - __entry->nskb, __entry->where, __entry->aux) ); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fd438dc..0277912 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -467,7 +467,6 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; - atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ u8 security_ix; /* Security type */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9efd9b0..f843397 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -232,9 +232,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, rxrpc_call_new_client, - atomic_read(&call->usage), 0, - here, (const void *)user_call_ID); + trace_rxrpc_call(call, 0, atomic_read(&call->usage), here, + (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ call->user_call_ID = user_call_ID; @@ -325,7 +324,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, return ERR_PTR(-EBUSY); trace_rxrpc_call(candidate, rxrpc_call_new_service, - atomic_read(&candidate->usage), 0, here, NULL); + atomic_read(&candidate->usage), here, NULL); chan = sp->hdr.cid & RXRPC_CHANNELMASK; candidate->conn = conn; @@ -446,11 +445,10 @@ bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); int n = __atomic_add_unless(&call->usage, 1, 0); - int m = atomic_read(&call->skb_count); if (n == 0) return false; if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued, n + 1, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -463,10 +461,9 @@ bool __rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); int n = atomic_read(&call->usage); - int m = atomic_read(&call->skb_count); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued_ref, n, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -480,9 +477,8 @@ void rxrpc_see_call(struct rxrpc_call *call) const void *here = __builtin_return_address(0); if (call) { int n = atomic_read(&call->usage); - int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_seen, n, m, here, NULL); + trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); } } @@ -493,9 +489,8 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - int m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, op, n, m, here, NULL); + trace_rxrpc_call(call, op, n, here, NULL); } /* @@ -505,9 +500,8 @@ void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - int m = atomic_inc_return(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_got_skb, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_got_skb, n, here, skb); } /* @@ -642,17 +636,15 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n, m; + int n; ASSERT(call != NULL); n = atomic_dec_return(&call->usage); - m = atomic_read(&call->skb_count); - trace_rxrpc_call(call, op, n, m, here, NULL); + trace_rxrpc_call(call, op, n, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(m != 0); rxrpc_cleanup_call(call); } } @@ -663,15 +655,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) { const void *here = __builtin_return_address(0); - int n, m; + int n; n = atomic_dec_return(&call->usage); - m = atomic_dec_return(&call->skb_count); - trace_rxrpc_call(call, rxrpc_call_put_skb, n, m, here, skb); + trace_rxrpc_call(call, rxrpc_call_put_skb, n, here, skb); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(m != 0); rxrpc_cleanup_call(call); } } -- cgit v1.1 From 49e19ec7d3499f79d2b3a45bb28418e89512fd7a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Add tracepoints to record received packets and end of data_ready Add two tracepoints: (1) Record the RxRPC protocol header of packets retrieved from the UDP socket by the data_ready handler. (2) Record the outcome of the data_ready handler. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 38 ++++++++++++++++++++++++++++++++++++++ net/rxrpc/input.c | 8 ++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6b06cf0..ea3b10e 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -80,6 +80,44 @@ TRACE_EVENT(rxrpc_skb, __entry->where) ); +TRACE_EVENT(rxrpc_rx_packet, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags) + ); + +TRACE_EVENT(rxrpc_rx_done, + TP_PROTO(int result, int abort_code), + + TP_ARGS(result, abort_code), + + TP_STRUCT__entry( + __field(int, result ) + __field(int, abort_code ) + ), + + TP_fast_assign( + __entry->result = result; + __entry->abort_code = abort_code; + ), + + TP_printk("r=%d a=%d", __entry->result, __entry->abort_code) + ); + TRACE_EVENT(rxrpc_abort, TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8e62410..6c4b7df 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -683,6 +683,7 @@ void rxrpc_data_ready(struct sock *sk) /* dig out the RxRPC connection details */ if (rxrpc_extract_header(sp, skb) < 0) goto bad_message; + trace_rxrpc_rx_packet(sp); _net("Rx RxRPC %s ep=%x call=%x:%x", sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", @@ -767,6 +768,7 @@ discard_unlock: out_unlock: rcu_read_unlock(); out: + trace_rxrpc_rx_done(0, 0); return; cant_route_call: @@ -780,7 +782,7 @@ cant_route_call: skb_queue_tail(&local->accept_queue, skb); rxrpc_queue_work(&local->processor); _leave(" [incoming]"); - return; + goto out; } skb->priority = RX_INVALID_OPERATION; } else { @@ -789,7 +791,7 @@ cant_route_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { _debug("reject type %d",sp->hdr.type); - rxrpc_reject_packet(local, skb); + goto reject_packet; } else { rxrpc_free_skb(skb); } @@ -798,6 +800,8 @@ cant_route_call: bad_message: skb->priority = RX_PROTOCOL_ERROR; +reject_packet: + trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); } -- cgit v1.1 From 00e907127e6f86d0f9b122d9b4347a8aa09a8b61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Preallocate peers, conns and calls for incoming service requests Make it possible for the data_ready handler called from the UDP transport socket to completely instantiate an rxrpc_call structure and make it immediately live by preallocating all the memory it might need. The idea is to cut out the background thread usage as much as possible. [Note that the preallocated structs are not actually used in this patch - that will be done in a future patch.] If insufficient resources are available in the preallocation buffers, it will be possible to discard the DATA packet in the data_ready handler or schedule a BUSY packet without the need to schedule an attempt at allocation in a background thread. To this end: (1) Preallocate rxrpc_peer, rxrpc_connection and rxrpc_call structs to a maximum number each of the listen backlog size. The backlog size is limited to a maxmimum of 32. Only this many of each can be in the preallocation buffer. (2) For userspace sockets, the preallocation is charged initially by listen() and will be recharged by accepting or rejecting pending new incoming calls. (3) For kernel services {,re,dis}charging of the preallocation buffers is handled manually. Two notifier callbacks have to be provided before kernel_listen() is invoked: (a) An indication that a new call has been instantiated. This can be used to trigger background recharging. (b) An indication that a call is being discarded. This is used when the socket is being released. A function, rxrpc_kernel_charge_accept() is called by the kernel service to preallocate a single call. It should be passed the user ID to be used for that call and a callback to associate the rxrpc call with the kernel service's side of the ID. (4) Discard the preallocation when the socket is closed. (5) Temporarily bump the refcount on the call allocated in rxrpc_incoming_call() so that rxrpc_release_call() can ditch the preallocation ref on service calls unconditionally. This will no longer be necessary once the preallocation is used. Note that this does not yet control the number of active service calls on a client - that will come in a later patch. A future development would be to provide a setsockopt() call that allows a userspace server to manually charge the preallocation buffer. This would allow user call IDs to be provided in advance and the awkward manual accept stage to be bypassed. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 71 ++++++++++++++- include/net/af_rxrpc.h | 10 ++- net/rxrpc/af_rxrpc.c | 16 +++- net/rxrpc/ar-internal.h | 32 ++++++- net/rxrpc/call_accept.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/call_object.c | 12 ++- net/rxrpc/conn_object.c | 2 + net/rxrpc/conn_service.c | 24 +++++ net/rxrpc/input.c | 2 +- net/rxrpc/proc.c | 8 +- 10 files changed, 391 insertions(+), 15 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 53750de..720ef05 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,6 +18,7 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; +static struct afs_call *afs_spare_incoming_call; static atomic_t afs_outstanding_calls; static void afs_free_call(struct afs_call *); @@ -26,7 +27,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); static void afs_process_async_call(struct work_struct *); -static void afs_rx_new_call(struct sock *); +static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ @@ -54,8 +56,10 @@ static const struct afs_call_type afs_RXCMxxxx = { }; static void afs_collect_incoming_call(struct work_struct *); +static void afs_charge_preallocation(struct work_struct *); static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); +static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) { @@ -100,13 +104,15 @@ int afs_open_socket(void) if (ret < 0) goto error_2; - rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, + afs_rx_discard_new_call); ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; afs_socket = socket; + afs_charge_preallocation(NULL); _leave(" = 0"); return 0; @@ -126,6 +132,12 @@ void afs_close_socket(void) { _enter(""); + if (afs_spare_incoming_call) { + atomic_inc(&afs_outstanding_calls); + afs_free_call(afs_spare_incoming_call); + afs_spare_incoming_call = NULL; + } + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); @@ -635,12 +647,65 @@ static void afs_collect_incoming_call(struct work_struct *work) afs_free_call(call); } +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + call->rxcall = rxcall; +} + +/* + * Charge the incoming call preallocation. + */ +static void afs_charge_preallocation(struct work_struct *work) +{ + struct afs_call *call = afs_spare_incoming_call; + + for (;;) { + if (!call) { + call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); + if (!call) + break; + + INIT_WORK(&call->async_work, afs_process_async_call); + call->wait_mode = &afs_async_incoming_call; + call->type = &afs_RXCMxxxx; + init_waitqueue_head(&call->waitq); + call->state = AFS_CALL_AWAIT_OP_ID; + } + + if (rxrpc_kernel_charge_accept(afs_socket, + afs_wake_up_async_call, + afs_rx_attach, + (unsigned long)call, + GFP_KERNEL) < 0) + break; + call = NULL; + } + afs_spare_incoming_call = call; +} + +/* + * Discard a preallocated call when a socket is shut down. + */ +static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + atomic_inc(&afs_outstanding_calls); + call->rxcall = NULL; + afs_free_call(call); +} + /* * Notification of an incoming call. */ -static void afs_rx_new_call(struct sock *sk) +static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) { queue_work(afs_wq, &afs_collect_incoming_call_work); + queue_work(afs_wq, &afs_charge_preallocation_work); } /* diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 08ed872..9cf551be 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,10 +21,14 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_notify_new_call_t)(struct sock *); +typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); void rxrpc_kernel_new_call_notification(struct socket *, - rxrpc_notify_new_call_t); + rxrpc_notify_new_call_t, + rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, @@ -43,5 +47,7 @@ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, + rxrpc_user_attach_call_t, unsigned long, gfp_t); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f13cca1..1e8cf3d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -193,7 +193,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - unsigned int max; + unsigned int max, old; int ret; _enter("%p,%d", rx, backlog); @@ -212,9 +212,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) backlog = max; else if (backlog < 0 || backlog > max) break; + old = sk->sk_max_ack_backlog; sk->sk_max_ack_backlog = backlog; - rx->sk.sk_state = RXRPC_SERVER_LISTENING; - ret = 0; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; break; default: ret = -EBUSY; @@ -303,16 +307,19 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls * * Allow a kernel service to be given notifications about new calls. */ void rxrpc_kernel_new_call_notification( struct socket *sock, - rxrpc_notify_new_call_t notify_new_call) + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; } EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); @@ -622,6 +629,7 @@ static int rxrpc_release_sock(struct sock *sk) } /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0277912..45e1c26 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -64,19 +64,42 @@ enum { }; /* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + +/* * RxRPC socket definition */ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ struct list_head acceptq; /* calls awaiting acceptance */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ - struct rb_root calls; /* outstanding calls on this socket */ + struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ rwlock_t call_lock; /* lock for calls */ @@ -290,6 +313,7 @@ enum rxrpc_conn_cache_state { enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ @@ -408,6 +432,7 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ @@ -534,6 +559,8 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -557,6 +584,7 @@ extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, @@ -573,6 +601,7 @@ void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); +void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) @@ -757,6 +786,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, struct sockaddr_rxrpc *, struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 4c71efc..cc7194e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -20,12 +20,210 @@ #include #include #include +#include #include #include #include #include "ar-internal.h" /* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. + */ +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_call *call; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; + + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; + + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; + + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); + } + + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; + + conn = rxrpc_prealloc_service_connection(gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); + } + + /* Now it gets complicated, because calls get registered with the + * socket here, particularly if a user ID is preassigned by the user. + */ + call = rxrpc_alloc_call(gfp); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call, rxrpc_call_new_service, + atomic_read(&call->usage), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); + if (user_attach_call) { + struct rxrpc_call *xcall; + struct rb_node *parent, **pp; + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + rxrpc_get_call(call, rxrpc_call_got); + user_attach_call(call, user_call_ID); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + } + + write_unlock(&rx->call_lock); + + write_lock(&rxrpc_call_lock); + list_add_tail(&call->link, &rxrpc_calls); + write_unlock(&rxrpc_call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); + return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; +} + +/* + * Preallocate sufficient service connections, calls and peers to cover the + * entire backlog of a socket. When a new call comes in, if we don't have + * sufficient of each available, the call gets rejected as busy or ignored. + * + * The backlog is replenished when a connection is accepted or rejected. + */ +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + if (rx->discard_new_call) + return 0; + + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + ; + + return 0; +} + +/* + * Discard the preallocation on a service. + */ +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) +{ + struct rxrpc_backlog *b = rx->backlog; + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + kfree(peer); + tail = (tail + 1) & (size - 1); + } + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxrpc_connection_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + kfree(conn); + tail = (tail + 1) & (size - 1); + } + + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); + } + + kfree(b); +} + +/* * generate a connection-level abort */ static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, @@ -450,3 +648,34 @@ int rxrpc_kernel_reject_call(struct socket *sock) return ret; } EXPORT_SYMBOL(rxrpc_kernel_reject_call); + +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. + * + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. + * + * Note that the call may be come connected before this function returns. + */ +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; + + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f843397..d233adc 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -31,6 +31,7 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", @@ -71,7 +72,6 @@ DEFINE_RWLOCK(rxrpc_call_lock); static void rxrpc_call_life_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); -static void rxrpc_cleanup_call(struct rxrpc_call *call); /* * find an extant server call @@ -113,7 +113,7 @@ found_extant_call: /* * allocate a new call */ -static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) { struct rxrpc_call *call; @@ -392,6 +392,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (call_id <= conn->channels[chan].call_counter) goto old_call; /* TODO: Just drop packet */ + /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ + rxrpc_get_call(candidate, rxrpc_call_got); + /* make the call available */ _debug("new call"); call = candidate; @@ -596,6 +599,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); + /* We have to release the prealloc backlog ref */ + if (rxrpc_is_service_call(call)) + rxrpc_put_call(call, rxrpc_call_put); _leave(""); } @@ -682,7 +688,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) /* * clean up a call */ -static void rxrpc_cleanup_call(struct rxrpc_call *call) +void rxrpc_cleanup_call(struct rxrpc_call *call) { _net("DESTROY CALL %d", call->debug_id); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 9c6685b..8da82e3 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -286,6 +286,8 @@ static void rxrpc_connection_reaper(struct work_struct *work) ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; idle_timestamp = READ_ONCE(conn->idle_timestamp); _debug("reap CONN %d { u=%d,t=%ld }", diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 316a921..189338a 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -119,6 +119,30 @@ replace_old_connection: } /* + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. + */ +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +{ + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + atomic_set(&conn->usage, 2); + + write_lock(&rxrpc_connection_lock); + list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); + write_unlock(&rxrpc_connection_lock); + } + + return conn; +} + +/* * get a record of an incoming connection */ struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6c4b7df..5906579 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -102,7 +102,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk); + rx->notify_new_call(&rx->sk, NULL, 0); } else if (call->notify_rx) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index dfad238..d529d1b 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,6 +17,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", @@ -156,6 +157,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } sprintf(lbuff, "%pI4:%u", &conn->params.local->srx.transport.sin.sin_addr, @@ -164,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pI4:%u", &conn->params.peer->srx.transport.sin.sin_addr, ntohs(conn->params.peer->srx.transport.sin.sin_port)); - +print: seq_printf(seq, "UDP %-22.22s %-22.22s %4x %08x %s %3u" " %s %08x %08x %08x\n", -- cgit v1.1 From 248f219cb8bcbfbd7f132752d44afa2df7c241d1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Rewrite the data and ack handling code Rewrite the data and ack handling code such that: (1) Parsing of received ACK and ABORT packets and the distribution and the filing of DATA packets happens entirely within the data_ready context called from the UDP socket. This allows us to process and discard ACK and ABORT packets much more quickly (they're no longer stashed on a queue for a background thread to process). (2) We avoid calling skb_clone(), pskb_pull() and pskb_trim(). We instead keep track of the offset and length of the content of each packet in the sk_buff metadata. This means we don't do any allocation in the receive path. (3) Jumbo DATA packet parsing is now done in data_ready context. Rather than cloning the packet once for each subpacket and pulling/trimming it, we file the packet multiple times with an annotation for each indicating which subpacket is there. From that we can directly calculate the offset and length. (4) A call's receive queue can be accessed without taking locks (memory barriers do have to be used, though). (5) Incoming calls are set up from preallocated resources and immediately made live. They can than have packets queued upon them and ACKs generated. If insufficient resources exist, DATA packet #1 is given a BUSY reply and other DATA packets are discarded). (6) sk_buffs no longer take a ref on their parent call. To make this work, the following changes are made: (1) Each call's receive buffer is now a circular buffer of sk_buff pointers (rxtx_buffer) rather than a number of sk_buff_heads spread between the call and the socket. This permits each sk_buff to be in the buffer multiple times. The receive buffer is reused for the transmit buffer. (2) A circular buffer of annotations (rxtx_annotations) is kept parallel to the data buffer. Transmission phase annotations indicate whether a buffered packet has been ACK'd or not and whether it needs retransmission. Receive phase annotations indicate whether a slot holds a whole packet or a jumbo subpacket and, if the latter, which subpacket. They also note whether the packet has been decrypted in place. (3) DATA packet window tracking is much simplified. Each phase has just two numbers representing the window (rx_hard_ack/rx_top and tx_hard_ack/tx_top). The hard_ack number is the sequence number before base of the window, representing the last packet the other side says it has consumed. hard_ack starts from 0 and the first packet is sequence number 1. The top number is the sequence number of the highest-numbered packet residing in the buffer. Packets between hard_ack+1 and top are soft-ACK'd to indicate they've been received, but not yet consumed. Four macros, before(), before_eq(), after() and after_eq() are added to compare sequence numbers within the window. This allows for the top of the window to wrap when the hard-ack sequence number gets close to the limit. Two flags, RXRPC_CALL_RX_LAST and RXRPC_CALL_TX_LAST, are added also to indicate when rx_top and tx_top point at the packets with the LAST_PACKET bit set, indicating the end of the phase. (4) Calls are queued on the socket 'receive queue' rather than packets. This means that we don't need have to invent dummy packets to queue to indicate abnormal/terminal states and we don't have to keep metadata packets (such as ABORTs) around (5) The offset and length of a (sub)packet's content are now passed to the verify_packet security op. This is currently expected to decrypt the packet in place and validate it. However, there's now nowhere to store the revised offset and length of the actual data within the decrypted blob (there may be a header and padding to skip) because an sk_buff may represent multiple packets, so a locate_data security op is added to retrieve these details from the sk_buff content when needed. (6) recvmsg() now has to handle jumbo subpackets, where each subpacket is individually secured and needs to be individually decrypted. The code to do this is broken out into rxrpc_recvmsg_data() and shared with the kernel API. It now iterates over the call's receive buffer rather than walking the socket receive queue. Additional changes: (1) The timers are condensed to a single timer that is set for the soonest of three timeouts (delayed ACK generation, DATA retransmission and call lifespan). (2) Transmission of ACK and ABORT packets is effected immediately from process-context socket ops/kernel API calls that cause them instead of them being punted off to a background work item. The data_ready handler still has to defer to the background, though. (3) A shutdown op is added to the AF_RXRPC socket so that the AFS filesystem can shut down the socket and flush its own work items before closing the socket to deal with any in-progress service calls. Future additional changes that will need to be considered: (1) Make sure that a call doesn't hog the front of the queue by receiving data from the network as fast as userspace is consuming it to the exclusion of other calls. (2) Transmit delayed ACKs from within recvmsg() when we've consumed sufficiently more packets to avoid the background work item needing to run. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 51 +- include/net/af_rxrpc.h | 3 - include/rxrpc/packet.h | 7 + net/rxrpc/af_rxrpc.c | 57 +- net/rxrpc/ar-internal.h | 177 +++--- net/rxrpc/call_accept.c | 472 +++++++--------- net/rxrpc/call_event.c | 1357 +++++++--------------------------------------- net/rxrpc/call_object.c | 535 +++++------------- net/rxrpc/conn_event.c | 137 +---- net/rxrpc/conn_object.c | 6 +- net/rxrpc/conn_service.c | 101 +--- net/rxrpc/input.c | 1044 ++++++++++++++++++----------------- net/rxrpc/insecure.c | 13 +- net/rxrpc/local_event.c | 2 +- net/rxrpc/local_object.c | 7 - net/rxrpc/misc.c | 2 +- net/rxrpc/output.c | 125 ++++- net/rxrpc/peer_event.c | 17 +- net/rxrpc/peer_object.c | 82 ++- net/rxrpc/recvmsg.c | 764 ++++++++++++++------------ net/rxrpc/rxkad.c | 108 +++- net/rxrpc/security.c | 10 +- net/rxrpc/sendmsg.c | 126 ++--- net/rxrpc/skbuff.c | 127 ----- 24 files changed, 1993 insertions(+), 3337 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 720ef05..59bdaa7 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -55,10 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { .abort_to_error = afs_abort_to_error, }; -static void afs_collect_incoming_call(struct work_struct *); static void afs_charge_preallocation(struct work_struct *); -static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) @@ -144,6 +142,8 @@ void afs_close_socket(void) _debug("no outstanding calls"); flush_workqueue(afs_async_calls); + kernel_sock_shutdown(afs_socket, SHUT_RDWR); + flush_workqueue(afs_async_calls); sock_release(afs_socket); _debug("dework"); @@ -602,51 +602,6 @@ static void afs_process_async_call(struct work_struct *work) _leave(""); } -/* - * accept the backlog of incoming calls - */ -static void afs_collect_incoming_call(struct work_struct *work) -{ - struct rxrpc_call *rxcall; - struct afs_call *call = NULL; - - _enter(""); - - do { - if (!call) { - call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); - if (!call) { - rxrpc_kernel_reject_call(afs_socket); - return; - } - - INIT_WORK(&call->async_work, afs_process_async_call); - call->wait_mode = &afs_async_incoming_call; - call->type = &afs_RXCMxxxx; - init_waitqueue_head(&call->waitq); - call->state = AFS_CALL_AWAIT_OP_ID; - - _debug("CALL %p{%s} [%d]", - call, call->type->name, - atomic_read(&afs_outstanding_calls)); - atomic_inc(&afs_outstanding_calls); - } - - rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long)call, - afs_wake_up_async_call); - if (!IS_ERR(rxcall)) { - call->rxcall = rxcall; - call->need_attention = true; - queue_work(afs_async_calls, &call->async_work); - call = NULL; - } - } while (!call); - - if (call) - afs_free_call(call); -} - static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_call *call = (struct afs_call *)user_call_ID; @@ -704,7 +659,7 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { - queue_work(afs_wq, &afs_collect_incoming_call_work); + atomic_inc(&afs_outstanding_calls); queue_work(afs_wq, &afs_charge_preallocation_work); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 9cf551be..1061a47 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -42,9 +42,6 @@ int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index b0ae5c1..fd6eb3a 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -133,6 +133,13 @@ struct rxrpc_ackpacket { } __packed; +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + /* * ACK packets can have a further piece of information tagged on the end */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1e8cf3d..caa226d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,7 +155,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) } if (rx->srx.srx_service) { - write_lock_bh(&local->services_lock); + write_lock(&local->services_lock); hlist_for_each_entry(prx, &local->services, listen_link) { if (prx->srx.srx_service == rx->srx.srx_service) goto service_in_use; @@ -163,7 +163,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) rx->local = local; hlist_add_head_rcu(&rx->listen_link, &local->services); - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; } else { @@ -176,7 +176,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) return 0; service_in_use: - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: @@ -515,15 +515,16 @@ error: static unsigned int rxrpc_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!list_empty(&rx->recvmsg_q)) mask |= POLLIN | POLLRDNORM; /* the socket is writable if there is space to add new data to the @@ -575,8 +576,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->calls = RB_ROOT; INIT_HLIST_NODE(&rx->listen_link); - INIT_LIST_HEAD(&rx->secureq); - INIT_LIST_HEAD(&rx->acceptq); + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); @@ -585,6 +589,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, } /* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + +/* * RxRPC socket destructor */ static void rxrpc_sock_destructor(struct sock *sk) @@ -623,9 +660,9 @@ static int rxrpc_release_sock(struct sock *sk) ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); if (!hlist_unhashed(&rx->listen_link)) { - write_lock_bh(&rx->local->services_lock); + write_lock(&rx->local->services_lock); hlist_del_rcu(&rx->listen_link); - write_unlock_bh(&rx->local->services_lock); + write_unlock(&rx->local->services_lock); } /* try to flush out this socket */ @@ -678,7 +715,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, - .shutdown = sock_no_shutdown, + .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 45e1c26..b1cb79e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -94,9 +94,12 @@ struct rxrpc_sock { rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ - struct list_head secureq; /* calls awaiting connection security clearance */ - struct list_head acceptq; /* calls awaiting acceptance */ struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ struct rb_root calls; /* User ID -> call mapping */ @@ -138,13 +141,16 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_call *call; /* call with which associated */ - unsigned long resend_at; /* time in jiffies at which to resend */ + union { + unsigned long resend_at; /* time in jiffies at which to resend */ + struct { + u8 nr_jumbo; /* Number of jumbo subpackets */ + }; + }; union { unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ u32 error; /* network error code */ - bool need_resend; /* T if needs resending */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -179,7 +185,11 @@ struct rxrpc_security { /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, - rxrpc_seq_t, u16); + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -211,7 +221,6 @@ struct rxrpc_local { struct work_struct processor; struct hlist_head services; /* services listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ struct rb_root client_conns; /* Client connections by socket params */ @@ -388,38 +397,21 @@ struct rxrpc_connection { */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ - RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */ - RXRPC_CALL_RCVD_LAST, /* all packets received */ - RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ - RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ - RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ - RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */ - RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */ - RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */ - RXRPC_CALL_EV_RCVD_ERROR, /* network error received */ - RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */ RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */ - RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */ + RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */ - RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */ - RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ - RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ - RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ }; /* @@ -431,7 +423,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ - RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ @@ -448,7 +439,6 @@ enum rxrpc_call_state { */ enum rxrpc_call_completion { RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ @@ -465,24 +455,23 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list ack_timer; /* ACK generation timer */ - struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct processor; /* packet processor and ACK generator */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long expire_at; /* When the call times out */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ - struct list_head accept_link; /* calls awaiting acceptance */ - struct rb_node sock_node; /* node in socket call tree */ - struct sk_buff_head rx_queue; /* received packets */ - struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ - struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ - unsigned long creation_jif; /* time of call creation */ unsigned long flags; unsigned long events; spinlock_t lock; @@ -492,40 +481,55 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; - atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ - /* transmission-phase ACK management */ - u8 acks_head; /* offset into window of first entry */ - u8 acks_tail; /* offset into window of last entry */ - u8 acks_winsz; /* size of un-ACK'd window */ - u8 acks_unacked; /* lowest unacked packet in last ACK received */ - int acks_latest; /* serial number of latest ACK received */ - rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */ - unsigned long *acks_window; /* sent packet window - * - elements are pointers with LSB set if ACK'd + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ +#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. + */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + u8 nr_jumbo_dup; /* Number of jumbo duplicates */ /* receive-phase ACK management */ - rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */ - rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */ - rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */ - rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ - rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ - rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic_t ackr_not_idle; /* number of packets in Rx queue */ + rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ - /* received packet records, 1 bit per record */ -#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) - unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + /* transmission-phase ACK management */ + rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; enum rxrpc_call_trace { @@ -535,10 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued_ref, rxrpc_call_seen, rxrpc_call_got, - rxrpc_call_got_skb, rxrpc_call_got_userid, rxrpc_call_put, - rxrpc_call_put_skb, rxrpc_call_put_userid, rxrpc_call_put_noqueue, rxrpc_call__nr_trace @@ -561,6 +563,9 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_connection *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -569,8 +574,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); /* @@ -589,9 +593,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, gfp_t); -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct sk_buff *); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); @@ -599,8 +602,6 @@ bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); -void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); -void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); @@ -672,13 +673,8 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, { trace_rxrpc_abort(why, call->cid, call->call_id, seq, abort_code, error); - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - return true; - } - return false; + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); } static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, @@ -713,8 +709,6 @@ void __exit rxrpc_destroy_all_client_connections(void); * conn_event.c */ void rxrpc_process_connection(struct work_struct *); -void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -void rxrpc_reject_packets(struct rxrpc_local *); /* * conn_object.c @@ -783,18 +777,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, - struct sockaddr_rxrpc *, - struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_data_ready(struct sock *); -int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); -void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * insecure.c @@ -868,6 +858,7 @@ extern const char *rxrpc_acks(u8 reason); */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +void rxrpc_reject_packets(struct rxrpc_local *); /* * peer_event.c @@ -883,6 +874,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, + struct rxrpc_peer *); static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { @@ -912,6 +905,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ +void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* @@ -961,6 +955,23 @@ static inline void rxrpc_sysctl_exit(void) {} */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + /* * debug tracing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cc7194e..b8acec0 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -129,6 +129,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); write_lock(&rxrpc_call_lock); @@ -186,6 +188,12 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) return; rx->backlog = NULL; + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { @@ -224,251 +232,179 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* - * generate a connection-level abort + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. */ -static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, - struct rxrpc_wire_header *whdr) +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct msghdr msg; - struct kvec iov[1]; - size_t len; - int ret; - - _enter("%d,,", local->debug_id); - - whdr->type = RXRPC_PACKET_TYPE_BUSY; - whdr->serial = htonl(1); - - msg.msg_name = &srx->transport.sin; - msg.msg_namelen = sizeof(srx->transport.sin); - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - iov[0].iov_base = whdr; - iov[0].iov_len = sizeof(*whdr); - - len = iov[0].iov_len; - - _proto("Tx BUSY %%1"); + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_peer *peer, *xpeer; + struct rxrpc_call *call; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; + + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); + + if (call_count == 0) + return NULL; + + if (!conn) { + /* No connection. We're going to need a peer to start off + * with. If one doesn't yet exist, use a spare from the + * preallocation set. We dump the address into the spare in + * anticipation - and to save on stack space. + */ + xpeer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + return NULL; + + peer = rxrpc_lookup_incoming_peer(local, xpeer); + if (peer == xpeer) { + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); + } - ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); - if (ret < 0) { - _leave(" = -EAGAIN [sendmsg failed: %d]", ret); - return -EAGAIN; + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_get_local(local); + conn->params.local = local; + conn->params.peer = peer; + rxrpc_new_incoming_connection(conn, skb); + } else { + rxrpc_get_connection(conn); } - _leave(" = 0"); - return 0; + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); + return call; } /* - * accept an incoming call that needs peer, transport and/or connection setting - * up + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. */ -static int rxrpc_accept_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb, - struct sockaddr_rxrpc *srx) +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp, *nsp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; struct rxrpc_call *call; - struct sk_buff *notification; - int ret; _enter(""); - sp = rxrpc_skb(skb); - - /* get a notification message to send to the server app */ - notification = alloc_skb(0, GFP_NOFS); - if (!notification) { - _debug("no memory"); - ret = -ENOMEM; - goto error_nofree; - } - rxrpc_new_skb(notification); - notification->mark = RXRPC_SKB_MARK_NEW_CALL; - - conn = rxrpc_incoming_connection(local, srx, skb); - if (IS_ERR(conn)) { - _debug("no conn"); - ret = PTR_ERR(conn); - goto error; - } - - call = rxrpc_incoming_call(rx, conn, skb); - rxrpc_put_connection(conn); - if (IS_ERR(call)) { - _debug("no call"); - ret = PTR_ERR(call); - goto error; + /* Get the socket providing the service */ + hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { + if (rx->srx.srx_service == sp->hdr.serviceId) + goto found_service; } - /* attach the call to the socket */ - read_lock_bh(&local->services_lock); - if (rx->sk.sk_state == RXRPC_CLOSE) - goto invalid_service; - - write_lock(&rx->call_lock); - if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call, rxrpc_call_got); - - spin_lock(&call->conn->state_lock); - if (sp->hdr.securityIndex > 0 && - call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) { - _debug("await conn sec"); - list_add_tail(&call->accept_link, &rx->secureq); - call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING; - set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - } else { - _debug("conn ready"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call_for_skb(call, notification); - nsp = rxrpc_skb(notification); - nsp->call = call; - - ASSERTCMP(atomic_read(&call->usage), >=, 3); - - _debug("notify"); - spin_lock(&call->lock); - ret = rxrpc_queue_rcv_skb(call, notification, true, - false); - spin_unlock(&call->lock); - notification = NULL; - BUG_ON(ret < 0); - } - spin_unlock(&call->conn->state_lock); + trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [service]"); + return NULL; - _debug("queued"); +found_service: + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [close]"); + call = NULL; + goto out; } - write_unlock(&rx->call_lock); - _debug("process"); - rxrpc_fast_process_packet(call, skb); - - _debug("done"); - read_unlock_bh(&local->services_lock); - rxrpc_free_skb(notification); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = 0"); - return 0; - -invalid_service: - _debug("invalid"); - read_unlock_bh(&local->services_lock); - - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - ret = -ECONNREFUSED; -error: - rxrpc_free_skb(notification); -error_nofree: - _leave(" = %d", ret); - return ret; -} + call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + if (!call) { + skb->mark = RXRPC_SKB_MARK_BUSY; + _leave(" = NULL [busy]"); + call = NULL; + goto out; + } -/* - * accept incoming calls that need peer, transport and/or connection setting up - * - the packets we get are all incoming client DATA packets that have seq == 1 - */ -void rxrpc_accept_incoming_calls(struct rxrpc_local *local) -{ - struct rxrpc_skb_priv *sp; - struct sockaddr_rxrpc srx; - struct rxrpc_sock *rx; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - int ret; + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; - _enter("%d", local->debug_id); + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); - skb = skb_dequeue(&local->accept_queue); - if (!skb) { - _leave("\n"); - return; - } + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; - _net("incoming call skb %p", skb); - - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - - /* Set up a response packet header in case we need it */ - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = 0; - whdr.flags = 0; - whdr.type = 0; - whdr.userStatus = 0; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = 0; - whdr.serviceId = htons(sp->hdr.serviceId); - - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto drop; - - /* get the socket providing the service */ - read_lock_bh(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId && - rx->sk.sk_state != RXRPC_CLOSE) - goto found_service; - } - read_unlock_bh(&local->services_lock); - goto invalid_service; + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + write_unlock(&call->state_lock); + break; -found_service: - _debug("found service %hd", rx->srx.srx_service); - if (sk_acceptq_is_full(&rx->sk)) - goto backlog_full; - sk_acceptq_added(&rx->sk); - read_unlock_bh(&local->services_lock); - - ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); - if (ret < 0) - sk_acceptq_removed(&rx->sk); - switch (ret) { - case -ECONNRESET: /* old calls are ignored */ - case -ECONNABORTED: /* aborted calls are reaborted or ignored */ - case 0: - return; - case -ECONNREFUSED: - goto invalid_service; - case -EBUSY: - goto busy; - case -EKEYREJECTED: - goto security_mismatch; + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->remote_abort, ECONNABORTED); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->local_abort, ECONNABORTED); + break; default: BUG(); } + spin_unlock(&conn->state_lock); -backlog_full: - read_unlock_bh(&local->services_lock); -busy: - rxrpc_busy(local, &srx, &whdr); - rxrpc_free_skb(skb); - return; - -drop: - rxrpc_free_skb(skb); - return; + if (call->state == RXRPC_CALL_SERVER_ACCEPTING) + rxrpc_notify_socket(call); -invalid_service: - skb->priority = RX_INVALID_OPERATION; - rxrpc_reject_packet(local, skb); - return; - - /* can't change connection security type mid-flow */ -security_mismatch: - skb->priority = RX_PROTOCOL_ERROR; - rxrpc_reject_packet(local, skb); - return; + _leave(" = %p{%d}", call, call->debug_id); +out: + spin_unlock(&rx->incoming_lock); + return call; } /* @@ -490,11 +426,10 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); ret = -ENODATA; - if (list_empty(&rx->acceptq)) + if (list_empty(&rx->to_be_accepted)) goto out; /* check the user ID isn't already in use */ - ret = -EBADSLT; pp = &rx->calls.rb_node; parent = NULL; while (*pp) { @@ -506,11 +441,14 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, else if (user_call_ID > call->user_call_ID) pp = &(*pp)->rb_right; else - goto out; + goto id_in_use; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -528,31 +466,35 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_got); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) BUG(); - if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) - BUG(); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_queue_call(call); + rxrpc_notify_socket(call); + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %p{%d}", call, call->debug_id); return call; out_release: + _debug("release %p", call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - _debug("release %p", call); rxrpc_release_call(rx, call); - _leave(" = %d", ret); - return ERR_PTR(ret); -out: + rxrpc_put_call(call, rxrpc_call_put); + goto out; + +id_in_use: + ret = -EBADSLT; write_unlock(&rx->call_lock); +out: + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -564,6 +506,7 @@ out: int rxrpc_reject_call(struct rxrpc_sock *rx) { struct rxrpc_call *call; + bool abort = false; int ret; _enter(""); @@ -572,15 +515,16 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) { + if (list_empty(&rx->to_be_accepted)) { write_unlock(&rx->call_lock); - _leave(" = -ENODATA"); return -ENODATA; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -588,66 +532,28 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_set_call_completion(call, RXRPC_CALL_SERVER_BUSY, - 0, ECONNABORTED); - if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) - rxrpc_queue_call(call); - ret = 0; - break; + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + abort = true; + /* fall through */ case RXRPC_CALL_COMPLETE: ret = call->error; - break; + goto out_discard; default: BUG(); } +out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); - _leave(" = %d", ret); - return ret; -} - -/** - * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call - * @sock: The socket on which the impending call is waiting - * @user_call_ID: The tag to attach to the call - * @notify_rx: Where to send notifications instead of socket queue - * - * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. The caller should immediately trigger their own - * notification as there must be data waiting. - */ -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) -{ - struct rxrpc_call *call; - - _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); - _leave(" = %p", call); - return call; -} -EXPORT_SYMBOL(rxrpc_kernel_accept_call); - -/** - * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call - * @sock: The socket on which the impending call is waiting - * - * Allow a kernel service to reject an incoming call with a BUSY message, - * assuming the incoming call is still valid. - */ -int rxrpc_kernel_reject_call(struct socket *sock) -{ - int ret; - - _enter(""); - ret = rxrpc_reject_call(rxrpc_sk(sock->sk)); + if (abort) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ret; } -EXPORT_SYMBOL(rxrpc_kernel_reject_call); /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index af88ad7..2b976e7 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -22,1257 +22,286 @@ #include "ar-internal.h" /* - * propose an ACK be sent + * Set the timer */ -void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) +static void rxrpc_set_timer(struct rxrpc_call *call) { - unsigned long expiry; - s8 prior = rxrpc_ack_priority[ack_reason]; - - ASSERTCMP(prior, >, 0); - - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); + unsigned long t, now = jiffies; - if (prior < rxrpc_ack_priority[call->ackr_reason]) { - if (immediate) - goto cancel_timer; - return; - } - - /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers */ - if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) { - call->ackr_skew = skew; - call->ackr_serial = serial; - } - if (immediate) - goto cancel_timer; - return; - } - - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - - switch (ack_reason) { - case RXRPC_ACK_DELAY: - _debug("run delay timer"); - expiry = rxrpc_soft_ack_delay; - goto run_timer; - - case RXRPC_ACK_IDLE: - if (!immediate) { - _debug("run defer timer"); - expiry = rxrpc_idle_ack_delay; - goto run_timer; - } - goto cancel_timer; + _enter("{%ld,%ld,%ld:%ld}", + call->ack_at - now, call->resend_at - now, call->expire_at - now, + call->timer.expires - now); + + read_lock_bh(&call->state_lock); - case RXRPC_ACK_REQUESTED: - expiry = rxrpc_requested_ack_delay; - if (!expiry) - goto cancel_timer; - if (!immediate || serial == 1) { - _debug("run defer timer"); - goto run_timer; + if (call->state < RXRPC_CALL_COMPLETE) { + t = call->ack_at; + if (time_before(call->resend_at, t)) + t = call->resend_at; + if (time_before(call->expire_at, t)) + t = call->expire_at; + if (!timer_pending(&call->timer) || + time_before(t, call->timer.expires)) { + _debug("set timer %ld", t - now); + mod_timer(&call->timer, t); } - - default: - _debug("immediate ACK"); - goto cancel_timer; } - -run_timer: - expiry += jiffies; - if (!timer_pending(&call->ack_timer) || - time_after(call->ack_timer.expires, expiry)) - mod_timer(&call->ack_timer, expiry); - return; - -cancel_timer: - _debug("cancel timer %%%u", serial); - try_to_del_timer_sync(&call->ack_timer); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } /* - * propose an ACK be sent, locking the call structure + * propose an ACK be sent */ -void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, + bool background) { + unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - if (prior > rxrpc_ack_priority[call->ackr_reason]) { - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate); - spin_unlock_bh(&call->lock); - } -} - -/* - * set the resend timer - */ -static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, - unsigned long resend_at) -{ - read_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_COMPLETE) - resend = 0; - - if (resend & 1) { - _debug("SET RESEND"); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); - } - - if (resend & 2) { - _debug("MODIFY RESEND TIMER"); - set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - mod_timer(&call->resend_timer, resend_at); - } else { - _debug("KILL RESEND TIMER"); - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - read_unlock_bh(&call->state_lock); -} - -/* - * resend packets - */ -static void rxrpc_resend(struct rxrpc_call *call) -{ - struct rxrpc_wire_header *whdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - bool stop; - int loop; - u8 resend; - - _enter("{%d,%d,%d,%d},", - call->acks_hard, call->acks_unacked, - atomic_read(&call->sequence), - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - stop = false; - resend = 0; - resend_at = 0; - - for (loop = call->acks_tail; - loop != call->acks_head || stop; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - if (*p_txb & 1) - continue; - - txb = (struct sk_buff *) *p_txb; - sp = rxrpc_skb(txb); - - if (sp->need_resend) { - sp->need_resend = false; - - /* each Tx packet has a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)txb->head; - whdr->serial = htonl(sp->hdr.serial); - - _proto("Tx DATA %%%u { #%d }", - sp->hdr.serial, sp->hdr.seq); - if (rxrpc_send_data_packet(call->conn, txb) < 0) { - stop = true; - sp->resend_at = jiffies + 3; - } else { - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - sp->resend_at = - jiffies + rxrpc_resend_timeout; - } - } - - if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * handle resend timer expiry - */ -static void rxrpc_resend_timer(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 resend; - - _enter("%d,%d,%d", - call->acks_tail, call->acks_unacked, call->acks_head); - - if (call->state == RXRPC_CALL_COMPLETE) - return; - - resend = 0; - resend_at = 0; - - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - ASSERT(!(*p_txb & 1)); + _enter("{%d},%s,%%%x,%u", + call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + call->ackr_serial = serial; + call->ackr_skew = skew; } + if (!immediate) + return; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + call->ackr_skew = skew; } - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * process soft ACKs of our transmitted packets - * - these indicate packets the peer has or has not received, but hasn't yet - * given to the consumer, and so can still be discarded and re-requested - */ -static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, - struct rxrpc_ackpacket *ack, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 sacks[RXRPC_MAXACKS], resend; - - _enter("{%d,%d},{%d},", - call->acks_hard, - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), - ack->nAcks); + switch (ack_reason) { + case RXRPC_ACK_REQUESTED: + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; - if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) - goto protocol_error; + case RXRPC_ACK_DELAY: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; - resend = 0; - resend_at = 0; - for (loop = 0; loop < ack->nAcks; loop++) { - p_txb = call->acks_window; - p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); + case RXRPC_ACK_IDLE: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_idle_ack_delay; + break; - switch (sacks[loop]) { - case RXRPC_ACK_TYPE_ACK: - sp->need_resend = false; - *p_txb |= 1; - break; - case RXRPC_ACK_TYPE_NACK: - sp->need_resend = true; - *p_txb &= ~1; - resend = 1; - break; - default: - _debug("Unsupported ACK type %d", sacks[loop]); - goto protocol_error; - } + default: + immediate = true; + break; } - smp_mb(); - call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); - - /* anything not explicitly ACK'd is implicitly NACK'd, but may just not - * have been received or processed yet by the far end */ - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - if (*p_txb & 1) { - /* packet must have been discarded */ - sp->need_resend = true; - *p_txb &= ~1; - resend |= 1; - } else if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + now = jiffies; + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + ack_at = now + expiry; + _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); + if (time_before(ack_at, call->ack_at)) { + call->ack_at = ack_at; + rxrpc_set_timer(call); } } - - rxrpc_set_resend(call, resend, resend_at); - _leave(" = 0"); - return 0; - -protocol_error: - _leave(" = -EPROTO"); - return -EPROTO; } /* - * discard hard-ACK'd packets from the Tx window - */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) -{ - unsigned long _skb; - int tail = call->acks_tail, old_tail; - int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - - _enter("{%u,%u},%u", call->acks_hard, win, hard); - - ASSERTCMP(hard - call->acks_hard, <=, win); - - while (call->acks_hard < hard) { - smp_read_barrier_depends(); - _skb = call->acks_window[tail] & ~1; - rxrpc_free_skb((struct sk_buff *) _skb); - old_tail = tail; - tail = (tail + 1) & (call->acks_winsz - 1); - call->acks_tail = tail; - if (call->acks_unacked == old_tail) - call->acks_unacked = tail; - call->acks_hard++; - } - - wake_up(&call->waitq); -} - -/* - * clear the Tx window in the event of a failure + * propose an ACK be sent, locking the call structure */ -static void rxrpc_clear_tx_window(struct rxrpc_call *call) +void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, bool background) { - rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); + spin_lock_bh(&call->lock); + __rxrpc_propose_ACK(call, ack_reason, skew, serial, + immediate, background); + spin_unlock_bh(&call->lock); } /* - * drain the out of sequence received packet queue into the packet Rx queue + * Perform retransmission of NAK'd and unack'd packets. */ -static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call) { + struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; struct sk_buff *skb; - bool terminal; - int ret; + rxrpc_seq_t cursor, seq, top; + unsigned long resend_at, now; + int ix; + u8 annotation; - _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); spin_lock_bh(&call->lock); - ret = -ECONNRESET; - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - goto socket_unavailable; + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; + + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + now = jiffies; + resend_at = now + rxrpc_resend_timeout; + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation == RXRPC_TX_ANNO_ACK) + continue; - skb = skb_dequeue(&call->rx_oos_queue); - if (skb) { + skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); sp = rxrpc_skb(skb); - _debug("drain OOS packet %d [%d]", - sp->hdr.seq, call->rx_first_oos); - - if (sp->hdr.seq != call->rx_first_oos) { - skb_queue_head(&call->rx_oos_queue, skb); - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - _debug("requeue %p {%u}", skb, call->rx_first_oos); - } else { - skb->mark = RXRPC_SKB_MARK_DATA; - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); - BUG_ON(ret < 0); - _debug("drain #%u", call->rx_data_post); - call->rx_data_post++; - - /* find out what the next packet is */ - skb = skb_peek(&call->rx_oos_queue); - rxrpc_see_skb(skb); - if (skb) - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - else - call->rx_first_oos = 0; - _debug("peek %p {%u}", skb, call->rx_first_oos); - } - } - - ret = 0; -socket_unavailable: - spin_unlock_bh(&call->lock); - _leave(" = %d", ret); - return ret; -} - -/* - * insert an out of sequence packet into the buffer - */ -static void rxrpc_insert_oos_packet(struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp, *psp; - struct sk_buff *p; - u32 seq; - - sp = rxrpc_skb(skb); - seq = sp->hdr.seq; - _enter(",,{%u}", seq); - - skb->destructor = rxrpc_packet_destructor; - ASSERTCMP(sp->call, ==, NULL); - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - - /* insert into the buffer in sequence order */ - spin_lock_bh(&call->lock); - - skb_queue_walk(&call->rx_oos_queue, p) { - psp = rxrpc_skb(p); - if (psp->hdr.seq > seq) { - _debug("insert oos #%u before #%u", seq, psp->hdr.seq); - skb_insert(p, skb, &call->rx_oos_queue); - goto inserted; - } - } - - _debug("append oos #%u", seq); - skb_queue_tail(&call->rx_oos_queue, skb); -inserted: - - /* we might now have a new front to the queue */ - if (call->rx_first_oos == 0 || seq < call->rx_first_oos) - call->rx_first_oos = seq; - - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - } - read_unlock(&call->state_lock); - - spin_unlock_bh(&call->lock); - _leave(" [stored #%u]", call->rx_first_oos); -} - -/* - * clear the Tx window on final ACK reception - */ -static void rxrpc_zap_tx_window(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - unsigned long _skb, *acks_window; - u8 winsz = call->acks_winsz; - int tail; - - acks_window = call->acks_window; - call->acks_window = NULL; - - while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { - tail = call->acks_tail; - smp_read_barrier_depends(); - _skb = acks_window[tail] & ~1; - smp_mb(); - call->acks_tail = (call->acks_tail + 1) & (winsz - 1); - - skb = (struct sk_buff *) _skb; - sp = rxrpc_skb(skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb(skb); - } - - kfree(acks_window); -} - -/* - * process the extra information that may be appended to an ACK packet - */ -static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int latest, int nAcks) -{ - struct rxrpc_ackinfo ackinfo; - struct rxrpc_peer *peer; - unsigned int mtu; - - if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { - _leave(" [no ackinfo]"); - return; - } - - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - latest, - ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), - ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); - - mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); - } -} - -/* - * process packets in the reception queue - */ -static int rxrpc_process_rx_queue(struct rxrpc_call *call, - u32 *_abort_code) -{ - struct rxrpc_ackpacket ack; - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool post_ACK; - int latest; - u32 hard, tx; - - _enter(""); - -process_further: - skb = skb_dequeue(&call->rx_queue); - if (!skb) - return -EAGAIN; - - rxrpc_see_skb(skb); - _net("deferred skb %p", skb); - - sp = rxrpc_skb(skb); - - _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); - - post_ACK = false; - - switch (sp->hdr.type) { - /* data packets that wind up here have been received out of - * order, need security processing or are jumbo packets */ - case RXRPC_PACKET_TYPE_DATA: - _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - /* secured packets must be verified and possibly decrypted */ - if (call->conn->security->verify_packet(call, skb, - sp->hdr.seq, - sp->hdr.cksum) < 0) - goto protocol_error; - - rxrpc_insert_oos_packet(call, skb); - goto process_further; - - /* partial ACK to process */ - case RXRPC_PACKET_TYPE_ACK: - if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { - _debug("extraction failure"); - goto protocol_error; - } - if (!skb_pull(skb, sizeof(ack))) - BUG(); - - latest = sp->hdr.serial; - hard = ntohl(ack.firstPacket); - tx = atomic_read(&call->sequence); - - _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - latest, - ntohs(ack.maxSkew), - hard, - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); - - if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", latest); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skb->priority, sp->hdr.serial, true); - } - - /* discard any out-of-order or duplicate ACKs */ - if (latest - call->acks_latest <= 0) { - _debug("discard ACK %d <= %d", - latest, call->acks_latest); - goto discard; - } - call->acks_latest = latest; - - if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && - call->state != RXRPC_CALL_SERVER_SEND_REPLY && - call->state != RXRPC_CALL_SERVER_AWAIT_ACK) - goto discard; - - _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); - - if (hard > 0) { - if (hard - 1 > tx) { - _debug("hard-ACK'd packet %d not transmitted" - " (%d top)", - hard - 1, tx); - goto protocol_error; - } - - if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || - call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) { - call->acks_hard = tx; - goto all_acked; + if (annotation == RXRPC_TX_ANNO_UNACK) { + if (time_after(sp->resend_at, now)) { + if (time_before(sp->resend_at, resend_at)) + resend_at = sp->resend_at; + continue; } - - smp_rmb(); - rxrpc_rotate_tx_window(call, hard - 1); - } - - if (ack.nAcks > 0) { - if (hard - 1 + ack.nAcks > tx) { - _debug("soft-ACK'd packet %d+%d not" - " transmitted (%d top)", - hard - 1, ack.nAcks, tx); - goto protocol_error; - } - - if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) - goto protocol_error; } - goto discard; - /* complete ACK to process */ - case RXRPC_PACKET_TYPE_ACKALL: - goto all_acked; - - /* abort and busy are handled elsewhere */ - case RXRPC_PACKET_TYPE_BUSY: - case RXRPC_PACKET_TYPE_ABORT: - BUG(); - - /* connection level events - also handled elsewhere */ - case RXRPC_PACKET_TYPE_CHALLENGE: - case RXRPC_PACKET_TYPE_RESPONSE: - case RXRPC_PACKET_TYPE_DEBUG: - BUG(); - } - - /* if we've had a hard ACK that covers all the packets we've sent, then - * that ends that phase of the operation */ -all_acked: - write_lock_bh(&call->state_lock); - _debug("ack all %d", call->state); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - break; - case RXRPC_CALL_SERVER_AWAIT_ACK: - _debug("srv complete"); - __rxrpc_call_completed(call); - post_ACK = true; - break; - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_RECV_REQUEST: - goto protocol_error_unlock; /* can't occur yet */ - default: - write_unlock_bh(&call->state_lock); - goto discard; /* assume packet left over from earlier phase */ - } - - write_unlock_bh(&call->state_lock); - - /* if all the packets we sent are hard-ACK'd, then we can discard - * whatever we've got left */ - _debug("clear Tx %d", - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - - if (call->acks_window) - rxrpc_zap_tx_window(call); + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + seq++; + } while (before_eq(seq, top)); + + call->resend_at = resend_at; + + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation != RXRPC_TX_ANNO_RETRANS) + continue; - if (post_ACK) { - /* post the final ACK message for userspace to pick up */ - _debug("post ACK"); - skb->mark = RXRPC_SKB_MARK_FINAL_ACK; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - spin_lock_bh(&call->lock); - if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) - BUG(); + skb = call->rxtx_buffer[ix]; + rxrpc_get_skb(skb); spin_unlock_bh(&call->lock); - goto process_further; - } - -discard: - rxrpc_free_skb(skb); - goto process_further; - -protocol_error_unlock: - write_unlock_bh(&call->state_lock); -protocol_error: - rxrpc_free_skb(skb); - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * post a message to the socket Rx queue for recvmsg() to pick up - */ -static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, - bool fatal) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - int ret; - - _enter("{%d,%lx},%u,%u,%d", - call->debug_id, call->flags, mark, error, fatal); - - /* remove timers and things for fatal messages */ - if (fatal) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } + sp = rxrpc_skb(skb); - if (mark != RXRPC_SKB_MARK_NEW_CALL && - !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - _leave("[no userid]"); - return 0; - } + /* Each Tx packet needs a new serial number */ + sp->hdr.serial = atomic_inc_return(&call->conn->serial); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - skb = alloc_skb(0, GFP_NOFS); - if (!skb) - return -ENOMEM; + whdr = (struct rxrpc_wire_header *)skb->head; + whdr->serial = htonl(sp->hdr.serial); - rxrpc_new_skb(skb); + if (rxrpc_send_data_packet(call->conn, skb) < 0) { + call->resend_at = now + 2; + rxrpc_free_skb(skb); + return; + } - skb->mark = mark; - - sp = rxrpc_skb(skb); - memset(sp, 0, sizeof(*sp)); - sp->error = error; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = now + rxrpc_resend_timeout; + rxrpc_free_skb(skb); spin_lock_bh(&call->lock); - ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); - spin_unlock_bh(&call->lock); - BUG_ON(ret < 0); - } - return 0; + /* We need to clear the retransmit state, but there are two + * things we need to be aware of: A new ACK/NAK might have been + * received and the packet might have been hard-ACK'd (in which + * case it will no longer be in the buffer). + */ + if (after(seq, call->tx_hard_ack) && + (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || + call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; + seq++; + } while (before_eq(seq, top)); + +out_unlock: + spin_unlock_bh(&call->lock); + _leave(""); } /* - * Handle background processing of incoming call packets and ACK / abort - * generation. A ref on the call is donated to us by whoever queued the work - * item. + * Handle retransmission and deferred ACK/abort generation. */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo ackinfo; - struct msghdr msg; - struct kvec iov[5]; - enum rxrpc_call_event genbit; - unsigned long bits; - __be32 data, pad; - size_t len; - bool requeue = false; - int loop, nbit, ioc, ret, mtu; - u32 serial, abort_code = RX_PROTOCOL_ERROR; - u8 *acks = NULL; + unsigned long now; rxrpc_see_call(call); //printk("\n--------------------\n"); - _enter("{%d,%s,%lx} [%lu]", - call->debug_id, rxrpc_call_states[call->state], call->events, - (jiffies - call->creation_jif) / (HZ / 10)); - - if (call->state >= RXRPC_CALL_COMPLETE) { - rxrpc_put_call(call, rxrpc_call_put); - return; - } - - if (!call->conn) - goto skip_msg_init; - - /* there's a good chance we're going to have to send a message, so set - * one up in advance */ - msg.msg_name = &call->peer->srx.transport; - msg.msg_namelen = call->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); - whdr.epoch = htonl(call->conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ACK; - whdr.flags = call->conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = call->conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(call->service_id); - - memset(iov, 0, sizeof(iov)); - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); -skip_msg_init: - - /* deal with events of a final nature */ - if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { - enum rxrpc_skb_mark mark; - - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - if (call->completion == RXRPC_CALL_NETWORK_ERROR) { - mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", call->error); - } else { - mark = RXRPC_SKB_MARK_LOCAL_ERROR; - _debug("post net local error %d", call->error); - } - - if (rxrpc_post_message(call, mark, call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - _debug("post conn abort"); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) { - whdr.type = RXRPC_PACKET_TYPE_BUSY; - genbit = RXRPC_CALL_EV_REJECT_BUSY; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->abort_code); - iov[1].iov_base = &data; - iov[1].iov_len = sizeof(data); - genbit = RXRPC_CALL_EV_ABORT; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) { - genbit = RXRPC_CALL_EV_ACK_FINAL; - - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - ack.serial = 0; - ack.reason = RXRPC_ACK_IDLE; - ack.nAcks = 0; - call->ackr_reason = 0; - - spin_lock_bh(&call->lock); - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = &pad; - iov[2].iov_len = 3; - iov[3].iov_base = &ackinfo; - iov[3].iov_len = sizeof(ackinfo); - goto send_ACK; +recheck_state: + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + goto recheck_state; } - if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) | - (1 << RXRPC_CALL_EV_RCVD_ABORT)) - ) { - u32 mark; - - if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events)) - mark = RXRPC_SKB_MARK_REMOTE_ABORT; - else - mark = RXRPC_SKB_MARK_BUSY; - - _debug("post abort/busy"); - rxrpc_clear_tx_window(call); - if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; + if (call->state == RXRPC_CALL_COMPLETE) { + del_timer_sync(&call->timer); + goto out_put; } - if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) { - _debug("do implicit ackall"); - rxrpc_clear_tx_window(call); - } - - if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { + now = jiffies; + if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); - - _debug("post timeout"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ETIME, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - goto kill_ACKs; + set_bit(RXRPC_CALL_EV_ABORT, &call->events); } - /* deal with assorted inbound messages */ - if (!skb_queue_empty(&call->rx_queue)) { - ret = rxrpc_process_rx_queue(call, &abort_code); - switch (ret) { - case 0: - case -EAGAIN: - break; - case -ENOMEM: - goto no_mem; - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EPROTO: - rxrpc_abort_call("PRO", call, 0, abort_code, -ret); - goto kill_ACKs; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + time_after_eq(now, call->ack_at)) { + call->ack_at = call->expire_at; + if (call->ackr_reason) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto recheck_state; } } - /* handle resending */ - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_resend_timer(call); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || + time_after_eq(now, call->resend_at)) { rxrpc_resend(call); - - /* consider sending an ordinary ACK */ - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("send ACK: window: %d - %d { %lx }", - call->rx_data_eaten, call->ackr_win_top, - call->ackr_window[0]); - - if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && - call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { - /* ACK by sending reply DATA packet in this state */ - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - goto maybe_reschedule; - } - - genbit = RXRPC_CALL_EV_ACK; - - acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, - GFP_NOFS); - if (!acks) - goto no_mem; - - //hdr.flags = RXRPC_SLOW_START_OK; - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - - spin_lock_bh(&call->lock); - ack.reason = call->ackr_reason; - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - - ack.nAcks = 0; - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - nbit = loop * BITS_PER_LONG; - for (bits = call->ackr_window[loop]; bits; bits >>= 1 - ) { - _debug("- l=%d n=%d b=%lx", loop, nbit, bits); - if (bits & 1) { - acks[nbit] = RXRPC_ACK_TYPE_ACK; - ack.nAcks = nbit + 1; - } - nbit++; - } - } - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = acks; - iov[2].iov_len = ack.nAcks; - iov[3].iov_base = &pad; - iov[3].iov_len = 3; - iov[4].iov_base = &ackinfo; - iov[4].iov_len = sizeof(ackinfo); - - switch (ack.reason) { - case RXRPC_ACK_REQUESTED: - case RXRPC_ACK_DUPLICATE: - case RXRPC_ACK_OUT_OF_SEQUENCE: - case RXRPC_ACK_EXCEEDS_WINDOW: - case RXRPC_ACK_NOSPACE: - case RXRPC_ACK_PING: - case RXRPC_ACK_PING_RESPONSE: - goto send_ACK_with_skew; - case RXRPC_ACK_DELAY: - case RXRPC_ACK_IDLE: - goto send_ACK; - } + goto recheck_state; } - /* handle completion of security negotiations on an incoming - * connection */ - if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) { - _debug("secured"); - spin_lock_bh(&call->lock); - - if (call->state == RXRPC_CALL_SERVER_SECURING) { - struct rxrpc_sock *rx; - _debug("securing"); - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (rx) { - write_lock(&rx->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &rx->acceptq); - } - write_unlock(&rx->call_lock); - } - rcu_read_unlock(); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - read_unlock(&call->state_lock); - } - - spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) - goto maybe_reschedule; - } - - /* post a notification of an acceptable connection to the app */ - if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) { - _debug("post accept"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, - 0, false) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - goto maybe_reschedule; - } - - /* handle incoming call acceptance */ - if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) { - _debug("accepted"); - ASSERTCMP(call->rx_data_post, ==, 0); - call->rx_data_post = 1; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - read_unlock_bh(&call->state_lock); - } - - /* drain the out of sequence received packet queue into the packet Rx - * queue */ - if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) { - while (call->rx_data_post == call->rx_first_oos) - if (rxrpc_drain_rx_oos_queue(call) < 0) - break; - goto maybe_reschedule; - } + rxrpc_set_timer(call); /* other events may have been raised since we started checking */ - goto maybe_reschedule; - -send_ACK_with_skew: - ack.maxSkew = htons(call->ackr_skew); -send_ACK: - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rxrpc_rx_window_size); - - /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(ack.maxSkew), - ntohl(ack.firstPacket), - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - del_timer_sync(&call->ack_timer); - if (ack.nAcks > 0) - set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); - goto send_message_2; - -send_message: - _debug("send message"); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial); -send_message_2: - - len = iov[0].iov_len; - ioc = 1; - if (iov[4].iov_len) { - ioc = 5; - len += iov[4].iov_len; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[3].iov_len) { - ioc = 4; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[2].iov_len) { - ioc = 3; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[1].iov_len) { - ioc = 2; - len += iov[1].iov_len; - } - - ret = kernel_sendmsg(call->conn->params.local->socket, - &msg, iov, ioc, len); - if (ret < 0) { - _debug("sendmsg failed: %d", ret); - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - goto error; - } - - switch (genbit) { - case RXRPC_CALL_EV_ABORT: - clear_bit(genbit, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - - case RXRPC_CALL_EV_ACK_FINAL: - rxrpc_call_completed(call); - goto kill_ACKs; - - default: - clear_bit(genbit, &call->events); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - _debug("start ACK timer"); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_skew, call->ackr_serial, - false); - default: - break; - } - goto maybe_reschedule; - } - -kill_ACKs: - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - -maybe_reschedule: - if (call->events || !skb_queue_empty(&call->rx_queue)) { - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - } - -error: - kfree(acks); - - if ((requeue || call->events) && !work_pending(&call->processor)) { - _debug("jumpstart %x", call->conn->proto.cid); + if (call->events && call->state < RXRPC_CALL_COMPLETE) { __rxrpc_queue_call(call); - } else { - rxrpc_put_call(call, rxrpc_call_put); + goto out; } +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: _leave(""); - return; - -no_mem: - _debug("out of memory"); - goto maybe_reschedule; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d233adc..18ab13f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -30,7 +30,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", @@ -43,7 +42,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_SUCCEEDED] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", [RXRPC_CALL_LOCAL_ERROR] = "LocError", @@ -57,10 +55,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_queued_ref] = "QUR", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", - [rxrpc_call_got_skb] = "Gsk", [rxrpc_call_got_userid] = "Gus", [rxrpc_call_put] = "PUT", - [rxrpc_call_put_skb] = "Psk", [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_noqueue] = "PNQ", }; @@ -69,9 +65,15 @@ struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_ack_time_expired(unsigned long _call); -static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_call_timer_expired(unsigned long _call) +{ + struct rxrpc_call *call = (struct rxrpc_call *)_call; + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_queue_call(call); +} /* * find an extant server call @@ -121,27 +123,24 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call) return NULL; - call->acks_winsz = 16; - call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long), + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), gfp); - if (!call->acks_window) { - kmem_cache_free(rxrpc_call_jar, call); - return NULL; - } + if (!call->rxtx_buffer) + goto nomem; - setup_timer(&call->lifetimer, &rxrpc_call_life_expired, - (unsigned long) call); - setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, - (unsigned long) call); - setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, - (unsigned long) call); + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; + + setup_timer(&call->timer, rxrpc_call_timer_expired, + (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); - skb_queue_head_init(&call->rx_queue); - skb_queue_head_init(&call->rx_oos_queue); - skb_queue_head_init(&call->knlrecv_queue); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -150,63 +149,52 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - call->rx_data_expect = 1; - call->rx_data_eaten = 0; - call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; - call->creation_jif = jiffies; + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46; + call->tx_winsize = 16; + call->rx_expect_next = 1; return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; } /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; _enter(""); - ASSERT(rx->local != NULL); - call = rxrpc_alloc_call(gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - call->rx_data_post = 1; call->service_id = srx->srx_service; - rcu_assign_pointer(call->socket, rx); _leave(" = %p", call); return call; } /* - * Begin client call. + * Initiate the call ack/resend/expiry timer. */ -static int rxrpc_begin_client_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static void rxrpc_start_call_timer(struct rxrpc_call *call) { - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(call, cp, srx, gfp); - if (ret < 0) - return ret; - - spin_lock(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); - spin_unlock(&call->conn->params.peer->lock); - - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - return 0; + unsigned long expire_at; + + expire_at = jiffies + rxrpc_max_call_lifetime; + call->expire_at = expire_at; + call->ack_at = expire_at; + call->resend_at = expire_at; + call->timer.expires = expire_at; + add_timer(&call->timer); } /* @@ -226,7 +214,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { _leave(" = %ld", PTR_ERR(call)); return call; @@ -255,19 +243,32 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto found_user_ID_now_present; } + rcu_assign_pointer(call->socket, rx); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); - ret = rxrpc_begin_client_call(call, cp, srx, gfp); + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); if (ret < 0) goto error; + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); + + rxrpc_start_call_timer(call); + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); _leave(" = %p [new]", call); @@ -279,9 +280,9 @@ error: write_unlock(&rx->call_lock); rxrpc_put_call(call, rxrpc_call_put_userid); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); error_out: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, @@ -303,142 +304,46 @@ found_user_ID_now_present: } /* - * set up an incoming call - * - called in process context with IRQs enabled + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. */ -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct sk_buff *skb) +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call, *candidate; - const void *here = __builtin_return_address(0); - u32 call_id, chan; - - _enter(",%d", conn->debug_id); - - ASSERT(rx != NULL); - - candidate = rxrpc_alloc_call(GFP_NOIO); - if (!candidate) - return ERR_PTR(-EBUSY); + u32 chan; - trace_rxrpc_call(candidate, rxrpc_call_new_service, - atomic_read(&candidate->usage), here, NULL); + _enter(",%d", call->conn->debug_id); - chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->conn = conn; - candidate->peer = conn->params.peer; - candidate->cid = sp->hdr.cid; - candidate->call_id = sp->hdr.callNumber; - candidate->security_ix = sp->hdr.securityIndex; - candidate->rx_data_post = 0; - candidate->state = RXRPC_CALL_SERVER_ACCEPTING; - candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); - if (conn->security_ix > 0) - candidate->state = RXRPC_CALL_SERVER_SECURING; - rcu_assign_pointer(candidate->socket, rx); - - spin_lock(&conn->channel_lock); - - /* set the channel for this call */ - call = rcu_dereference_protected(conn->channels[chan].call, - lockdep_is_held(&conn->channel_lock)); - - _debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call); - if (call && call->call_id == sp->hdr.callNumber) { - /* already set; must've been a duplicate packet */ - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - rxrpc_queue_call(call); - case RXRPC_CALL_REMOTELY_ABORTED: - read_unlock(&call->state_lock); - goto aborted_call; - default: - rxrpc_get_call(call, rxrpc_call_got); - read_unlock(&call->state_lock); - goto extant_call; - } - } - - if (call) { - /* it seems the channel is still in use from the previous call - * - ditch the old binding if its call is now complete */ - _debug("CALL: %u { %s }", - call->debug_id, rxrpc_call_states[call->state]); - - if (call->state == RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(conn, call); - } else { - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -EBUSY"); - return ERR_PTR(-EBUSY); - } - } - - /* check the call number isn't duplicate */ - _debug("check dup"); - call_id = sp->hdr.callNumber; - - /* We just ignore calls prior to the current call ID. Terminated calls - * are handled via the connection. + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (sp->hdr.securityIndex > 0) + call->state = RXRPC_CALL_SERVER_SECURING; + + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). */ - if (call_id <= conn->channels[chan].call_counter) - goto old_call; /* TODO: Just drop packet */ - - /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ - rxrpc_get_call(candidate, rxrpc_call_got); - - /* make the call available */ - _debug("new call"); - call = candidate; - candidate = NULL; - conn->channels[chan].call_counter = call_id; + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); - rxrpc_get_connection(conn); - rxrpc_get_peer(call->peer); - spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); hlist_add_head(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); - write_lock_bh(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); - - call->service_id = conn->params.service_id; - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - _leave(" = %p {%d} [new]", call, call->debug_id); - return call; - -extant_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); - return call; - -aborted_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNABORTED"); - return ERR_PTR(-ECONNABORTED); - -old_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNRESET [old]"); - return ERR_PTR(-ECONNRESET); + rxrpc_start_call_timer(call); + _leave(""); } /* @@ -497,25 +402,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Note the addition of a ref on a call for a socket buffer. + * Detach a call from its owning socket. */ -void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); + struct rxrpc_connection *conn = call->conn; + bool put = false; + int i; - trace_rxrpc_call(call, rxrpc_call_got_skb, n, here, skb); -} + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); -/* - * detach a call from a socket and set up for release - */ -void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - _enter("{%d,%d,%d,%d}", - call->debug_id, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - call->rx_first_oos); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); rxrpc_see_call(call); @@ -524,80 +421,46 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) BUG(); spin_unlock_bh(&call->lock); - /* dissociate from the socket - * - the socket's ref on the call is passed to the death timer - */ - _debug("RELEASE CALL %p (%d)", call, call->debug_id); + del_timer_sync(&call->timer); - if (call->peer) { - spin_lock(&call->peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&call->peer->lock); - } + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); - write_lock_bh(&rx->call_lock); - if (!list_empty(&call->accept_link)) { + if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", call, call->events, call->flags); - ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + list_del(&call->recvmsg_link); + put = true; + } + + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; + + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); + + write_lock(&rx->call_lock); + + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_put_call(call, rxrpc_call_put_userid); } - write_unlock_bh(&rx->call_lock); - - /* free up the channel for reuse */ - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) { - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - rxrpc_call_completed(call); - } else { - write_lock_bh(&call->state_lock); - - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - } - - write_unlock_bh(&call->state_lock); - } - if (call->conn) + list_del(&call->sock_link); + write_unlock(&rx->call_lock); + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + + if (conn) rxrpc_disconnect_call(call); - /* clean up the Rx queue */ - if (!skb_queue_empty(&call->rx_queue) || - !skb_queue_empty(&call->rx_oos_queue)) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - - _debug("purge Rx queues"); - - spin_lock_bh(&call->lock); - while ((skb = skb_dequeue(&call->rx_queue)) || - (skb = skb_dequeue(&call->rx_oos_queue))) { - spin_unlock_bh(&call->lock); - - sp = rxrpc_skb(skb); - _debug("- zap %s %%%u #%u", - rxrpc_pkts[sp->hdr.type], - sp->hdr.serial, sp->hdr.seq); - rxrpc_free_skb(skb); - spin_lock_bh(&call->lock); - } - spin_unlock_bh(&call->lock); + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i]); + call->rxtx_buffer[i] = NULL; } - rxrpc_purge_queue(&call->knlrecv_queue); - - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->lifetimer); /* We have to release the prealloc backlog ref */ if (rxrpc_is_service_call(call)) @@ -611,28 +474,19 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; - struct rb_node *p; _enter("%p", rx); - read_lock_bh(&rx->call_lock); - - /* kill the not-yet-accepted incoming calls */ - list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_release_call(rx, call); - } - - list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_release_call(rx, call); - } - - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); } - read_unlock_bh(&rx->call_lock); _leave(""); } @@ -651,23 +505,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - rxrpc_cleanup_call(call); - } -} + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); -/* - * Release a call ref held by a socket buffer. - */ -void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) -{ - const void *here = __builtin_return_address(0); - int n; + write_lock(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock(&rxrpc_call_lock); - n = atomic_dec_return(&call->usage); - trace_rxrpc_call(call, rxrpc_call_put_skb, n, here, skb); - ASSERTCMP(n, >=, 0); - if (n == 0) { - _debug("call %d dead", call->debug_id); rxrpc_cleanup_call(call); } } @@ -679,9 +522,9 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); - rxrpc_purge_queue(&call->rx_queue); - rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); } @@ -690,49 +533,24 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); + int i; - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->lifetimer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->resend_timer); + del_timer_sync(&call->timer); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERT(!work_pending(&call->processor)); ASSERTCMP(call->conn, ==, NULL); - if (call->acks_window) { - _debug("kill Tx window %d", - CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz)); - smp_mb(); - while (CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz) > 0) { - struct rxrpc_skb_priv *sp; - unsigned long _skb; - - _skb = call->acks_window[call->acks_tail] & ~1; - sp = rxrpc_skb((struct sk_buff *)_skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb((struct sk_buff *)_skb); - call->acks_tail = - (call->acks_tail + 1) & (call->acks_winsz - 1); - } - - kfree(call->acks_window); - } + /* Clean up the Rx/Tx buffer */ + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) + rxrpc_free_skb(call->rxtx_buffer[i]); rxrpc_free_skb(call->tx_pending); - rxrpc_purge_queue(&call->rx_queue); - ASSERT(skb_queue_empty(&call->rx_oos_queue)); - rxrpc_purge_queue(&call->knlrecv_queue); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } @@ -747,8 +565,8 @@ void __exit rxrpc_destroy_all_calls(void) if (list_empty(&rxrpc_calls)) return; - - write_lock_bh(&rxrpc_call_lock); + + write_lock(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); @@ -757,74 +575,15 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_see_call(call); list_del_init(&call->link); - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), rxrpc_call_states[call->state], call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); cond_resched(); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); } - write_unlock_bh(&rxrpc_call_lock); - _leave(""); -} - -/* - * handle call lifetime being exceeded - */ -static void rxrpc_call_life_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); -} - -/* - * handle resend timer expiry - * - may not take call->state_lock as this can deadlock against del_timer_sync() - */ -static void rxrpc_resend_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); -} - -/* - * handle ACK timer expiry - */ -static void rxrpc_ack_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); + write_unlock(&rxrpc_call_lock); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 8c7938b..0691007 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -15,10 +15,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -140,16 +136,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, u32 abort_code, int error) { struct rxrpc_call *call; - bool queue; - int i, bit; + int i; _enter("{%d},%x", conn->debug_id, abort_code); - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - bit = RXRPC_CALL_EV_CONN_ABORT; - else - bit = RXRPC_CALL_EV_RCVD_ABORT; - spin_lock(&conn->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -157,22 +147,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); if (call) { - rxrpc_see_call(call); if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort("CON", call->cid, call->call_id, 0, abort_code, error); - - write_lock_bh(&call->state_lock); - if (rxrpc_set_call_completion(call, compl, abort_code, - error)) { - set_bit(bit, &call->events); - queue = true; - } - write_unlock_bh(&call->state_lock); - if (queue) - rxrpc_queue_call(call); - + if (rxrpc_set_call_completion(call, compl, + abort_code, error)) + rxrpc_notify_socket(call); } } @@ -251,17 +232,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* * mark a call as being on a now-secured channel - * - must be called with softirqs disabled + * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_ACCEPTING; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); } } @@ -278,7 +260,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - kleave(" = -ECONNABORTED [%u]", conn->state); + _leave(" = -ECONNABORTED [%u]", conn->state); return -ECONNABORTED; } @@ -291,14 +273,14 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, 0, RXRPC_CALL_REMOTELY_ABORTED, - abort_code); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -323,14 +305,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); + } else { + spin_unlock(&conn->state_lock); } - spin_unlock(&conn->state_lock); spin_unlock(&conn->channel_lock); return 0; @@ -433,88 +417,3 @@ protocol_error: _leave(" [EPROTO]"); goto out; } - -/* - * put a packet up for transport-level abort - */ -void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - CHECK_SLAB_OKAY(&local->usage); - - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); -} - -/* - * reject packets through the local endpoint - */ -void rxrpc_reject_packets(struct rxrpc_local *local) -{ - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; - struct rxrpc_skb_priv *sp; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - struct msghdr msg; - struct kvec iov[2]; - size_t size; - __be32 code; - - _enter("%d", local->debug_id); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &code; - iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); - - msg.msg_name = &sa; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - - memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; - - while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - code = htonl(skb->priority); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; - } - - rxrpc_free_skb(skb); - } - - _leave(""); -} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 8da82e3..ffa9add 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -169,7 +169,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; } else { - chan->last_seq = call->rx_data_eaten; + chan->last_seq = call->rx_hard_ack; chan->last_type = RXRPC_PACKET_TYPE_ACK; } /* Sync with rxrpc_conn_retransmit(). */ @@ -191,6 +191,10 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + spin_lock_bh(&conn->params.peer->lock); + hlist_del_init(&call->error_link); + spin_unlock_bh(&conn->params.peer->lock); + if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 189338a..83d54da 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -65,9 +65,8 @@ done: * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ -static struct rxrpc_connection * -rxrpc_publish_service_conn(struct rxrpc_peer *peer, - struct rxrpc_connection *conn) +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; @@ -96,7 +95,7 @@ conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock_bh(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); - return conn; + return; found_extant_conn: if (atomic_read(&cursor->usage) == 0) @@ -143,106 +142,30 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) } /* - * get a record of an incoming connection + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. */ -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - const char *new = "old"; _enter(""); - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) { - _debug("no peer"); - return ERR_PTR(-EBUSY); - } - - ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED); - - rcu_read_lock(); - peer = rxrpc_lookup_peer_rcu(local, srx); - if (peer) { - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto security_mismatch_rcu; - if (rxrpc_get_connection_maybe(conn)) - goto found_extant_connection_rcu; - - /* The conn has expired but we can't remove it without - * the appropriate lock, so we attempt to replace it - * when we have a new candidate. - */ - } - - if (!rxrpc_get_peer_maybe(peer)) - peer = NULL; - } - rcu_read_unlock(); - - if (!peer) { - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) - goto enomem; - } - - /* We don't have a matching record yet. */ - conn = rxrpc_alloc_connection(GFP_NOIO); - if (!conn) - goto enomem_peer; - conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.local = local; - conn->params.peer = peer; conn->params.service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; - conn->state = RXRPC_CONN_SERVICE; - if (conn->params.service_id) + if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; - - rxrpc_get_local(local); - - /* We maintain an extra ref on the connection whilst it is on - * the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); - - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + else + conn->state = RXRPC_CONN_SERVICE; /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(peer, conn); - - new = "new"; - -success: - _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid); - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return conn; - -found_extant_connection_rcu: - rcu_read_unlock(); - goto success; - -security_mismatch_rcu: - rcu_read_unlock(); - _leave(" = -EKEYREJECTED"); - return ERR_PTR(-EKEYREJECTED); + rxrpc_publish_service_conn(conn->params.peer, conn); -enomem_peer: - rxrpc_put_peer(peer); -enomem: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5906579..afeba98 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,6 +1,6 @@ /* RxRPC packet reception * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,549 +27,547 @@ #include #include "ar-internal.h" +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) +{ + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } +} + /* - * queue a packet for recvmsg to pass to userspace - * - the caller must hold a lock on call->lock - * - must not be called with interrupts disabled (sk_filter() disables BH's) - * - eats the packet whether successful or not - * - there must be just one reference to the packet, which the caller passes to - * this function + * Apply a hard ACK by advancing the Tx window. */ -int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, - bool force, bool terminal) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { - struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx; - struct sock *sk; - int ret; + struct sk_buff *skb, *list = NULL; + int ix; - _enter(",,%d,%d", force, terminal); + spin_lock(&call->lock); - ASSERT(!irqs_disabled()); + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; + } - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, call); + spin_unlock(&call->lock); - /* if we've already posted the terminal message for a call, then we - * don't post any more */ - if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - _debug("already terminated"); - ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); + while (list) { + skb = list; + list = skb->next; + skb->next = NULL; rxrpc_free_skb(skb); - return 0; } +} - /* The socket may go away under us */ - ret = 0; - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (!rx) - goto out; - sk = &rx->sk; - if (sock_flag(sk, SOCK_DEAD)) - goto out; +/* + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. + */ +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) +{ + _enter(""); - if (!force) { - /* cast skb->rcvbuf to unsigned... It's pointless, but - * reduces number of warnings when compiling with -W - * --ANK */ -// ret = -ENOBUFS; -// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -// (unsigned int) sk->sk_rcvbuf) -// goto out; - - ret = sk_filter(sk, skb); - if (ret < 0) - goto out; + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + return true; + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; } - spin_lock_bh(&sk->sk_receive_queue.lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && - !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - sk->sk_state != RXRPC_CLOSE) { - skb->destructor = rxrpc_packet_destructor; - skb->dev = NULL; - skb->sk = sk; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - - if (terminal) { - _debug("<<<< TERMINAL MESSAGE >>>>"); - set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags); - } + rxrpc_rotate_tx_window(call, call->tx_top); - /* allow interception by a kernel service */ - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && - rx->notify_new_call) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk, NULL, 0); - } else if (call->notify_rx) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - call->notify_rx(&rx->sk, call, call->user_call_ID); - } else { - _net("post skb %p", skb); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); + write_lock(&call->state_lock); - sk->sk_data_ready(sk); - } - skb = NULL; - } else { - spin_unlock_bh(&sk->sk_receive_queue.lock); + switch (call->state) { + default: + break; + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + break; + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + rxrpc_notify_socket(call); + break; } - ret = 0; -out: - rxrpc_free_skb(skb); - rcu_read_unlock(); + write_unlock(&call->state_lock); + _leave(" = ok"); + return true; +} + +/* + * Scan a jumbo packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_jumbo(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int len = skb->data_len; + int nr_jumbo = 1; + u8 flags = sp->hdr.flags; + + do { + nr_jumbo++; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } while (flags & RXRPC_JUMBO_PACKET); + + sp->nr_jumbo = nr_jumbo; + return true; - _leave(" = %d", ret); - return ret; +protocol_error: + return false; } /* - * process a DATA packet, posting the packet to the appropriate queue - * - eats the packet if successful + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). */ -static int rxrpc_fast_process_data(struct rxrpc_call *call, - struct sk_buff *skb, u32 seq) +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + u8 annotation, bool *_jumbo_dup) { - struct rxrpc_skb_priv *sp; - bool terminal; - int ret, ackbit, ack; - u32 serial; - u16 skew; - u8 flags; + /* Discard normal packets that are duplicates. */ + if (annotation == 0) + return; - _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_dup) { + call->nr_jumbo_dup++; + *_jumbo_dup = true; + } +} - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, NULL); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - skew = skb->priority; +/* + * Process a DATA packet, adding the packet to the Rx ring. + */ +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int ix; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + rxrpc_seq_t seq = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_dup = false, queued; + u16 len; + u8 ack = 0, flags, annotation = 0; - spin_lock(&call->lock); + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->data_len, seq); - if (call->state > RXRPC_CALL_COMPLETE) - goto discard; + _proto("Rx DATA %%%u { #%u f=%02x }", + sp->hdr.serial, seq, sp->hdr.flags); - ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post); - ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv); - ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten); + if (call->state >= RXRPC_CALL_COMPLETE) + return; - if (seq < call->rx_data_post) { - _debug("dup #%u [-%u]", seq, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - ret = -ENOBUFS; - goto discard_and_ack; - } + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && + !rxrpc_end_tx_phase(call, "ETD")) + return; - /* we may already have the packet in the out of sequence queue */ - ackbit = seq - (call->rx_data_eaten + 1); - ASSERTCMP(ackbit, >=, 0); - if (__test_and_set_bit(ackbit, call->ackr_window)) { - _debug("dup oos #%u [%u,%u]", - seq, call->rx_data_eaten, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - goto discard_and_ack; - } + call->ackr_prev_seq = seq; - if (seq >= call->ackr_win_top) { - _debug("exceed #%u [%u]", seq, call->ackr_win_top); - __clear_bit(ackbit, call->ackr_window); + hard_ack = READ_ONCE(call->rx_hard_ack); + if (after(seq, hard_ack + call->rx_winsize)) { ack = RXRPC_ACK_EXCEEDS_WINDOW; - goto discard_and_ack; + ack_serial = serial; + goto ack; } - if (seq == call->rx_data_expect) { - clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags); - call->rx_data_expect++; - } else if (seq > call->rx_data_expect) { - _debug("oos #%u [%u]", seq, call->rx_data_expect); - call->rx_data_expect = seq + 1; - if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) { - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - goto enqueue_and_ack; + flags = sp->hdr.flags; + if (flags & RXRPC_JUMBO_PACKET) { + if (call->nr_jumbo_dup > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; } - goto enqueue_packet; + annotation = 1; } - if (seq != call->rx_data_post) { - _debug("ahead #%u [%u]", seq, call->rx_data_post); - goto enqueue_packet; +next_subpacket: + queued = false; + ix = seq & RXRPC_RXTX_BUFF_MASK; + len = skb->data_len; + if (flags & RXRPC_JUMBO_PACKET) + len = RXRPC_JUMBO_DATALEN; + + if (flags & RXRPC_LAST_PACKET) { + if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) + return rxrpc_proto_abort("LSN", call, seq); + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) + return rxrpc_proto_abort("LSA", call, seq); } - if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags)) - goto protocol_error; - - /* if the packet need security things doing to it, then it goes down - * the slow path */ - if (call->security_ix) - goto enqueue_packet; - - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - terminal = ((flags & RXRPC_LAST_PACKET) && - !(flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOBUFS) { - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_NOSPACE; - goto discard_and_ack; + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + goto skip; + } + + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; } - goto out; + immediate_ack = true; + goto skip; } - skb = NULL; - sp = NULL; - - _debug("post #%u", seq); - ASSERTCMP(call->rx_data_post, ==, seq); - call->rx_data_post++; - - if (flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); - - /* if we've reached an out of sequence packet then we need to drain - * that queue into the socket Rx queue now */ - if (call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) + smp_store_release(&call->rx_top, seq); + queued = true; + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; } - spin_unlock(&call->lock); - atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false); - _leave(" = 0 [posted]"); - return 0; +skip: + offset += len; + if (flags & RXRPC_JUMBO_PACKET) { + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + return rxrpc_proto_abort("XJF", call, seq); + offset += sizeof(struct rxrpc_jumbo_header); + seq++; + serial++; + annotation++; + if (flags & RXRPC_JUMBO_PACKET) + annotation |= RXRPC_RX_ANNO_JLAST; + + _proto("Rx DATA Jumbo %%%u", serial); + goto next_subpacket; + } -protocol_error: - ret = -EBADMSG; -out: - spin_unlock(&call->lock); - _leave(" = %d", ret); - return ret; + if (queued && flags & RXRPC_LAST_PACKET && !ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } -discard_and_ack: - _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, skew, serial, true); -discard: - spin_unlock(&call->lock); - rxrpc_free_skb(skb); - _leave(" = 0 [discarded]"); - return 0; +ack: + if (ack) + rxrpc_propose_ACK(call, ack, skew, ack_serial, + immediate_ack, true); -enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, skew, serial, true); -enqueue_packet: - _net("defer skb %p", skb); - spin_unlock(&call->lock); - skb_queue_tail(&call->rx_queue, skb); - atomic_inc(&call->ackr_not_idle); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); - _leave(" = 0 [queued]"); - return 0; + if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) + rxrpc_notify_socket(call); + _leave(" [queued]"); } /* - * assume an implicit ACKALL of the transmission phase of a client socket upon - * reception of the first reply packet + * Process the extra information that may be appended to an ACK packet */ -static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial) +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) { - write_lock_bh(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - call->acks_latest = serial; - - _debug("implicit ACKALL %%%u", call->acks_latest); - set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events); - write_unlock_bh(&call->state_lock); + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max)); + + if (call->tx_winsize > ntohl(ackinfo->rwind)) + call->tx_winsize = ntohl(ackinfo->rwind); + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } +} - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_RESEND, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks) +{ + bool resend = false; + int ix; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + switch (*acks) { + case RXRPC_ACK_TYPE_ACK: + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + break; + case RXRPC_ACK_TYPE_NACK: + if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + continue; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + resend = true; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); } - break; - - default: - write_unlock_bh(&call->state_lock); - break; } + + if (resend && + !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); } /* - * post an incoming packet to the nominated call to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. */ -void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code; - - _enter("%p,%p", call, skb); - - ASSERT(!irqs_disabled()); - -#if 0 // INJECT RX ERROR - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - static int skip = 0; - if (++skip == 3) { - printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n"); - skip = 0; - goto free_packet; - } + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_seq_t first_soft_ack, hard_ack; + int nr_acks, offset; + + _enter(""); + + if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); } -#endif - - /* request ACK generation for any ACK or DATA packet that requests - * it */ - if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - _proto("ACK Requested on %%%u", sp->hdr.serial); + sp->offset += sizeof(buf.ack); + + first_soft_ack = ntohl(buf.ack.firstPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + sp->hdr.serial, + ntohs(buf.ack.maxSkew), + first_soft_ack, + ntohl(buf.ack.previousPacket), + ntohl(buf.ack.serial), + rxrpc_acks(buf.ack.reason), + buf.ack.nAcks); + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + skew, sp->hdr.serial, true, true); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skb->priority, sp->hdr.serial, false); + skew, sp->hdr.serial, true, true); } - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_ABORT: - _debug("abort"); - - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) - goto protocol_error; - - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) { - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - rxrpc_queue_call(call); - } - goto free_packet; - - case RXRPC_PACKET_TYPE_BUSY: - _proto("Rx BUSY %%%u", sp->hdr.serial); - - if (rxrpc_is_service_call(call)) - goto protocol_error; + offset = sp->offset + nr_acks + 3; + if (skb->data_len >= offset + sizeof(buf.info)) { + if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + rxrpc_input_ackinfo(call, skb, &buf.info); + } - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - __rxrpc_set_call_completion(call, - RXRPC_CALL_SERVER_BUSY, - 0, EBUSY); - set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - rxrpc_queue_call(call); - case RXRPC_CALL_SERVER_BUSY: - goto free_packet_unlock; - default: - goto protocol_error_locked; - } + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; default: - _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); - goto protocol_error; - - case RXRPC_PACKET_TYPE_DATA: - _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - if (sp->hdr.seq == 0) - goto protocol_error; - - call->ackr_prev_seq = sp->hdr.seq; + return; + } - /* received data implicitly ACKs all of the request packets we - * sent when we're acting as a client */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - rxrpc_assume_implicit_ackall(call, sp->hdr.serial); + /* Discard any out-of-order or duplicate ACKs. */ + if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest = sp->hdr.serial; - switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) { - case 0: - skb = NULL; - goto done; + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && + hard_ack == call->tx_top) { + rxrpc_end_tx_phase(call, "ETA"); + return; + } - default: - BUG(); + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); - /* data packet received beyond the last packet */ - case -EBADMSG: - goto protocol_error; - } + if (after(hard_ack, call->tx_hard_ack)) + rxrpc_rotate_tx_window(call, hard_ack); - case RXRPC_PACKET_TYPE_ACKALL: - case RXRPC_PACKET_TYPE_ACK: - /* ACK processing is done in process context */ - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - skb_queue_tail(&call->rx_queue, skb); - rxrpc_queue_call(call); - skb = NULL; - } - read_unlock_bh(&call->state_lock); - goto free_packet; - } + if (after(first_soft_ack, call->tx_top)) + return; -protocol_error: - _debug("protocol error"); - write_lock_bh(&call->state_lock); -protocol_error_locked: - if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); -free_packet_unlock: - write_unlock_bh(&call->state_lock); -free_packet: - rxrpc_free_skb(skb); -done: - _leave(""); + if (nr_acks > call->tx_top - first_soft_ack + 1) + nr_acks = first_soft_ack - call->tx_top + 1; + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); } /* - * split up a jumbo data packet + * Process an ACKALL packet. */ -static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, - struct sk_buff *jumbo) +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_jumbo_header jhdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *part; - - _enter(",{%u,%u}", jumbo->data_len, jumbo->len); - - sp = rxrpc_skb(jumbo); - - do { - sp->hdr.flags &= ~RXRPC_JUMBO_PACKET; - - /* make a clone to represent the first subpacket in what's left - * of the jumbo packet */ - part = skb_clone(jumbo, GFP_ATOMIC); - if (!part) { - /* simply ditch the tail in the event of ENOMEM */ - pskb_trim(jumbo, RXRPC_JUMBO_DATALEN); - break; - } - rxrpc_new_skb(part); - - pskb_trim(part, RXRPC_JUMBO_DATALEN); - - if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN)) - goto protocol_error; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0) - goto protocol_error; - if (!pskb_pull(jumbo, sizeof(jhdr))) - BUG(); + _proto("Rx ACKALL %%%u", sp->hdr.serial); - sp->hdr.seq += 1; - sp->hdr.serial += 1; - sp->hdr.flags = jhdr.flags; - sp->hdr._rsvd = ntohs(jhdr._rsvd); + rxrpc_end_tx_phase(call, "ETL"); +} - _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1); +/* + * Process an ABORT packet. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + __be32 wtmp; + u32 abort_code = RX_CALL_DEAD; - rxrpc_fast_process_packet(call, part); - part = NULL; + _enter(""); - } while (sp->hdr.flags & RXRPC_JUMBO_PACKET); + if (skb->len >= 4 && + skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); - rxrpc_fast_process_packet(call, jumbo); - _leave(""); - return; + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); -protocol_error: - _debug("protocol error"); - rxrpc_free_skb(part); - if (rxrpc_abort_call("PJP", call, sp->hdr.seq, - RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); - rxrpc_free_skb(jumbo); - _leave(""); + if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) + rxrpc_notify_socket(call); } /* - * post an incoming packet to the appropriate call/socket to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process an incoming call packet. */ -static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, - struct rxrpc_call *call, - struct sk_buff *skb) +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb, u16 skew) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter("%p,%p", call, skb); - sp = rxrpc_skb(skb); - - _debug("extant call [%d]", call->state); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_COMPLETE: - switch (call->completion) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, - &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - default: - goto dead_call; - case RXRPC_CALL_SUCCEEDED: - if (rxrpc_is_service_call(call)) - goto dead_call; - goto resend_final_ack; - } - - case RXRPC_CALL_CLIENT_FINAL_ACK: - goto resend_final_ack; + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb, skew); + break; - default: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb, skew); break; - } - read_unlock(&call->state_lock); + case RXRPC_PACKET_TYPE_BUSY: + _proto("Rx BUSY %%%u", sp->hdr.serial); - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.flags & RXRPC_JUMBO_PACKET) - rxrpc_process_jumbo_packet(call, skb); - else - rxrpc_fast_process_packet(call, skb); + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; - goto done; + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; -resend_final_ack: - _debug("final ack again"); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; -dead_call: - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->params.local, skb); - goto unlock; + default: + _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); + break; } -free_unlock: - rxrpc_free_skb(skb); -unlock: - read_unlock(&call->state_lock); -done: + _leave(""); } @@ -601,6 +599,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, } /* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + CHECK_SLAB_OKAY(&local->usage); + + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +} + +/* * Extract the wire header from a packet and translate the byte order. */ static noinline @@ -611,8 +620,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(whdr))) - BUG(); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -626,6 +633,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); + sp->offset = sizeof(whdr); return 0; } @@ -637,19 +645,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *sk) +void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local = udp_sk->sk_user_data; struct sk_buff *skb; + unsigned int channel; int ret, skew; - _enter("%p", sk); + _enter("%p", udp_sk); ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(sk, 0, 1, &ret); + skb = skb_recv_datagram(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -695,111 +706,122 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } - if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: rxrpc_post_packet_to_local(local, skb); goto out; - } - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) - goto bad_message; + case RXRPC_PACKET_TYPE_BUSY: + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + goto discard; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0) + goto bad_message; + if (sp->hdr.flags & RXRPC_JUMBO_PACKET && + !rxrpc_validate_jumbo(skb)) + goto bad_message; + break; + } rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) { - skb->priority = 0; - goto cant_route_call; - } + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; - /* Note the serial number skew here */ - skew = (int)sp->hdr.serial - (int)conn->hi_serial; - if (skew >= 0) { - if (skew > 0) - conn->hi_serial = sp->hdr.serial; - skb->priority = 0; - } else { - skew = -skew; - skb->priority = min(skew, 65535); - } + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + } else { + skew = -skew; + skew = min(skew, 65535); + } - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; - } else { /* Call-bound packets are routed by connection channel. */ - unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; - struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call; + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) goto discard_unlock; if (sp->hdr.callNumber == chan->last_call) { - /* For the previous service call, if completed - * successfully, we discard all further packets. + /* For the previous service call, if completed successfully, we + * discard all further packets. */ if (rxrpc_conn_is_service(conn) && (chan->last_type == RXRPC_PACKET_TYPE_ACK || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) goto discard_unlock; - /* But otherwise we need to retransmit the final packet - * from data cached in the connection record. + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. */ rxrpc_post_packet_to_conn(conn, skb); goto out_unlock; } call = rcu_dereference(chan->call); - if (!call || atomic_read(&call->usage) == 0) - goto cant_route_call; + } else { + skew = 0; + call = NULL; + } - rxrpc_see_call(call); - rxrpc_post_packet_to_call(conn, call, skb); - goto out_unlock; + if (!call || atomic_read(&call->usage) == 0) { + if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + sp->hdr.callNumber == 0 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message_unlock; + if (sp->hdr.seq != 1) + goto discard_unlock; + call = rxrpc_new_incoming_call(local, conn, skb); + if (!call) { + rcu_read_unlock(); + goto reject_packet; + } } + rxrpc_input_call_packet(call, skb, skew); + goto discard_unlock; + discard_unlock: - rxrpc_free_skb(skb); -out_unlock: rcu_read_unlock(); +discard: + rxrpc_free_skb(skb); out: trace_rxrpc_rx_done(0, 0); return; -cant_route_call: +out_unlock: rcu_read_unlock(); + goto out; - _debug("can't route call"); - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - if (sp->hdr.seq == 1) { - _debug("first packet"); - skb_queue_tail(&local->accept_queue, skb); - rxrpc_queue_work(&local->processor); - _leave(" [incoming]"); - goto out; - } - skb->priority = RX_INVALID_OPERATION; - } else { - skb->priority = RX_CALL_DEAD; - } - - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - _debug("reject type %d",sp->hdr.type); - goto reject_packet; - } else { - rxrpc_free_skb(skb); - } - _leave(" [no call]"); - return; +wrong_security: + rcu_read_unlock(); + trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; +bad_message_unlock: + rcu_read_unlock(); bad_message: + trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index a4aba02..7d4375e 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -30,14 +30,18 @@ static int none_secure_packet(struct rxrpc_call *call, return 0; } -static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - rxrpc_seq_t seq, - u16 expected_cksum) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { return 0; } +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) @@ -79,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = { .prime_packet_security = none_prime_packet_security, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, + .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index bcc6593..cdd58e6 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -98,7 +98,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, 0, &v, 1) < 0) + if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 610916f..782b9ad 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -77,7 +77,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) INIT_WORK(&local->processor, rxrpc_local_processor); INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); local->client_conns = RB_ROOT; @@ -308,7 +307,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); @@ -332,11 +330,6 @@ static void rxrpc_local_processor(struct work_struct *work) if (atomic_read(&local->usage) == 0) return rxrpc_local_destroyer(local); - if (!skb_queue_empty(&local->accept_queue)) { - rxrpc_accept_incoming_calls(local); - again = true; - } - if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); again = true; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 39e7cc3..fd096f7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -50,7 +50,7 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = 32; +unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46; /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8756d74..719a4c2 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include "ar-internal.h" @@ -38,20 +40,38 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_seq_t hard_ack, top, seq; + int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + /* Barrier against rxrpc_input_data(). */ + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(0); - pkt->ack.firstPacket = htonl(call->rx_data_eaten + 1); + pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(call->ackr_serial); - pkt->ack.reason = RXRPC_ACK_IDLE; - pkt->ack.nAcks = 0; + pkt->ack.reason = call->ackr_reason; + pkt->ack.nAcks = top - hard_ack; + + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); + } - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - jmax = rxrpc_rx_jumbo_max; + mtu = call->conn->params.peer->if_mtu; + mtu -= call->conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); @@ -60,11 +80,11 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; - return 3; + return top - hard_ack + 3; } /* - * Send a final ACK or ABORT call packet. + * Send an ACK or ABORT call packet. */ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) { @@ -158,6 +178,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { + switch (pkt->whdr.type) { + case RXRPC_PACKET_TYPE_ACK: + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.serial), + true, true); + break; + case RXRPC_PACKET_TYPE_ABORT: + break; + } + } + out: rxrpc_put_connection(conn); kfree(pkt); @@ -233,3 +266,77 @@ send_fragmentable: _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); return ret; } + +/* + * reject packets through the local endpoint + */ +void rxrpc_reject_packets(struct rxrpc_local *local) +{ + union { + struct sockaddr sa; + struct sockaddr_in sin; + } sa; + struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; + struct sk_buff *skb; + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; + + _enter("%d", local->debug_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + size = sizeof(whdr) + sizeof(code); + + msg.msg_name = &sa; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&sa, 0, sizeof(sa)); + sa.sa.sa_family = local->srx.transport.family; + switch (sa.sa.sa_family) { + case AF_INET: + msg.msg_namelen = sizeof(sa.sin); + break; + default: + msg.msg_namelen = 0; + break; + } + + memset(&whdr, 0, sizeof(whdr)); + whdr.type = RXRPC_PACKET_TYPE_ABORT; + + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + switch (sa.sa.sa_family) { + case AF_INET: + sa.sin.sin_port = udp_hdr(skb)->source; + sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + code = htonl(skb->priority); + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + kernel_sendmsg(local->socket, &msg, iov, 2, size); + break; + + default: + break; + } + + rxrpc_free_skb(skb); + } + + _leave(""); +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 27b9eca..c894893 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -129,15 +129,14 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + rxrpc_new_skb(skb); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - kfree_skb(skb); + rxrpc_free_skb(skb); return; } - rxrpc_new_skb(skb); - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb); if (peer && !rxrpc_get_peer_maybe(peer)) @@ -249,7 +248,6 @@ void rxrpc_peer_error_distributor(struct work_struct *work) container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; enum rxrpc_call_completion compl; - bool queue; int error; _enter(""); @@ -272,15 +270,8 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - queue = false; - write_lock(&call->state_lock); - if (__rxrpc_set_call_completion(call, compl, 0, error)) { - set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - queue = true; - } - write_unlock(&call->state_lock); - if (queue) - rxrpc_queue_call(call); + if (rxrpc_set_call_completion(call, compl, 0, error)) + rxrpc_notify_socket(call); } spin_unlock_bh(&peer->lock); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index aebc73a..2efe29a 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -199,6 +199,32 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) } /* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +{ + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; + + if (peer->srx.transport.family == AF_INET) { + peer->hdrsize = sizeof(struct iphdr); + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + break; + } + } else { + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + +/* * Set up a new peer. */ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, @@ -214,29 +240,39 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, if (peer) { peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); + rxrpc_init_peer(peer, hash_key); + } - rxrpc_assess_MTU_size(peer); - peer->mtu = peer->if_mtu; - - if (srx->transport.family == AF_INET) { - peer->hdrsize = sizeof(struct iphdr); - switch (srx->transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { - BUG(); - } + _leave(" = %p", peer); + return peer; +} - peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; +/* + * Set up a new incoming peer. The address is prestored in the preallocated + * peer. + */ +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, + struct rxrpc_peer *prealloc) +{ + struct rxrpc_peer *peer; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); + prealloc->local = local; + rxrpc_init_peer(prealloc, hash_key); + + spin_lock(&rxrpc_peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = prealloc; + hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); } - _leave(" = %p", peer); + spin_unlock(&rxrpc_peer_hash_lock); return peer; } @@ -272,7 +308,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -282,7 +318,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, hash_add_rcu(rxrpc_peer_hash, &candidate->hash_link, hash_key); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); if (peer) kfree(candidate); @@ -307,9 +343,9 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) { ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); kfree_rcu(peer, rcu); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6876ffb..20d0b5c 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,319 +19,479 @@ #include "ar-internal.h" /* - * receive a message from an RxRPC socket - * - we need to be careful about two or more threads calling recvmsg - * simultaneously + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. */ -int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +void rxrpc_notify_socket(struct rxrpc_call *call) { - struct rxrpc_skb_priv *sp; - struct rxrpc_call *call = NULL, *continue_call = NULL; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - struct sk_buff *skb; - long timeo; - int copy, ret, ullen, offset, copied = 0; - u32 abort_code; + struct rxrpc_sock *rx; + struct sock *sk; - DEFINE_WAIT(wait); + _enter("%d", call->debug_id); - _enter(",,,%zu,%d", len, flags); + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + call->notify_rx(sk, call, call->user_call_ID); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); - if (flags & (MSG_OOB | MSG_TRUNC)) - return -EOPNOTSUPP; + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } + } - ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long); + rcu_read_unlock(); + _leave(""); +} - timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); - msg->msg_flags |= MSG_MORE; +/* + * Pass a call terminating message to userspace. + */ +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) +{ + u32 tmp = 0; + int ret; - lock_sock(&rx->sk); + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } - for (;;) { - /* return immediately if a client socket has no outstanding - * calls */ - if (RB_EMPTY_ROOT(&rx->calls)) { - if (copied) - goto out; - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call, - rxrpc_call_put); - return -ENODATA; - } - } + return ret; +} - /* get the next message on the Rx queue */ - skb = skb_peek(&rx->sk.sk_receive_queue); - if (!skb) { - /* nothing remains on the queue */ - if (copied && - (flags & MSG_PEEK || timeo == 0)) - goto out; +/* + * Pass back notification of a new call. The call is added to the + * to-be-accepted list. This means that the next call to be accepted might not + * be the last call seen awaiting acceptance, but unless we leave this on the + * front of the queue and block all other messages until someone gives us a + * user_ID for it, there's not a lot we can do. + */ +static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, int flags) +{ + int tmp = 0, ret; - /* wait for a message to turn up */ - release_sock(&rx->sk); - prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, - TASK_INTERRUPTIBLE); - ret = sock_error(&rx->sk); - if (ret) - goto wait_error; - - if (skb_queue_empty(&rx->sk.sk_receive_queue)) { - if (signal_pending(current)) - goto wait_interrupted; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(&rx->sk), &wait); - lock_sock(&rx->sk); - continue; - } + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - peek_next_packet: - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - call = sp->call; - ASSERT(call != NULL); - rxrpc_see_call(call); - - _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); - - /* make sure we wait for the state to be updated in this call */ - spin_lock_bh(&call->lock); - spin_unlock_bh(&call->lock); - - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("packet from released call"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - continue; - } + if (ret == 0 && !(flags & MSG_PEEK)) { + _debug("to be accepted"); + write_lock_bh(&rx->recvmsg_lock); + list_del_init(&call->recvmsg_link); + write_unlock_bh(&rx->recvmsg_lock); - /* determine whether to continue last data receive */ - if (continue_call) { - _debug("maybe cont"); - if (call != continue_call || - skb->mark != RXRPC_SKB_MARK_DATA) { - release_sock(&rx->sk); - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [noncont]", copied); - return copied; - } - } + write_lock(&rx->call_lock); + list_add_tail(&call->accept_link, &rx->to_be_accepted); + write_unlock(&rx->call_lock); + } - rxrpc_get_call(call, rxrpc_call_got); + return ret; +} - /* copy the peer address and timestamp */ - if (!continue_call) { - if (msg->msg_name) { - size_t len = - sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, - &call->conn->params.peer->srx, len); - msg->msg_namelen = len; - } - sock_recv_timestamp(msg, &rx->sk, skb); - } +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - /* receive the message */ - if (skb->mark != RXRPC_SKB_MARK_DATA) - goto receive_non_data_message; + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } else { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); + } - _debug("recvmsg DATA #%u { %d, %d }", - sp->hdr.seq, skb->len, sp->offset); + write_lock_bh(&call->state_lock); - if (!continue_call) { - /* only set the control data once per recvmsg() */ - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - } + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + break; - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + break; + default: + break; + } - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); + write_unlock_bh(&call->state_lock); +} - offset = sp->offset; - copy = skb->len - offset; - if (copy > len - copied) - copy = len - copied; +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); - ret = skb_copy_datagram_msg(skb, offset, msg, copy); + rxrpc_free_skb(skb); + _debug("%u,%u,%lx", hard_ack, top, call->flags); + if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + rxrpc_end_rx_phase(call); +} + +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; + + _enter(""); + + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + } + + return call->conn->security->verify_packet(call, skb, offset, len, + seq, cksum); +} + +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = *_offset; + unsigned int len = *_len; + int ret; + u8 annotation = *_annotation; + + if (offset > 0) + return 0; + + /* Locate the subpacket */ + offset = sp->offset; + len = skb->len - sp->offset; + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { + offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * + RXRPC_JUMBO_SUBPKTLEN); + len = (annotation & RXRPC_RX_ANNO_JLAST) ? + skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; + } + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); if (ret < 0) - goto copy_error; + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } - /* handle piecemeal consumption of data packets */ - _debug("copied %d+%d", copy, copied); + *_offset = offset; + *_len = len; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; +} - offset += copy; - copied += copy; +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = 0; + + _enter(""); + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) + break; + smp_rmb(); + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); - if (!(flags & MSG_PEEK)) - sp->offset = offset; + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + _debug("recvmsg %x DATA #%u { %d, %d }", + sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); + + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret < 0) + goto out; + + /* handle piecemeal consumption of data packets */ + _debug("copied %d @%zu", copy, *_offset); + + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; + } - if (sp->offset < skb->len) { + if (rx_pkt_len > 0) { _debug("buffer full"); - ASSERTCMP(copied, ==, len); + ASSERTCMP(*_offset, ==, len); break; } - /* we transferred the whole data packet */ + /* The whole packet has been transferred. */ + last = sp->hdr.flags & RXRPC_LAST_PACKET; if (!(flags & MSG_PEEK)) - rxrpc_kernel_data_consumed(call, skb); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - _debug("last"); - if (rxrpc_conn_is_client(call->conn)) { - /* last byte of reply received */ - ret = copied; - goto terminal_message; - } + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; - /* last bit of request received */ - if (!(flags & MSG_PEEK)) { - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != - skb) - BUG(); - rxrpc_free_skb(skb); - } - msg->msg_flags &= ~MSG_MORE; - break; - } + ASSERTIFCMP(last, seq, ==, top); + } - /* move on to the next data message */ - _debug("next"); - if (!continue_call) - continue_call = sp->call; - else - rxrpc_put_call(call, rxrpc_call_put); - call = NULL; - - if (flags & MSG_PEEK) { - _debug("peek next"); - skb = skb->next; - if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue) - break; - goto peek_next_packet; - } + if (after(seq, top)) { + ret = -EAGAIN; + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + ret = 1; + } +out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; + } + _leave(" = %d [%u/%u]", ret, seq, top); + return ret; +} - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); +/* + * Receive a message from an RxRPC socket + * - we need to be careful about two or more threads calling recvmsg + * simultaneously + */ +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct rxrpc_call *call; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct list_head *l; + size_t copied = 0; + long timeo; + int ret; + + DEFINE_WAIT(wait); + + _enter(",,,%zu,%d", len, flags); + + if (flags & (MSG_OOB | MSG_TRUNC)) + return -EOPNOTSUPP; + + timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); + +try_again: + lock_sock(&rx->sk); + + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -ENODATA; } - /* end of non-terminal data packet reception for the moment */ - _debug("end rcv data"); -out: - release_sock(&rx->sk); - if (call) - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [data]", copied); - return copied; - - /* handle non-DATA messages such as aborts, incoming connections and - * final ACKs */ -receive_non_data_message: - _debug("non-data"); - - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) { - _debug("RECV NEW CALL"); - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code); - if (ret < 0) - goto copy_error; - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) + goto error_no_call; + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + timeo = schedule_timeout(timeo); } - goto out; + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; } - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else + rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: + _debug("recvmsg call %p", call); + + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - case RXRPC_SKB_MARK_FINAL_ACK: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code); - break; - case RXRPC_SKB_MARK_BUSY: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - if (call->error) { - abort_code = call->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), + &call->user_call_ID); } + if (ret < 0) + goto error; + } + + if (msg->msg_name) { + size_t len = sizeof(call->conn->params.peer->srx); + memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + msg->msg_namelen = len; + } + + switch (call->state) { + case RXRPC_CALL_SERVER_ACCEPTING: + ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; - case RXRPC_SKB_MARK_NET_ERROR: - _debug("RECV NET ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - _debug("RECV LOCAL ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; break; default: - pr_err("Unknown packet mark %u\n", skb->mark); - BUG(); + ret = 0; break; } if (ret < 0) - goto copy_error; - -terminal_message: - _debug("terminal"); - msg->msg_flags &= ~MSG_MORE; - msg->msg_flags |= MSG_EOR; + goto error; - if (!(flags & MSG_PEEK)) { - _net("free terminal skb %p", skb); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - rxrpc_release_call(rx, call); + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; } - release_sock(&rx->sk); - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d", ret); - return ret; + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; -copy_error: - _debug("copy error"); - release_sock(&rx->sk); +error: rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); +error_no_call: + release_sock(&rx->sk); _leave(" = %d", ret); return ret; @@ -339,85 +499,8 @@ wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - if (copied) - copied = ret; - _leave(" = %d [waitfail %d]", copied, ret); - return copied; - -} - -/* - * Deliver messages to a call. This keeps processing packets until the buffer - * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. - * - * TODO: Note that this is hacked in at the moment and will be replaced. - */ -static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, - struct iov_iter *iter, size_t size, - size_t *_offset) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - size_t remain; - int ret, copy; - - _enter("%d", call->debug_id); - -next: - local_bh_disable(); - skb = skb_dequeue(&call->knlrecv_queue); - local_bh_enable(); - if (!skb) { - if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) - return 1; - _leave(" = -EAGAIN [empty]"); - return -EAGAIN; - } - - sp = rxrpc_skb(skb); - _debug("dequeued %p %u/%zu", skb, sp->offset, size); - - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - remain = size - *_offset; - if (remain > 0) { - copy = skb->len - sp->offset; - if (copy > remain) - copy = remain; - ret = skb_copy_datagram_iter(skb, sp->offset, iter, - copy); - if (ret < 0) - goto requeue_and_leave; - - /* handle piecemeal consumption of data packets */ - sp->offset += copy; - *_offset += copy; - } - - if (sp->offset < skb->len) - goto partially_used_skb; - - /* We consumed the whole packet */ - ASSERTCMP(sp->offset, ==, skb->len); - if (sp->hdr.flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); - rxrpc_kernel_data_consumed(call, skb); - rxrpc_free_skb(skb); - goto next; - - default: - rxrpc_free_skb(skb); - goto next; - } - -partially_used_skb: - ASSERTCMP(*_offset, ==, size); - ret = 0; -requeue_and_leave: - skb_queue_head(&call->knlrecv_queue, skb); + release_sock(&rx->sk); + _leave(" = %d [wait]", ret); return ret; } @@ -453,8 +536,9 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, struct kvec iov; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], size, want_more); + _enter("{%d,%s},%zu/%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + *_offset, size, want_more); ASSERTCMP(*_offset, <=, size); ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); @@ -469,7 +553,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = temp_deliver_data(sock, call, &iter, size, _offset); + ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, + _offset); if (ret < 0) goto out; @@ -494,7 +579,6 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, goto call_complete; default: - *_offset = 0; ret = -EINPROGRESS; goto out; } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 3777432..ae39255 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -317,6 +317,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, * decrypt partial encryption on a packet (level 1 security) */ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; @@ -330,18 +331,20 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0 || nsg > 16) goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + skb_to_sgvec(skb, sg, offset, 8); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -353,12 +356,12 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -371,18 +374,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -395,6 +396,7 @@ nomem: * wholly decrypt a packet (level 2 security) */ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { const struct rxrpc_key_token *token; @@ -409,12 +411,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0) goto nomem; @@ -427,7 +431,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + skb_to_sgvec(skb, sg, offset, len); /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -435,19 +439,19 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -460,17 +464,16 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -484,6 +487,7 @@ nomem: * jumbo packet). */ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); @@ -521,6 +525,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } @@ -529,15 +534,61 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_SECURITY_PLAIN: return 0; case RXRPC_SECURITY_AUTH: - return rxkad_verify_packet_1(call, skb, seq); + return rxkad_verify_packet_1(call, skb, offset, len, seq); case RXRPC_SECURITY_ENCRYPT: - return rxkad_verify_packet_2(call, skb, seq); + return rxkad_verify_packet_2(call, skb, offset, len, seq); default: return -ENOANO; } } /* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } +} + +/* * issue a challenge */ static int rxkad_issue_challenge(struct rxrpc_connection *conn) @@ -704,7 +755,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 version, nonce, min_level, abort_code; int ret; @@ -722,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - sp = rxrpc_skb(skb); - if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -969,7 +1019,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, { struct rxkad_response response __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; time_t expiry; void *ticket; @@ -980,7 +1030,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -988,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response.version); ticket_len = ntohl(response.ticket_len); kvno = ntohl(response.kvno); - sp = rxrpc_skb(skb); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1010,7 +1059,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, @@ -1135,6 +1184,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 5d79d5a..82d8134 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) } /* find the service */ - read_lock_bh(&local->services_lock); + read_lock(&local->services_lock); hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == conn->params.service_id) goto found_service; } /* the service appears to have died */ - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -152,13 +152,13 @@ found_service: kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } key = key_ref_to_ptr(kref); - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); conn->server_key = key; conn->security = sec; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9a4af99..cba2365 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include "ar-internal.h" @@ -38,19 +37,20 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) + if (call->tx_top - call->tx_hard_ack < call->tx_winsize) break; + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -call->error; + break; + } if (signal_pending(current)) { ret = sock_intr_errno(*timeo); break; @@ -68,36 +68,44 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, } /* - * attempt to schedule an instant Tx resend + * Schedule an instant Tx resend. */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) { - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); } - read_unlock_bh(&call->state_lock); + + spin_unlock_bh(&call->lock); } /* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send the + * packet immediately */ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, bool last) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; + + _net("queue skb %p [%d]", skb, seq); - _net("queue skb %p [%d]", skb, call->acks_head); + ASSERTCMP(seq, ==, call->tx_top + 1); - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); @@ -121,34 +129,17 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = jiffies + rxrpc_resend_timeout; + ret = rxrpc_send_data_packet(call->conn, skb); if (ret < 0) { _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); + rxrpc_instant_resend(call, ix); } + rxrpc_free_skb(skb); _leave(""); } @@ -212,9 +203,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { + if (call->tx_top - call->tx_hard_ack >= + call->tx_winsize) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; @@ -313,7 +303,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, memset(skb_put(skb, pad), 0, pad); } - seq = atomic_inc_return(&call->sequence); + seq = call->tx_top + 1; sp->hdr.epoch = conn->proto.epoch; sp->hdr.cid = call->cid; @@ -329,9 +319,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; if (more && seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; @@ -358,7 +347,7 @@ out: call_terminated: rxrpc_free_skb(skb); _leave(" = %d", -call->error); - return ret; + return -call->error; maybe_error: if (copied) @@ -452,29 +441,6 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, } /* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, - u32 abort_code, int error) -{ - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - write_lock_bh(&call->state_lock); - - if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - -/* * Create a new client call for sendmsg(). */ static struct rxrpc_call * @@ -549,7 +515,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); } - rxrpc_see_call(call); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -557,8 +522,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; + if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + ret = rxrpc_send_call_packet(call, + RXRPC_PACKET_TYPE_ABORT); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -639,7 +606,8 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); - rxrpc_send_abort(call, why, abort_code, error); + if (rxrpc_abort_call(why, call, 0, abort_code, error)) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); release_sock(sock->sk); _leave(""); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9b8f845..620d9cc 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -19,133 +19,6 @@ #include "ar-internal.h" /* - * set up for the ACK at the end of the receive phase when we discard the final - * receive phase data packet - * - called with softirqs disabled - */ -static void rxrpc_request_final_ACK(struct rxrpc_call *call) -{ - /* the call may be aborted before we have a chance to ACK it */ - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - call->state = RXRPC_CALL_CLIENT_FINAL_ACK; - _debug("request final ACK"); - - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - if (try_to_del_timer_sync(&call->ack_timer) >= 0) - rxrpc_queue_call(call); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - default: - break; - } - - write_unlock(&call->state_lock); -} - -/* - * drop the bottom ACK off of the call ACK window and advance the window - */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop; - u32 seq; - - spin_lock_bh(&call->lock); - - _debug("hard ACK #%u", sp->hdr.seq); - - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - call->ackr_window[loop] >>= 1; - call->ackr_window[loop] |= - call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); - } - - seq = sp->hdr.seq; - ASSERTCMP(seq, ==, call->rx_data_eaten + 1); - call->rx_data_eaten = seq; - - if (call->ackr_win_top < UINT_MAX) - call->ackr_win_top++; - - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_post, >=, call->rx_data_recv); - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_recv, >=, call->rx_data_eaten); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - rxrpc_request_final_ACK(call); - } else if (atomic_dec_and_test(&call->ackr_not_idle) && - test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { - /* We previously soft-ACK'd some received packets that have now - * been consumed, so send a hard-ACK if no more packets are - * immediately forthcoming to allow the transmitter to free up - * its Tx bufferage. - */ - _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, - skb->priority, sp->hdr.serial, false); - } - - spin_unlock_bh(&call->lock); -} - -/** - * rxrpc_kernel_data_consumed - Record consumption of data message - * @call: The call to which the message pertains. - * @skb: Message holding data - * - * Record the consumption of a data message and generate an ACK if appropriate. - * The call state is shifted if this was the final packet. The caller must be - * in process context with no spinlocks held. - * - * TODO: Actually generate the ACK here rather than punting this to the - * workqueue. - */ -void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); - - ASSERTCMP(sp->call, ==, call); - ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); - - /* TODO: Fix the sequence number tracking */ - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, skb); -} - -/* - * Destroy a packet that has an RxRPC control buffer - */ -void rxrpc_packet_destructor(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - _enter("%p{%p}", skb, call); - - if (call) { - rxrpc_put_call_for_skb(call, skb); - sp->call = NULL; - } - - if (skb->sk) - sock_rfree(skb); - _leave(""); -} - -/* * Note the existence of a new-to-us socket buffer (allocated or dequeued). */ void rxrpc_new_skb(struct sk_buff *skb) -- cgit v1.1 From 7e9d2850a8db4e0d85a20bb692198bf2cc4be3b7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: ARM: dts: STiH410: Handle interconnect clock required by EHCI/OHCI (USB) The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If this clock isn't handled correctly by ST's EHCI/OHCI drivers, their hub won't be found, the following error be shown and the result will be non-working USB: [ 97.221963] hub 2-1:1.0: hub_ext_port_status failed (err = -110) Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih410.dtsi | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 18ed1ad..4031886 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -41,7 +41,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a03c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -57,7 +58,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -71,7 +73,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a83c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; @@ -87,7 +90,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; -- cgit v1.1 From 78567f135d9bbbaf4538f63656d3e4d957c35fe9 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: ARM: dts: STiH407-family: Provide interconnect clock for consumption in ST SDHCI The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If these clocks aren't handled correctly by ST's SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih407-family.dtsi | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index d294e82..8b063ab 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -550,8 +550,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_0>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; bus-width = <8>; non-removable; }; @@ -565,8 +566,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sd1>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_1>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_1>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; resets = <&softreset STIH407_MMC1_SOFTRESET>; bus-width = <4>; }; -- cgit v1.1 From a4497a86fb9b855c5ac8503fdc959393b00bb643 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 8 Sep 2016 10:15:28 -0400 Subject: x86, clock: Fix kvm guest tsc initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting a kvm guest on AMD with the latest kernel the following messages are displayed in the boot log: tsc: Unable to calibrate against PIT tsc: HPET/PMTIMER calibration failed aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") introduced a change to account for a difference in cpu and tsc frequencies for Intel SKL processors. Before this change the native tsc set x86_platform.calibrate_tsc to native_calibrate_tsc() which is a hardware calibration of the tsc, and in tsc_init() executed tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; The kvm code changed x86_platform.calibrate_tsc to kvm_get_tsc_khz() and executed the same tsc_init() function. This meant that KVM guests did not execute the native hardware calibration function. After aa297292d708, there are separate native calibrations for cpu_khz and tsc_khz. The code sets x86_platform.calibrate_tsc to native_calibrate_tsc() which is now an Intel specific calibration function, and x86_platform.calibrate_cpu to native_calibrate_cpu() which is the "old" native_calibrate_tsc() function (ie, the native hardware calibration function). tsc_init() now does cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); if (tsc_khz == 0) tsc_khz = cpu_khz; else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) cpu_khz = tsc_khz; The kvm code should not call the hardware initialization in native_calibrate_cpu(), as it isn't applicable for kvm and it didn't do that prior to aa297292d708. This patch resolves this issue by setting x86_platform.calibrate_cpu to kvm_get_tsc_khz(). v2: I had originally set x86_platform.calibrate_cpu to cpu_khz_from_cpuid(), however, pbonzini pointed out that the CPUID leaf in that function is not available in KVM. I have changed the function pointer to kvm_get_tsc_khz(). Fixes: aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") Signed-off-by: Prarit Bhargava Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Len Brown Cc: "Peter Zijlstra (Intel)" Cc: Borislav Petkov Cc: Adrian Hunter Cc: "Christopher S. Hall" Cc: David Woodhouse Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1d39bfb..3692249 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -289,6 +289,7 @@ void __init kvmclock_init(void) put_cpu(); x86_platform.calibrate_tsc = kvm_get_tsc_khz; + x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.1 From e35478eac030990e23a56bf11dc074c5a069124a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 23 Aug 2016 17:28:03 +0200 Subject: i2c: mux: demux-pinctrl: run properly with multiple instances We can't use a static property for all the changesets, so we now create dynamic ones for each changeset. Signed-off-by: Wolfram Sang Fixes: 50a5ba87690814 ("i2c: mux: demux-pinctrl: add driver") Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index b6b9d25..b3893f6 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -37,8 +37,6 @@ struct i2c_demux_pinctrl_priv { struct i2c_demux_pinctrl_chan chan[]; }; -static struct property status_okay = { .name = "status", .length = 3, .value = "ok" }; - static int i2c_demux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) { struct i2c_demux_pinctrl_priv *priv = adap->algo_data; @@ -193,6 +191,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct i2c_demux_pinctrl_priv *priv; + struct property *props; int num_chan, i, j, err; num_chan = of_count_phandle_with_args(np, "i2c-parent", NULL); @@ -203,7 +202,10 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) priv = devm_kzalloc(&pdev->dev, sizeof(*priv) + num_chan * sizeof(struct i2c_demux_pinctrl_chan), GFP_KERNEL); - if (!priv) + + props = devm_kcalloc(&pdev->dev, num_chan, sizeof(*props), GFP_KERNEL); + + if (!priv || !props) return -ENOMEM; err = of_property_read_string(np, "i2c-bus-name", &priv->bus_name); @@ -221,8 +223,12 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) } priv->chan[i].parent_np = adap_np; + props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL); + props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL); + props[i].length = 3; + of_changeset_init(&priv->chan[i].chgset); - of_changeset_update_property(&priv->chan[i].chgset, adap_np, &status_okay); + of_changeset_update_property(&priv->chan[i].chgset, adap_np, &props[i]); } priv->num_chan = num_chan; -- cgit v1.1 From 30851a7c2155d0b321485e66386ea99191d8b3f5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 25 Aug 2016 19:27:40 +0200 Subject: Documentation: i2c: slave-interface: add note for driver development Make it clear that adding slave support shall not disable master functionality. We can have both, so we should. Signed-off-by: Wolfram Sang --- Documentation/i2c/slave-interface | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface index 80807ad..7e2a228 100644 --- a/Documentation/i2c/slave-interface +++ b/Documentation/i2c/slave-interface @@ -145,6 +145,11 @@ If you want to add slave support to the bus driver: * Catch the slave interrupts and send appropriate i2c_slave_events to the backend. +Note that most hardware supports being master _and_ slave on the same bus. So, +if you extend a bus driver, please make sure that the driver supports that as +well. In almost all cases, slave support does not need to disable the master +functionality. + Check the i2c-rcar driver as an example. -- cgit v1.1 From 015282c9eb6da05bfad6ff009078f91e06c0c98f Mon Sep 17 00:00:00 2001 From: Wenbo Wang Date: Thu, 8 Sep 2016 12:12:11 -0400 Subject: nvme/quirk: Add a delay before checking device ready for memblaze device Signed-off-by: Wenbo Wang Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index be84a84..60f7eab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2117,6 +2117,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { 0, } -- cgit v1.1 From 47a7b0d8888c04c9746812820b6e60553cc77bbc Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 4 Sep 2016 22:17:28 -0400 Subject: md-cluster: make md-cluster also can work when compiled into kernel The md-cluster is compiled as module by default, if it is compiled by built-in way, then we can't make md-cluster works. [64782.630008] md/raid1:md127: active with 2 out of 2 mirrors [64782.630528] md-cluster module not found. [64782.630530] md127: Could not setup cluster service (-2) Fixes: edb39c9 ("Introduce md_cluster_operations to handle cluster functions") Cc: stable@vger.kernel.org (v4.1+) Reported-by: Marc Smith Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 67642ba..915e84d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } -- cgit v1.1 From 664d58bf4d3406dc4404e29bcb8c89fd22589d57 Mon Sep 17 00:00:00 2001 From: Zhuo-hao Lee Date: Sat, 27 Aug 2016 15:39:30 +0800 Subject: i2c: designware: save the preset value of DW_IC_SDA_HOLD There are several ways to set the SDA hold time for i2c controller, including: Device Tree, built-in device properties and ACPI. However, if the SDA hold time is not specified by above method, we should read the value, where it is preset by firmware, and save it to sda_hold_time. This is needed because when i2c controller enters runtime suspend, the DW_IC_SDA_HOLD value will be reset to chipset default value. And during runtime resume, i2c_dw_init will be called to reconfigure i2c controller. If sda_hold_time is zero, the chipset default hold time will be used, that will be too short for some platforms. Therefore, to have a better tolerance, the DW_IC_SDA_HOLD value should be kept by sda_hold_time. Signed-off-by: Zhuo-hao Lee Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index c6922b8..fcd973d 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -367,13 +367,17 @@ int i2c_dw_init(struct dw_i2c_dev *dev) dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt); /* Configure SDA Hold Time if required */ - if (dev->sda_hold_time) { - reg = dw_readl(dev, DW_IC_COMP_VERSION); - if (reg >= DW_IC_SDA_HOLD_MIN_VERS) + reg = dw_readl(dev, DW_IC_COMP_VERSION); + if (reg >= DW_IC_SDA_HOLD_MIN_VERS) { + if (dev->sda_hold_time) { dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD); - else - dev_warn(dev->dev, - "Hardware too old to adjust SDA hold time."); + } else { + /* Keep previous hold time setting if no one set it */ + dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD); + } + } else { + dev_warn(dev->dev, + "Hardware too old to adjust SDA hold time.\n"); } /* Configure Tx/Rx FIFO threshold levels */ -- cgit v1.1 From e0603c8dd298171bd64227c65c6bbd6a861e1a78 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 31 Aug 2016 11:38:48 +0200 Subject: i2c: Spelling s/acknowedge/acknowledge/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 2 +- drivers/i2c/busses/i2c-rk3x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 90bbd9f..3c16a2f 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -767,7 +767,7 @@ static int cdns_i2c_setclk(unsigned long clk_in, struct cdns_i2c *id) * depending on the scaling direction. * * Return: NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK - * to acknowedge the change, NOTIFY_DONE if the notification is + * to acknowledge the change, NOTIFY_DONE if the notification is * considered irrelevant. */ static int cdns_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 2bc8b01..dce1abd 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -918,7 +918,7 @@ static void rk3x_i2c_adapt_div(struct rk3x_i2c *i2c, unsigned long clk_rate) * Code adapted from i2c-cadence.c. * * Return: NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK - * to acknowedge the change, NOTIFY_DONE if the notification is + * to acknowledge the change, NOTIFY_DONE if the notification is * considered irrelevant. */ static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long -- cgit v1.1 From cbfff439c54f37fc363b1d365183fa61af43585c Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Mon, 29 Aug 2016 14:22:36 -0700 Subject: i2c: rk3x: Restore clock settings at resume time Depending on a number of factors including: - Which exact Rockchip SoC we're working with - How deep we suspend - Which i2c port we're on We might lose the state of the i2c registers at suspend time. Specifically we've found that on rk3399 the i2c ports that are not in the PMU power domain lose their state with the current suspend depth configured by ARM Tursted Firmware. Note that there are very few actual i2c registers that aren't configured per transfer anyway so all we actually need to re-configure are the clock config registers. We'll just add a call to rk3x_i2c_adapt_div() at resume time and be done with it. NOTE: On rk3399 on ports whose power was lost, I put printouts in at resume time. I saw things like: before: con=0x00010300, div=0x00060006 after: con=0x00010200, div=0x00180025 Signed-off-by: Douglas Anderson Reviewed-by: David Wu Tested-by: David Wu [wsa: removed duplicate const] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rk3x.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index dce1abd..5c5b7ca 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -1111,6 +1111,15 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, return ret < 0 ? ret : num; } +static __maybe_unused int rk3x_i2c_resume(struct device *dev) +{ + struct rk3x_i2c *i2c = dev_get_drvdata(dev); + + rk3x_i2c_adapt_div(i2c, clk_get_rate(i2c->clk)); + + return 0; +} + static u32 rk3x_i2c_func(struct i2c_adapter *adap) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING; @@ -1334,12 +1343,15 @@ static int rk3x_i2c_remove(struct platform_device *pdev) return 0; } +static SIMPLE_DEV_PM_OPS(rk3x_i2c_pm_ops, NULL, rk3x_i2c_resume); + static struct platform_driver rk3x_i2c_driver = { .probe = rk3x_i2c_probe, .remove = rk3x_i2c_remove, .driver = { .name = "rk3x-i2c", .of_match_table = rk3x_i2c_match, + .pm = &rk3x_i2c_pm_ops, }, }; -- cgit v1.1 From 3732b30a7d88dc32bef76e117067fb397e811703 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 8 Sep 2016 23:05:07 +0200 Subject: cpufreq-stats: Minor documentation fix The cpufreq-stats code can no longer be built as a module, so it now appears with square brackets in menuconfig. Signed-off-by: Jean Delvare Fixes: 1aefc75b2449 (cpufreq: stats: Make the stats code non-modular) Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpufreq-stats.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index fc64749..8d9773f 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -103,7 +103,7 @@ Config Main Menu Power management options (ACPI, APM) ---> CPU Frequency scaling ---> [*] CPU Frequency scaling - <*> CPU frequency translation statistics + [*] CPU frequency translation statistics [*] CPU frequency translation statistics details -- cgit v1.1 From 74f13c80e210ff5a9e7b13b9853d8a866972f385 Mon Sep 17 00:00:00 2001 From: Zubair Lutfullah Kakakhel Date: Mon, 5 Sep 2016 13:07:54 +0100 Subject: net: ethernet: xilinx: Enable emaclite for MIPS The MIPS based xilfpga platform uses this driver. Enable it for MIPS Signed-off-by: Zubair Lutfullah Kakakhel Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 4f5c024..6d68c8a 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_XILINX bool "Xilinx devices" default y - depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ + depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,7 +18,7 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" - depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ) + depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS select PHYLIB ---help--- This driver supports the 10/100 Ethernet Lite from Xilinx. -- cgit v1.1 From 76061f631c2ea4ab9c4d66f3a96ecc5737f5aaf7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Sep 2016 08:34:11 -0700 Subject: tcp: fastopen: avoid negative sk_forward_alloc When DATA and/or FIN are carried in a SYN/ACK message or SYN message, we append an skb in socket receive queue, but we forget to call sk_forced_mem_schedule(). Effect is that the socket has a negative sk->sk_forward_alloc as long as the message is not read by the application. Josh Hunt fixed a similar issue in commit d22e15371811 ("tcp: fix tcp fin memory accounting") Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Eric Dumazet Reviewed-by: Josh Hunt Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 62a5751..4e777a3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tp->segs_in = 0; tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); + sk_forced_mem_schedule(sk, skb->truesize); skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; -- cgit v1.1 From d545caca827b65aab557a9e9dcdcf1e5a3823c2d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 8 Sep 2016 00:42:25 +0900 Subject: net: inet: diag: expose the socket mark to privileged processes. This adds the capability for a process that has CAP_NET_ADMIN on a socket to see the socket mark in socket dumps. Commit a52e95abf772 ("net: diag: allow socket bytecode filters to match socket marks") recently gave privileged processes the ability to filter socket dumps based on mark. This patch is complementary: it ensures that the mark is also passed to userspace in the socket's netlink attributes. It is useful for tools like ss which display information about sockets. Tested: https://android-review.googlesource.com/270210 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++-- include/uapi/linux/inet_diag.h | 1 + net/ipv4/inet_diag.c | 49 ++++++++++++++++++++++++++++-------------- net/ipv4/udp_diag.c | 10 +++++---- net/sctp/sctp_diag.c | 20 +++++++++++------ 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index feb04ea..65da430 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + const struct nlmsghdr *unlh, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, @@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); + struct user_namespace *user_ns, bool net_admin); extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 5581206..b5c366f 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -123,6 +123,7 @@ enum { INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, + INET_DIAG_MARK, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index abfbe49..e4d16fc 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -99,6 +99,7 @@ static size_t inet_sk_attr_size(void) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -109,7 +110,8 @@ static size_t inet_sk_attr_size(void) int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns) + struct user_namespace *user_ns, + bool net_admin) { const struct inet_sock *inet = inet_sk(sk); @@ -136,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } #endif + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) + goto errout; + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -149,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; @@ -175,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -274,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, - user_ns, portid, seq, nlmsg_flags, unlh); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, + portid, seq, nlmsg_flags, unlh, net_admin); } static int inet_twsk_diag_fill(struct sock *sk, @@ -319,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk, static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { + struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -334,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; - r->idiag_retrans = inet_reqsk(sk)->num_retrans; + r->idiag_retrans = reqsk->num_retrans; BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != offsetof(struct sock, sk_cookie)); @@ -346,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_uid = 0; r->idiag_inode = 0; + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + inet_rsk(reqsk)->ir_mark)) + return -EMSGSIZE; + nlmsg_end(skb, nlh); return 0; } @@ -354,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill(sk, skb, portid, seq, @@ -362,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == TCP_NEW_SYN_RECV) return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -435,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); @@ -796,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - const struct nlattr *bc) + const struct nlattr *bc, + bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -804,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, + net_admin); } static void twsk_build_assert(void) @@ -840,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -880,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, cb->args[3] > 0) goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, + bc, net_admin) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -948,7 +965,7 @@ skip_listen_ht: sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh); + cb->nlh, net_admin); if (res < 0) { spin_unlock_bh(lock); goto done; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 58b79c0..9a89c10 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -20,7 +20,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct nlattr *bc, bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, @@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); @@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; @@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, r->id.idiag_dport) goto next; - if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { spin_unlock_bh(&hslot->lock); goto done; } diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f3508aa..807158e3 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, int portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct list_head *addr_list; @@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r->idiag_retrans = 0; } - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { @@ -203,6 +204,7 @@ struct sctp_comm_param { struct netlink_callback *cb; const struct inet_diag_req_v2 *r; const struct nlmsghdr *nlh; + bool net_admin; }; static size_t inet_assoc_attr_size(struct sctp_association *asoc) @@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) @@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + commp->net_admin); release_sock(sk); if (err < 0) { WARN_ON(err == -EMSGSIZE); @@ -310,7 +314,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, cb->nlh) < 0) { + NLM_F_MULTI, cb->nlh, + commp->net_admin) < 0) { cb->args[3] = 1; err = 2; goto release; @@ -320,7 +325,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) if (inet_sctp_diag_fill(sk, assoc, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + cb->nlh->nlmsg_seq, 0, cb->nlh, + commp->net_admin) < 0) { err = 2; goto release; } @@ -375,7 +381,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh) < 0) { + cb->nlh, commp->net_admin) < 0) { err = 2; goto out; } @@ -412,6 +418,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb, .skb = in_skb, .r = req, .nlh = nlh, + .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN), }; if (req->sdiag_family == AF_INET) { @@ -447,6 +454,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, .skb = skb, .cb = cb, .r = r, + .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; /* eps hashtable dumps -- cgit v1.1 From 0dbf657c3993a269fa0cd7dabbb9c02741d4aefa Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Sep 2016 19:07:57 +0300 Subject: net/mlx5e: Fix xmit_more counter race issue Update the xmit_more counter before notifying the HW, to prevent a possible use-after-free of the skb. Fixes: c8cf78fe100b ("net/mlx5e: Add ethtool counter for TX xmit_more") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 988eca9..eb0e725 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -356,6 +356,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.stopped++; } + sq->stats.xmit_more += skb->xmit_more; if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; @@ -375,7 +376,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.packets++; sq->stats.bytes += num_bytes; - sq->stats.xmit_more += skb->xmit_more; return NETDEV_TX_OK; dma_unmap_wqe_err: -- cgit v1.1 From e7e31ca43d6bedf1c551b1f9c7e78d51c9a45790 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Wed, 7 Sep 2016 19:07:58 +0300 Subject: net/mlx5e: Move an_disable_cap bit to a new position Previous an_disable_cap position bit31 is deprecated to be use in driver with newer firmware. New firmware will advertise the same capability in bit29. Old capability didn't allow setting more than one protocol for a specific speed when autoneg is off, while newer firmware will allow this and it is indicated in the new capability location. Signed-off-by: Bodong Wang Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 21bc455..d1f9a58 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6710,9 +6710,10 @@ struct mlx5_ifc_pude_reg_bits { }; struct mlx5_ifc_ptys_reg_bits { - u8 an_disable_cap[0x1]; + u8 reserved_at_0[0x1]; u8 an_disable_admin[0x1]; - u8 reserved_at_2[0x6]; + u8 an_disable_cap[0x1]; + u8 reserved_at_3[0x5]; u8 local_port[0x8]; u8 reserved_at_10[0xd]; u8 proto_mask[0x3]; -- cgit v1.1 From 7abc2110779b133e54ce681a4d0be3d89304eda1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Sep 2016 19:07:59 +0300 Subject: net/mlx5e: Prevent casting overflow On 64 bits architectures unsigned long is longer than u32, casting to unsigned long will result in overflow. We need to first allocate an unsigned long variable, then assign the wanted value. Fixes: 665bc53969d7 ('net/mlx5e: Use new ethtool get/set link ksettings API') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d0cf8fa..98e1a4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -659,9 +659,10 @@ out: static void ptys2ethtool_supported_link(unsigned long *supported_modes, u32 eth_proto_cap) { + unsigned long proto_cap = eth_proto_cap; int proto; - for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) bitmap_or(supported_modes, supported_modes, ptys2ethtool_table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -670,9 +671,10 @@ static void ptys2ethtool_supported_link(unsigned long *supported_modes, static void ptys2ethtool_adver_link(unsigned long *advertising_modes, u32 eth_proto_cap) { + unsigned long proto_cap = eth_proto_cap; int proto; - for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) bitmap_or(advertising_modes, advertising_modes, ptys2ethtool_table[proto].advertised, __ETHTOOL_LINK_MODE_MASK_NBITS); -- cgit v1.1 From 4e39883d9c7426f8246ef86a32dc6a6f06f5aace Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Sep 2016 19:08:00 +0300 Subject: net/mlx5e: Fix global PFC counters replication Currently when reading global PFC statistics we left the counter iterator out of the equation and we ended up reading the same counter over and over again. Instead of reading the counter at index 0 on every iteration we now read the counter at index (i). Fixes: e989d5a532ce ('net/mlx5e: Expose flow control counters to ethtool') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 98e1a4a..7a346bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -331,7 +331,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, if (mlx5e_query_global_pause_combined(priv)) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_per_prio_pfc_stats_desc, 0); + pport_per_prio_pfc_stats_desc, i); } } -- cgit v1.1 From cd17d230dd060a12f7451c0caeedb3fd5158eaf9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Sep 2016 19:08:01 +0300 Subject: net/mlx5e: Fix parsing of vlan packets when updating lro header Currently vlan tagged packets were not parsed correctly and assumed to be regular IPv4/IPv6 packets. We should check for 802.1Q/802.1ad tags and update the lro header accordingly. This fixes the use case where LRO is on and rxvlan is off (vlan stripping is off). Fixes: e586b3b0baee ('net/mlx5: Ethernet Datapath files') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b6f8ebb..e7c969d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -637,24 +637,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); - struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct iphdr *ipv4; + struct ipv6hdr *ipv6; struct tcphdr *tcp; + int network_depth = 0; + __be16 proto; + u16 tot_len; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = cqe_bcnt - ETH_HLEN; + skb->mac_len = ETH_HLEN; + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - if (eth->h_proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + ipv4 = (struct iphdr *)(skb->data + network_depth); + ipv6 = (struct ipv6hdr *)(skb->data + network_depth); + tot_len = cqe_bcnt - network_depth; + + if (proto == htons(ETH_P_IP)) { + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct iphdr)); ipv6 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; } else { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct ipv6hdr)); ipv4 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; -- cgit v1.1 From 8c146bb9d59aa2ac45222171916ece186c4b3943 Mon Sep 17 00:00:00 2001 From: Thomas F Herbert Date: Wed, 7 Sep 2016 12:56:57 -0400 Subject: openvswitch: 802.1ad uapi changes. openvswitch: Add support for 8021.AD Change the description of the VLAN tpid field. Signed-off-by: Thomas F Herbert Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 54c3b4f..59ed399 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -605,13 +605,13 @@ struct ovs_action_push_mpls { * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set * (but it will not be set in the 802.1Q header that is pushed). * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. + * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD. + * The only acceptable TPID values are those that the kernel module also parses + * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed + * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results. */ struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */ __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; @@ -721,9 +721,10 @@ enum ovs_nat_attr { * is copied from the value to the packet header field, rest of the bits are * left unchanged. The non-masked value bits must be passed in as zeroes. * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header + * onto the packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header + * from the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the -- cgit v1.1 From fe19c4f971a55cea3be442d8032a5f6021702791 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:58 -0400 Subject: vlan: Check for vlan ethernet types for 8021.q or 802.1ad This is to simplify using double tagged vlans. This function allows all valid vlan ethertypes to be checked in a single function call. Also replace some instances that check for both ETH_P_8021Q and ETH_P_8021AD. Patch based on one originally by Thomas F Herbert. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 49d4aef..3319d97 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -272,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev) } #endif +/** + * eth_type_vlan - check for valid vlan ether type. + * @ethertype: ether type to check + * + * Returns true if the ether type is a vlan ether type. + */ +static inline bool eth_type_vlan(__be16 ethertype) +{ + switch (ethertype) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + return true; + default: + return false; + } +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -425,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q) && - veth->h_vlan_proto != htons(ETH_P_8021AD)) + if (!eth_type_vlan(veth->h_vlan_proto)) return -EINVAL; *vlan_tci = ntohs(veth->h_vlan_TCI); @@ -488,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * present at mac_len - VLAN_HLEN (if mac_len > 0), or at * ETH_HLEN otherwise */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (eth_type_vlan(type)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; @@ -506,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); + } while (eth_type_vlan(type)); } if (depth) @@ -572,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, static inline bool skb_vlan_tagged(const struct sk_buff *skb) { if (!skb_vlan_tag_present(skb) && - likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + likely(!eth_type_vlan(skb->protocol))) return false; return true; @@ -593,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (!skb_vlan_tag_present(skb)) { struct vlan_ethhdr *veh; - if (likely(protocol != htons(ETH_P_8021Q) && - protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(protocol))) return false; veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + if (!eth_type_vlan(protocol)) return false; return true; -- cgit v1.1 From 018c1dda5ff1e7bd1fe2d9fd1d0f5b82dc6fc0cd Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:59 -0400 Subject: openvswitch: 802.1AD Flow handling, actions, vlan parsing, netlink attributes Add support for 802.1ad including the ability to push and pop double tagged vlans. Add support for 802.1ad to netlink parsing and flow conversion. Uses double nested encap attributes to represent double tagged vlan. Inner TPID encoded along with ctci in nested attributes. This is based on Thomas F Herbert's original v20 patch. I made some small clean ups and bug fixes. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 16 ++- net/openvswitch/flow.c | 65 ++++++--- net/openvswitch/flow.h | 8 +- net/openvswitch/flow_netlink.c | 310 ++++++++++++++++++++++++++++------------- net/openvswitch/vport.c | 7 +- 5 files changed, 282 insertions(+), 124 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ca91fc3..4fe9032 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -246,20 +246,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = 0; + } else { + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + } return err; } static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = vlan->vlan_tci; + } else { + key->eth.vlan.tci = vlan->vlan_tci; + key->eth.vlan.tpid = vlan->vlan_tpid; + } return skb_vlan_push(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128e..1240ae3 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -302,24 +302,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) { - struct qtag_prefix { - __be16 eth_type; /* ETH_P_8021Q */ - __be16 tci; - }; - struct qtag_prefix *qp; + struct vlan_head *vh = (struct vlan_head *)skb->data; - if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + if (likely(!eth_type_vlan(vh->tpid))) return 0; - if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + - sizeof(__be16)))) + if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16))) + return 0; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + + sizeof(__be16)))) return -ENOMEM; - qp = (struct qtag_prefix *) skb->data; - key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); - __skb_pull(skb, sizeof(struct qtag_prefix)); + vh = (struct vlan_head *)skb->data; + key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); + key_vh->tpid = vh->tpid; + + __skb_pull(skb, sizeof(struct vlan_head)); + return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; + + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + key->eth.cvlan.tci = 0; + key->eth.cvlan.tpid = 0; + + if (likely(skb_vlan_tag_present(skb))) { + key->eth.vlan.tci = htons(skb->vlan_tci); + key->eth.vlan.tpid = skb->vlan_proto; + } else { + /* Parse outer vlan tag in the non-accelerated case. */ + res = parse_vlan_tag(skb, &key->eth.vlan); + if (res <= 0) + return res; + } + + /* Parse inner vlan tag. */ + res = parse_vlan_tag(skb, &key->eth.cvlan); + if (res <= 0) + return res; return 0; } @@ -480,12 +513,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) * update skb->csum here. */ - key->eth.tci = 0; - if (skb_vlan_tag_present(skb)) - key->eth.tci = htons(skb->vlan_tci); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 03378e7..156a302 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -50,6 +50,11 @@ struct ovs_tunnel_info { struct metadata_dst *tun_dst; }; +struct vlan_head { + __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/ + __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ +}; + #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) @@ -69,7 +74,8 @@ struct sw_flow_key { struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ - __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ + struct vlan_head vlan; + struct vlan_head cvlan; __be16 type; /* Ethernet frame type. */ } eth; union { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c78a6a1..8efa718 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb, ip_tunnel_info_af(tun_info)); } +static int encode_vlan_from_nlattrs(struct sw_flow_match *match, + const struct nlattr *a[], + bool is_mask, bool inner) +{ + __be16 tci = 0; + __be16 tpid = 0; + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (likely(!inner)) { + SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); + } else { + SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); + } + return 0; +} + +static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + + if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && + eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { + /* Not a VLAN. */ + return 0; + } + + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (tci) { + OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { + /* Corner case for truncated VLAN header. */ + OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + } + + return 1; +} + +static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + __be16 tpid = 0; + bool encap_valid = !!(match->key->eth.vlan.tci & + htons(VLAN_TAG_PRESENT)); + bool i_encap_valid = !!(match->key->eth.cvlan.tci & + htons(VLAN_TAG_PRESENT)); + + if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { + /* Not a VLAN. */ + return 0; + } + + if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { + OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (tpid != htons(0xffff)) { + OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", + (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); + return -EINVAL; + } + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + return 1; +} + +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, bool inner, + const struct nlattr **a, bool is_mask, + bool log) +{ + int err; + const struct nlattr *encap; + + if (!is_mask) + err = validate_vlan_from_nlattrs(match, *key_attrs, inner, + a, log); + else + err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, + a, log); + if (err <= 0) + return err; + + err = encode_vlan_from_nlattrs(match, a, is_mask, inner); + if (err) + return err; + + *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + + encap = a[OVS_KEY_ATTR_ENCAP]; + + if (!is_mask) + err = parse_flow_nlattrs(encap, a, key_attrs, log); + else + err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); + + return err; +} + +static int parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + int err; + bool encap_valid = false; + + err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, + is_mask, log); + if (err) + return err; + + encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); + if (encap_valid) { + err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, + is_mask, log); + if (err) + return err; + } + + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) @@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, } if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); - else - OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { @@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; - bool encap_valid = false; int err; err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR(log, "Invalid Vlan frame."); - return -EINVAL; - } - - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs, log); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); - return -EINVAL; - } - } else { - OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); - return -EINVAL; - } - } + err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); + if (err) + return err; err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) @@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, goto free_newmask; /* Always match on tci. */ - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); - - if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); - err = -EINVAL; - goto free_newmask; - } - - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, - &mask_attrs, log); - if (err) - goto free_newmask; - } else { - OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", - ntohs(eth_type)); - err = -EINVAL; - goto free_newmask; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", - ntohs(tci)); - err = -EINVAL; - goto free_newmask; - } - } + err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); + if (err) + goto free_newmask; err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, log); @@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } +static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, + bool is_mask) +{ + __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) + return -EMSGSIZE; + return 0; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla; + struct nlattr *encap = NULL; + struct nlattr *in_encap = NULL; if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ether_addr_copy(eth_key->eth_src, output->eth.src); ether_addr_copy(eth_key->eth_dst, output->eth.dst); - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) + if (!swkey->eth.vlan.tci) goto unencap; - } else - encap = NULL; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } + } if (swkey->eth.type == htons(ETH_P_802_2)) { /* @@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; + if (eth_type_vlan(swkey->eth.type)) { + /* There are 3 VLAN tags, we don't know anything about the rest + * of the packet, so truncate here. + */ + WARN_ON_ONCE(!(encap && in_encap)); + goto unencap; + } + if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, } unencap: + if (in_encap) + nla_nest_end(skb, in_encap); if (encap) nla_nest_end(skb, encap); @@ -2283,7 +2397,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; @@ -2388,7 +2502,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, - key->eth.tci, log); + key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6b21fd0..8f19843 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -485,9 +485,14 @@ static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; - if (skb->protocol == htons(ETH_P_8021Q)) + if (skb_vlan_tagged(skb)) length -= VLAN_HLEN; + /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow + * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none + * account for 802.1ad. e.g. is_skb_forwardable(). + */ + return length; } -- cgit v1.1 From db7196a0d0984b933ccf2cd6a60e26abf466e8a3 Mon Sep 17 00:00:00 2001 From: Artem Germanov Date: Wed, 7 Sep 2016 10:49:36 -0700 Subject: tcp: cwnd does not increase in TCP YeAH Commit 76174004a0f19785a328f40388e87e982bbf69b9 (tcp: do not slow start when cwnd equals ssthresh ) introduced regression in TCP YeAH. Using 100ms delay 1% loss virtual ethernet link kernel 4.2 shows bandwidth ~500KB/s for single TCP connection and kernel 4.3 and above (including 4.8-rc4) shows bandwidth ~100KB/s. That is caused by stalled cwnd when cwnd equals ssthresh. This patch fixes it by proper increasing cwnd in this case. Signed-off-by: Artem Germanov Acked-by: Dmitry Adamushko Signed-off-by: David S. Miller --- net/ipv4/tcp_yeah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 028eb04..9c5fc97 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else if (!yeah->doing_reno_now) { -- cgit v1.1 From 312fada1f9f87fb55ace4b5a55a70a9eea5100fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Sep 2016 19:45:35 +0100 Subject: nfp: remove linux/version.h includes Remove unnecessary version.h includes. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Dinan Gunawardena Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 1 - drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 1 - drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 88678c1..e4fe0f0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -41,7 +41,6 @@ * Chris Telfer */ -#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 7d7933d..4c98972 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -40,7 +40,6 @@ * Brad Petrus */ -#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index 37abef0..6f22b0e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -38,7 +38,6 @@ * Rolf Neugebauer */ -#include #include #include #include -- cgit v1.1 From 313b345cbff566340022c82267a377e1e493ef90 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Sep 2016 19:45:36 +0100 Subject: nfp: drop support for old firmware ABIs Be more strict about FW versions. Drop support for old transitional revisions which were never used in production. Dropping support for FW ABI version 0.0.0.0 is particularly useful because 0 could just be uninitialized memory. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index 6f22b0e..f7062cb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -133,7 +133,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } nfp_net_get_fw_version(&fw_ver, ctrl_bar); - if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); err = -EINVAL; @@ -141,9 +141,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } /* Determine stride */ - if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) || - nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) || - nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) { + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1)) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; rx_bar_no = NFP_NET_Q1_BAR; -- cgit v1.1 From ebecefc820ec791a216382a68b937577bb69d3f7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Sep 2016 19:45:37 +0100 Subject: nfp: don't pad frames on receive There is no need to pad frames to ETH_ZLEN on RX. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Dinan Gunawardena Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index e4fe0f0..252e492 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1440,10 +1440,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_set_hash(nn->netdev, skb, rxd); - /* Pad small frames to minimum */ - if (skb_put_padto(skb, 60)) - break; - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; -- cgit v1.1 From 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Wed, 7 Sep 2016 14:49:28 -0700 Subject: tcp: use an RB tree for ooo receive queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 + include/linux/tcp.h | 7 +- include/net/tcp.h | 2 +- net/core/skbuff.c | 19 +++ net/ipv4/tcp.c | 4 +- net/ipv4/tcp_input.c | 330 +++++++++++++++++++++++++++-------------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 1 - 8 files changed, 218 insertions(+), 149 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cfb7219..4c5662f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2402,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b12..c723a46 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,10 +281,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d6ae365..fdfbedd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -640,7 +640,7 @@ static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (skb_queue_empty(&tp->out_of_order_queue) && + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3864b4b6..1e329d4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2445,6 +2445,25 @@ void skb_queue_purge(struct sk_buff_head *list) EXPORT_SYMBOL(skb_queue_purge); /** + * skb_rbtree_purge - empty a skb rbtree + * @root: root of the rbtree to empty + * + * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from + * the list and one reference dropped. This function does not take + * any lock. Synchronization should be handled by the caller (e.g., TCP + * out-of-order queue is protected by the socket lock). + */ +void skb_rbtree_purge(struct rb_root *root) +{ + struct sk_buff *skb, *next; + + rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + kfree_skb(skb); + + *root = RB_ROOT; +} + +/** * skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77311a9..a13fcb3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -380,7 +380,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __skb_queue_head_init(&tp->out_of_order_queue); + tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -2243,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cd02c0..a5934c4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4108,7 +4108,7 @@ void tcp_fin(struct sock *sk) /* It _is_ possible, that we have something out-of-order _after_ FIN. * Probably, we should reset in this case. For now drop them. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); @@ -4268,7 +4268,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int this_sack; /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_empty(&tp->out_of_order_queue)) { + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; return; } @@ -4344,10 +4344,13 @@ static void tcp_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; + bool fin, fragstolen, eaten; struct sk_buff *skb, *tail; - bool fragstolen, eaten; + struct rb_node *p; - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4357,9 +4360,10 @@ static void tcp_ofo_queue(struct sock *sk) dsack_high = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); - __skb_unlink(skb, &tp->out_of_order_queue); - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4371,12 +4375,19 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); - if (eaten) + else kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + tcp_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } } } @@ -4403,8 +4414,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p, *q, *parent; struct sk_buff *skb1; u32 seq, end_seq; + bool fragstolen; tcp_ecn_check_ce(tp, skb); @@ -4419,88 +4432,85 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + tp->rcv_nxt, seq, end_seq); - skb1 = skb_peek_tail(&tp->out_of_order_queue); - if (!skb1) { + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; + tp->selective_acks[0].start_seq = seq; + tp->selective_acks[0].end_seq = end_seq; } - __skb_queue_head(&tp->out_of_order_queue, skb); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; goto end; } - seq = TCP_SKB_CB(skb)->seq; - end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - bool fragstolen; - - if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - } else { - tcp_grow_window(sk, skb); - kfree_skb_partial(skb, fragstolen); - skb = NULL; + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; + } + + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; } - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - goto end; - } - - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); + } else { + /* skb's seq == skb1's seq and skb covers skb1. + * Replace skb1 with skb. + */ + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb1); + goto add_sack; + } + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + goto coalesce_done; } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + p = &parent->rb_right; } - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); - skb = NULL; - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + /* Insert segment into RB tree. */ + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + /* Remove other segments covered by skb. */ + while ((q = rb_next(&skb->rbnode)) != NULL) { + skb1 = rb_entry(q, struct sk_buff, rbnode); if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; @@ -4509,12 +4519,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } - __skb_unlink(skb1, &tp->out_of_order_queue); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } + /* If there is no skb after us, we are the last_skb ! */ + if (!q) + tp->ooo_last_skb = skb; add_sack: if (tcp_is_sack(tp)) @@ -4651,13 +4664,13 @@ queue_and_out: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); - if (!skb_queue_empty(&tp->out_of_order_queue)) { + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); /* RFC2581. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) inet_csk(sk)->icsk_ack.pingpong = 0; } @@ -4711,48 +4724,76 @@ drop: tcp_data_queue_ofo(sk, skb); } +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list) +{ + if (list) + return !skb_queue_is_last(list, skb) ? skb->next : NULL; + + return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); +} + static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *list) + struct sk_buff_head *list, + struct rb_root *root) { - struct sk_buff *next = NULL; + struct sk_buff *next = tcp_skb_next(skb, list); - if (!skb_queue_is_last(list, skb)) - next = skb_queue_next(list, skb); + if (list) + __skb_unlink(skb, list); + else + rb_erase(&skb->rbnode, root); - __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct sk_buff *skb1; + + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, root); +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * - * If tail is NULL, this means until the end of the list. + * If tail is NULL, this means until the end of the queue. * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff_head *list, - struct sk_buff *head, struct sk_buff *tail, - u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, + struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb, *n; + struct sk_buff *skb = head, *n; + struct sk_buff_head tmp; bool end_of_skbs; /* First, check that queue is collapsible and find - * the point where collapsing can be useful. */ - skb = head; + * the point where collapsing can be useful. + */ restart: - end_of_skbs = true; - skb_queue_walk_from_safe(list, skb, n) { - if (skb == tail) - break; + for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) { + n = tcp_skb_next(skb, list); + /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb) break; goto restart; @@ -4770,13 +4811,10 @@ restart: break; } - if (!skb_queue_is_last(list, skb)) { - struct sk_buff *next = skb_queue_next(list, skb); - if (next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { - end_of_skbs = false; - break; - } + if (n && n != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { + end_of_skbs = false; + break; } /* Decided to skip this, advance start seq. */ @@ -4786,17 +4824,22 @@ restart: (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; + __skb_queue_head_init(&tmp); + while (before(start, end)) { int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) - return; + break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_queue_before(list, skb, nskb); + if (list) + __skb_queue_before(list, skb, nskb); + else + __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4814,14 +4857,17 @@ restart: start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) - return; + goto end; } } } +end: + skb_queue_walk_safe(&tmp, skb, n) + tcp_rbtree_insert(root, skb); } /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs @@ -4830,43 +4876,43 @@ restart: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; + struct sk_buff *skb, *head; + struct rb_node *p; u32 start, end; - if (!skb) + p = rb_first(&tp->out_of_order_queue); + skb = rb_entry_safe(p, struct sk_buff, rbnode); +new_range: + if (!skb) { + p = rb_last(&tp->out_of_order_queue); + /* Note: This is possible p is NULL here. We do not + * use rb_entry_safe(), as ooo_last_skb is valid only + * if rbtree is not empty. + */ + tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); return; - + } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; - head = skb; - - for (;;) { - struct sk_buff *next = NULL; - if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) - next = skb_queue_next(&tp->out_of_order_queue, skb); - skb = next; + for (head = skb;;) { + skb = tcp_skb_next(skb, NULL); - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ + /* Range is terminated when we see a gap or when + * we are at the queue end. + */ if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, + tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); - head = skb; - if (!skb) - break; - /* Start new segment */ + goto new_range; + } + + if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) start = TCP_SKB_CB(skb)->seq; + if (after(TCP_SKB_CB(skb)->end_seq, end)) end = TCP_SKB_CB(skb)->end_seq; - } else { - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; - } } } @@ -4883,20 +4929,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; + struct rb_node *node, *prev; - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); - - while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) { - tcp_drop(sk, skb); + node = &tp->ooo_last_skb->rbnode; + do { + prev = rb_prev(node); + rb_erase(node, &tp->out_of_order_queue); + tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; - } + node = prev; + } while (node); + tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection @@ -4930,7 +4980,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) - tcp_collapse(sk, &sk->sk_receive_queue, + tcp_collapse(sk, &sk->sk_receive_queue, NULL, skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); @@ -5035,7 +5085,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a75bf48..04b9893 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4b95ec4..f63c73d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd_cnt = 0; tcp_init_xmit_timers(newsk); - __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; -- cgit v1.1 From 2d2be8cab26ed918e94d2deae89580003242a123 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Sep 2016 01:03:42 +0200 Subject: bpf: fix range propagation on direct packet access LLVM can generate code that tests for direct packet access via skb->data/data_end in a way that currently gets rejected by the verifier, example: [...] 7: (61) r3 = *(u32 *)(r6 +80) 8: (61) r9 = *(u32 *)(r6 +76) 9: (bf) r2 = r9 10: (07) r2 += 54 11: (3d) if r3 >= r2 goto pc+12 R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 12: (18) r4 = 0xffffff7a 14: (05) goto pc+430 [...] from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 24: (7b) *(u64 *)(r10 -40) = r1 25: (b7) r1 = 0 26: (63) *(u32 *)(r6 +56) = r1 27: (b7) r2 = 40 28: (71) r8 = *(u8 *)(r9 +20) invalid access to packet, off=20 size=1, R9(id=0,off=0,r=0) The reason why this gets rejected despite a proper test is that we currently call find_good_pkt_pointers() only in case where we detect tests like rX > pkt_end, where rX is of type pkt(id=Y,off=Z,r=0) and derived, for example, from a register of type pkt(id=Y,off=0,r=0) pointing to skb->data. find_good_pkt_pointers() then fills the range in the current branch to pkt(id=Y,off=0,r=Z) on success. For above case, we need to extend that to recognize pkt_end >= rX pattern and mark the other branch that is taken on success with the appropriate pkt(id=Y,off=0,r=Z) type via find_good_pkt_pointers(). Since eBPF operates on BPF_JGT (>) and BPF_JGE (>=), these are the only two practical options to test for from what LLVM could have generated, since there's no such thing as BPF_JLT (<) or BPF_JLE (<=) that we would need to take into account as well. After the fix: [...] 7: (61) r3 = *(u32 *)(r6 +80) 8: (61) r9 = *(u32 *)(r6 +76) 9: (bf) r2 = r9 10: (07) r2 += 54 11: (3d) if r3 >= r2 goto pc+12 R1=inv R2=pkt(id=0,off=54,r=0) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=0) R10=fp 12: (18) r4 = 0xffffff7a 14: (05) goto pc+430 [...] from 11 to 24: R1=inv R2=pkt(id=0,off=54,r=54) R3=pkt_end R4=inv R6=ctx R9=pkt(id=0,off=0,r=54) R10=fp 24: (7b) *(u64 *)(r10 -40) = r1 25: (b7) r1 = 0 26: (63) *(u32 *)(r6 +56) = r1 27: (b7) r2 = 40 28: (71) r8 = *(u8 *)(r9 +20) 29: (bf) r1 = r8 30: (25) if r8 > 0x3c goto pc+47 R1=inv56 R2=imm40 R3=pkt_end R4=inv R6=ctx R8=inv56 R9=pkt(id=0,off=0,r=54) R10=fp 31: (b7) r1 = 1 [...] Verifier test cases are also added in this work, one that demonstrates the mentioned example here and one that tries a bad packet access for the current/fall-through branch (the one with types pkt(id=X,off=Y,r=0), pkt(id=X,off=0,r=0)), then a case with good and bad accesses, and two with both test variants (>, >=). Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 55 +++++++++++++++++------- samples/bpf/test_verifier.c | 102 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 15 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48c2705..90493a6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1637,21 +1637,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_env *env, - struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct verifier_state *state, + const struct reg_state *dst_reg) { - struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs, *reg; int i; - /* r2 = r3; - * r2 += 8 - * if (r2 > pkt_end) goto somewhere - * r2 == dst_reg, pkt_end == src_reg, - * r2=pkt(id=n,off=8,r=0) - * r3=pkt(id=n,off=0,r=0) - * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access + + /* LLVM can generate two kind of checks: + * + * Type 1: + * + * r2 = r3; + * r2 += 8; + * if (r2 > pkt_end) goto + * + * + * Where: + * r2 == dst_reg, pkt_end == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Type 2: + * + * r2 = r3; + * r2 += 8; + * if (pkt_end >= r2) goto + * + * + * Where: + * pkt_end == dst_reg, r2 == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) + * so that range of bytes [r3, r3 + 8) is safe to access. */ + for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) regs[i].range = dst_reg->off; @@ -1668,8 +1689,8 @@ static void find_good_pkt_pointers(struct verifier_env *env, static int check_cond_jmp_op(struct verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; - struct verifier_state *other_branch; + struct verifier_state *other_branch, *this_branch = &env->cur_state; + struct reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1750,13 +1771,17 @@ static int check_cond_jmp_op(struct verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(env, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; } if (log_level) - print_verifier_state(&env->cur_state); + print_verifier_state(this_branch); return 0; } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 78c6f13..1f6cc9b 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -1529,6 +1529,108 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "direct packet access: test5 (pkt_end >= reg, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test6 (pkt_end >= reg, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test7 (pkt_end >= reg, both accesses)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test8 (double test, variant 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test9 (double test, variant 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, -- cgit v1.1 From e895cdce683161081e3626c4f5a5c55cb72089f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Sep 2016 21:52:56 -0700 Subject: ipv4: accept u8 in IP_TOS ancillary data In commit f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data") Francesco added IP_TOS values specified as integer. However, kernel sends to userspace (at recvmsg() time) an IP_TOS value in a single byte, when IP_RECVTOS is set on the socket. It can be very useful to reflect all ancillary options as given by the kernel in a subsequent sendmsg(), instead of aborting the sendmsg() with EINVAL after Francesco patch. So this patch extends IP_TOS ancillary to accept an u8, so that an UDP server can simply reuse same ancillary block without having to mangle it. Jesper can then augment https://github.com/netoptimizer/network-testing/blob/master/src/udp_example02.c to add TOS reflection ;) Fixes: f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data") Signed-off-by: Eric Dumazet Cc: Francesco Fusco Cc: Jesper Dangaard Brouer Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 71a52f4d..af49197 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -284,9 +284,12 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->ttl = val; break; case IP_TOS: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) + val = *(int *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) + val = *(u8 *)CMSG_DATA(cmsg); + else return -EINVAL; - val = *(int *)CMSG_DATA(cmsg); if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; -- cgit v1.1 From 2f30ea5090cbc57ea573cdc66421264b3de3fb0a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 8 Sep 2016 18:09:57 +0200 Subject: xfrm_user: propagate sec ctx allocation errors When we fail to attach the security context in xfrm_state_construct() we'll return 0 as error value which, in turn, will wrongly claim success to userland when, in fact, we won't be adding / updating the XFRM state. This is a regression introduced by commit fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr()"). Fix it by propagating the error returned by security_xfrm_state_alloc() in this case. Fixes: fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl()...") Signed-off-by: Mathias Krause Cc: Thomas Graf Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cb65d91..0889209 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (err) goto error; - if (attrs[XFRMA_SEC_CTX] && - security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) - goto error; + if (attrs[XFRMA_SEC_CTX]) { + err = security_xfrm_state_alloc(x, + nla_data(attrs[XFRMA_SEC_CTX])); + if (err) + goto error; + } if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) -- cgit v1.1 From 1fb81e09d487656aa23f2acb1232c7f56b4c2367 Mon Sep 17 00:00:00 2001 From: "thomas.zeitlhofer+lkml@ze-it.at" Date: Wed, 7 Sep 2016 20:40:38 +0200 Subject: vti: use right inner_mode for inbound inter address family policy checks In case of inter address family tunneling (IPv6 over vti4 or IPv4 over vti6), the inbound policy checks in vti_rcv_cb() and vti6_rcv_cb() are using the wrong address family. As a result, all inbound inter address family traffic is dropped. Use the xfrm_ip2inner_mode() helper, as done in xfrm_input() (i.e., also increment LINUX_MIB_XFRMINSTATEMODEERROR in case of error), to select the inner_mode that contains the right address family for the inbound policy checks. Signed-off-by: Thomas Zeitlhofer Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 15 ++++++++++++++- net/ipv6/ip6_vti.c | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cc701fa..5d7944f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f..52a2f73 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -340,6 +340,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -357,7 +358,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); -- cgit v1.1 From 34a3d4b2d1f1b7c81af79f6f93a6cef4c3a0f54a Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 8 Sep 2016 13:55:56 -0400 Subject: xfrm: fix header file comment reference to struct xfrm_replay_state_esn Reported-by: Paul Wouters Signed-off-by: Richard Guy Briggs Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 1433389..1fc62b2 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -298,7 +298,7 @@ enum xfrm_attr_type_t { XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ - XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_state_esn */ XFRMA_SA_EXTRA_FLAGS, /* __u32 */ XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ -- cgit v1.1 From defb893fffef89ac6db4e68fccae1783d7c93977 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 17 Mar 2016 10:39:17 +0100 Subject: bcma: use of_dma_configure() to set initial dma mask While fixing another bug, I noticed that bcma manually sets up a dma_mask pointer for its child devices. We have a generic helper for that now, which should be able to cope better with any variations that might be needed to deal with cache coherency, unusual DMA address offsets, iommus, or limited DMA masks, none of which are currently handled here. This changes the core to use the of_dma_configure(), like we do for platform devices that are probed directly from DT. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/bcma/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 1f63547..2c1798e 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -209,6 +209,8 @@ static void bcma_of_fill_device(struct platform_device *parent, core->dev.of_node = node; core->irq = bcma_of_get_irq(parent, core, 0); + + of_dma_configure(&core->dev, node); } unsigned int bcma_core_irq(struct bcma_device *core, int num) @@ -248,12 +250,12 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) core->irq = bus->host_pci->irq; break; case BCMA_HOSTTYPE_SOC: - core->dev.dma_mask = &core->dev.coherent_dma_mask; - if (bus->host_pdev) { + if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) { core->dma_dev = &bus->host_pdev->dev; core->dev.parent = &bus->host_pdev->dev; bcma_of_fill_device(bus->host_pdev, core); } else { + core->dev.dma_mask = &core->dev.coherent_dma_mask; core->dma_dev = &core->dev; } break; -- cgit v1.1 From cf5383b088d07f304d189986fdbd4efbd7d41538 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 2 Sep 2016 13:05:06 +0530 Subject: mwifiex: add manufacturing mode support By default normal mode is chosen when driver is loaded. This patch adds a provision to choose manufacturing mode via module parameters. Below command loads driver in manufacturing mode insmod mwifiex.ko mfg_mode=1. Tested-by: chunfan chen Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 8 ++++++++ drivers/net/wireless/marvell/mwifiex/init.c | 22 +++++++++++++++------- drivers/net/wireless/marvell/mwifiex/main.c | 26 ++++++++++++++++++++++---- drivers/net/wireless/marvell/mwifiex/main.h | 2 ++ drivers/net/wireless/marvell/mwifiex/pcie.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- drivers/net/wireless/marvell/mwifiex/usb.c | 2 +- 7 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index d433aa0..5347728 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -595,6 +595,14 @@ int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no, return -1; } } + /* We don't expect commands in manufacturing mode. They are cooked + * in application and ready to download buffer is passed to the driver + */ + if (adapter->mfg_mode && cmd_no) { + dev_dbg(adapter->dev, "Ignoring commands in manufacturing mode\n"); + return -1; + } + /* Get a new command node */ cmd_node = mwifiex_get_cmd_node(adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 1489c90..82839d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -298,6 +298,7 @@ static void mwifiex_init_adapter(struct mwifiex_adapter *adapter) memset(&adapter->arp_filter, 0, sizeof(adapter->arp_filter)); adapter->arp_filter_size = 0; adapter->max_mgmt_ie_index = MAX_MGMT_IE_INDEX; + adapter->mfg_mode = mfg_mode; adapter->key_api_major_ver = 0; adapter->key_api_minor_ver = 0; eth_broadcast_addr(adapter->perm_addr); @@ -553,15 +554,22 @@ int mwifiex_init_fw(struct mwifiex_adapter *adapter) return -1; } } + if (adapter->mfg_mode) { + adapter->hw_status = MWIFIEX_HW_STATUS_READY; + ret = -EINPROGRESS; + } else { + for (i = 0; i < adapter->priv_num; i++) { + if (adapter->priv[i]) { + ret = mwifiex_sta_init_cmd(adapter->priv[i], + first_sta, true); + if (ret == -1) + return -1; + + first_sta = false; + } + - for (i = 0; i < adapter->priv_num; i++) { - if (adapter->priv[i]) { - ret = mwifiex_sta_init_cmd(adapter->priv[i], first_sta, - true); - if (ret == -1) - return -1; - first_sta = false; } } diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 51d4dfc..029e5da 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -23,6 +23,7 @@ #include "11n.h" #define VERSION "1.0" +#define MFG_FIRMWARE "mwifiex_mfg.bin" static unsigned int debug_mask = MWIFIEX_DEFAULT_DEBUG_MASK; module_param(debug_mask, uint, 0); @@ -37,6 +38,10 @@ module_param(driver_mode, ushort, 0); MODULE_PARM_DESC(driver_mode, "station=0x1(default), ap-sta=0x3, station-p2p=0x5, ap-sta-p2p=0x7"); +bool mfg_mode; +module_param(mfg_mode, bool, 0); +MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0"); + /* * This function registers the device and performs all the necessary * initializations. @@ -561,10 +566,12 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; } /* Wait for mwifiex_init to complete */ - wait_event_interruptible(adapter->init_wait_q, - adapter->init_wait_q_woken); - if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) - goto err_init_fw; + if (!adapter->mfg_mode) { + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) + goto err_init_fw; + } priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; if (mwifiex_register_cfg80211(adapter)) { @@ -668,6 +675,17 @@ static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) { int ret; + /* Override default firmware with manufacturing one if + * manufacturing mode is enabled + */ + if (mfg_mode) { + if (strlcpy(adapter->fw_name, MFG_FIRMWARE, + sizeof(adapter->fw_name)) >= + sizeof(adapter->fw_name)) { + pr_err("%s: fw_name too long!\n", __func__); + return -1; + } + } ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, adapter->dev, GFP_KERNEL, adapter, mwifiex_fw_dpc); diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index cd9a4f1..17221c4 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -58,6 +58,7 @@ #include "sdio.h" extern const char driver_version[]; +extern bool mfg_mode; struct mwifiex_adapter; struct mwifiex_private; @@ -990,6 +991,7 @@ struct mwifiex_adapter { u32 drv_info_size; bool scan_chan_gap_enabled; struct sk_buff_head rx_data_q; + bool mfg_mode; struct mwifiex_chan_stats *chan_stats; u32 num_in_chan_stats; int survey_idx; diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 50a6a53..c1b3e8e 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -225,7 +225,7 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) if (!adapter || !adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM_SLEEP if (adapter->is_suspended) mwifiex_pcie_resume(&pdev->dev); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index d3e1561..6dba409 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -289,7 +289,7 @@ mwifiex_sdio_remove(struct sdio_func *func) mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num); - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { if (adapter->is_suspended) mwifiex_sdio_resume(adapter->dev); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 3bd04f5..9213516 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -611,7 +611,7 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) if (!adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM if (adapter->is_suspended) mwifiex_usb_resume(intf); -- cgit v1.1 From 3935ccc14d2c68488bd96448fc073da48eaeebf0 Mon Sep 17 00:00:00 2001 From: Xinming Hu Date: Fri, 2 Sep 2016 13:05:07 +0530 Subject: mwifiex: add cfg80211 testmode support This patch adds cfg80211 testmode support so that userspace tools can download necessary commands to firmware during manufacturing mode tests. Signed-off-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 83 +++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 876d420..0a03d3f 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3919,6 +3919,88 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, return ret; } +#ifdef CONFIG_NL80211_TESTMODE + +enum mwifiex_tm_attr { + __MWIFIEX_TM_ATTR_INVALID = 0, + MWIFIEX_TM_ATTR_CMD = 1, + MWIFIEX_TM_ATTR_DATA = 2, + + /* keep last */ + __MWIFIEX_TM_ATTR_AFTER_LAST, + MWIFIEX_TM_ATTR_MAX = __MWIFIEX_TM_ATTR_AFTER_LAST - 1, +}; + +static const struct nla_policy mwifiex_tm_policy[MWIFIEX_TM_ATTR_MAX + 1] = { + [MWIFIEX_TM_ATTR_CMD] = { .type = NLA_U32 }, + [MWIFIEX_TM_ATTR_DATA] = { .type = NLA_BINARY, + .len = MWIFIEX_SIZE_OF_CMD_BUFFER }, +}; + +enum mwifiex_tm_command { + MWIFIEX_TM_CMD_HOSTCMD = 0, +}; + +static int mwifiex_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, + void *data, int len) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); + struct mwifiex_ds_misc_cmd *hostcmd; + struct nlattr *tb[MWIFIEX_TM_ATTR_MAX + 1]; + struct mwifiex_adapter *adapter; + struct sk_buff *skb; + int err; + + if (!priv) + return -EINVAL; + adapter = priv->adapter; + + err = nla_parse(tb, MWIFIEX_TM_ATTR_MAX, data, len, + mwifiex_tm_policy); + if (err) + return err; + + if (!tb[MWIFIEX_TM_ATTR_CMD]) + return -EINVAL; + + switch (nla_get_u32(tb[MWIFIEX_TM_ATTR_CMD])) { + case MWIFIEX_TM_CMD_HOSTCMD: + if (!tb[MWIFIEX_TM_ATTR_DATA]) + return -EINVAL; + + hostcmd = kzalloc(sizeof(*hostcmd), GFP_KERNEL); + if (!hostcmd) + return -ENOMEM; + + hostcmd->len = nla_len(tb[MWIFIEX_TM_ATTR_DATA]); + memcpy(hostcmd->cmd, nla_data(tb[MWIFIEX_TM_ATTR_DATA]), + hostcmd->len); + + if (mwifiex_send_cmd(priv, 0, 0, 0, hostcmd, true)) { + dev_err(priv->adapter->dev, "Failed to process hostcmd\n"); + return -EFAULT; + } + + /* process hostcmd response*/ + skb = cfg80211_testmode_alloc_reply_skb(wiphy, hostcmd->len); + if (!skb) + return -ENOMEM; + err = nla_put(skb, MWIFIEX_TM_ATTR_DATA, + hostcmd->len, hostcmd->cmd); + if (err) { + kfree_skb(skb); + return -EMSGSIZE; + } + + err = cfg80211_testmode_reply(skb); + kfree(hostcmd); + return err; + default: + return -EOPNOTSUPP; + } +} +#endif + static int mwifiex_cfg80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, @@ -4031,6 +4113,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .tdls_cancel_channel_switch = mwifiex_cfg80211_tdls_cancel_chan_switch, .add_station = mwifiex_cfg80211_add_station, .change_station = mwifiex_cfg80211_change_station, + CFG80211_TESTMODE_CMD(mwifiex_tm_cmd) .get_channel = mwifiex_cfg80211_get_channel, .start_radar_detection = mwifiex_cfg80211_start_radar_detection, .channel_switch = mwifiex_cfg80211_channel_switch, -- cgit v1.1 From 3e9b3112ec74f192eaab976c3889e34255cae940 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:44 +0100 Subject: add basic register-field manipulation macros Common approach to accessing register fields is to define structures or sets of macros containing mask and shift pair. Operations on the register are then performed as follows: field = (reg >> shift) & mask; reg &= ~(mask << shift); reg |= (field & mask) << shift; Defining shift and mask separately is tedious. Ivo van Doorn came up with an idea of computing them at compilation time based on a single shifted mask (later refined by Felix) which can be used like this: #define REG_FIELD 0x000ff000 field = FIELD_GET(REG_FIELD, reg); reg &= ~REG_FIELD; reg |= FIELD_PREP(REG_FIELD, field); FIELD_{GET,PREP} macros take care of finding out what the appropriate shift is based on compilation time ffs operation. GENMASK can be used to define registers (which is usually less error-prone and easier to match with datasheets). This approach is the most convenient I've seen so to limit code multiplication let's move the macros to a global header file. Attempts to use static inlines instead of macros failed due to false positive triggering of BUILD_BUG_ON()s, especially with GCC < 6.0. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- include/linux/bitfield.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bug.h | 3 ++ 2 files changed, 96 insertions(+) create mode 100644 include/linux/bitfield.h diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h new file mode 100644 index 0000000..f6505d8 --- /dev/null +++ b/include/linux/bitfield.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2014 Felix Fietkau + * Copyright (C) 2004 - 2009 Ivo van Doorn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: 32bit value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +#endif diff --git a/include/linux/bug.h b/include/linux/bug.h index e51b070..292d6a1 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -13,6 +13,7 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) @@ -24,6 +25,8 @@ struct pt_regs; #else /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) -- cgit v1.1 From faad5433b7227e0091c390c68be9076fe846627f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:45 +0100 Subject: mt7601u: remove redefinition of GENMASK Remove redefinition of GENMASK which should not be there in the upstream version of the code. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/regs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/regs.h b/drivers/net/wireless/mediatek/mt7601u/regs.h index afd8978..27a429d 100644 --- a/drivers/net/wireless/mediatek/mt7601u/regs.h +++ b/drivers/net/wireless/mediatek/mt7601u/regs.h @@ -17,10 +17,6 @@ #include -#ifndef GENMASK -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#endif - #define MT_ASIC_VERSION 0x0000 #define MT76XX_REV_E3 0x22 -- cgit v1.1 From adcc710d0a9e260ae315bc7d31b68c5697f58b43 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:46 +0100 Subject: mt7601u: remove unnecessary include There is no need to include linux/version.h in a in-tree driver. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index e70dd95..43ebd46 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -15,7 +15,6 @@ #include "mt7601u.h" #include "mac.h" #include -#include static int mt7601u_start(struct ieee80211_hw *hw) { -- cgit v1.1 From d43af50566b43fb4abce42789ba999a7e9dc45bb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:47 +0100 Subject: mt7601u: use linux/bitfield.h Use the newly added linux/bitfield.h. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/dma.c | 2 +- drivers/net/wireless/mediatek/mt7601u/dma.h | 10 ++-- drivers/net/wireless/mediatek/mt7601u/eeprom.c | 12 ++-- drivers/net/wireless/mediatek/mt7601u/init.c | 10 ++-- drivers/net/wireless/mediatek/mt7601u/mac.c | 38 ++++++------ drivers/net/wireless/mediatek/mt7601u/mcu.c | 20 +++---- drivers/net/wireless/mediatek/mt7601u/mt7601u.h | 4 +- drivers/net/wireless/mediatek/mt7601u/phy.c | 44 +++++++------- drivers/net/wireless/mediatek/mt7601u/tx.c | 19 +++--- drivers/net/wireless/mediatek/mt7601u/util.h | 77 ------------------------- 10 files changed, 81 insertions(+), 155 deletions(-) delete mode 100644 drivers/net/wireless/mediatek/mt7601u/util.h diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 57a80cf..a8bc064 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -103,7 +103,7 @@ static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2])) dev_err_once(dev->dev, "Error: RXWI zero fields are set\n"); - if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info))) + if (unlikely(FIELD_GET(MT_RXD_INFO_TYPE, fce_info))) dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n"); trace_mt_rx(dev, rxwi, fce_info); diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h b/drivers/net/wireless/mediatek/mt7601u/dma.h index 978e8a9..270d126 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.h +++ b/drivers/net/wireless/mediatek/mt7601u/dma.h @@ -18,8 +18,6 @@ #include #include -#include "util.h" - #define MT_DMA_HDR_LEN 4 #define MT_RX_INFO_LEN 4 #define MT_FCE_INFO_LEN 4 @@ -79,9 +77,9 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, */ info = flags | - MT76_SET(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | - MT76_SET(MT_TXD_INFO_D_PORT, d_port) | - MT76_SET(MT_TXD_INFO_TYPE, type); + FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | + FIELD_PREP(MT_TXD_INFO_D_PORT, d_port) | + FIELD_PREP(MT_TXD_INFO_TYPE, type); put_unaligned_le32(info, skb_push(skb, sizeof(info))); return skb_put_padto(skb, round_up(skb->len, 4) + 4); @@ -90,7 +88,7 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, static inline int mt7601u_dma_skb_wrap_pkt(struct sk_buff *skb, enum mt76_qsel qsel, u32 flags) { - flags |= MT76_SET(MT_TXD_PKT_INFO_QSEL, qsel); + flags |= FIELD_PREP(MT_TXD_PKT_INFO_QSEL, qsel); return mt7601u_dma_skb_wrap(skb, WLAN_PORT, DMA_PACKET, flags); } diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c index 8d8ee03..da6faea 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c @@ -45,8 +45,8 @@ mt7601u_efuse_read(struct mt7601u_dev *dev, u16 addr, u8 *data, val = mt76_rr(dev, MT_EFUSE_CTRL); val &= ~(MT_EFUSE_CTRL_AIN | MT_EFUSE_CTRL_MODE); - val |= MT76_SET(MT_EFUSE_CTRL_AIN, addr & ~0xf) | - MT76_SET(MT_EFUSE_CTRL_MODE, mode) | + val |= FIELD_PREP(MT_EFUSE_CTRL_AIN, addr & ~0xf) | + FIELD_PREP(MT_EFUSE_CTRL_MODE, mode) | MT_EFUSE_CTRL_KICK; mt76_wr(dev, MT_EFUSE_CTRL, val); @@ -128,8 +128,8 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom) if (!field_valid(nic_conf0 >> 8)) return; - if (MT76_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || - MT76_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) + if (FIELD_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || + FIELD_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) dev_err(dev->dev, "Error: device has more than 1 RX/TX stream!\n"); } @@ -150,7 +150,7 @@ mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom) mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr)); mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) | - MT76_SET(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); + FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); return 0; } @@ -176,7 +176,7 @@ mt7601u_set_channel_power(struct mt7601u_dev *dev, u8 *eeprom) u8 max_pwr; val = mt7601u_rr(dev, MT_TX_ALC_CFG_0); - max_pwr = MT76_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); + max_pwr = FIELD_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); if (mt7601u_has_tssi(dev, eeprom)) { mt7601u_set_channel_target_power(dev, eeprom, max_pwr); diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c index 8fa78d7..44d46e2 100644 --- a/drivers/net/wireless/mediatek/mt7601u/init.c +++ b/drivers/net/wireless/mediatek/mt7601u/init.c @@ -108,8 +108,9 @@ static void mt7601u_init_usb_dma(struct mt7601u_dev *dev) { u32 val; - val = MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | - MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, MT_USB_AGGR_SIZE_LIMIT) | + val = FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | + FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, + MT_USB_AGGR_SIZE_LIMIT) | MT_USB_DMA_CFG_RX_BULK_EN | MT_USB_DMA_CFG_TX_BULK_EN; if (dev->in_max_packet == 512) @@ -396,8 +397,9 @@ int mt7601u_init_hardware(struct mt7601u_dev *dev) mt7601u_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e); - mt7601u_wr(dev, MT_TXOP_CTRL_CFG, MT76_SET(MT_TXOP_TRUN_EN, 0x3f) | - MT76_SET(MT_TXOP_EXT_CCA_DLY, 0x58)); + mt7601u_wr(dev, MT_TXOP_CTRL_CFG, + FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) | + FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58)); ret = mt7601u_eeprom_init(dev); if (ret) diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c index e21c53e..3c57639 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mac.c +++ b/drivers/net/wireless/mediatek/mt7601u/mac.c @@ -19,13 +19,13 @@ static void mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) { - u8 idx = MT76_GET(MT_TXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_TXWI_RATE_MCS, rate); txrate->idx = 0; txrate->flags = 0; txrate->count = 1; - switch (MT76_GET(MT_TXWI_RATE_PHY_MODE, rate)) { + switch (FIELD_GET(MT_TXWI_RATE_PHY_MODE, rate)) { case MT_PHY_TYPE_OFDM: txrate->idx = idx + 4; return; @@ -47,7 +47,7 @@ mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) return; } - if (MT76_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) + if (FIELD_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) txrate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate & MT_TXWI_RATE_SGI) @@ -125,9 +125,9 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev, bw = 0; } - rateval = MT76_SET(MT_RXWI_RATE_MCS, rate_idx); - rateval |= MT76_SET(MT_RXWI_RATE_PHY, phy); - rateval |= MT76_SET(MT_RXWI_RATE_BW, bw); + rateval = FIELD_PREP(MT_RXWI_RATE_MCS, rate_idx); + rateval |= FIELD_PREP(MT_RXWI_RATE_PHY, phy); + rateval |= FIELD_PREP(MT_RXWI_RATE_BW, bw); if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rateval |= MT_RXWI_RATE_SGI; @@ -156,9 +156,9 @@ struct mt76_tx_status mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev) stat.success = !!(val & MT_TX_STAT_FIFO_SUCCESS); stat.aggr = !!(val & MT_TX_STAT_FIFO_AGGR); stat.ack_req = !!(val & MT_TX_STAT_FIFO_ACKREQ); - stat.pktid = MT76_GET(MT_TX_STAT_FIFO_PID_TYPE, val); - stat.wcid = MT76_GET(MT_TX_STAT_FIFO_WCID, val); - stat.rate = MT76_GET(MT_TX_STAT_FIFO_RATE, val); + stat.pktid = FIELD_GET(MT_TX_STAT_FIFO_PID_TYPE, val); + stat.wcid = FIELD_GET(MT_TX_STAT_FIFO_WCID, val); + stat.rate = FIELD_GET(MT_TX_STAT_FIFO_RATE, val); return stat; } @@ -270,7 +270,7 @@ void mt7601u_mac_config_tsf(struct mt7601u_dev *dev, bool enable, int interval) } val &= ~MT_BEACON_TIME_CFG_INTVAL; - val |= MT76_SET(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | + val |= FIELD_PREP(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | MT_BEACON_TIME_CFG_TIMER_EN | MT_BEACON_TIME_CFG_SYNC_MODE | MT_BEACON_TIME_CFG_TBTT_EN; @@ -349,8 +349,8 @@ mt7601u_mac_wcid_setup(struct mt7601u_dev *dev, u8 idx, u8 vif_idx, u8 *mac) u8 zmac[ETH_ALEN] = {}; u32 attr; - attr = MT76_SET(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | - MT76_SET(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); + attr = FIELD_PREP(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | + FIELD_PREP(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); mt76_wr(dev, MT_WCID_ATTR(idx), attr); @@ -382,15 +382,15 @@ void mt7601u_mac_set_ampdu_factor(struct mt7601u_dev *dev) rcu_read_unlock(); mt7601u_wr(dev, MT_MAX_LEN_CFG, 0xa0fff | - MT76_SET(MT_MAX_LEN_CFG_AMPDU, min_factor)); + FIELD_PREP(MT_MAX_LEN_CFG_AMPDU, min_factor)); } static void mt76_mac_process_rate(struct ieee80211_rx_status *status, u16 rate) { - u8 idx = MT76_GET(MT_RXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_RXWI_RATE_MCS, rate); - switch (MT76_GET(MT_RXWI_RATE_PHY, rate)) { + switch (FIELD_GET(MT_RXWI_RATE_PHY, rate)) { case MT_PHY_TYPE_OFDM: if (WARN_ON(idx >= 8)) idx = 0; @@ -436,7 +436,7 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi, u16 rate, int rssi) { dev->bcn_freq_off = rxwi->freq_off; - dev->bcn_phy_mode = MT76_GET(MT_RXWI_RATE_PHY, rate); + dev->bcn_phy_mode = FIELD_GET(MT_RXWI_RATE_PHY, rate); dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8); } @@ -458,7 +458,7 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, u16 rate = le16_to_cpu(rxwi->rate); int rssi; - len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl); + len = FIELD_GET(MT_RXWI_CTL_MPDU_LEN, ctl); if (len < 10) return 0; @@ -542,8 +542,8 @@ int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx, val = mt7601u_rr(dev, MT_WCID_ATTR(idx)); val &= ~MT_WCID_ATTR_PKEY_MODE & ~MT_WCID_ATTR_PKEY_MODE_EXT; - val |= MT76_SET(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | - MT76_SET(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); + val |= FIELD_PREP(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | + FIELD_PREP(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); val &= ~MT_WCID_ATTR_PAIRWISE; val |= MT_WCID_ATTR_PAIRWISE * !!(key && key->flags & IEEE80211_KEY_FLAG_PAIRWISE); diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c index 91c4b34..dbdfb3f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mcu.c +++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c @@ -43,8 +43,8 @@ static inline void mt7601u_dma_skb_wrap_cmd(struct sk_buff *skb, u8 seq, enum mcu_cmd cmd) { WARN_ON(mt7601u_dma_skb_wrap(skb, CPU_TX_PORT, DMA_COMMAND, - MT76_SET(MT_TXD_CMD_INFO_SEQ, seq) | - MT76_SET(MT_TXD_CMD_INFO_TYPE, cmd))); + FIELD_PREP(MT_TXD_CMD_INFO_SEQ, seq) | + FIELD_PREP(MT_TXD_CMD_INFO_TYPE, cmd))); } static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev, @@ -100,13 +100,13 @@ static int mt7601u_mcu_wait_resp(struct mt7601u_dev *dev, u8 seq) dev_err(dev->dev, "Error: MCU resp urb failed:%d\n", urb_status); - if (MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) + if (FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) return 0; - dev_err(dev->dev, "Error: MCU resp evt:%hhx seq:%hhx-%hhx!\n", - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), - seq, MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); + dev_err(dev->dev, "Error: MCU resp evt:%lx seq:%hhx-%lx!\n", + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), + seq, FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); } dev_err(dev->dev, "Error: %s timed out\n", __func__); @@ -291,9 +291,9 @@ static int __mt7601u_dma_fw(struct mt7601u_dev *dev, u32 val; int ret; - reg = cpu_to_le32(MT76_SET(MT_TXD_INFO_TYPE, DMA_PACKET) | - MT76_SET(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | - MT76_SET(MT_TXD_INFO_LEN, len)); + reg = cpu_to_le32(FIELD_PREP(MT_TXD_INFO_TYPE, DMA_PACKET) | + FIELD_PREP(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | + FIELD_PREP(MT_TXD_INFO_LEN, len)); memcpy(buf.buf, ®, sizeof(reg)); memcpy(buf.buf + sizeof(reg), data, len); memset(buf.buf + sizeof(reg) + len, 0, 8); diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h index 428bd2f1..c7ec404 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h +++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h @@ -15,6 +15,7 @@ #ifndef MT7601U_H #define MT7601U_H +#include #include #include #include @@ -24,7 +25,6 @@ #include #include "regs.h" -#include "util.h" #define MT_CALIBRATE_INTERVAL (4 * HZ) @@ -299,7 +299,7 @@ bool mt76_poll_msec(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val, /* Compatibility with mt76 */ #define mt76_rmw_field(_dev, _reg, _field, _val) \ - mt76_rmw(_dev, _reg, _field, MT76_SET(_field, _val)) + mt76_rmw(_dev, _reg, _field, FIELD_PREP(_field, _val)) static inline u32 mt76_rr(struct mt7601u_dev *dev, u32 offset) { diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index 1908af6..ca09a5d 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -41,11 +41,12 @@ mt7601u_rf_wr(struct mt7601u_dev *dev, u8 bank, u8 offset, u8 value) goto out; } - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_DATA, value) | - MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_WR | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_DATA, value) | + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_WR | + MT_RF_CSR_CFG_KICK); trace_rf_write(dev, bank, offset, value); out: mutex_unlock(&dev->reg_atomic_mutex); @@ -74,17 +75,18 @@ mt7601u_rf_rr(struct mt7601u_dev *dev, u8 bank, u8 offset) if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_KICK); if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; val = mt7601u_rr(dev, MT_RF_CSR_CFG); - if (MT76_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && - MT76_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { - ret = MT76_GET(MT_RF_CSR_CFG_DATA, val); + if (FIELD_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && + FIELD_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { + ret = FIELD_GET(MT_RF_CSR_CFG_DATA, val); trace_rf_read(dev, bank, offset, ret); } out: @@ -139,8 +141,8 @@ static void mt7601u_bbp_wr(struct mt7601u_dev *dev, u8 offset, u8 val) } mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_VAL, val) | - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_VAL, val) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY); trace_bbp_write(dev, offset, val); out: @@ -163,7 +165,7 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY | MT_BBP_CSR_CFG_READ); @@ -171,8 +173,8 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; val = mt7601u_rr(dev, MT_BBP_CSR_CFG); - if (MT76_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { - ret = MT76_GET(MT_BBP_CSR_CFG_VAL, val); + if (FIELD_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { + ret = FIELD_GET(MT_BBP_CSR_CFG_VAL, val); trace_bbp_read(dev, offset, ret); } out: @@ -249,9 +251,9 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, /* bw40 */ { -2, 16, 34 } } }; - int bw = MT76_GET(MT_RXWI_RATE_BW, rate); - int aux_lna = MT76_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); - int lna_id = MT76_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); + int bw = FIELD_GET(MT_RXWI_RATE_BW, rate); + int aux_lna = FIELD_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); + int lna_id = FIELD_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); int val; if (lna_id) /* LNA id can be 0, 2, 3. */ @@ -259,7 +261,7 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, val = 8; val -= lna[aux_lna][bw][lna_id]; - val -= MT76_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); + val -= FIELD_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); val -= dev->ee->lna_gain; val -= dev->ee->rssi_offset[0]; @@ -939,7 +941,7 @@ static int mt7601u_tssi_cal(struct mt7601u_dev *dev) dev_dbg(dev->dev, "final diff: %08x\n", diff_pwr); val = mt7601u_rr(dev, MT_TX_ALC_CFG_1); - curr_pwr = s6_to_int(MT76_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); + curr_pwr = s6_to_int(FIELD_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); diff_pwr += curr_pwr; val = (val & ~MT_TX_ALC_CFG_1_TEMP_COMP) | int_to_s6(diff_pwr); mt7601u_wr(dev, MT_TX_ALC_CFG_1, val); diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c index a0a33dc..ad77bec 100644 --- a/drivers/net/wireless/mediatek/mt7601u/tx.c +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c @@ -175,11 +175,12 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, ba_size = min_t(int, 63, ba_size); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) ba_size = 0; - txwi->ack_ctl |= MT76_SET(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); + txwi->ack_ctl |= FIELD_PREP(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); - txwi->flags = cpu_to_le16(MT_TXWI_FLAGS_AMPDU | - MT76_SET(MT_TXWI_FLAGS_MPDU_DENSITY, - sta->ht_cap.ampdu_density)); + txwi->flags = + cpu_to_le16(MT_TXWI_FLAGS_AMPDU | + FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY, + sta->ht_cap.ampdu_density)); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) txwi->flags = 0; } @@ -188,7 +189,7 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, is_probe = !!(info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE); pkt_id = mt7601u_tx_pktid_enc(dev, rate_ctl & 0x7, is_probe); - pkt_len |= MT76_SET(MT_TXWI_LEN_PKTID, pkt_id); + pkt_len |= FIELD_PREP(MT_TXWI_LEN_PKTID, pkt_id); txwi->len_ctl = cpu_to_le16(pkt_len); return txwi; @@ -285,9 +286,9 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, WARN_ON(cw_min > 0xf); WARN_ON(cw_max > 0xf); - val = MT76_SET(MT_EDCA_CFG_AIFSN, params->aifs) | - MT76_SET(MT_EDCA_CFG_CWMIN, cw_min) | - MT76_SET(MT_EDCA_CFG_CWMAX, cw_max); + val = FIELD_PREP(MT_EDCA_CFG_AIFSN, params->aifs) | + FIELD_PREP(MT_EDCA_CFG_CWMIN, cw_min) | + FIELD_PREP(MT_EDCA_CFG_CWMAX, cw_max); /* TODO: based on user-controlled EnableTxBurst var vendor drv sets * a really long txop on AC0 (see connect.c:2009) but only on * connect? When not connected should be 0. @@ -295,7 +296,7 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!hw_q) val |= 0x60; else - val |= MT76_SET(MT_EDCA_CFG_TXOP, params->txop); + val |= FIELD_PREP(MT_EDCA_CFG_TXOP, params->txop); mt76_wr(dev, MT_EDCA_CFG_AC(hw_q), val); val = mt76_rr(dev, MT_WMM_TXOP(hw_q)); diff --git a/drivers/net/wireless/mediatek/mt7601u/util.h b/drivers/net/wireless/mediatek/mt7601u/util.h deleted file mode 100644 index b89140b..0000000 --- a/drivers/net/wireless/mediatek/mt7601u/util.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2014 Felix Fietkau - * Copyright (C) 2004 - 2009 Ivo van Doorn - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __MT76_UTIL_H -#define __MT76_UTIL_H - -/* - * Power of two check, this will check - * if the mask that has been given contains and contiguous set of bits. - * Note that we cannot use the is_power_of_2() function since this - * check must be done at compile-time. - */ -#define is_power_of_two(x) ( !((x) & ((x)-1)) ) -#define low_bit_mask(x) ( ((x)-1) & ~(x) ) -#define is_valid_mask(x) is_power_of_two(1LU + (x) + low_bit_mask(x)) - -/* - * Macros to find first set bit in a variable. - * These macros behave the same as the __ffs() functions but - * the most important difference that this is done during - * compile-time rather then run-time. - */ -#define compile_ffs2(__x) \ - __builtin_choose_expr(((__x) & 0x1), 0, 1) - -#define compile_ffs4(__x) \ - __builtin_choose_expr(((__x) & 0x3), \ - (compile_ffs2((__x))), \ - (compile_ffs2((__x) >> 2) + 2)) - -#define compile_ffs8(__x) \ - __builtin_choose_expr(((__x) & 0xf), \ - (compile_ffs4((__x))), \ - (compile_ffs4((__x) >> 4) + 4)) - -#define compile_ffs16(__x) \ - __builtin_choose_expr(((__x) & 0xff), \ - (compile_ffs8((__x))), \ - (compile_ffs8((__x) >> 8) + 8)) - -#define compile_ffs32(__x) \ - __builtin_choose_expr(((__x) & 0xffff), \ - (compile_ffs16((__x))), \ - (compile_ffs16((__x) >> 16) + 16)) - -/* - * This macro will check the requirements for the FIELD{8,16,32} macros - * The mask should be a constant non-zero contiguous set of bits which - * does not exceed the given typelimit. - */ -#define FIELD_CHECK(__mask) \ - BUILD_BUG_ON(!(__mask) || !is_valid_mask(__mask)) - -#define MT76_SET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (((u32) (_val)) << compile_ffs32(_mask)) & _mask; \ - }) - -#define MT76_GET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (u32) (((_val) & _mask) >> compile_ffs32(_mask)); \ - }) - -#endif -- cgit v1.1 From 634faf3686900ccdee87b77e2c56df8b2159912b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 11:42:12 +0100 Subject: brcmfmac: add support for bcm4339 chip with modalias sdio:c00v02D0d4339 The driver already supports the bcm4339 chipset but only for the variant that shares the same modalias as the bcm4335, ie. sdio:c00v02D0d4335. It turns out that there are also bcm4339 devices out there that have a more distiguishable modalias sdio:c00v02D0d4339. Reported-by: Steve deRosier Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 1 + drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 3 ++- include/linux/mmc/sdio_ids.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index f549c25..03404cb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1101,6 +1101,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354), diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 68ab3ac..589a49c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3757,7 +3757,8 @@ static u32 brcmf_sdio_buscore_read32(void *ctx, u32 addr) u32 val, rev; val = brcmf_sdiod_regrl(sdiodev, addr, NULL); - if (sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 && + if ((sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 || + sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4339) && addr == CORE_CC_REG(SI_ENUM_BASE, chipid)) { rev = (val & CID_REV_MASK) >> CID_REV_SHIFT; if (rev >= 2) { diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0d126ae..d43ef96 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -32,6 +32,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43340 0xa94c #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 +#define SDIO_DEVICE_ID_BROADCOM_4339 0x4339 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 -- cgit v1.1 From 5251b6be8bb5c5675bdf12347c7b83937a5c91e5 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 11:42:13 +0100 Subject: brcmfmac: sdio: shorten retry loop in brcmf_sdio_kso_control() In brcmf_sdio_kso_control() there is a retry loop as hardware may take time to settle. However, when the call to brcmf_sdiod_regrb() returns an error it is due to SDIO access failure and it makes no sense to wait for hardware to settle. This patch aborts the loop after a number of subsequent access errors. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 589a49c..b892dac 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -313,6 +313,7 @@ struct rte_console { #define KSO_WAIT_US 50 #define MAX_KSO_ATTEMPTS (PMU_MAX_TRANSITION_DLY/KSO_WAIT_US) +#define BRCMF_SDIO_MAX_ACCESS_ERRORS 5 /* * Conversion of 802.1D priority to precedence level @@ -677,6 +678,7 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) { u8 wr_val = 0, rd_val, cmp_val, bmask; int err = 0; + int err_cnt = 0; int try_cnt = 0; brcmf_dbg(TRACE, "Enter: on=%d\n", on); @@ -712,9 +714,14 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) */ rd_val = brcmf_sdiod_regrb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, &err); - if (((rd_val & bmask) == cmp_val) && !err) + if (!err) { + if ((rd_val & bmask) == cmp_val) + break; + err_cnt = 0; + } + /* bail out upon subsequent access errors */ + if (err && (err_cnt++ > BRCMF_SDIO_MAX_ACCESS_ERRORS)) break; - udelay(KSO_WAIT_US); brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, wr_val, &err); -- cgit v1.1 From 4c5dae59d2e9386c706a2f3c7c2746ae277bf568 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 26 Jul 2016 19:31:44 +0530 Subject: mwifiex: add PCIe function level reset support This patch implements pre and post FLR handlers to support PCIe FLR functionality. Software cleanup is performed in pre-FLR whereas firmware is downloaded and software is re-initialised in post-FLR handler. Following command triggers FLR. echo "1" > /sys/bus/pci/devices/$NUMBER/reset This feature can be used as a recovery mechanism when firmware gets hang. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 242 ++++++++++++++++++++++++++-- drivers/net/wireless/marvell/mwifiex/main.h | 3 + drivers/net/wireless/marvell/mwifiex/pcie.c | 136 +++++++++++++++- drivers/net/wireless/marvell/mwifiex/pcie.h | 1 + 4 files changed, 365 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 029e5da..9b2e98c 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -493,9 +493,11 @@ static void mwifiex_free_adapter(struct mwifiex_adapter *adapter) */ static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter) { - flush_workqueue(adapter->workqueue); - destroy_workqueue(adapter->workqueue); - adapter->workqueue = NULL; + if (adapter->workqueue) { + flush_workqueue(adapter->workqueue); + destroy_workqueue(adapter->workqueue); + adapter->workqueue = NULL; + } if (adapter->rx_workqueue) { flush_workqueue(adapter->rx_workqueue); @@ -574,10 +576,13 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) } priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; - if (mwifiex_register_cfg80211(adapter)) { - mwifiex_dbg(adapter, ERROR, - "cannot register with cfg80211\n"); - goto err_init_fw; + + if (!adapter->wiphy) { + if (mwifiex_register_cfg80211(adapter)) { + mwifiex_dbg(adapter, ERROR, + "cannot register with cfg80211\n"); + goto err_init_fw; + } } if (mwifiex_init_channel_scan_gap(adapter)) { @@ -671,7 +676,8 @@ done: /* * This function initializes the hardware and gets firmware. */ -static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) +static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter, + bool req_fw_nowait) { int ret; @@ -686,12 +692,25 @@ static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) return -1; } } - ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, - adapter->dev, GFP_KERNEL, adapter, - mwifiex_fw_dpc); - if (ret < 0) - mwifiex_dbg(adapter, ERROR, - "request_firmware_nowait error %d\n", ret); + + if (req_fw_nowait) { + ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, + adapter->dev, GFP_KERNEL, adapter, + mwifiex_fw_dpc); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware_nowait error %d\n", ret); + } else { + ret = request_firmware(&adapter->firmware, + adapter->fw_name, + adapter->dev); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware error %d\n", ret); + else + mwifiex_fw_dpc(adapter->firmware, (void *)adapter); + } + return ret; } @@ -1341,6 +1360,199 @@ static void mwifiex_main_work_queue(struct work_struct *work) } /* + * This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_remove_card() + */ +static int +mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) +{ + struct mwifiex_private *priv; + int i; + + if (down_interruptible(sem)) + goto exit_sem_err; + + if (!adapter) + goto exit_remove; + + priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + mwifiex_deauthenticate(priv, NULL); + + /* We can no longer handle interrupts once we start doing the teardown + * below. + */ + if (adapter->if_ops.disable_int) + adapter->if_ops.disable_int(adapter); + + adapter->surprise_removed = true; + mwifiex_terminate_workqueue(adapter); + + /* Stop data */ + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && priv->netdev) { + mwifiex_stop_net_dev_queue(priv->netdev, adapter); + if (netif_carrier_ok(priv->netdev)) + netif_carrier_off(priv->netdev); + netif_device_detach(priv->netdev); + } + } + + mwifiex_dbg(adapter, CMD, "cmd: calling mwifiex_shutdown_drv...\n"); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->if_ops.down_dev) + adapter->if_ops.down_dev(adapter); + + mwifiex_dbg(adapter, CMD, "cmd: mwifiex_shutdown_drv done\n"); + if (atomic_read(&adapter->rx_pending) || + atomic_read(&adapter->tx_pending) || + atomic_read(&adapter->cmd_pending)) { + mwifiex_dbg(adapter, ERROR, + "rx_pending=%d, tx_pending=%d,\t" + "cmd_pending=%d\n", + atomic_read(&adapter->rx_pending), + atomic_read(&adapter->tx_pending), + atomic_read(&adapter->cmd_pending)); + } + + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (!priv) + continue; + rtnl_lock(); + if (priv->netdev && + priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) + mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev); + rtnl_unlock(); + } + +exit_remove: + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); + return 0; +} + +/* This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_add_card() + */ +static int +mwifiex_reinit_sw(struct mwifiex_adapter *adapter, struct semaphore *sem, + struct mwifiex_if_ops *if_ops, u8 iface_type) +{ + char fw_name[32]; + struct pcie_service_card *card = adapter->card; + + if (down_interruptible(sem)) + goto exit_sem_err; + + mwifiex_init_lock_list(adapter); + if (adapter->if_ops.up_dev) + adapter->if_ops.up_dev(adapter); + + adapter->iface_type = iface_type; + adapter->card_sem = sem; + + adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; + adapter->surprise_removed = false; + init_waitqueue_head(&adapter->init_wait_q); + adapter->is_suspended = false; + adapter->hs_activated = false; + init_waitqueue_head(&adapter->hs_activate_wait_q); + init_waitqueue_head(&adapter->cmd_wait_q.wait); + adapter->cmd_wait_q.status = 0; + adapter->scan_wait_q_woken = false; + + if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB) + adapter->rx_work_enabled = true; + + adapter->workqueue = + alloc_workqueue("MWIFIEX_WORK_QUEUE", + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!adapter->workqueue) + goto err_kmalloc; + + INIT_WORK(&adapter->main_work, mwifiex_main_work_queue); + + if (adapter->rx_work_enabled) { + adapter->rx_workqueue = alloc_workqueue("MWIFIEX_RX_WORK_QUEUE", + WQ_HIGHPRI | + WQ_MEM_RECLAIM | + WQ_UNBOUND, 1); + if (!adapter->rx_workqueue) + goto err_kmalloc; + INIT_WORK(&adapter->rx_work, mwifiex_rx_work_queue); + } + + /* Register the device. Fill up the private data structure with + * relevant information from the card. Some code extracted from + * mwifiex_register_dev() + */ + mwifiex_dbg(adapter, INFO, "%s, mwifiex_init_hw_fw()...\n", __func__); + strcpy(fw_name, adapter->fw_name); + strcpy(adapter->fw_name, PCIE8997_DEFAULT_WIFIFW_NAME); + + adapter->tx_buf_size = card->pcie.tx_buf_size; + adapter->ext_scan = card->pcie.can_ext_scan; + if (mwifiex_init_hw_fw(adapter, false)) { + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, ERROR, + "%s: firmware init failed\n", __func__); + goto err_init_fw; + } + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); + up(sem); + return 0; + +err_init_fw: + mwifiex_dbg(adapter, ERROR, "info: %s: unregister device\n", __func__); + if (adapter->if_ops.unregister_dev) + adapter->if_ops.unregister_dev(adapter); + if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) { + mwifiex_dbg(adapter, ERROR, + "info: %s: shutdown mwifiex\n", __func__); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + } + +err_kmalloc: + mwifiex_terminate_workqueue(adapter); + adapter->surprise_removed = true; + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, error\n", __func__); + + return -1; +} + +/* This function processes pre and post PCIe function level resets. + * It performs software cleanup without touching PCIe specific code. + * Also, during initialization PCIe stuff is skipped. + */ +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare) +{ + struct mwifiex_if_ops if_ops; + + if (!prepare) { + mwifiex_reinit_sw(adapter, adapter->card_sem, &if_ops, + adapter->iface_type); + } else { + memcpy(&if_ops, &adapter->if_ops, + sizeof(struct mwifiex_if_ops)); + mwifiex_shutdown_sw(adapter, adapter->card_sem); + } +} +EXPORT_SYMBOL_GPL(mwifiex_do_flr); + +/* * This function adds the card. * * This function follows the following major steps to set up the device - @@ -1411,7 +1623,7 @@ mwifiex_add_card(void *card, struct semaphore *sem, goto err_registerdev; } - if (mwifiex_init_hw_fw(adapter)) { + if (mwifiex_init_hw_fw(adapter, true)) { pr_err("%s: firmware init failed\n", __func__); goto err_init_fw; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 17221c4..26df28f 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -829,6 +829,8 @@ struct mwifiex_if_ops { void (*deaggr_pkt)(struct mwifiex_adapter *, struct sk_buff *); void (*multi_port_resync)(struct mwifiex_adapter *); bool (*is_port_ready)(struct mwifiex_private *); + void (*down_dev)(struct mwifiex_adapter *); + void (*up_dev)(struct mwifiex_adapter *); }; struct mwifiex_adapter { @@ -1629,4 +1631,5 @@ void mwifiex_debugfs_remove(void); void mwifiex_dev_debugfs_init(struct mwifiex_private *priv); void mwifiex_dev_debugfs_remove(struct mwifiex_private *priv); #endif +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare); #endif /* !_MWIFIEX_MAIN_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index c1b3e8e..e4ef90e 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -277,6 +277,52 @@ static const struct pci_device_id mwifiex_ids[] = { MODULE_DEVICE_TABLE(pci, mwifiex_ids); +static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare) +{ + struct mwifiex_adapter *adapter; + struct pcie_service_card *card; + + if (!pdev) { + pr_err("%s: PCIe device is not specified\n", __func__); + return; + } + + card = (struct pcie_service_card *)pci_get_drvdata(pdev); + if (!card || !card->adapter) { + pr_err("%s: Card or adapter structure is not valid (%ld)\n", + __func__, (long)card); + return; + } + + adapter = card->adapter; + mwifiex_dbg(adapter, INFO, + "%s: vendor=0x%4.04x device=0x%4.04x rev=%d %s\n", + __func__, pdev->vendor, pdev->device, + pdev->revision, + prepare ? "Pre-FLR" : "Post-FLR"); + + if (prepare) { + /* Kernel would be performing FLR after this notification. + * Cleanup all software without cleaning anything related to + * PCIe and HW. + */ + mwifiex_do_flr(adapter, prepare); + adapter->surprise_removed = true; + } else { + /* Kernel stores and restores PCIe function context before and + * after performing FLR respectively. Reconfigure the software + * and firmware including firmware redownload + */ + adapter->surprise_removed = false; + mwifiex_do_flr(adapter, prepare); + } + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +} + +static const struct pci_error_handlers mwifiex_pcie_err_handler[] = { + { .reset_notify = mwifiex_pcie_reset_notify, }, +}; + #ifdef CONFIG_PM_SLEEP /* Power Management Hooks */ static SIMPLE_DEV_PM_OPS(mwifiex_pcie_pm_ops, mwifiex_pcie_suspend, @@ -295,6 +341,7 @@ static struct pci_driver __refdata mwifiex_pcie = { }, #endif .shutdown = mwifiex_pcie_shutdown, + .err_handler = mwifiex_pcie_err_handler, }; /* @@ -2953,7 +3000,6 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - const struct mwifiex_pcie_card_reg *reg; struct pci_dev *pdev; int i; @@ -2977,8 +3023,90 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) if (card->msi_enable) pci_disable_msi(pdev); } + } +} - reg = card->pcie.reg; +/* This function initializes the PCI-E host memory space, WCB rings, etc. + * + * The following initializations steps are followed - + * - Allocate TXBD ring buffers + * - Allocate RXBD ring buffers + * - Allocate event BD ring buffers + * - Allocate command response ring buffer + * - Allocate sleep cookie buffer + * Part of mwifiex_pcie_init(), not reset the PCIE registers + */ +static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + int ret; + struct pci_dev *pdev = card->dev; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + card->cmdrsp_buf = NULL; + ret = mwifiex_pcie_create_txbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create txbd ring\n"); + goto err_cre_txbd; + } + + ret = mwifiex_pcie_create_rxbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create rxbd ring\n"); + goto err_cre_rxbd; + } + + ret = mwifiex_pcie_create_evtbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create evtbd ring\n"); + goto err_cre_evtbd; + } + + ret = mwifiex_pcie_alloc_cmdrsp_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate cmdbuf buffer\n"); + goto err_alloc_cmdbuf; + } + + if (reg->sleep_cookie) { + ret = mwifiex_pcie_alloc_sleep_cookie_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate sleep_cookie buffer\n"); + goto err_alloc_cookie; + } + } else { + card->sleep_cookie_vbase = NULL; + } + return; + +err_alloc_cookie: + mwifiex_pcie_delete_cmdrsp_buf(adapter); +err_alloc_cmdbuf: + mwifiex_pcie_delete_evtbd_ring(adapter); +err_cre_evtbd: + mwifiex_pcie_delete_rxbd_ring(adapter); +err_cre_rxbd: + mwifiex_pcie_delete_txbd_ring(adapter); +err_cre_txbd: + pci_iounmap(pdev, card->pci_mmap1); +} + +/* This function cleans up the PCI-E host memory space. + * Some code is extracted from mwifiex_unregister_dev() + * + */ +static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + if (mwifiex_write_reg(adapter, reg->drv_rdy, 0x00000000)) + mwifiex_dbg(adapter, ERROR, "Failed to write driver not-ready signature\n"); + + adapter->seq_num = 0; + adapter->tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K; + + if (card) { if (reg->sleep_cookie) mwifiex_pcie_delete_sleep_cookie_buf(adapter); @@ -2988,6 +3116,8 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) mwifiex_pcie_delete_txbd_ring(adapter); card->cmdrsp_buf = NULL; } + + return; } static struct mwifiex_if_ops pcie_ops = { @@ -3014,6 +3144,8 @@ static struct mwifiex_if_ops pcie_ops = { .clean_pcie_ring = mwifiex_clean_pcie_ring_buf, .reg_dump = mwifiex_pcie_reg_dump, .device_dump = mwifiex_pcie_device_dump, + .down_dev = mwifiex_pcie_down_dev, + .up_dev = mwifiex_pcie_up_dev, }; /* diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index f05061c..f6992f0 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -37,6 +37,7 @@ #define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin" #define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin" #define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin" +#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan.bin" #define PCIE_VENDOR_ID_MARVELL (0x11ab) #define PCIE_VENDOR_ID_V2_MARVELL (0x1b4b) -- cgit v1.1 From 6f3c4fb6d05e63c9c6d8968302491c3a5457be61 Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Mon, 5 Sep 2016 19:29:58 +0200 Subject: usb: chipidea: udc: fix NULL ptr dereference in isr_setup_status_phase Problems with the signal integrity of the high speed USB data lines or noise on reference ground lines can cause the i.MX6 USB controller to violate USB specs and exhibit unexpected behavior. It was observed that USBi_UI interrupts were triggered first and when isr_setup_status_phase was called, ci->status was NULL, which lead to a NULL pointer dereference kernel panic. This patch fixes the kernel panic, emits a warning once and returns -EPIPE to halt the device and let the host get stalled. It also adds a comment to point people, who are experiencing this issue, to their USB hardware design. Cc: #4.1+ Signed-off-by: Clemens Gruber Signed-off-by: Peter Chen --- drivers/usb/chipidea/udc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index dfec5a1..b933568 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -949,6 +949,15 @@ static int isr_setup_status_phase(struct ci_hdrc *ci) int retval; struct ci_hw_ep *hwep; + /* + * Unexpected USB controller behavior, caused by bad signal integrity + * or ground reference problems, can lead to isr_setup_status_phase + * being called with ci->status equal to NULL. + * If this situation occurs, you should review your USB hardware design. + */ + if (WARN_ON_ONCE(!ci->status)) + return -EPIPE; + hwep = (ci->ep0_dir == TX) ? ci->ep0out : ci->ep0in; ci->status->context = ci; ci->status->complete = isr_setup_status_complete; -- cgit v1.1 From b711657616947e7b4c15f6825d259324216b23f2 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Wed, 31 Aug 2016 13:50:59 +0200 Subject: mwifiex: scan: Simplify code This patch: - improves code layout - removes a useless memset(0) for some memory allocated with kzalloc - removes a useless if. We know that 'if (chan_band_tlv)' will succeed because it has been tested a few lines above Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/scan.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 8daf0d86..97c9765 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2194,18 +2194,14 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, if (chan_band_tlv && adapter->nd_info) { adapter->nd_info->matches[idx] = - kzalloc(sizeof(*pmatch) + - sizeof(u32), GFP_ATOMIC); + kzalloc(sizeof(*pmatch) + sizeof(u32), + GFP_ATOMIC); pmatch = adapter->nd_info->matches[idx]; if (pmatch) { - memset(pmatch, 0, sizeof(*pmatch)); - if (chan_band_tlv) { - pmatch->n_channels = 1; - pmatch->channels[0] = - chan_band->chan_number; - } + pmatch->n_channels = 1; + pmatch->channels[0] = chan_band->chan_number; } } -- cgit v1.1 From 293f293637b55db4f9f522a5a72514e98a541076 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 8 Sep 2016 16:25:49 +0100 Subject: kvm-arm: Unmap shadow pagetables properly On arm/arm64, we depend on the kvm_unmap_hva* callbacks (via mmu_notifiers::invalidate_*) to unmap the stage2 pagetables when the userspace buffer gets unmapped. However, when the Hypervisor process exits without explicit unmap of the guest buffers, the only notifier we get is kvm_arch_flush_shadow_all() (via mmu_notifier::release ) which does nothing on arm. Later this causes us to access pages that were already released [via exit_mmap() -> unmap_vmas()] when we actually get to unmap the stage2 pagetable [via kvm_arch_destroy_vm() -> kvm_free_stage2_pgd()]. This triggers crashes with CONFIG_DEBUG_PAGEALLOC, which unmaps any free'd pages from the linear map. [ 757.644120] Unable to handle kernel paging request at virtual address ffff800661e00000 [ 757.652046] pgd = ffff20000b1a2000 [ 757.655471] [ffff800661e00000] *pgd=00000047fffe3003, *pud=00000047fcd8c003, *pmd=00000047fcc7c003, *pte=00e8004661e00712 [ 757.666492] Internal error: Oops: 96000147 [#3] PREEMPT SMP [ 757.672041] Modules linked in: [ 757.675100] CPU: 7 PID: 3630 Comm: qemu-system-aar Tainted: G D 4.8.0-rc1 #3 [ 757.683240] Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.15 Aug 19 2016 [ 757.692938] task: ffff80069cdd3580 task.stack: ffff8006adb7c000 [ 757.698840] PC is at __flush_dcache_area+0x1c/0x40 [ 757.703613] LR is at kvm_flush_dcache_pmd+0x60/0x70 [ 757.708469] pc : [] lr : [] pstate: 20000145 ... [ 758.357249] [] __flush_dcache_area+0x1c/0x40 [ 758.363059] [] unmap_stage2_range+0x458/0x5f0 [ 758.368954] [] kvm_free_stage2_pgd+0x34/0x60 [ 758.374761] [] kvm_arch_destroy_vm+0x20/0x68 [ 758.380570] [] kvm_put_kvm+0x210/0x358 [ 758.385860] [] kvm_vm_release+0x2c/0x40 [ 758.391239] [] __fput+0x114/0x2e8 [ 758.396096] [] ____fput+0xc/0x18 [ 758.400869] [] task_work_run+0x108/0x138 [ 758.406332] [] do_exit+0x48c/0x10e8 [ 758.411363] [] do_group_exit+0x6c/0x130 [ 758.416739] [] get_signal+0x284/0xa18 [ 758.421943] [] do_signal+0x158/0x860 [ 758.427060] [] do_notify_resume+0x6c/0x88 [ 758.432608] [] work_pending+0x10/0x14 [ 758.437812] Code: 9ac32042 8b010001 d1000443 8a230000 (d50b7e20) This patch fixes the issue by moving the kvm_free_stage2_pgd() to kvm_arch_flush_shadow_all(). Cc: # 3.9+ Tested-by: Itaru Kitayama Reported-by: Itaru Kitayama Reported-by: James Morse Cc: Marc Zyngier Cc: Catalin Marinas Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 -- arch/arm/kvm/mmu.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 75f130e..c94b90d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -158,8 +158,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; - kvm_free_stage2_pgd(kvm); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_arch_vcpu_free(kvm->vcpus[i]); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index a3faafe..e9a5c0e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1894,6 +1894,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) void kvm_arch_flush_shadow_all(struct kvm *kvm) { + kvm_free_stage2_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, -- cgit v1.1 From 75696fe704774039e0e2ca65be24d79739ed206d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 29 Jul 2016 16:08:51 +0530 Subject: mwifiex: PCIe8997 chip specific handling The patch corrects the revision id register and uses it along with magic value and chip version registers to download appropriate firmware image. PCIe8997 Z chipset variant code has been removed, as it won't be used in production. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 35 ++++++++++------------------- drivers/net/wireless/marvell/mwifiex/pcie.h | 14 +++++------- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index e4ef90e..3c3c4f1 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2911,7 +2911,7 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter) static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) { int revision_id = 0; - int version; + int version, magic; struct pcie_service_card *card = adapter->card; switch (card->dev->device) { @@ -2936,30 +2936,19 @@ static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) } break; case PCIE_DEVICE_ID_MARVELL_88W8997: - mwifiex_read_reg(adapter, 0x0c48, &revision_id); + mwifiex_read_reg(adapter, 0x8, &revision_id); mwifiex_read_reg(adapter, 0x0cd0, &version); + mwifiex_read_reg(adapter, 0x0cd4, &magic); + revision_id &= 0xff; version &= 0x7; - switch (revision_id) { - case PCIE8997_V2: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_V2); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_V2); - break; - case PCIE8997_Z: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_Z); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_Z); - break; - default: - strcpy(adapter->fw_name, PCIE8997_DEFAULT_FW_NAME); - break; - } + magic &= 0xff; + if (revision_id == PCIE8997_A1 && + magic == CHIP_MAGIC_VALUE && + version == CHIP_VER_PCIEUART) + strcpy(adapter->fw_name, PCIEUART8997_FW_NAME_V4); + else + strcpy(adapter->fw_name, PCIEUSB8997_FW_NAME_V4); + break; default: break; } diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index f6992f0..46f99ca 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -32,12 +32,9 @@ #define PCIE8897_DEFAULT_FW_NAME "mrvl/pcie8897_uapsta.bin" #define PCIE8897_A0_FW_NAME "mrvl/pcie8897_uapsta_a0.bin" #define PCIE8897_B0_FW_NAME "mrvl/pcie8897_uapsta.bin" -#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieusb8997_combo_v2.bin" -#define PCIEUART8997_FW_NAME_Z "mrvl/pcieuart8997_combo.bin" -#define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin" -#define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin" -#define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin" -#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan.bin" +#define PCIEUART8997_FW_NAME_V4 "mrvl/pcieuart8997_combo_v4.bin" +#define PCIEUSB8997_FW_NAME_V4 "mrvl/pcieusb8997_combo_v4.bin" +#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan_v4.bin" #define PCIE_VENDOR_ID_MARVELL (0x11ab) #define PCIE_VENDOR_ID_V2_MARVELL (0x1b4b) @@ -47,9 +44,10 @@ #define PCIE8897_A0 0x1100 #define PCIE8897_B0 0x1200 -#define PCIE8997_Z 0x0 -#define PCIE8997_V2 0x471 +#define PCIE8997_A0 0x10 +#define PCIE8997_A1 0x11 #define CHIP_VER_PCIEUART 0x3 +#define CHIP_MAGIC_VALUE 0x24 /* Constants for Buffer Descriptor (BD) rings */ #define MWIFIEX_MAX_TXRX_BD 0x20 -- cgit v1.1 From 872c63fbf9e153146b07f0cece4da0d70b283eeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Sep 2016 11:56:05 +0100 Subject: arm64: spinlocks: implement smp_mb__before_spinlock() as smp_mb() smp_mb__before_spinlock() is intended to upgrade a spin_lock() operation to a full barrier, such that prior stores are ordered with respect to loads and stores occuring inside the critical section. Unfortunately, the core code defines the barrier as smp_wmb(), which is insufficient to provide the required ordering guarantees when used in conjunction with our load-acquire-based spinlock implementation. This patch overrides the arm64 definition of smp_mb__before_spinlock() to map to a full smp_mb(). Cc: Cc: Peter Zijlstra Reported-by: Alan Stern Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/spinlock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index e875a5a..89206b5 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -363,4 +363,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() +/* + * Accesses appearing in program order before a spin_lock() operation + * can be reordered with accesses inside the critical section, by virtue + * of arch_spin_lock being constructed using acquire semantics. + * + * In cases where this is problematic (e.g. try_to_wake_up), an + * smp_mb__before_spinlock() can restore the required ordering. + */ +#define smp_mb__before_spinlock() smp_mb() + #endif /* __ASM_SPINLOCK_H */ -- cgit v1.1 From 2b9743441a312e0b0a2d87deae363eccbe9d0f00 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 8 Sep 2016 20:46:42 +0800 Subject: arm64: use preempt_disable_notrace in _percpu_read/write When debug preempt or preempt tracer is enabled, preempt_count_add/sub() can be traced by function and function graph tracing, and preempt_disable/enable() would call preempt_count_add/sub(), so in Ftrace subsystem we should use preempt_disable/enable_notrace instead. In the commit 345ddcc882d8 ("ftrace: Have set_ftrace_pid use the bitmap like events do") the function this_cpu_read() was added to trace_graph_entry(), and if this_cpu_read() calls preempt_disable(), graph tracer will go into a recursive loop, even if the tracing_on is disabled. So this patch change to use preempt_enable/disable_notrace instead in this_cpu_read(). Since Yonghui Yang helped a lot to find the root cause of this problem, so also add his SOB. Signed-off-by: Yonghui Yang Signed-off-by: Chunyan Zhang Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0a456be..2fee2f5 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -199,19 +199,19 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, #define _percpu_read(pcp) \ ({ \ typeof(pcp) __retval; \ - preempt_disable(); \ + preempt_disable_notrace(); \ __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ __retval; \ }) #define _percpu_write(pcp, val) \ do { \ - preempt_disable(); \ + preempt_disable_notrace(); \ __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } while(0) \ #define _pcp_protect(operation, pcp, val) \ -- cgit v1.1 From 3c97f5de1f282492335a6aec1f94b77f7f899b8c Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 2 Sep 2016 19:46:09 +0300 Subject: ath10k: implement NAPI support Add NAPI support for rx and tx completion. NAPI poll is scheduled from interrupt handler. The design is as below - on interrupt - schedule napi and mask interrupts - on poll - process all pipes (no actual Tx/Rx) - process Rx within budget - if quota exceeds budget reschedule napi poll by returning budget - process Tx completions and update budget if necessary - process Tx fetch indications (pull-push) - push any other pending Tx (if possible) - before resched or napi completion replenish htt rx ring buffer - if work done < budget, complete napi poll and unmask interrupts This change also get rid of two tasklets (intr_tq and txrx_compl_task). Measured peak throughput with NAPI on IPQ4019 platform in controlled environment. No noticeable reduction in throughput is seen and also observed improvements in CPU usage. Approx. 15% CPU usage got reduced in UDP uplink case. DL: AP DUT Tx UL: AP DUT Rx IPQ4019 (avg. cpu usage %) ======== TOT +NAPI =========== ============= TCP DL 644 Mbps (42%) 645 Mbps (36%) TCP UL 673 Mbps (30%) 675 Mbps (26%) UDP DL 682 Mbps (49%) 680 Mbps (49%) UDP UL 720 Mbps (28%) 717 Mbps (11%) Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/ahb.c | 10 +- drivers/net/wireless/ath/ath10k/core.c | 2 + drivers/net/wireless/ath/ath10k/core.h | 8 ++ drivers/net/wireless/ath/ath10k/htt.h | 2 +- drivers/net/wireless/ath/ath10k/htt_rx.c | 154 +++++++++++++++++++------------ drivers/net/wireless/ath/ath10k/htt_tx.c | 2 - drivers/net/wireless/ath/ath10k/pci.c | 71 ++++++++------ drivers/net/wireless/ath/ath10k/pci.h | 6 +- 8 files changed, 159 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index dede026..b99ad5d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -462,13 +462,13 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar) static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) { struct ath10k *ar = arg; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); if (!ath10k_pci_irq_pending(ar)) return IRQ_NONE; ath10k_pci_disable_and_clear_legacy_irq(ar); - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } @@ -717,6 +717,9 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) synchronize_irq(ar_ahb->irq); ath10k_pci_flush(ar); + + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); } static int ath10k_ahb_hif_power_up(struct ath10k *ar) @@ -748,6 +751,7 @@ static int ath10k_ahb_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce_deinit; } + napi_enable(&ar->napi); return 0; @@ -831,7 +835,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) goto err_resource_deinit; } - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); ret = ath10k_ahb_request_irq_legacy(ar); if (ret) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index c9d163e..3abf8d6 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2322,6 +2322,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); + init_dummy_netdev(&ar->napi_dev); + ret = ath10k_debug_create(ar); if (ret) goto err_free_aux_wq; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index b367e9c..c223913 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -65,6 +65,10 @@ #define ATH10K_KEEPALIVE_MAX_IDLE 3895 #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 +/* NAPI poll budget */ +#define ATH10K_NAPI_BUDGET 64 +#define ATH10K_NAPI_QUOTA_LIMIT 60 + struct ath10k; enum ath10k_bus { @@ -954,6 +958,10 @@ struct ath10k { struct ath10k_thermal thermal; struct ath10k_wow wow; + /* NAPI */ + struct net_device napi_dev; + struct napi_struct napi; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 430a83e..98c1424 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -1665,7 +1665,6 @@ struct ath10k_htt { /* This is used to group tx/rx completions separately and process them * in batches to reduce cache stalls */ - struct tasklet_struct txrx_compl_task; struct sk_buff_head rx_compl_q; struct sk_buff_head rx_in_ord_compl_q; struct sk_buff_head tx_fetch_ind_q; @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu); void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, struct sk_buff *skb); +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); #endif diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index ae6931b..740b0fa 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -34,7 +34,6 @@ #define HTT_RX_RING_REFILL_RESCHED_MS 5 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); -static void ath10k_htt_txrx_compl_task(unsigned long ptr); static struct sk_buff * ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) void ath10k_htt_rx_free(struct ath10k_htt *htt) { del_timer_sync(&htt->rx_ring.refill_retry_timer); - tasklet_kill(&htt->txrx_compl_task); skb_queue_purge(&htt->rx_compl_q); skb_queue_purge(&htt->rx_in_ord_compl_q); @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) skb_queue_head_init(&htt->tx_fetch_ind_q); atomic_set(&htt->num_mpdus_ready, 0); - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, - (unsigned long)htt); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", htt->rx_ring.size, htt->rx_ring.fill_level); return 0; @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, trace_ath10k_rx_hdr(ar, skb->data, skb->len); trace_ath10k_rx_payload(ar, skb->data, skb->len); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); } static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, @@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) struct ath10k *ar = htt->ar; static struct ieee80211_rx_status rx_status; struct sk_buff_head amsdu; - int ret; + int ret, num_msdus; __skb_queue_head_init(&amsdu); @@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) return ret; } + num_msdus = skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); - return 0; + return num_msdus; } static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, @@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, mpdu_count += mpdu_ranges[i].mpdu_count; atomic_add(mpdu_count, &htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); -} - -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) -{ - atomic_inc(&htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); } static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, @@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, RX_FLAG_MMIC_STRIPPED; } -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, - struct sk_buff_head *list) +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, + struct sk_buff_head *list) { struct ath10k_htt *htt = &ar->htt; struct ieee80211_rx_status *status = &htt->rx_status; struct htt_rx_offload_msdu *rx; struct sk_buff *msdu; size_t offset; + int num_msdu = 0; while ((msdu = __skb_dequeue(list))) { /* Offloaded frames don't have Rx descriptor. Instead they have @@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, ath10k_htt_rx_h_rx_offload_prot(status, msdu); ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); ath10k_process_rx(ar, status, msdu); + num_msdu++; } + return num_msdu; } -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; struct htt_resp *resp = (void *)skb->data; @@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) u8 tid; bool offload; bool frag; - int ret; + int ret, num_msdus = 0; lockdep_assert_held(&htt->rx_ring.lock); if (htt->rx_confused) - return; + return -EIO; skb_pull(skb, sizeof(resp->hdr)); skb_pull(skb, sizeof(resp->rx_in_ord_ind)); @@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { ath10k_warn(ar, "dropping invalid in order rx indication\n"); - return; + return -EINVAL; } /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later @@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (ret < 0) { ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); htt->rx_confused = true; - return; + return -EIO; } /* Offloaded frames are very different and need to be handled * separately. */ if (offload) - ath10k_htt_rx_h_rx_offload(ar, &list); + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); while (!skb_queue_empty(&list)) { __skb_queue_head_init(&amsdu); @@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) * better to report something than nothing though. This * should still give an idea about rx rate to the user. */ + num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); ath10k_htt_rx_h_mpdu(ar, &amsdu, status); @@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); htt->rx_confused = true; __skb_queue_purge(&list); - return; + return -EIO; } } + return num_msdus; } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, @@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) } case HTT_T2H_MSG_TYPE_TX_COMPL_IND: ath10k_htt_rx_tx_compl_ind(htt->ar, skb); - tasklet_schedule(&htt->txrx_compl_task); break; case HTT_T2H_MSG_TYPE_SEC_IND: { struct ath10k *ar = htt->ar; @@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", skb->data, skb->len); - ath10k_htt_rx_frag_handler(htt); + atomic_inc(&htt->num_mpdus_ready); break; } case HTT_T2H_MSG_TYPE_TEST: @@ -2320,8 +2311,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); - tasklet_schedule(&htt->txrx_compl_task); + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); return false; } case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: @@ -2347,7 +2337,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); - tasklet_schedule(&htt->txrx_compl_task); break; } case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: @@ -2376,27 +2365,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, } EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); -static void ath10k_htt_txrx_compl_task(unsigned long ptr) +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) { - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; - struct ath10k *ar = htt->ar; + struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {}; - struct sk_buff_head rx_ind_q; struct sk_buff_head tx_ind_q; struct sk_buff *skb; unsigned long flags; - int num_mpdus; + int quota = 0, done, num_rx_msdus; + bool resched_napi = false; - __skb_queue_head_init(&rx_ind_q); __skb_queue_head_init(&tx_ind_q); - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, + * process it first to utilize full available quota. + */ + while (quota < budget) { + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) + break; - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); + if (!skb) { + resched_napi = true; + goto exit; + } + + spin_lock_bh(&htt->rx_ring.lock); + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); + spin_unlock_bh(&htt->rx_ring.lock); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + dev_kfree_skb_any(skb); + if (num_rx_msdus > 0) + quota += num_rx_msdus; + + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { + resched_napi = true; + goto exit; + } + } + + while (quota < budget) { + /* no more data to receive */ + if (!atomic_read(&htt->num_mpdus_ready)) + break; + + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + quota += num_rx_msdus; + atomic_dec(&htt->num_mpdus_ready); + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + atomic_read(&htt->num_mpdus_ready)) { + resched_napi = true; + goto exit; + } + } + + /* From NAPI documentation: + * The napi poll() function may also process TX completions, in which + * case if it processes the entire TX ring then it should count that + * work as the rest of the budget. + */ + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) + quota = budget; /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. * From kfifo_get() documentation: @@ -2406,27 +2445,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + while ((skb = __skb_dequeue(&tx_ind_q))) { ath10k_htt_rx_tx_fetch_ind(ar, skb); dev_kfree_skb_any(skb); } - num_mpdus = atomic_read(&htt->num_mpdus_ready); - - while (num_mpdus) { - if (ath10k_htt_rx_handle_amsdu(htt)) - break; - - num_mpdus--; - atomic_dec(&htt->num_mpdus_ready); - } - - while ((skb = __skb_dequeue(&rx_ind_q))) { - spin_lock_bh(&htt->rx_ring.lock); - ath10k_htt_rx_in_ord_ind(ar, skb); - spin_unlock_bh(&htt->rx_ring.lock); - dev_kfree_skb_any(skb); - } - +exit: ath10k_htt_rx_msdu_buff_replenish(htt); + /* In case of rx failure or more data to read, report budget + * to reschedule NAPI poll + */ + done = resched_napi ? budget : quota; + + return done; } +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index 7c072b6..ae5b33f 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -390,8 +390,6 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) { int size; - tasklet_kill(&htt->txrx_compl_task); - idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); idr_destroy(&htt->pending_tx); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 1b841ad..930e951 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1506,12 +1506,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, ath10k_ce_per_engine_service(ar, pipe); } -void ath10k_pci_kill_tasklet(struct ath10k *ar) +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - tasklet_kill(&ar_pci->intr_tq); - del_timer_sync(&ar_pci->rx_post_retry); } @@ -1570,7 +1568,7 @@ void ath10k_pci_hif_get_default_pipe(struct ath10k *ar, ul_pipe, dl_pipe); } -static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) { u32 val; @@ -1753,7 +1751,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) void ath10k_pci_flush(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); ath10k_pci_buffer_cleanup(ar); } @@ -1780,6 +1778,8 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_pci_irq_disable(ar); ath10k_pci_irq_sync(ar); ath10k_pci_flush(ar); + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); @@ -2533,6 +2533,7 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce; } + napi_enable(&ar->napi); return 0; @@ -2772,35 +2773,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg) return IRQ_NONE; } - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { - if (!ath10k_pci_irq_pending(ar)) - return IRQ_NONE; - - ath10k_pci_disable_and_clear_legacy_irq(ar); - } + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && + !ath10k_pci_irq_pending(ar)) + return IRQ_NONE; - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_disable_and_clear_legacy_irq(ar); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } -static void ath10k_pci_tasklet(unsigned long data) +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) { - struct ath10k *ar = (struct ath10k *)data; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + struct ath10k *ar = container_of(ctx, struct ath10k, napi); + int done = 0; if (ath10k_pci_has_fw_crashed(ar)) { - ath10k_pci_irq_disable(ar); ath10k_pci_fw_crashed_clear(ar); ath10k_pci_fw_crashed_dump(ar); - return; + napi_complete(ctx); + return done; } ath10k_ce_per_engine_service_any(ar); - /* Re-enable legacy irq that was disabled in the irq handler */ - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) + done = ath10k_htt_txrx_compl_task(ar, budget); + + if (done < budget) { + napi_complete(ctx); + /* In case of MSI, it is possible that interrupts are received + * while NAPI poll is inprogress. So pending interrupts that are + * received after processing all copy engine pipes by NAPI poll + * will not be handled again. This is causing failure to + * complete boot sequence in x86 platform. So before enabling + * interrupts safer to check for pending interrupts for + * immediate servicing. + */ + if (CE_INTERRUPT_SUMMARY(ar)) { + napi_reschedule(ctx); + goto out; + } ath10k_pci_enable_legacy_irq(ar); + ath10k_pci_irq_msi_fw_unmask(ar); + } + +out: + return done; } static int ath10k_pci_request_irq_msi(struct ath10k *ar) @@ -2858,11 +2877,10 @@ static void ath10k_pci_free_irq(struct ath10k *ar) free_irq(ar_pci->pdev->irq, ar); } -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) +void ath10k_pci_init_napi(struct ath10k *ar) { - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, + ATH10K_NAPI_BUDGET); } static int ath10k_pci_init_irq(struct ath10k *ar) @@ -2870,7 +2888,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); int ret; - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) ath10k_info(ar, "limiting irq mode to: %d\n", @@ -3131,7 +3149,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) void ath10k_pci_release_resource(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); + netif_napi_del(&ar->napi); ath10k_pci_ce_deinit(ar); ath10k_pci_free_pipes(ar); } @@ -3298,7 +3317,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_free_irq: ath10k_pci_free_irq(ar); - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); err_deinit_irq: ath10k_pci_deinit_irq(ar); diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 6eca1df..9854ad5 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -177,8 +177,6 @@ struct ath10k_pci { /* Operating interrupt mode */ enum ath10k_pci_irq_mode oper_irq_mode; - struct tasklet_struct intr_tq; - struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; /* Copy Engine used for Diagnostic Accesses */ @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_rx_replenish_retry(unsigned long ptr); void ath10k_pci_ce_deinit(struct ath10k *ar); -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); -void ath10k_pci_kill_tasklet(struct ath10k *ar); +void ath10k_pci_init_napi(struct ath10k *ar); int ath10k_pci_init_pipes(struct ath10k *ar); int ath10k_pci_init_config(struct ath10k *ar); void ath10k_pci_rx_post(struct ath10k *ar); @@ -303,6 +300,7 @@ void ath10k_pci_flush(struct ath10k *ar); void ath10k_pci_enable_legacy_irq(struct ath10k *ar); bool ath10k_pci_irq_pending(struct ath10k *ar); void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar); +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar); int ath10k_pci_wait_for_target_init(struct ath10k *ar); int ath10k_pci_setup_resource(struct ath10k *ar); void ath10k_pci_release_resource(struct ath10k *ar); -- cgit v1.1 From 18f53fe0f30331e826b075709ed7b26b9283235e Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 2 Sep 2016 19:46:10 +0300 Subject: ath10k: fix throughput regression in multi client mode commit 7a0adc83f34d ("ath10k: improve tx scheduling") is causing severe throughput drop in multi client mode. This issue is originally reported in veriwave setup with 50 clients with TCP downlink traffic. While increasing number of clients, the average throughput drops gradually. With 50 clients, the combined peak throughput is decreased to 98 Mbps whereas reverting given commit restored it to 550 Mbps. Processing txqs for every tx completion is causing overhead. Ideally for management frame tx completion, pending txqs processing can be avoided. The change partly reverts the commit "ath10k: improve tx scheduling". Processing pending txqs after all skbs tx completion will yeild enough room to burst tx frames. Fixes: 7a0adc83f34d ("ath10k: improve tx scheduling") Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 ++ drivers/net/wireless/ath/ath10k/txrx.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 740b0fa..2067ac2 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2445,6 +2445,8 @@ int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + ath10k_mac_tx_push_pending(ar); + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index 1e695d1..9852c5d 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -119,8 +119,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, ieee80211_tx_status(htt->ar->hw, msdu); /* we do not own the msdu anymore */ - ath10k_mac_tx_push_pending(ar); - return 0; } -- cgit v1.1 From d94a461d7a7df68991fb9663531173f60ef89c68 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Sep 2016 19:46:12 +0300 Subject: ath9k: use ieee80211_tx_status_noskb where possible It removes the need for undoing the padding changes to skb->data and it improves performance by eliminating one tx status lookup per MPDU in the status path. It is also useful for preparing a follow-up fix to better handle powersave filtering. A side effect is that these counters, available via debugfs, become now invalid: * dot11TransmittedFragmentCount * dot11FrameDuplicateCount, * dot11ReceivedFragmentCount * dot11MulticastReceivedFrameCount Signed-off-by: Felix Fietkau [kvalo@qca.qualcomm.com: add a note about counters, thanks to Zefir Kurtisi] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/xmit.c | 94 +++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 8ddd604..b99ffa0 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -50,9 +50,11 @@ static u16 bits_per_symbol[][2] = { static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, struct sk_buff *skb); static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq); + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta); static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok); static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, struct list_head *head, bool internal); @@ -77,6 +79,22 @@ enum { /* Aggregation logic */ /*********************/ +static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sta *sta = info->status.status_driver_data[0]; + + if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + ieee80211_tx_status(hw, skb); + return; + } + + if (sta) + ieee80211_tx_status_noskb(hw, sta, info); + + dev_kfree_skb(skb); +} + void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq) __acquires(&txq->axq_lock) { @@ -92,6 +110,7 @@ void ath_txq_unlock(struct ath_softc *sc, struct ath_txq *txq) void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) __releases(&txq->axq_lock) { + struct ieee80211_hw *hw = sc->hw; struct sk_buff_head q; struct sk_buff *skb; @@ -100,7 +119,7 @@ void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) spin_unlock_bh(&txq->axq_lock); while ((skb = __skb_dequeue(&q))) - ieee80211_tx_status(sc->hw, skb); + ath_tx_status(hw, skb); } static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq, @@ -253,7 +272,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } if (sendbar) { @@ -318,12 +337,12 @@ static void ath_tid_drain(struct ath_softc *sc, struct ath_txq *txq, bf = fi->bf; if (!bf) { - ath_tx_complete(sc, skb, ATH_TX_ERROR, txq); + ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL); continue; } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } } @@ -426,12 +445,11 @@ static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok) { struct ath_node *an = NULL; struct sk_buff *skb; - struct ieee80211_sta *sta; - struct ieee80211_hw *hw = sc->hw; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; struct ath_atx_tid *tid = NULL; @@ -460,12 +478,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, for (i = 0; i < ts->ts_rateindex; i++) retries += rates[i].count; - rcu_read_lock(); - - sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); if (!sta) { - rcu_read_unlock(); - INIT_LIST_HEAD(&bf_head); while (bf) { bf_next = bf->bf_next; @@ -473,7 +486,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, if (!bf->bf_state.stale || bf_next != NULL) list_move_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0); bf = bf_next; } @@ -583,7 +596,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ts); } - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, + ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts, !txfail); } else { if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) { @@ -604,7 +617,8 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_tx_update_baw(sc, tid, seqno); ath_tx_complete_buf(sc, bf, txq, - &bf_head, ts, 0); + &bf_head, NULL, ts, + 0); bar_index = max_t(int, bar_index, ATH_BA_INDEX(seq_first, seqno)); break; @@ -648,8 +662,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_txq_lock(sc, txq); } - rcu_read_unlock(); - if (needreset) ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR); } @@ -664,7 +676,10 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf, struct list_head *bf_head) { + struct ieee80211_hw *hw = sc->hw; struct ieee80211_tx_info *info; + struct ieee80211_sta *sta; + struct ieee80211_hdr *hdr; bool txok, flush; txok = !(ts->ts_status & ATH9K_TXERR_MASK); @@ -677,6 +692,10 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc, ts->ts_rateindex); + + hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data; + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); + if (!bf_isampdu(bf)) { if (!flush) { info = IEEE80211_SKB_CB(bf->bf_mpdu); @@ -685,9 +704,9 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok); ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts); } - ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok); + ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else - ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok); + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok); if (!flush) ath_txq_schedule(sc, txq); @@ -923,7 +942,7 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, list_add(&bf->list, &bf_head); __skb_unlink(skb, *q); ath_tx_update_baw(sc, tid, seqno); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); continue; } @@ -1832,6 +1851,7 @@ static void ath_drain_txq_list(struct ath_softc *sc, struct ath_txq *txq, */ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) { + rcu_read_lock(); ath_txq_lock(sc, txq); if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { @@ -1850,6 +1870,7 @@ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) ath_drain_txq_list(sc, txq, &txq->axq_q); ath_txq_unlock_complete(sc, txq); + rcu_read_unlock(); } bool ath_drain_all_txq(struct ath_softc *sc) @@ -2472,7 +2493,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /*****************/ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq) + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_common *common = ath9k_hw_common(sc->sc_ah); @@ -2492,15 +2514,17 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_info->flags |= IEEE80211_TX_STAT_ACK; } - padpos = ieee80211_hdrlen(hdr->frame_control); - padsize = padpos & 3; - if (padsize && skb->len>padpos+padsize) { - /* - * Remove MAC header padding before giving the frame back to - * mac80211. - */ - memmove(skb->data + padsize, skb->data, padpos); - skb_pull(skb, padsize); + if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + padpos = ieee80211_hdrlen(hdr->frame_control); + padsize = padpos & 3; + if (padsize && skb->len>padpos+padsize) { + /* + * Remove MAC header padding before giving the frame back to + * mac80211. + */ + memmove(skb->data + padsize, skb->data, padpos); + skb_pull(skb, padsize); + } } spin_lock_irqsave(&sc->sc_pm_lock, flags); @@ -2515,12 +2539,14 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, } spin_unlock_irqrestore(&sc->sc_pm_lock, flags); - __skb_queue_tail(&txq->complete_q, skb); ath_txq_skb_done(sc, txq, skb); + tx_info->status.status_driver_data[0] = sta; + __skb_queue_tail(&txq->complete_q, skb); } static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok) { struct sk_buff *skb = bf->bf_mpdu; @@ -2548,7 +2574,7 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, complete(&sc->paprd_complete); } else { ath_debug_stat_tx(sc, bf, ts, txq, tx_flags); - ath_tx_complete(sc, skb, tx_flags, txq); + ath_tx_complete(sc, skb, tx_flags, txq, sta); } skip_tx_complete: /* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't @@ -2700,10 +2726,12 @@ void ath_tx_tasklet(struct ath_softc *sc) u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs; int i; + rcu_read_lock(); for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i))) ath_tx_processq(sc, &sc->tx.txq[i]); } + rcu_read_unlock(); } void ath_tx_edma_tasklet(struct ath_softc *sc) @@ -2717,6 +2745,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) struct list_head *fifo_list; int status; + rcu_read_lock(); for (;;) { if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) break; @@ -2787,6 +2816,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head); ath_txq_unlock_complete(sc, txq); } + rcu_read_unlock(); } /*****************/ -- cgit v1.1 From 315c457ff123d5e36eb5fa41ed41512609f64bee Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Sep 2016 19:46:13 +0300 Subject: ath9k: improve powersave filter handling For non-aggregated frames, ath9k was leaving handling of powersave filtered packets to mac80211. This can be too slow if the intermediate queue is already filled with packets and mac80211 does not immediately send a new packet via drv_tx(). Improve response time with filtered frames by triggering clearing the powersave filter internally. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/xmit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index b99ffa0..52bfbb9 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -446,13 +446,13 @@ static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf, struct list_head *bf_q, struct ieee80211_sta *sta, + struct ath_atx_tid *tid, struct ath_tx_status *ts, int txok) { struct ath_node *an = NULL; struct sk_buff *skb; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; - struct ath_atx_tid *tid = NULL; struct ath_buf *bf_next, *bf_last = bf->bf_lastbf; struct list_head bf_head; struct sk_buff_head bf_pending; @@ -494,7 +494,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, } an = (struct ath_node *)sta->drv_priv; - tid = ath_get_skb_tid(sc, an, skb); seq_first = tid->seq_start; isba = ts->ts_flags & ATH9K_TX_BA; @@ -680,6 +679,7 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ieee80211_tx_info *info; struct ieee80211_sta *sta; struct ieee80211_hdr *hdr; + struct ath_atx_tid *tid = NULL; bool txok, flush; txok = !(ts->ts_status & ATH9K_TXERR_MASK); @@ -695,6 +695,12 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data; sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); + if (sta) { + struct ath_node *an = (struct ath_node *)sta->drv_priv; + tid = ath_get_skb_tid(sc, an, bf->bf_mpdu); + if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY)) + tid->clear_ps_filter = true; + } if (!bf_isampdu(bf)) { if (!flush) { @@ -706,7 +712,7 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, } ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else - ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok); + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok); if (!flush) ath_txq_schedule(sc, txq); -- cgit v1.1 From fe79f6314a717cf031a6c04f180910583633c37e Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Thu, 1 Sep 2016 21:43:36 +0530 Subject: ath10k: remove unnecessary error code assignment The error assigned does not seems to be used anywhere, fixes nothing just a small cleanup Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index eb4ab6f..38993d7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3563,7 +3563,6 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to map beacon: %d\n", ret); dev_kfree_skb_any(bcn); - ret = -EIO; goto skip; } -- cgit v1.1 From 77eb3d693182b4eaa88c6ba406fbb92b1f1bd636 Mon Sep 17 00:00:00 2001 From: Ashok Raj Nagarajan Date: Fri, 2 Sep 2016 10:59:53 +0530 Subject: ath10k: fix reporting channel survey data When user requests for survey dump data, driver is providing wrong survey information. This information we sent is the survey data that we have collected during previous user request. This issue occurs because we request survey dump for wrong channel. With this change, we correctly display the correct and current survey information to userspace. Fixes: fa7937e3d5c2 ("ath10k: update bss channel survey information") Signed-off-by: Ashok Raj Nagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 4565321..c4d965f 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6576,7 +6576,7 @@ static int ath10k_get_survey(struct ieee80211_hw *hw, int idx, goto exit; } - ath10k_mac_update_bss_chan_survey(ar, survey->channel); + ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]); spin_lock_bh(&ar->data_lock); memcpy(survey, ar_survey, sizeof(*survey)); -- cgit v1.1 From b5182e157d3a1d94d7ee6b4f4cb8267f9d7ad606 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 16 Apr 2016 10:54:37 -0400 Subject: ath9k: remove repetitions of mask array size The constant "123", which is the number of elements in mask_m / mask_p, is repeated several times in this function. Replace memsets with array initialization, and replace a loop conditional with ARRAY_SIZE() so that we don't repeat ourselves. Signed-off-by: Bob Copeland Reviewed-by: Oleksij Rempel Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 1b271b9..8eea8d2 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -260,8 +260,8 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, int cur_bin; int upper, lower, cur_vit_mask; int i; - int8_t mask_m[123]; - int8_t mask_p[123]; + int8_t mask_m[123] = {0}; + int8_t mask_p[123] = {0}; int8_t mask_amt; int tmp_mask; static const int pilot_mask_reg[4] = { @@ -274,9 +274,6 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, }; static const int inc[4] = { 0, 100, 0, 0 }; - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - cur_bin = -6000; upper = bin + 100; lower = bin - 100; @@ -302,7 +299,7 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, upper = bin + 120; lower = bin - 120; - for (i = 0; i < 123; i++) { + for (i = 0; i < ARRAY_SIZE(mask_m); i++) { if ((cur_vit_mask > lower) && (cur_vit_mask < upper)) { /* workaround for gcc bug #37014 */ volatile int tmp_v = abs(cur_vit_mask - bin); -- cgit v1.1 From f3fa63144482d6dffd26d8b0a94b06a55d22d940 Mon Sep 17 00:00:00 2001 From: Dan Kephart Date: Wed, 3 Aug 2016 16:43:43 -0400 Subject: ath6kl: enable firmware crash dumps on the AR6004 The firmware crash dumps on the 6004 are the same as the 6003. Remove the statement guarding it from dumping on the 6004. Renamed the REG_DUMP_COUNT_AR6003 to reflect support on both chips. Signed-off-by: Dan Kephart Reviewed-by: Steve deRosier Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/hif.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/hif.c b/drivers/net/wireless/ath/ath6kl/hif.c index 18c0708..d194253 100644 --- a/drivers/net/wireless/ath/ath6kl/hif.c +++ b/drivers/net/wireless/ath/ath6kl/hif.c @@ -64,7 +64,7 @@ int ath6kl_hif_rw_comp_handler(void *context, int status) } EXPORT_SYMBOL(ath6kl_hif_rw_comp_handler); -#define REG_DUMP_COUNT_AR6003 60 +#define REGISTER_DUMP_COUNT 60 #define REGISTER_DUMP_LEN_MAX 60 static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) @@ -73,9 +73,6 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) u32 i, address, regdump_addr = 0; int ret; - if (ar->target_type != TARGET_TYPE_AR6003) - return; - /* the reg dump pointer is copied to the host interest area */ address = ath6kl_get_hi_item_addr(ar, HI_ITEM(hi_failure_state)); address = TARG_VTOP(ar->target_type, address); @@ -95,7 +92,7 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) /* fetch register dump data */ ret = ath6kl_diag_read(ar, regdump_addr, (u8 *)®dump_val[0], - REG_DUMP_COUNT_AR6003 * (sizeof(u32))); + REGISTER_DUMP_COUNT * (sizeof(u32))); if (ret) { ath6kl_warn("failed to get register dump: %d\n", ret); return; @@ -105,9 +102,9 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) ath6kl_info("hw 0x%x fw %s\n", ar->wiphy->hw_version, ar->wiphy->fw_version); - BUILD_BUG_ON(REG_DUMP_COUNT_AR6003 % 4); + BUILD_BUG_ON(REGISTER_DUMP_COUNT % 4); - for (i = 0; i < REG_DUMP_COUNT_AR6003; i += 4) { + for (i = 0; i < REGISTER_DUMP_COUNT; i += 4) { ath6kl_info("%d: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", i, le32_to_cpu(regdump_val[i]), -- cgit v1.1 From cabd34d0e9c47ac8747e7a1d871e10acdb8e980f Mon Sep 17 00:00:00 2001 From: Eric Bentley Date: Tue, 30 Aug 2016 02:16:19 -0400 Subject: ath6kl: Allow the radio to report 0 dbm txpower without timing out The ath6kl driver attempts to get the txpower value from the radio by first clearing the existing stored value and then watching the value to become non-zero. APs allow setting client power to values from -127..127, but this radio is not capable of setting values less then 0 and so will report txpower as 0dbm for both negative and 0 client power values. When the radio has txpower set to 0dbm txpower (equivalent to 1mw) the ath6kl_cfg80211_get_txpower() function will remain in the wait_event_interruptible_timeout() loop waiting for the value to be non-zero, and will eventually timeout. This results in a 5 second delay in response. However, the correct value of zero is eventually returned. The 6004 defaults to 63dbm which is then limited by regulatory and hardware limits with max of 18dbm (6003 max is 16dbm), therefore we can use values larger then these to be able to determine when the value has been updated. To correct the issue, set the value to a nonsensical value (255) and wait for it to change to the valid value. Tested on both 6003 and 6004 based radios. Return value of zero is correctly returned in an expected amount of time (similar to when returning non-zero values) when AP client power is set to both 0 and negative values. Signed-off-by: Eric Bentley Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 72e2ec6..b7fe0af 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1449,14 +1449,14 @@ static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, return -EIO; if (test_bit(CONNECTED, &vif->flags)) { - ar->tx_pwr = 0; + ar->tx_pwr = 255; if (ath6kl_wmi_get_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx) != 0) { ath6kl_err("ath6kl_wmi_get_tx_pwr_cmd failed\n"); return -EIO; } - wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 0, + wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 255, 5 * HZ); if (signal_pending(current)) { -- cgit v1.1 From 78a9e170388b672f609cb6e8e097e0ddca24e6f5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 18 Aug 2016 15:12:05 +0200 Subject: carl9170: Fix wrong completion usage carl9170_usb_stop() is used from several places to flush and cleanup any pending work. The normal pattern is to send a request and wait for the irq handler to call complete(). The completion is not reinitialized during normal operation and as the old comment indicates it is important to keep calls to wait_for_completion_timeout() and complete() balanced. Calling complete_all() brings this equilibirum out of balance and needs to be fixed by a reinit_completion(). But that opens a small race window. It is possible that the sequence of complete_all(), reinit_completion() is faster than the wait_for_completion_timeout() can do its work. The wake up is not lost but the done counter test is after reinit_completion() has been executed. The only reason we don't see carl9170_exec_cmd() hang forever is we use the timeout version of wait_for_copletion(). Let's fix this by reinitializing the completion (that is just setting done counter to 0) just before we send out an request. Now, carl9170_usb_stop() can be sure a complete() call is enough to make progess since there is only one waiter at max. This is a common pattern also seen in various drivers which use completion. Signed-off-by: Daniel Wagner Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/carl9170/usb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 76842e6..99ab203 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -670,6 +670,7 @@ int carl9170_exec_cmd(struct ar9170 *ar, const enum carl9170_cmd_oids cmd, ar->readlen = outlen; spin_unlock_bh(&ar->cmd_lock); + reinit_completion(&ar->cmd_wait); err = __carl9170_exec_cmd(ar, &ar->cmd, false); if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) { @@ -778,10 +779,7 @@ void carl9170_usb_stop(struct ar9170 *ar) spin_lock_bh(&ar->cmd_lock); ar->readlen = 0; spin_unlock_bh(&ar->cmd_lock); - complete_all(&ar->cmd_wait); - - /* This is required to prevent an early completion on _start */ - reinit_completion(&ar->cmd_wait); + complete(&ar->cmd_wait); /* * Note: -- cgit v1.1 From a179b69359feb26ddb148bb6a2c0c53a8d1dc5be Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 05:36:53 -0300 Subject: [media] cec: don't Feature Abort broadcast msgs when unregistered If the adapter is configured as 'Unregistered', then cec_receive_notify incorrectly thinks that broadcast messages are directed messages. The destination for broadcast messages is 0xf, and the logical address assigned to Unregistered devices is also 0xf and the logic didn't handle that correctly. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-adap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index e980ac9..946986f 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1409,7 +1409,6 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, u8 init_laddr = cec_msg_initiator(msg); u8 devtype = cec_log_addr2dev(adap, dest_laddr); int la_idx = cec_log_addr2idx(adap, dest_laddr); - bool is_directed = la_idx >= 0; bool from_unregistered = init_laddr == 0xf; struct cec_msg tx_cec_msg = { }; @@ -1571,7 +1570,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, * Unprocessed messages are aborted if userspace isn't doing * any processing either. */ - if (is_directed && !is_reply && !adap->follower_cnt && + if (!is_broadcast && !is_reply && !adap->follower_cnt && !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT) return cec_feature_abort(adap, msg); break; -- cgit v1.1 From 60815d4a78204915f5cdf79a536bc96d5d23ae5f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 07:17:22 -0300 Subject: [media] cec: fix ioctl return code when not registered Don't return the confusing -EIO error code when the device is not registered, instead return -ENODEV which is the proper thing to do in this situation. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/cec/cec-api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 6f58ee8..e274e2f 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -435,7 +435,7 @@ static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) void __user *parg = (void __user *)arg; if (!devnode->registered) - return -EIO; + return -ENODEV; switch (cmd) { case CEC_ADAP_G_CAPS: -- cgit v1.1 From 6b7b554d34fdbc5dc9fae9d4ca9dd37f6346be3d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:44 +0100 Subject: ARM: dts: Remove use of skeleton.dtsi from bcm283x.dtsi This file is included from DTS files under arch/arm64 too (via broadcom/bcm2837-rpi-3-b.dts and broadcom/bcm2837.dtsi). There is a desire not to have skeleton.dtsi for ARM64. See commit 3ebee5a2e141 ("arm64: dts: kill skeleton.dtsi") for rationale for its removal. As well as the addition of #*-cells also requires adding the device_type to the rpi memory node explicitly. Note that this change results in the removal of an empty /aliases node from bcm2835-rpi-a.dtb and bcm2835-rpi-a-plus.dtb. I have no hardware to check if this is a problem or not. It also results in some reordering of the nodes in the DTBs (the /aliases and /memory nodes come later). This isn't supposed to matter but, again, I've no hardware to check if it is true in this particular case. Signed-off-by: Ian Campbell Acked-by: Mark Rutland Tested-by: Stefan Wahren Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/bcm2835-rpi.dtsi | 1 + arch/arm/boot/dts/bcm283x.dtsi | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index caf2707..e9b47b2 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -2,6 +2,7 @@ / { memory { + device_type = "memory"; reg = <0 0x10000000>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index b982522..445624a 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -2,7 +2,6 @@ #include #include #include -#include "skeleton.dtsi" /* This include file covers the common peripherals and configuration between * bcm2835 and bcm2836 implementations, leaving the CPU configuration to @@ -13,6 +12,8 @@ compatible = "brcm,bcm2835"; model = "BCM2835"; interrupt-parent = <&intc>; + #address-cells = <1>; + #size-cells = <1>; chosen { bootargs = "earlyprintk console=ttyAMA0"; -- cgit v1.1 From 76aa7591688001839ec9ca838041f2d55d49ab92 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:45 +0100 Subject: ARM64: dts: bcm: Use a symlink to R-Pi dtsi files from arch=arm The ../../../arm... style cross-references added by commit 9d56c22a7861 ("ARM: bcm2835: Add devicetree for the Raspberry Pi 3.") do not work in the context of the split device-tree repository[0] (where the directory structure differs). As with commit 8ee57b8182c4 ("ARM64: dts: vexpress: Use a symlink to vexpress-v2m-rs1.dtsi from arch=arm") use symlinks instead. [0] https://git.kernel.org/cgit/linux/kernel/git/devicetree/devicetree-rebasing.git/ Signed-off-by: Ian Campbell Acked-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi | 1 + arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts | 4 ++-- arch/arm64/boot/dts/broadcom/bcm2837.dtsi | 2 +- arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi | 1 + arch/arm64/boot/dts/broadcom/bcm283x.dtsi | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) create mode 120000 arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi create mode 120000 arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi create mode 120000 arch/arm64/boot/dts/broadcom/bcm283x.dtsi diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi new file mode 120000 index 0000000..3937b77 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm2835-rpi.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 6f47dd2..7841b72 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -1,7 +1,7 @@ /dts-v1/; #include "bcm2837.dtsi" -#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi" -#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi" +#include "bcm2835-rpi.dtsi" +#include "bcm283x-rpi-smsc9514.dtsi" / { compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index f2a31d0..8216bbb 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,4 +1,4 @@ -#include "../../../../arm/boot/dts/bcm283x.dtsi" +#include "bcm283x.dtsi" / { compatible = "brcm,bcm2836"; diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi new file mode 120000 index 0000000..dca7c05 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi new file mode 120000 index 0000000..5f54e4c --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x.dtsi \ No newline at end of file -- cgit v1.1 From c94455558337eece474eebb6a16b905f98930418 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 8 Sep 2016 10:43:58 -0700 Subject: raid5: fix a small race condition commit 5f9d1fde7d54a5(raid5: fix memory leak of bio integrity data) moves bio_reset to bio_endio. But it introduces a small race condition. It does bio_reset after raid5_release_stripe, which could make the stripe reusable and hence reuse the bio just before bio_reset. Moving bio_reset before raid5_release_stripe is called should fix the race. Reported-and-tested-by: Stefan Priebe - Profihost AG Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b95c54c..ee7fc37 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); - bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); - bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) -- cgit v1.1 From af7c1beccfd98bad752644dc14ea93805d65b2c9 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 1 Sep 2016 19:02:57 +0800 Subject: virtio: mark vring_dma_dev() static We get 1 warning when building kernel with W=1: drivers/virtio/virtio_ring.c:170:16: warning: no previous prototype for 'vring_dma_dev' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e383ecd..ed9c9ee 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -167,7 +167,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev) * making all of the arch DMA ops work on the vring device itself * is a mess. For now, we use the parent device for DMA ops. */ -struct device *vring_dma_dev(const struct vring_virtqueue *vq) +static struct device *vring_dma_dev(const struct vring_virtqueue *vq) { return vq->vq.vdev->dev.parent; } -- cgit v1.1 From 5e59d9a1aed26abcc79abe78af5cfd34e53cbe7f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 30 Aug 2016 08:04:15 -0700 Subject: virtio_console: Stop doing DMA on the stack virtio_console uses a small DMA buffer for control requests. Move that buffer into heap memory. Doing virtio DMA on the stack is normally okay on non-DMA-API virtio systems (which is currently most of them), but it breaks completely if the stack is virtually mapped. Tested by typing both directions using picocom aimed at /dev/hvc0. Signed-off-by: Andy Lutomirski Signed-off-by: Michael S. Tsirkin Reviewed-by: Amit Shah --- drivers/char/virtio_console.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index d2406fe..5da47e26 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -165,6 +165,12 @@ struct ports_device { */ struct virtqueue *c_ivq, *c_ovq; + /* + * A control packet buffer for guest->host requests, protected + * by c_ovq_lock. + */ + struct virtio_console_control cpkt; + /* Array of per-port IO virtqueues */ struct virtqueue **in_vqs, **out_vqs; @@ -560,28 +566,29 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, unsigned int event, unsigned int value) { struct scatterlist sg[1]; - struct virtio_console_control cpkt; struct virtqueue *vq; unsigned int len; if (!use_multiport(portdev)) return 0; - cpkt.id = cpu_to_virtio32(portdev->vdev, port_id); - cpkt.event = cpu_to_virtio16(portdev->vdev, event); - cpkt.value = cpu_to_virtio16(portdev->vdev, value); - vq = portdev->c_ovq; - sg_init_one(sg, &cpkt, sizeof(cpkt)); - spin_lock(&portdev->c_ovq_lock); - if (virtqueue_add_outbuf(vq, sg, 1, &cpkt, GFP_ATOMIC) == 0) { + + portdev->cpkt.id = cpu_to_virtio32(portdev->vdev, port_id); + portdev->cpkt.event = cpu_to_virtio16(portdev->vdev, event); + portdev->cpkt.value = cpu_to_virtio16(portdev->vdev, value); + + sg_init_one(sg, &portdev->cpkt, sizeof(struct virtio_console_control)); + + if (virtqueue_add_outbuf(vq, sg, 1, &portdev->cpkt, GFP_ATOMIC) == 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len) && !virtqueue_is_broken(vq)) cpu_relax(); } + spin_unlock(&portdev->c_ovq_lock); return 0; } -- cgit v1.1 From ea90383837334bcebe842e719ad4d8c966f4ef51 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 8 Sep 2016 19:03:20 +0800 Subject: drm/vc4: mark vc4_bo_cache_purge() static We get 1 warning when building kernel with W=1: drivers/gpu/drm/vc4/vc4_bo.c:147:6: warning: no previous prototype for 'vc4_bo_cache_purge' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. So this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 59adcf8..3f6704c 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -144,7 +144,7 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, return &vc4->bo_cache.size_list[page_index]; } -void vc4_bo_cache_purge(struct drm_device *dev) +static void vc4_bo_cache_purge(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); -- cgit v1.1 From 7d762e49c2117d3829eb3355f2617aea080ed3a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 9 Sep 2016 18:08:23 +0200 Subject: perf/x86/amd/uncore: Prevent use after free The resent conversion of the cpu hotplug support in the uncore driver introduced a regression due to the way the callbacks are invoked at initialization time. The old code called the prepare/starting/online function on each online cpu as a block. The new code registers the hotplug callbacks in the core for each state. The core invokes the callbacks at each registration on all online cpus. The code implicitely relied on the prepare/starting/online callbacks being called as combo on a particular cpu, which was not obvious and completely undocumented. The resulting subtle wreckage happens due to the way how the uncore code manages shared data structures for cpus which share an uncore resource in hardware. The sharing is determined in the cpu starting callback, but the prepare callback allocates per cpu data for the upcoming cpu because potential sharing is unknown at this point. If the starting callback finds a online cpu which shares the hardware resource it takes a refcount on the percpu data of that cpu and puts the own data structure into a 'free_at_online' pointer of that shared data structure. The online callback frees that. With the old model this worked because in a starting callback only one non unused structure (the one of the starting cpu) was available. The new code allocates the data structures for all cpus when the prepare callback is registered. Now the starting function iterates through all online cpus and looks for a data structure (skipping its own) which has a matching hardware id. The id member of the data structure is initialized to 0, but the hardware id can be 0 as well. The resulting wreckage is: CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts its own data structure into CPU1s data structure to be freed. CPU1 skips CPU0 because the data structure is its allegedly unsued own. It finds a matching id on CPU2, takes a refcount on CPU1 data and puts its own data structure into CPU2s data structure to be freed. .... Now the online callbacks are invoked. CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So far so good. CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which is still referenced by CPU0 ---> Booom So there are two issues to be solved here: 1) The id field must be initialized at allocation time to a value which cannot be a valid hardware id, i.e. -1 This prevents the above scenario, but now CPU1 and CPU2 both stick their own data structure into the free_at_online pointer of CPU0. So we leak CPU1s data structure. 2) Fix the memory leak described in #1 Instead of having a single pointer, use a hlist to enqueue the superflous data structures which are then freed by the first cpu invoking the online callback. Ideally we should know the sharing _before_ invoking the prepare callback, but that's way beyond the scope of this bug fix. [ tglx: Rewrote changelog ] Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine") Reported-and-tested-by: Eric Sandeen Signed-off-by: Sebastian Andrzej Siewior Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/uncore.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e6131d4..65577f0 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -29,6 +29,8 @@ #define COUNTER_SHIFT 16 +static HLIST_HEAD(uncore_unused_list); + struct amd_uncore { int id; int refcnt; @@ -39,7 +41,7 @@ struct amd_uncore { cpumask_t *active_mask; struct pmu *pmu; struct perf_event *events[MAX_COUNTERS]; - struct amd_uncore *free_when_cpu_online; + struct hlist_node node; }; static struct amd_uncore * __percpu *amd_uncore_nb; @@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->id = -1; *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } @@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_l2->active_mask = &amd_l2_active_mask; uncore_l2->pmu = &amd_l2_pmu; + uncore_l2->id = -1; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } @@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, continue; if (this->id == that->id) { - that->free_when_cpu_online = this; + hlist_add_head(&this->node, &uncore_unused_list); this = that; break; } @@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu) return 0; } +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore); + } +} + static void uncore_online(unsigned int cpu, struct amd_uncore * __percpu *uncores) { struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); - kfree(uncore->free_when_cpu_online); - uncore->free_when_cpu_online = NULL; + uncore_clean_online(); if (cpu == uncore->cpu) cpumask_set_cpu(cpu, uncore->active_mask); -- cgit v1.1 From 2561d309dfd1555e781484af757ed0115035ddb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:02:32 -0400 Subject: alpha: fix copy_from_user() it should clear the destination even when access_ok() fails. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/alpha/include/asm/uaccess.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index c419b43..466e42e 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len) return __cu_len; } -extern inline long -__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate) -{ - if (__access_ok((unsigned long)validate, len, get_fs())) - len = __copy_tofrom_user_nocheck(to, from, len); - return len; -} - #define __copy_to_user(to, from, n) \ ({ \ __chk_user_ptr(to); \ @@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user - extern inline long copy_to_user(void __user *to, const void *from, long n) { - return __copy_tofrom_user((__force void *)to, from, n, to); + if (likely(__access_ok((unsigned long)to, n, get_fs()))) + n = __copy_tofrom_user_nocheck((__force void *)to, from, n); + return n; } extern inline long copy_from_user(void *to, const void __user *from, long n) { - return __copy_tofrom_user(to, (__force void *)from, n, from); + if (likely(__access_ok((unsigned long)from, n, get_fs()))) + n = __copy_tofrom_user_nocheck(to, (__force void *)from, n); + else + memset(to, 0, n); + return n; } extern void __do_clear_user(void); -- cgit v1.1 From 2545e5da080b4839dd859e3b09343a884f6ab0e3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:36:37 -0400 Subject: asm-generic: make copy_from_user() zero the destination properly ... in all cases, including the failing access_ok() Note that some architectures using asm-generic/uaccess.h have __copy_from_user() not zeroing the tail on failure halfway through. This variant works either way. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/asm-generic/uaccess.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 1bfa602..04e21a4 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -257,11 +257,13 @@ extern int __get_user_bad(void) __attribute__((noreturn)); static inline long copy_from_user(void *to, const void __user * from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - else - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, -- cgit v1.1 From b93e1fa7106582e3a81cc818b719e0341585ff1b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 7 Sep 2016 17:20:46 +0200 Subject: ipv4: fix value of ->nlmsg_flags reported in RTM_NEWROUTE events fib_table_insert() inconsistently fills the nlmsg_flags field in its notification messages. Since commit b8f558313506 ("[RTNETLINK]: Fix sending netlink message when replace route."), the netlink message has its nlmsg_flags set to NLM_F_REPLACE if the route replaced a preexisting one. Then commit a2bb6d7d6f42 ("ipv4: include NLM_F_APPEND flag in append route notifications") started setting nlmsg_flags to NLM_F_APPEND if the route matched a preexisting one but was appended. In other cases (exclusive creation or prepend), nlmsg_flags is 0. This patch sets ->nlmsg_flags in all situations, preserving the semantic of the NLM_F_* bits: * NLM_F_CREATE: a new fib entry has been created for this route. * NLM_F_EXCL: no other fib entry existed for this route. * NLM_F_REPLACE: this route has overwritten a preexisting fib entry. * NLM_F_APPEND: the new fib entry was added after other entries for the same route. As a result, the possible flag combination can now be reported (iproute2's terminology into parentheses): * NLM_F_CREATE | NLM_F_EXCL: route didn't exist, exclusive creation ("add"). * NLM_F_CREATE | NLM_F_APPEND: route did already exist, new route added after preexisting ones ("append"). * NLM_F_CREATE: route did already exist, new route added before preexisting ones ("prepend"). * NLM_F_REPLACE: route did already exist, new route replaced the first preexisting one ("change"). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e2ffc2a..241f27b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1081,7 +1081,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; - unsigned int nlflags = 0; + u16 nlflags = NLM_F_EXCL; struct fib_info *fi; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; @@ -1126,6 +1126,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (cfg->fc_nlflags & NLM_F_EXCL) goto out; + nlflags &= ~NLM_F_EXCL; + /* We have 2 goals: * 1. Find exact match for type, scope, fib_info to avoid * duplicate routes @@ -1151,6 +1153,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) struct fib_info *fi_drop; u8 state; + nlflags |= NLM_F_REPLACE; fa = fa_first; if (fa_match) { if (fa == fa_match) @@ -1191,7 +1194,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); + tb->tb_id, &cfg->fc_nlinfo, nlflags); goto succeeded; } @@ -1203,7 +1206,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) goto out; if (cfg->fc_nlflags & NLM_F_APPEND) - nlflags = NLM_F_APPEND; + nlflags |= NLM_F_APPEND; else fa = fa_first; } @@ -1211,6 +1214,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; + nlflags |= NLM_F_CREATE; err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) -- cgit v1.1 From 73483c1289d148282be3aac3ad30b4aa1f8fac87 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 7 Sep 2016 17:21:40 +0200 Subject: ipv6: report NLM_F_CREATE and NLM_F_EXCL flags in RTM_NEWROUTE events Since commit 37a1d3611c12 ("ipv6: include NLM_F_REPLACE in route replace notifications"), RTM_NEWROUTE notifications have their NLM_F_REPLACE flag set if the new route replaced a preexisting one. However, other flags aren't set. This patch reports the missing NLM_F_CREATE and NLM_F_EXCL flag bits. NLM_F_APPEND is not reported, because in ipv6 a NLM_F_CREATE request is interpreted as an append request (contrary to ipv4, "prepend" is not supported, so if NLM_F_EXCL is not set then NLM_F_APPEND is implicit). As a result, the possible flag combination can now be reported (iproute2's terminology into parentheses): * NLM_F_CREATE | NLM_F_EXCL: route didn't exist, exclusive creation ("add"). * NLM_F_CREATE: route did already exist, new route added after preexisting ones ("append"). * NLM_F_REPLACE: route did already exist, new route replaced the first preexisting one ("change"). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 771be1f..ef54852 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + u16 nlflags = NLM_F_EXCL; int err; ins = &fn->leaf; @@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_EXCL)) return -EEXIST; + + nlflags &= ~NLM_F_EXCL; if (replace) { if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { found++; @@ -856,6 +859,7 @@ next_iter: pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + nlflags |= NLM_F_CREATE; err = fib6_commit_metrics(&rt->dst, mxc); if (err) return err; @@ -864,7 +868,7 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { -- cgit v1.1 From b8b867e132d2c32f16a49b3ce5c11ee289a92c4e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 7 Sep 2016 13:57:36 -0700 Subject: rtnetlink: remove unused ifla_stats_policy This structure is defined but never used. Flagged with W=1 Signed-off-by: Stephen Hemminger Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1dfca1c..937e459 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3669,10 +3669,6 @@ nla_put_failure: return -EMSGSIZE; } -static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { - [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, -}; - static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { -- cgit v1.1 From c24acf03c7352bd10a99e58b0366b2acf8722856 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 7 Sep 2016 14:07:32 -0700 Subject: macsec: set network devtype The netdevice type structure for macsec was being defined but never used. To set the network device type the macro SET_NETDEV_DEVTYPE must be called. Compile tested only, I don't use macsec. Signed-off-by: Stephen Hemminger Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 351e701..3ea47f2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2973,6 +2973,7 @@ static void macsec_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->destructor = macsec_free_netdev; + SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); } -- cgit v1.1 From 40d259045500c6431633f215516c1f94bac8c62f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Sep 2016 08:16:01 +0200 Subject: mlxsw: spectrum_router: Fix error path in mlxsw_sp_router_init When neigh_init fails, we have to do proper cleanup including router_fini call. Fixes: 6cf3c971dc84cb ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 352259b..3f5c51d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1517,7 +1517,14 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return err; mlxsw_sp_lpm_init(mlxsw_sp); mlxsw_sp_vrs_init(mlxsw_sp); - return mlxsw_sp_neigh_init(mlxsw_sp); + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + return 0; + +err_neigh_init: + __mlxsw_sp_router_fini(mlxsw_sp); + return err; } void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -- cgit v1.1 From 3247ff2b318f3eeaaf32163ce39ddaac23e9a6f9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Sep 2016 08:16:02 +0200 Subject: mlxsw: spectrum: Set port type before setting its address During port init, we currently set the port's type to Ethernet after setting its MAC address. However, the hardware documentation states this should be the other way around. Align the driver with the hardware documentation and set the port's MAC address after setting its type. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e16b347..d48873b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2106,6 +2106,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; + err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sp_port->local_port); + goto err_port_swid_set; + } + err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n", @@ -2131,13 +2138,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_system_port_mapping_set; } - err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", - mlxsw_sp_port->local_port); - goto err_port_swid_set; - } - err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n", @@ -2219,10 +2219,10 @@ err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: - mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); -err_port_swid_set: err_port_system_port_mapping_set: err_dev_addr_init: + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); +err_port_swid_set: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); -- cgit v1.1 From ca120cf688874f4423e579e7cc5ddf7244aeca45 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 3 Sep 2016 10:38:03 -0700 Subject: mm: fix show_smap() for zone_device-pmd ranges Attempting to dump /proc//smaps for a process with pmd dax mappings currently results in the following VM_BUG_ONs: kernel BUG at mm/huge_memory.c:1105! task: ffff88045f16b140 task.stack: ffff88045be14000 RIP: 0010:[] [] follow_trans_huge_pmd+0x2cb/0x340 [..] Call Trace: [] smaps_pte_range+0xa0/0x4b0 [] ? vsnprintf+0x255/0x4c0 [] __walk_page_range+0x1fe/0x4d0 [] walk_page_vma+0x62/0x80 [] show_smap+0xa6/0x2b0 kernel BUG at fs/proc/task_mmu.c:585! RIP: 0010:[] [] smaps_pte_range+0x499/0x4b0 Call Trace: [] ? vsnprintf+0x255/0x4c0 [] __walk_page_range+0x1fe/0x4d0 [] walk_page_vma+0x62/0x80 [] show_smap+0xa6/0x2b0 These locations are sanity checking page flags that must be set for an anonymous transparent huge page, but are not set for the zone_device pages associated with dax mappings. Cc: Ross Zwisler Cc: Kirill A. Shutemov Acked-by: Andrew Morton Signed-off-by: Dan Williams --- fs/proc/task_mmu.c | 2 ++ mm/huge_memory.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 187d84e..f6fa99e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -581,6 +581,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, mss->anonymous_thp += HPAGE_PMD_SIZE; else if (PageSwapBacked(page)) mss->shmem_thp += HPAGE_PMD_SIZE; + else if (is_zone_device_page(page)) + /* pass */; else VM_BUG_ON_PAGE(1, page); smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2db2112..a6abd76 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1078,7 +1078,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, goto out; page = pmd_page(*pmd); - VM_BUG_ON_PAGE(!PageHead(page), page); + VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) touch_pmd(vma, addr, pmd); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { @@ -1116,7 +1116,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, } skip_mlock: page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; - VM_BUG_ON_PAGE(!PageCompound(page), page); + VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); if (flags & FOLL_GET) get_page(page); -- cgit v1.1 From 9049771f7d5490a302589976984810064c83ab40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Sep 2016 08:51:21 -0700 Subject: mm: fix cache mode of dax pmd mappings track_pfn_insert() in vmf_insert_pfn_pmd() is marking dax mappings as uncacheable rendering them impractical for application usage. DAX-pte mappings are cached and the goal of establishing DAX-pmd mappings is to attain more performance, not dramatically less (3 orders of magnitude). track_pfn_insert() relies on a previous call to reserve_memtype() to establish the expected page_cache_mode for the range. While memremap() arranges for reserve_memtype() to be called, devm_memremap_pages() does not. So, teach track_pfn_insert() and untrack_pfn() how to handle tracking without a vma, and arrange for devm_memremap_pages() to establish the write-back-cache reservation in the memtype tree. Cc: Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Nilesh Choudhury Cc: Kirill A. Shutemov Reported-by: Toshi Kani Reported-by: Kai Zhang Acked-by: Andrew Morton Signed-off-by: Dan Williams --- arch/x86/mm/pat.c | 17 ++++++++++------- kernel/memremap.c | 9 +++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ecb1b69..170cc4f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma) } /* - * prot is passed in as a parameter for the new mapping. If the vma has a - * linear pfn mapping for the entire range reserve the entire vma range with - * single reserve_pfn_range call. + * prot is passed in as a parameter for the new mapping. If the vma has + * a linear pfn mapping for the entire range, or no vma is provided, + * reserve the entire pfn + size range with single reserve_pfn_range + * call. */ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long addr, unsigned long size) @@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, enum page_cache_mode pcm; /* reserve the whole chunk starting from paddr */ - if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { + if (!vma || (addr == vma->vm_start + && size == (vma->vm_end - vma->vm_start))) { int ret; ret = reserve_pfn_range(paddr, size, prot, 0); - if (!ret) + if (ret == 0 && vma) vma->vm_flags |= VM_PAT; return ret; } @@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long prot; - if (!(vma->vm_flags & VM_PAT)) + if (vma && !(vma->vm_flags & VM_PAT)) return; /* free the chunk starting from pfn or the whole chunk */ @@ -1011,7 +1013,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); - vma->vm_flags &= ~VM_PAT; + if (vma) + vma->vm_flags &= ~VM_PAT; } /* diff --git a/kernel/memremap.c b/kernel/memremap.c index 251d16b..b501e39 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); arch_remove_memory(align_start, align_size); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, "%s: failed to free all reserved pages\n", __func__); @@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { resource_size_t key, align_start, align_size, align_end; + pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; int error, nid, is_ram; @@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (nid < 0) nid = numa_mem_id(); + error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, + align_size); + if (error) + goto err_pfn_remap; + error = arch_add_memory(nid, align_start, align_size, true); if (error) goto err_add_memory; @@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, return __va(res->start); err_add_memory: + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + err_pfn_remap: err_radix: pgmap_radix_release(res); devres_free(page_map); -- cgit v1.1 From 2e21807d4b131dfd4a8e5c82116a85b62f28aeec Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 2 Sep 2016 17:27:30 -0600 Subject: nfit, mce: Fix SPA matching logic in MCE handler The check for a 'pmem' type SPA in the MCE handler was inverted due to a merge/rebase error. Fixes: 6839a6d nfit: do an ARS scrub on hitting a latent media error Cc: linux-acpi@vger.kernel.org Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index 4c745bf..161f915 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -42,7 +42,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { struct acpi_nfit_system_address *spa = nfit_spa->spa; - if (nfit_spa_type(spa) == NFIT_SPA_PM) + if (nfit_spa_type(spa) != NFIT_SPA_PM) continue; /* find the spa that covers the mce addr */ if (spa->address > mce->addr) -- cgit v1.1 From 1e8b8d9619f9476e94f32eb20cab000d50d236aa Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 9 Sep 2016 09:10:08 -0700 Subject: libnvdimm: allow legacy (e820) pmem region to clear bad blocks Bad blocks can be injected via /sys/block/pmemN/badblocks. In a situation where legacy pmem is being used or a pmem region created by using memmap kernel parameter, the injected bad blocks are not cleared due to nvdimm_clear_poison() failing from lack of ndctl function pointer. In this case we need to just return as handled and allow the bad blocks to be cleared rather than fail. Reviewed-by: Vishal Verma Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 458daf9..935866f 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -185,8 +185,12 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, return -ENXIO; nd_desc = nvdimm_bus->nd_desc; + /* + * if ndctl does not exist, it's PMEM_LEGACY and + * we want to just pretend everything is handled. + */ if (!nd_desc->ndctl) - return -ENXIO; + return len; memset(&ars_cap, 0, sizeof(ars_cap)); ars_cap.address = phys; -- cgit v1.1 From 05f1b12f71a49848730a0eb9acda032d5c43432b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Sep 2016 08:42:06 +0100 Subject: net: x25: remove null checks on arrays calling_ae and called_ae dtefacs.calling_ae and called_ae are both 20 element __u8 arrays and cannot be null and hence are redundant checks. Remove these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/x25/af_x25.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a750f33..f83b74d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1500,12 +1500,8 @@ out_fac_release: goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.calling_ae == NULL) - goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.called_ae == NULL) - goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; out_dtefac_release: -- cgit v1.1 From 4ffd03f5e47d18e06543f585d71a5540e7e61f0e Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Thu, 8 Sep 2016 14:09:31 +0530 Subject: net: phy: Fixed checkpatch errors for Microsemi PHYs. The existing VSC85xx PHY driver did not follow the coding style and caused "checkpatch" to complain. This commit fixes this. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 6 +- drivers/net/phy/mscc.c | 178 ++++++++++++++++++++++++------------------------ 2 files changed, 92 insertions(+), 92 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 1c3e07c..87b566f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -274,9 +274,9 @@ config MICROCHIP_PHY Supports the LAN88XX PHYs. config MICROSEMI_PHY - tristate "Microsemi PHYs" - ---help--- - Currently supports the VSC8531 and VSC8541 PHYs + tristate "Microsemi PHYs" + ---help--- + Currently supports the VSC8531 and VSC8541 PHYs config NATIONAL_PHY tristate "National Semiconductor PHYs" diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index ad33390..c09cc4a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -13,135 +13,135 @@ #include enum rgmii_rx_clock_delay { - RGMII_RX_CLK_DELAY_0_2_NS = 0, - RGMII_RX_CLK_DELAY_0_8_NS = 1, - RGMII_RX_CLK_DELAY_1_1_NS = 2, - RGMII_RX_CLK_DELAY_1_7_NS = 3, - RGMII_RX_CLK_DELAY_2_0_NS = 4, - RGMII_RX_CLK_DELAY_2_3_NS = 5, - RGMII_RX_CLK_DELAY_2_6_NS = 6, - RGMII_RX_CLK_DELAY_3_4_NS = 7 + RGMII_RX_CLK_DELAY_0_2_NS = 0, + RGMII_RX_CLK_DELAY_0_8_NS = 1, + RGMII_RX_CLK_DELAY_1_1_NS = 2, + RGMII_RX_CLK_DELAY_1_7_NS = 3, + RGMII_RX_CLK_DELAY_2_0_NS = 4, + RGMII_RX_CLK_DELAY_2_3_NS = 5, + RGMII_RX_CLK_DELAY_2_6_NS = 6, + RGMII_RX_CLK_DELAY_3_4_NS = 7 }; -#define MII_VSC85XX_INT_MASK 25 -#define MII_VSC85XX_INT_MASK_MASK 0xa000 -#define MII_VSC85XX_INT_STATUS 26 +#define MII_VSC85XX_INT_MASK 25 +#define MII_VSC85XX_INT_MASK_MASK 0xa000 +#define MII_VSC85XX_INT_STATUS 26 -#define MSCC_EXT_PAGE_ACCESS 31 -#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ -#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ +#define MSCC_EXT_PAGE_ACCESS 31 +#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ +#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ /* Extended Page 2 Registers */ -#define MSCC_PHY_RGMII_CNTL 20 -#define RGMII_RX_CLK_DELAY_MASK 0x0070 -#define RGMII_RX_CLK_DELAY_POS 4 +#define MSCC_PHY_RGMII_CNTL 20 +#define RGMII_RX_CLK_DELAY_MASK 0x0070 +#define RGMII_RX_CLK_DELAY_POS 4 /* Microsemi PHY ID's */ -#define PHY_ID_VSC8531 0x00070570 -#define PHY_ID_VSC8541 0x00070770 +#define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8541 0x00070770 static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { - int rc; + int rc; - rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); - return rc; + rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); + return rc; } static int vsc85xx_default_config(struct phy_device *phydev) { - int rc; - u16 reg_val; + int rc; + u16 reg_val; - mutex_lock(&phydev->lock); - rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); - if (rc != 0) - goto out_unlock; + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; - reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); - reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); - reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); - phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); - rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); + reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); + reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); + phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); out_unlock: - mutex_unlock(&phydev->lock); + mutex_unlock(&phydev->lock); - return rc; + return rc; } static int vsc85xx_config_init(struct phy_device *phydev) { - int rc; + int rc; - rc = vsc85xx_default_config(phydev); - if (rc) - return rc; - rc = genphy_config_init(phydev); + rc = vsc85xx_default_config(phydev); + if (rc) + return rc; + rc = genphy_config_init(phydev); - return rc; + return rc; } static int vsc85xx_ack_interrupt(struct phy_device *phydev) { - int rc = 0; + int rc = 0; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); - return (rc < 0) ? rc : 0; + return (rc < 0) ? rc : 0; } static int vsc85xx_config_intr(struct phy_device *phydev) { - int rc; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - rc = phy_write(phydev, MII_VSC85XX_INT_MASK, - MII_VSC85XX_INT_MASK_MASK); - } else { - rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); - if (rc < 0) - return rc; - rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); - } - - return rc; + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, + MII_VSC85XX_INT_MASK_MASK); + } else { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); + if (rc < 0) + return rc; + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + } + + return rc; } /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { - .phy_id = PHY_ID_VSC8531, - .name = "Microsemi VSC8531", - .phy_id_mask = 0xfffffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .soft_reset = &genphy_soft_reset, - .config_init = &vsc85xx_config_init, - .config_aneg = &genphy_config_aneg, - .aneg_done = &genphy_aneg_done, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, - .config_intr = &vsc85xx_config_intr, - .suspend = &genphy_suspend, - .resume = &genphy_resume, + .phy_id = PHY_ID_VSC8531, + .name = "Microsemi VSC8531", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, }, { - .phy_id = PHY_ID_VSC8541, - .name = "Microsemi VSC8541 SyncE", - .phy_id_mask = 0xfffffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .soft_reset = &genphy_soft_reset, - .config_init = &vsc85xx_config_init, - .config_aneg = &genphy_config_aneg, - .aneg_done = &genphy_aneg_done, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, - .config_intr = &vsc85xx_config_intr, - .suspend = &genphy_suspend, - .resume = &genphy_resume, + .phy_id = PHY_ID_VSC8541, + .name = "Microsemi VSC8541 SyncE", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, } }; @@ -149,9 +149,9 @@ static struct phy_driver vsc85xx_driver[] = { module_phy_driver(vsc85xx_driver); static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { - { PHY_ID_VSC8531, 0xfffffff0, }, - { PHY_ID_VSC8541, 0xfffffff0, }, - { } + { PHY_ID_VSC8531, 0xfffffff0, }, + { PHY_ID_VSC8541, 0xfffffff0, }, + { } }; MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); -- cgit v1.1 From 9438451e7325815fb38db04b1da0670ecc601b5e Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 8 Sep 2016 16:43:23 +0800 Subject: qede: mark qede_set_features() static We get 1 warning when building kernel with W=1: drivers/net/ethernet/qlogic/qede/qede_main.c:2113:5: warning: no previous prototype for 'qede_set_features' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index b4a56e6..578bbec 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2116,7 +2116,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) edev->accept_any_vlan = false; } -int qede_set_features(struct net_device *dev, netdev_features_t features) +static int qede_set_features(struct net_device *dev, netdev_features_t features) { struct qede_dev *edev = netdev_priv(dev); netdev_features_t changes = features ^ dev->features; -- cgit v1.1 From 46dfc23e9e0823616abee670cd24acde0d900ca9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Sep 2016 10:04:24 +0100 Subject: via-velocity: remove null pointer check on array tdinfo->skb_dma tdinfo->skb_dma is a 7 element array of dma_addr_t hence cannot be null, so the pull pointer check on tdinfo->skb_dma is redundant. Remove it. Signed-off-by: Colin Ian King Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-velocity.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index f38696c..908e72e 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1724,24 +1724,21 @@ static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo, struct tx_desc *td) { struct sk_buff *skb = tdinfo->skb; + int i; /* * Don't unmap the pre-allocated tx_bufs */ - if (tdinfo->skb_dma) { - int i; + for (i = 0; i < tdinfo->nskb_dma; i++) { + size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); - for (i = 0; i < tdinfo->nskb_dma; i++) { - size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); + /* For scatter-gather */ + if (skb_shinfo(skb)->nr_frags > 0) + pktlen = max_t(size_t, pktlen, + td->td_buf[i].size & ~TD_QUEUE); - /* For scatter-gather */ - if (skb_shinfo(skb)->nr_frags > 0) - pktlen = max_t(size_t, pktlen, - td->td_buf[i].size & ~TD_QUEUE); - - dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], - le16_to_cpu(pktlen), DMA_TO_DEVICE); - } + dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], + le16_to_cpu(pktlen), DMA_TO_DEVICE); } dev_kfree_skb_irq(skb); tdinfo->skb = NULL; -- cgit v1.1 From 7303a1475008bee5c3e82a06a282568415690d72 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 8 Sep 2016 17:54:11 +0800 Subject: sctp: identify chunks that need to be fragmented at IP level Previously, without GSO, it was easy to identify it: if the chunk didn't fit and there was no data chunk in the packet yet, we could fragment at IP level. So if there was an auth chunk and we were bundling a big data chunk, it would fragment regardless of the size of the auth chunk. This also works for the context of PMTU reductions. But with GSO, we cannot distinguish such PMTU events anymore, as the packet is allowed to exceed PMTU. So we need another check: to ensure that the chunk that we are adding, actually fits the current PMTU. If it doesn't, trigger a flush and let it be fragmented at IP level in the next round. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/output.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 1f1682b..31b7bc3 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -878,7 +878,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len) { - size_t psize, pmtu; + size_t psize, pmtu, maxsize; sctp_xmit_t retval = SCTP_XMIT_OK; psize = packet->size; @@ -906,6 +906,17 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, goto out; } + /* Similarly, if this chunk was built before a PMTU + * reduction, we have to fragment it at IP level now. So + * if the packet already contains something, we need to + * flush. + */ + maxsize = pmtu - packet->overhead; + if (packet->auth) + maxsize -= WORD_ROUND(packet->auth->skb->len); + if (chunk_len > maxsize) + retval = SCTP_XMIT_PMTU_FULL; + /* It is also okay to fragment if the chunk we are * adding is a control chunk, but only if current packet * is not a GSO one otherwise it causes fragmentation of -- cgit v1.1 From 47b02f7294a483387772a46931da942b2ca9d845 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Thu, 8 Sep 2016 13:24:21 +0200 Subject: dwc_eth_qos: do not register semi-initialized device We move register_netdev() to the end of dwceqos_probe() to close any races where the netdev callbacks are called before the initialization has finished. Reported-by: Pavel Andrianov Signed-off-by: Lars Persson Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 38 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 5a3941b..4490eba 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -1246,7 +1246,7 @@ static int dwceqos_mii_init(struct net_local *lp) lp->mii_bus->read = &dwceqos_mdio_read; lp->mii_bus->write = &dwceqos_mdio_write; lp->mii_bus->priv = lp; - lp->mii_bus->parent = &lp->ndev->dev; + lp->mii_bus->parent = &lp->pdev->dev; of_address_to_resource(lp->pdev->dev.of_node, 0, &res); snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%.8llx", @@ -2853,25 +2853,17 @@ static int dwceqos_probe(struct platform_device *pdev) ndev->features = ndev->hw_features; - netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT); - - ret = register_netdev(ndev); - if (ret) { - dev_err(&pdev->dev, "Cannot register net device, aborting.\n"); - goto err_out_clk_dis_aper; - } - lp->phy_ref_clk = devm_clk_get(&pdev->dev, "phy_ref_clk"); if (IS_ERR(lp->phy_ref_clk)) { dev_err(&pdev->dev, "phy_ref_clk clock not found.\n"); ret = PTR_ERR(lp->phy_ref_clk); - goto err_out_unregister_netdev; + goto err_out_clk_dis_aper; } ret = clk_prepare_enable(lp->phy_ref_clk); if (ret) { dev_err(&pdev->dev, "Unable to enable device clock.\n"); - goto err_out_unregister_netdev; + goto err_out_clk_dis_aper; } lp->phy_node = of_parse_phandle(lp->pdev->dev.of_node, @@ -2880,7 +2872,7 @@ static int dwceqos_probe(struct platform_device *pdev) ret = of_phy_register_fixed_link(lp->pdev->dev.of_node); if (ret < 0) { dev_err(&pdev->dev, "invalid fixed-link"); - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } lp->phy_node = of_node_get(lp->pdev->dev.of_node); @@ -2889,7 +2881,7 @@ static int dwceqos_probe(struct platform_device *pdev) ret = of_get_phy_mode(lp->pdev->dev.of_node); if (ret < 0) { dev_err(&lp->pdev->dev, "error in getting phy i/f\n"); - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } lp->phy_interface = ret; @@ -2897,14 +2889,14 @@ static int dwceqos_probe(struct platform_device *pdev) ret = dwceqos_mii_init(lp); if (ret) { dev_err(&lp->pdev->dev, "error in dwceqos_mii_init\n"); - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } ret = dwceqos_mii_probe(ndev); if (ret != 0) { netdev_err(ndev, "mii_probe fail.\n"); ret = -ENXIO; - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } dwceqos_set_umac_addr(lp, lp->ndev->dev_addr, 0); @@ -2922,7 +2914,7 @@ static int dwceqos_probe(struct platform_device *pdev) if (ret) { dev_err(&lp->pdev->dev, "Unable to retrieve DT, error %d\n", ret); - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } dev_info(&lp->pdev->dev, "pdev->id %d, baseaddr 0x%08lx, irq %d\n", pdev->id, ndev->base_addr, ndev->irq); @@ -2932,18 +2924,24 @@ static int dwceqos_probe(struct platform_device *pdev) if (ret) { dev_err(&lp->pdev->dev, "Unable to request IRQ %d, error %d\n", ndev->irq, ret); - goto err_out_unregister_clk_notifier; + goto err_out_clk_dis_phy; } if (netif_msg_probe(lp)) netdev_dbg(ndev, "net_local@%p\n", lp); + netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT); + + ret = register_netdev(ndev); + if (ret) { + dev_err(&pdev->dev, "Cannot register net device, aborting.\n"); + goto err_out_clk_dis_phy; + } + return 0; -err_out_unregister_clk_notifier: +err_out_clk_dis_phy: clk_disable_unprepare(lp->phy_ref_clk); -err_out_unregister_netdev: - unregister_netdev(ndev); err_out_clk_dis_aper: clk_disable_unprepare(lp->apb_pclk); err_out_free_netdev: -- cgit v1.1 From d9e6620c8ee108f068cd703b3b82d9a8d38c1ada Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Sep 2016 14:20:17 +0200 Subject: ATM-ENI: Use kmalloc_array() in eni_start() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data structure by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/eni.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 6339efd..f2aaf9e 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1845,8 +1845,9 @@ static int eni_start(struct atm_dev *dev) /* initialize memory management */ buffer_mem = eni_dev->mem - (buf - eni_dev->ram); eni_dev->free_list_size = buffer_mem/MID_MIN_BUF_SIZE/2; - eni_dev->free_list = kmalloc( - sizeof(struct eni_free)*(eni_dev->free_list_size+1),GFP_KERNEL); + eni_dev->free_list = kmalloc_array(eni_dev->free_list_size + 1, + sizeof(*eni_dev->free_list), + GFP_KERNEL); if (!eni_dev->free_list) { printk(KERN_ERR DEV_LABEL "(itf %d): couldn't get free page\n", dev->number); -- cgit v1.1 From 2c4f414f0290e019d463f1f7f447807fd6a6470c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Sep 2016 15:43:37 +0200 Subject: ATM-ForeRunnerHE: Use kmalloc_array() in he_init_group() * Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of data types by pointer dereferences to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/he.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 0f5cb37..31b513a 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -779,8 +779,9 @@ static int he_init_group(struct he_dev *he_dev, int group) G0_RBPS_BS + (group * 32)); /* bitmap table */ - he_dev->rbpl_table = kmalloc(BITS_TO_LONGS(RBPL_TABLE_SIZE) - * sizeof(unsigned long), GFP_KERNEL); + he_dev->rbpl_table = kmalloc_array(BITS_TO_LONGS(RBPL_TABLE_SIZE), + sizeof(*he_dev->rbpl_table), + GFP_KERNEL); if (!he_dev->rbpl_table) { hprintk("unable to allocate rbpl bitmap table\n"); return -ENOMEM; @@ -788,8 +789,9 @@ static int he_init_group(struct he_dev *he_dev, int group) bitmap_zero(he_dev->rbpl_table, RBPL_TABLE_SIZE); /* rbpl_virt 64-bit pointers */ - he_dev->rbpl_virt = kmalloc(RBPL_TABLE_SIZE - * sizeof(struct he_buff *), GFP_KERNEL); + he_dev->rbpl_virt = kmalloc_array(RBPL_TABLE_SIZE, + sizeof(*he_dev->rbpl_virt), + GFP_KERNEL); if (!he_dev->rbpl_virt) { hprintk("unable to allocate rbpl virt table\n"); goto out_free_rbpl_table; -- cgit v1.1 From c2f57fb97da5730509a50e316f353d3da17f6c25 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 8 Sep 2016 16:25:15 +0200 Subject: drivers: net: phy: mdio-xgene: Add hardware dependency The mdio-xgene driver is only useful on X-Gene SoC. Signed-off-by: Jean Delvare Cc: Iyappan Subramanian Cc: David S. Miller Acked-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 47a6434..b4863e4 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -303,6 +303,7 @@ config MDIO_HISI_FEMAC config MDIO_XGENE tristate "APM X-Gene SoC MDIO bus controller" + depends on ARCH_XGENE || COMPILE_TEST help This module provides a driver for the MDIO busses found in the APM X-Gene SoC's. -- cgit v1.1 From bf8d85d4f907d2156e6e5d2831378527957d9bde Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Sep 2016 15:40:48 -0700 Subject: ip_tunnel: do not clear l4 hashes If skb has a valid l4 hash, there is no point clearing hash and force a further flow dissection when a tunnel encapsulation is added. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 0f227db..777bc18 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -69,7 +69,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb_clear_hash(skb); + skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -- cgit v1.1 From 6088b5823b4cb132a838878747384cbfb5ce6646 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:28 +0200 Subject: bpf: minor cleanups in helpers Some minor misc cleanups, f.e. use sizeof(__u32) instead of hardcoding and in __bpf_skb_max_len(), I missed that we always have skb->dev valid anyway, so we can drop the unneeded test for dev; also few more other misc bits addressed here. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/helpers.c | 6 +++--- net/core/filter.c | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ea3afb..6df73bd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -126,7 +126,7 @@ static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct task_struct *task = current; - if (!task) + if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; @@ -144,12 +144,12 @@ static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) kuid_t uid; kgid_t gid; - if (!task) + if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | - from_kuid(&init_user_ns, uid); + from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { diff --git a/net/core/filter.c b/net/core/filter.c index a83766b..628ed8c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2010,8 +2010,7 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb) static u32 __bpf_skb_max_len(const struct sk_buff *skb) { - return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : - 65536; + return skb->dev->mtu + skb->dev->hard_header_len; } static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) @@ -2605,7 +2604,7 @@ static bool __is_valid_xdp_access(int off, int size, return false; if (off % size != 0) return false; - if (size != 4) + if (size != sizeof(__u32)) return false; return true; @@ -2727,7 +2726,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, dst_reg, src_reg, insn); case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); prog->cb_access = 1; -- cgit v1.1 From f035a51536af9802f55d8c79bd87f184ebffb093 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:29 +0200 Subject: bpf: add BPF_SIZEOF and BPF_FIELD_SIZEOF macros Add BPF_SIZEOF() and BPF_FIELD_SIZEOF() macros to improve the code a bit which otherwise often result in overly long bytes_to_bpf_size(sizeof()) and bytes_to_bpf_size(FIELD_SIZEOF()) lines. So place them into a macro helper instead. Moreover, we currently have a BUILD_BUG_ON(BPF_FIELD_SIZEOF()) check in convert_bpf_extensions(), but we should rather make that generic as well and add a BUILD_BUG_ON() test in all BPF_SIZEOF()/BPF_FIELD_SIZEOF() users to detect any rewriter size issues at compile time. Note, there are currently none, but we want to assert that it stays this way. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 14 ++++++++++++++ kernel/trace/bpf_trace.c | 12 ++++++------ net/core/filter.c | 15 +++++++-------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index a16439b..7fabad8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -314,6 +314,20 @@ struct bpf_prog_aux; bpf_size; \ }) +#define BPF_SIZEOF(type) \ + ({ \ + const int __size = bytes_to_bpf_size(sizeof(type)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define BPF_FIELD_SIZEOF(type, field) \ + ({ \ + const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d3869b0..e63d7d4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -583,18 +583,18 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct bpf_perf_event_data, sample_period): BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)), - dst_reg, src_reg, + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + data), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, offsetof(struct perf_sample_data, period)); break; default: - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)), - dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + regs), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, regs)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)), - dst_reg, dst_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off); break; } diff --git a/net/core/filter.c b/net/core/filter.c index 628ed8c..120c813 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -233,9 +233,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_HATYPE: BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), BPF_REG_TMP, BPF_REG_CTX, offsetof(struct sk_buff, dev)); /* if (tmp != 0) goto pc + 1 */ @@ -2685,7 +2684,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, ifindex): BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), dst_reg, src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); @@ -2750,7 +2749,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, break; case offsetof(struct __sk_buff, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), dst_reg, src_reg, offsetof(struct sk_buff, data)); break; @@ -2759,8 +2758,8 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, ctx_off -= offsetof(struct __sk_buff, data_end); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct bpf_skb_data_end, data_end); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), - dst_reg, src_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg, + ctx_off); break; case offsetof(struct __sk_buff, tc_index): @@ -2795,12 +2794,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct xdp_md, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), dst_reg, src_reg, offsetof(struct xdp_buff, data)); break; case offsetof(struct xdp_md, data_end): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), dst_reg, src_reg, offsetof(struct xdp_buff, data_end)); break; -- cgit v1.1 From 374fb54eeaaa6b2cb82bca73a11273687bb2a96a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:30 +0200 Subject: bpf: add own ctx rewriter on ifindex for clsact progs When fetching ifindex, we don't need to test dev for being NULL since we're always guaranteed to have a valid dev for clsact programs. Thus, avoid this test in fast path. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 120c813..d6d9bb8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2634,10 +2634,10 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn_buf, - struct bpf_prog *prog) +static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -2785,6 +2785,31 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, ifindex): + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), + dst_reg, src_reg, + offsetof(struct sk_buff, dev)); + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + offsetof(struct net_device, ifindex)); + break; + default: + return sk_filter_convert_ctx_access(type, dst_reg, src_reg, + ctx_off, insn_buf, prog); + } + + return insn - insn_buf; +} + static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -2811,13 +2836,13 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = tc_cls_act_convert_ctx_access, }; static const struct bpf_verifier_ops xdp_ops = { -- cgit v1.1 From f3694e00123802d688180e7ae90b240669910e3c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:31 +0200 Subject: bpf: add BPF_CALL_x macros for declaring helpers This work adds BPF_CALL_() macros and converts all the eBPF helper functions to use them, in a similar fashion like we do with SYSCALL_DEFINE() macros that are used today. Motivation for this is to hide all the register handling and all necessary casts from the user, so that it is done automatically in the background when adding a BPF_CALL_() call. This makes current helpers easier to review, eases to write future helpers, avoids getting the casting mess wrong, and allows for extending all helpers at once (f.e. build time checks, etc). It also helps detecting more easily in code reviews that unused registers are not instrumented in the code by accident, breaking compatibility with existing programs. BPF_CALL_() internals are quite similar to SYSCALL_DEFINE() ones with some fundamental differences, for example, for generating the actual helper function that carries all u64 regs, we need to fill unused regs, so that we always end up with 5 u64 regs as an argument. I reviewed several 0-5 generated BPF_CALL_() variants of the .i results and they look all as expected. No sparse issue spotted. We let this also sit for a few days with Fengguang's kbuild test robot, and there were no issues seen. On s390, it barked on the "uses dynamic stack allocation" notice, which is an old one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion to the call wrapper, just telling that the perf raw record/frag sits on stack (gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests and they were fine as well. All eBPF helpers are now converted to use these macros, getting rid of a good chunk of all the raw castings. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 50 ++++++++++++++++++ kernel/bpf/core.c | 2 +- kernel/bpf/helpers.c | 46 +++++------------ kernel/bpf/stackmap.c | 5 +- kernel/trace/bpf_trace.c | 75 +++++++++++++-------------- net/core/filter.c | 129 ++++++++++++++++++----------------------------- 6 files changed, 149 insertions(+), 158 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 7fabad8..1f09c52 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -328,6 +328,56 @@ struct bpf_prog_aux; __size; \ }) +#define __BPF_MAP_0(m, v, ...) v +#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) +#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) +#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) +#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) +#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) + +#define __BPF_REG_0(...) __BPF_PAD(5) +#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) +#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) +#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) +#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) +#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) + +#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) +#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) + +#define __BPF_CAST(t, a) \ + (__force t) \ + (__force \ + typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ + (unsigned long)0, (t)0))) a +#define __BPF_V void +#define __BPF_N + +#define __BPF_DECL_ARGS(t, a) t a +#define __BPF_DECL_REGS(t, a) u64 a + +#define __BPF_PAD(n) \ + __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ + u64, __ur_3, u64, __ur_4, u64, __ur_5) + +#define BPF_CALL_x(x, name, ...) \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + { \ + return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + } \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) + +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 03fd23d..7b7baae 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void) prandom_init_once(&bpf_user_rnd_state); } -u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_user_rnd_u32) { /* Should someone ever have the rather unwise idea to use some * of the registers passed into this function, then note that diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6df73bd..a5b8bf8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -16,6 +16,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -26,24 +27,10 @@ * if program is allowed to access maps, so check rcu_read_lock_held in * all three functions. */ -static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - /* verifier checked that R1 contains a valid pointer to bpf_map - * and R2 points to a program stack and map->key_size bytes were - * initialized - */ - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value; - WARN_ON_ONCE(!rcu_read_lock_held()); - - value = map->ops->map_lookup_elem(map, key); - - /* lookup() returns either pointer to element value or NULL - * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type - */ - return (unsigned long) value; + return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { @@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { .arg2_type = ARG_PTR_TO_MAP_KEY, }; -static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, + void *, value, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value = (void *) (unsigned long) r3; - WARN_ON_ONCE(!rcu_read_lock_held()); - - return map->ops->map_update_elem(map, key, value, r4); + return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { @@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - WARN_ON_ONCE(!rcu_read_lock_held()); - return map->ops->map_delete_elem(map, key); } @@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } @@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); @@ -122,7 +101,7 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -138,7 +117,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; @@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; - char *buf = (char *) (long) r1; if (unlikely(!task)) goto err_clear; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index bf4495f..732ae16 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,10 +116,9 @@ free_smap: return ERR_PTR(err); } -u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) { - struct pt_regs *regs = (struct pt_regs *) (long) r1; - struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e63d7d4..5dcb992 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -61,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - void *dst = (void *) (long) r1; - int ret, size = (int) r2; - void *unsafe_ptr = (void *) (long) r3; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) @@ -83,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, + u32, size) { - void *unsafe_ptr = (void *) (long) r1; - void *src = (void *) (long) r2; - int size = (int) r3; - /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. @@ -130,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ -static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2, u64, arg3) { - char *fmt = (char *) (long) r1; bool str_seen = false; int mod[3] = {}; int fmt_cnt = 0; @@ -178,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) switch (fmt_cnt) { case 1: - unsafe_addr = r3; - r3 = (long) buf; + unsafe_addr = arg1; + arg1 = (long) buf; break; case 2: - unsafe_addr = r4; - r4 = (long) buf; + unsafe_addr = arg2; + arg2 = (long) buf; break; case 3: - unsafe_addr = r5; - r5 = (long) buf; + unsafe_addr = arg3; + arg3 = (long) buf; break; } buf[0] = 0; @@ -209,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) } return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, - mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, - mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); + mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, + mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, + mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); } static const struct bpf_func_proto bpf_trace_printk_proto = { @@ -233,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; @@ -312,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return 0; } -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { - struct pt_regs *regs = (struct pt_regs *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; - void *data = (void *)(long) r4; struct perf_raw_record raw = { .frag = { .size = size, @@ -367,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, return __bpf_perf_event_output(regs, map, flags, &raw); } -static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_task) { return (long) current; } @@ -378,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { - struct bpf_map *map = (struct bpf_map *)(long)r1; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; - u32 idx = (u32)r2; if (unlikely(in_interrupt())) return -EINVAL; - if (unlikely(idx >= array->map.max_entries)) return -E2BIG; @@ -481,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; -static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { + struct pt_regs *regs = *(struct pt_regs **)tp_buff; + /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it - * from there and call the same bpf_perf_event_output() helper + * from there and call the same bpf_perf_event_output() helper inline. */ - u64 ctx = *(long *)(uintptr_t)r1; - - return bpf_perf_event_output(ctx, r2, index, r4, size); + return ____bpf_perf_event_output(regs, map, flags, data, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { @@ -504,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags) { - u64 ctx = *(long *)(uintptr_t)r1; + struct pt_regs *regs = *(struct pt_regs **)tp_buff; - return bpf_get_stackid(ctx, r2, r3, r4, r5); + /* + * Same comment as in bpf_perf_event_output_tp(), only that this time + * the other helper's function body cannot be inlined due to being + * external, thus we need to call raw helper function. + */ + return bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { diff --git a/net/core/filter.c b/net/core/filter.c index d6d9bb8..298b146 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -94,14 +94,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) } EXPORT_SYMBOL(sk_filter_trim_cap); -static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) { - return skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff(skb); } -static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -120,9 +119,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -145,7 +143,7 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_0(__get_raw_cpu_id) { return raw_smp_processor_id(); } @@ -1376,12 +1374,9 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); } -static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) +BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, + const void *, from, u32, len, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; - void *from = (void *) (long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) @@ -1416,12 +1411,9 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, + void *, to, u32, len) { - const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; - unsigned int offset = (unsigned int) r2; - void *to = (void *)(unsigned long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(offset > 0xffff)) @@ -1449,10 +1441,9 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) @@ -1494,12 +1485,11 @@ static const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | @@ -1547,12 +1537,11 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) +BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, + __be32 *, to, u32, to_size, __wsum, seed) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); - u64 diff_size = from_size + to_size; - __be32 *from = (__be32 *) (long) r1; - __be32 *to = (__be32 *) (long) r3; + u32 diff_size = from_size + to_size; int i, j = 0; /* This is quite flexible, some examples: @@ -1610,9 +1599,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } -static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; struct net_device *dev; if (unlikely(flags & ~(BPF_F_INGRESS))) @@ -1648,7 +1636,7 @@ struct redirect_info { static DEFINE_PER_CPU(struct redirect_info, redirect_info); -static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); @@ -1687,9 +1675,9 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { - return task_get_classid((struct sk_buff *) (unsigned long) r1); + return task_get_classid(skb); } static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { @@ -1699,9 +1687,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) { - return dst_tclassid((struct sk_buff *) (unsigned long) r1); + return dst_tclassid(skb); } static const struct bpf_func_proto bpf_get_route_realm_proto = { @@ -1711,14 +1699,14 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) { /* If skb_clear_hash() was called due to mangling, we can * trigger SW recalculation here. Later access to hash * can then use the inline skb->hash via context directly * instead of calling this helper again. */ - return skb_get_hash((struct sk_buff *) (unsigned long) r1); + return skb_get_hash(skb); } static const struct bpf_func_proto bpf_get_hash_recalc_proto = { @@ -1728,10 +1716,9 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, + u16, vlan_tci) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 vlan_proto = (__force __be16) r2; int ret; if (unlikely(vlan_proto != htons(ETH_P_8021Q) && @@ -1756,9 +1743,8 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { }; EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); -static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; int ret; bpf_push_mac_rcsum(skb); @@ -1933,10 +1919,9 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) return -ENOTSUPP; } -static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, + u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 proto = (__force __be16) r2; int ret; if (unlikely(flags)) @@ -1973,11 +1958,8 @@ static const struct bpf_func_proto bpf_skb_change_proto_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 pkt_type = r2; - /* We only allow a restricted subset to be changed for now. */ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || !skb_pkt_type_ok(pkt_type))) @@ -2028,12 +2010,11 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) return __skb_trim_rcsum(skb, new_len); } -static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, + u64, flags) { - struct sk_buff *skb = (struct sk_buff *)(long) r1; u32 max_len = __bpf_skb_max_len(skb); u32 min_len = __bpf_skb_min_len(skb); - u32 new_len = (u32) r2; int ret; if (unlikely(flags || new_len > max_len || new_len < min_len)) @@ -2113,13 +2094,10 @@ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, return 0; } -static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4, - u64 meta_size) +BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) { - struct sk_buff *skb = (struct sk_buff *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; - void *meta = (void *)(long) r4; if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; @@ -2146,10 +2124,9 @@ static unsigned short bpf_tunnel_key_af(u64 flags) return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; } -static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, + u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; void *to_orig = to; @@ -2214,10 +2191,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); int err; @@ -2252,10 +2227,9 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { static struct metadata_dst __percpu *md_dst; -static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, + const struct bpf_tunnel_key *, from, u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; @@ -2273,7 +2247,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) */ memcpy(compat, from, size); memset(compat + size, 0, sizeof(compat) - size); - from = (struct bpf_tunnel_key *)compat; + from = (const struct bpf_tunnel_key *) compat; break; default: return -EINVAL; @@ -2323,10 +2297,9 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, + const u8 *, from, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *from = (u8 *) (long) r2; struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); @@ -2372,23 +2345,20 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } -static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, + u32, idx) { - struct sk_buff *skb = (struct sk_buff *)(long)r1; - struct bpf_map *map = (struct bpf_map *)(long)r2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; struct sock *sk; - u32 i = (u32)r3; sk = skb->sk; if (!sk || !sk_fullsock(sk)) return -ENOENT; - - if (unlikely(i >= array->map.max_entries)) + if (unlikely(idx >= array->map.max_entries)) return -E2BIG; - cgrp = READ_ONCE(array->ptrs[i]); + cgrp = READ_ONCE(array->ptrs[idx]); if (unlikely(!cgrp)) return -EAGAIN; @@ -2411,13 +2381,10 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, return 0; } -static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4, - u64 meta_size) +BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) { - struct xdp_buff *xdp = (struct xdp_buff *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; - void *meta = (void *)(long) r4; if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; -- cgit v1.1 From ba56947a33541fd8c2e2e6fafd0126a5f6faaf15 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 9 Sep 2016 09:21:15 +0800 Subject: qed: mark symbols static where possible We get a few warnings when building kernel with W=1: drivers/net/ethernet/qlogic/qed/qed_l2.c:112:5: warning: no previous prototype for 'qed_sp_vport_start' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:110:6: warning: no previous prototype for 'qed_iov_is_valid_vfid' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:188:5: warning: no previous prototype for 'qed_iov_post_vf_bulletin' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:578:6: warning: no previous prototype for 'qed_iov_set_vfs_to_disable' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:1135:28: warning: no previous prototype for 'qed_iov_get_public_vf_info' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:1148:6: warning: no previous prototype for 'qed_iov_clean_vf' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:2444:5: warning: no previous prototype for 'qed_iov_chk_ucast' [-Wmissing-prototypes] drivers/net/ethernet/qlogic/qed/qed_sriov.c:2762:5: warning: no previous prototype for 'qed_iov_vf_flr_cleanup' [-Wmissing-prototypes] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 ++++--- drivers/net/ethernet/qlogic/qed/qed_l2.c | 4 +-- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 45 ++++++++++++++++------------- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 +-- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index be7b3dc..12c399b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -2130,17 +2130,19 @@ static int qed_dcbnl_ieee_setets(struct qed_dev *cdev, struct ieee_ets *ets) return rc; } -int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) +static int +qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) { return qed_dcbnl_get_ieee_ets(cdev, ets, true); } -int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) +static int +qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) { return qed_dcbnl_get_ieee_pfc(cdev, pfc, true); } -int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; @@ -2184,7 +2186,7 @@ int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) return 0; } -int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4409ea3..ddd410a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -111,8 +111,8 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -int qed_sp_vport_start(struct qed_hwfn *p_hwfn, - struct qed_sp_vport_start_params *p_params) +static int qed_sp_vport_start(struct qed_hwfn *p_hwfn, + struct qed_sp_vport_start_params *p_params) { if (IS_VF(p_hwfn->cdev)) { return qed_vf_pf_vport_start(p_hwfn, p_params->vport_id, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index d7c21cd..a4a3cea 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -108,8 +108,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, - int rel_vf_id, bool b_enabled_only) +static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, + int rel_vf_id, bool b_enabled_only) { if (!p_hwfn->pf_iov_info) { DP_NOTICE(p_hwfn->cdev, "No iov info\n"); @@ -186,8 +186,8 @@ static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn, return false; } -int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, - int vfid, struct qed_ptt *p_ptt) +static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, + int vfid, struct qed_ptt *p_ptt) { struct qed_bulletin_content *p_bulletin; int crc_size = sizeof(p_bulletin->crc); @@ -573,7 +573,7 @@ static void qed_iov_set_vf_to_disable(struct qed_dev *cdev, } } -void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) +static void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) { u16 i; @@ -1130,9 +1130,10 @@ static void qed_iov_prepare_resp(struct qed_hwfn *p_hwfn, qed_iov_send_response(p_hwfn, p_ptt, vf_info, length, status); } -struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, - u16 relative_vf_id, - bool b_enabled_only) +static struct +qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, + u16 relative_vf_id, + bool b_enabled_only) { struct qed_vf_info *vf = NULL; @@ -1143,7 +1144,7 @@ struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, return &vf->p_vf_info; } -void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) { struct qed_public_vf_info *vf_info; @@ -2510,8 +2511,8 @@ qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn, return rc; } -int qed_iov_chk_ucast(struct qed_hwfn *hwfn, - int vfid, struct qed_filter_ucast *params) +static int qed_iov_chk_ucast(struct qed_hwfn *hwfn, + int vfid, struct qed_filter_ucast *params) { struct qed_public_vf_info *vf; @@ -2828,7 +2829,8 @@ cleanup: return rc; } -int qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +static int +qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 ack_vfs[VF_MAX_STATIC / 32]; int rc = 0; @@ -3015,7 +3017,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, } } -void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) { u64 add_bit = 1ULL << (vfid % 64); @@ -3138,8 +3140,8 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } -void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, - u16 pvid, int vfid) +static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, + u16 pvid, int vfid) { struct qed_vf_info *vf_info; u64 feature; @@ -3172,7 +3174,7 @@ static bool qed_iov_vf_has_vport_instance(struct qed_hwfn *p_hwfn, int vfid) return !!p_vf_info->vport_instance; } -bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) +static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) { struct qed_vf_info *p_vf_info; @@ -3194,7 +3196,7 @@ static bool qed_iov_spoofchk_get(struct qed_hwfn *p_hwfn, int vfid) return vf_info->spoof_chk; } -int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) +static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) { struct qed_vf_info *vf; int rc = -EINVAL; @@ -3237,7 +3239,8 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, return p_vf->bulletin.p_virt->mac; } -u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) +static u16 +qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) { struct qed_vf_info *p_vf; @@ -3269,7 +3272,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn, return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val); } -int qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) +static int +qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) { struct qed_vf_info *vf; u8 vport_id; @@ -3828,7 +3832,8 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } -void qed_iov_pf_task(struct work_struct *work) +static void qed_iov_pf_task(struct work_struct *work) + { struct qed_hwfn *hwfn = container_of(work, struct qed_hwfn, iov_task.work); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index de0acbc..85334ce 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1186,8 +1186,8 @@ bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac) return false; } -bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, - u8 *dst_mac, u8 *p_is_forced) +static bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, + u8 *dst_mac, u8 *p_is_forced) { struct qed_bulletin_content *bulletin; -- cgit v1.1 From 163ae1c6ad6299b19e22b4a35d5ab24a89791a98 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Sep 2016 10:57:08 -0700 Subject: fscrypto: add authorization check for setting encryption policy On an ext4 or f2fs filesystem with file encryption supported, a user could set an encryption policy on any empty directory(*) to which they had readonly access. This is obviously problematic, since such a directory might be owned by another user and the new encryption policy would prevent that other user from creating files in their own directory (for example). Fix this by requiring inode_owner_or_capable() permission to set an encryption policy. This means that either the caller must own the file, or the caller must have the capability CAP_FOWNER. (*) Or also on any regular file, for f2fs v4.6 and later and ext4 v4.8-rc1 and later; a separate bug fix is coming for that. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org # 4.1+; check fs/{ext4,f2fs} Signed-off-by: Theodore Ts'o --- fs/crypto/policy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 0f9961e..c9800b1 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -95,6 +95,9 @@ static int create_encryption_context_from_policy(struct inode *inode, int fscrypt_process_policy(struct inode *inode, const struct fscrypt_policy *policy) { + if (!inode_owner_or_capable(inode)) + return -EACCES; + if (policy->version != 0) return -EINVAL; -- cgit v1.1 From 002ced4be6429918800ce3e41d5cbc2d7c01822c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Sep 2016 11:36:39 -0700 Subject: fscrypto: only allow setting encryption policy on directories The FS_IOC_SET_ENCRYPTION_POLICY ioctl allowed setting an encryption policy on nondirectory files. This was unintentional, and in the case of nonempty regular files did not behave as expected because existing data was not actually encrypted by the ioctl. In the case of ext4, the user could also trigger filesystem errors in ->empty_dir(), e.g. due to mismatched "directory" checksums when the kernel incorrectly tried to interpret a regular file as a directory. This bug affected ext4 with kernels v4.8-rc1 or later and f2fs with kernels v4.6 and later. It appears that older kernels only permitted directories and that the check was accidentally lost during the refactoring to share the file encryption code between ext4 and f2fs. This patch restores the !S_ISDIR() check that was present in older kernels. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o --- fs/crypto/policy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index c9800b1..f96547f 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -102,6 +102,8 @@ int fscrypt_process_policy(struct inode *inode, return -EINVAL; if (!inode_has_encryption_context(inode)) { + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; if (!inode->i_sb->s_cop->empty_dir) return -EOPNOTSUPP; if (!inode->i_sb->s_cop->empty_dir(inode)) -- cgit v1.1 From 9ee7b683ea6313e9cd27bf9c4f70a3d360abe5df Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:52 +0200 Subject: alx: refactor msi enablement and disablement Introduce a new flag field for the advanced interrupt capatibilities and add new functions to enable and disable msi interrupts. These functions will be extended later to cover msi-x interrupts. We enable msi interrupts earlier in alx_init_intr because with msi-x and multi queue support the number of queues must be set before we allocate resources for the rx and tx paths. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 5 ++++- drivers/net/ethernet/atheros/alx/main.c | 30 ++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 8fc93c5..16ca3f4 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -76,6 +76,9 @@ enum alx_device_quirks { ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0), }; +#define ALX_FLAG_USING_MSIX BIT(0) +#define ALX_FLAG_USING_MSI BIT(1) + struct alx_priv { struct net_device *dev; @@ -105,7 +108,7 @@ struct alx_priv { u16 msg_enable; - bool msi; + int flags; /* protects hw.stats */ spinlock_t stats_lock; diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d29a4f3..6dc1539 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -620,6 +620,22 @@ static void alx_config_vector_mapping(struct alx_priv *alx) alx_write_mem32(hw, ALX_MSI_ID_MAP, 0); } +static void alx_init_intr(struct alx_priv *alx, bool msix) +{ + if (!(alx->flags & ALX_FLAG_USING_MSIX)) { + if (!pci_enable_msi(alx->hw.pdev)) + alx->flags |= ALX_FLAG_USING_MSI; + } +} + +static void alx_disable_advanced_intr(struct alx_priv *alx) +{ + if (alx->flags & ALX_FLAG_USING_MSI) { + pci_disable_msi(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSI; + } +} + static void alx_irq_enable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; @@ -650,9 +666,7 @@ static int alx_request_irq(struct alx_priv *alx) msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT; - if (!pci_enable_msi(alx->hw.pdev)) { - alx->msi = true; - + if (alx->flags & ALX_FLAG_USING_MSI) { alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl | ALX_MSI_MASK_SEL_LINE); err = request_irq(pdev->irq, alx_intr_msi, 0, @@ -660,6 +674,7 @@ static int alx_request_irq(struct alx_priv *alx) if (!err) goto out; /* fall back to legacy interrupt */ + alx->flags &= ~ALX_FLAG_USING_MSI; pci_disable_msi(alx->hw.pdev); } @@ -678,10 +693,7 @@ static void alx_free_irq(struct alx_priv *alx) free_irq(pdev->irq, alx); - if (alx->msi) { - pci_disable_msi(alx->hw.pdev); - alx->msi = false; - } + alx_disable_advanced_intr(alx); } static int alx_identify_hw(struct alx_priv *alx) @@ -847,6 +859,8 @@ static int __alx_open(struct alx_priv *alx, bool resume) { int err; + alx_init_intr(alx, false); + if (!resume) netif_carrier_off(alx->dev); @@ -1236,7 +1250,7 @@ static void alx_poll_controller(struct net_device *netdev) { struct alx_priv *alx = netdev_priv(netdev); - if (alx->msi) + if (alx->flags & ALX_FLAG_USING_MSI) alx_intr_msi(0, alx); else alx_intr_legacy(0, alx); -- cgit v1.1 From a0373aef3ecf12d97a8332f953f0e16092f068b4 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:53 +0200 Subject: alx: factor out part of the interrupt handler Factor out the handling of misc interrupts into a new function. This function can be reused later for msi-x interrupts. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 34 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6dc1539..b34f7b6 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -302,22 +302,15 @@ static int alx_poll(struct napi_struct *napi, int budget) return work; } -static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +static bool alx_intr_handle_misc(struct alx_priv *alx, u32 intr) { struct alx_hw *hw = &alx->hw; - bool write_int_mask = false; - - spin_lock(&alx->irq_lock); - - /* ACK interrupt */ - alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); - intr &= alx->int_mask; if (intr & ALX_ISR_FATAL) { netif_warn(alx, hw, alx->dev, "fatal interrupt 0x%x, resetting\n", intr); alx_schedule_reset(alx); - goto out; + return true; } if (intr & ALX_ISR_ALERT) @@ -329,19 +322,32 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) * is cleared, the interrupt status could be cleared. */ alx->int_mask &= ~ALX_ISR_PHY; - write_int_mask = true; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_schedule_link_check(alx); } + return false; +} + +static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +{ + struct alx_hw *hw = &alx->hw; + + spin_lock(&alx->irq_lock); + + /* ACK interrupt */ + alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); + intr &= alx->int_mask; + + if (alx_intr_handle_misc(alx, intr)) + goto out; + if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) { napi_schedule(&alx->napi); /* mask rx/tx interrupt, enable them when napi complete */ alx->int_mask &= ~ALX_ISR_ALL_QUEUES; - write_int_mask = true; - } - - if (write_int_mask) alx_write_mem32(hw, ALX_IMR, alx->int_mask); + } alx_write_mem32(hw, ALX_ISR, 0); -- cgit v1.1 From dc39a78b3c6113dcad5e0f52e3b9deba7ad2fa3d Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:54 +0200 Subject: alx: add msi-x support Add msi-x support to the alx driver. This is in preparation for multi queue support. msi-x interrupts are disabled by default because without multi queue support there is no advantage over msi interrupts. The performance numbers observed with iperf stay the same. Based on information of the downstream driver at github.com/qca/alx Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 5 + drivers/net/ethernet/atheros/alx/hw.c | 14 +++ drivers/net/ethernet/atheros/alx/hw.h | 1 + drivers/net/ethernet/atheros/alx/main.c | 172 ++++++++++++++++++++++++++++++-- 4 files changed, 184 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 16ca3f4..6cac919 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -84,6 +84,11 @@ struct alx_priv { struct alx_hw hw; + /* msi-x vectors */ + int num_vec; + struct msix_entry *msix_entries; + char irq_lbl[IFNAMSIZ + 8]; + /* all descriptor memory */ struct { dma_addr_t dma; diff --git a/drivers/net/ethernet/atheros/alx/hw.c b/drivers/net/ethernet/atheros/alx/hw.c index 1fe35e4..6ac40b0 100644 --- a/drivers/net/ethernet/atheros/alx/hw.c +++ b/drivers/net/ethernet/atheros/alx/hw.c @@ -1031,6 +1031,20 @@ void alx_configure_basic(struct alx_hw *hw) alx_write_mem32(hw, ALX_WRR, val); } +void alx_mask_msix(struct alx_hw *hw, int index, bool mask) +{ + u32 reg, val; + + reg = ALX_MSIX_ENTRY_BASE + index * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = mask ? PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; + + alx_write_mem32(hw, reg, val); + alx_post_write(hw); +} + + bool alx_get_phy_info(struct alx_hw *hw) { u16 devs1, devs2; diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h index f289c05..0191477 100644 --- a/drivers/net/ethernet/atheros/alx/hw.h +++ b/drivers/net/ethernet/atheros/alx/hw.h @@ -562,6 +562,7 @@ int alx_reset_mac(struct alx_hw *hw); void alx_set_macaddr(struct alx_hw *hw, const u8 *addr); bool alx_phy_configured(struct alx_hw *hw); void alx_configure_basic(struct alx_hw *hw); +void alx_mask_msix(struct alx_hw *hw, int index, bool mask); void alx_disable_rss(struct alx_hw *hw); bool alx_get_phy_info(struct alx_hw *hw); void alx_update_hw_stats(struct alx_hw *hw); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index b34f7b6..a4f74d4 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -292,10 +292,14 @@ static int alx_poll(struct napi_struct *napi, int budget) napi_complete(&alx->napi); /* enable interrupt */ - spin_lock_irqsave(&alx->irq_lock, flags); - alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx_write_mem32(hw, ALX_IMR, alx->int_mask); - spin_unlock_irqrestore(&alx->irq_lock, flags); + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_mask_msix(hw, 1, false); + } else { + spin_lock_irqsave(&alx->irq_lock, flags); + alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); + spin_unlock_irqrestore(&alx->irq_lock, flags); + } alx_post_write(hw); @@ -356,6 +360,46 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) return IRQ_HANDLED; } +static irqreturn_t alx_intr_msix_ring(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 1, true); + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)); + + napi_schedule(&alx->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t alx_intr_msix_misc(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + u32 intr; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 0, true); + + /* read interrupt status */ + intr = alx_read_mem32(hw, ALX_ISR); + intr &= (alx->int_mask & ~ALX_ISR_ALL_QUEUES); + + if (alx_intr_handle_misc(alx, intr)) + return IRQ_HANDLED; + + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, intr); + + /* enable interrupt again */ + alx_mask_msix(hw, 0, false); + + return IRQ_HANDLED; +} + static irqreturn_t alx_intr_msi(int irq, void *data) { struct alx_priv *alx = data; @@ -620,15 +664,84 @@ static void alx_free_rings(struct alx_priv *alx) static void alx_config_vector_mapping(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + u32 tbl = 0; + + if (alx->flags & ALX_FLAG_USING_MSIX) { + tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT; + tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT; + } - alx_write_mem32(hw, ALX_MSI_MAP_TBL1, 0); + alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl); alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0); alx_write_mem32(hw, ALX_MSI_ID_MAP, 0); } +static bool alx_enable_msix(struct alx_priv *alx) +{ + int i, err, num_vec = 2; + + alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry), + GFP_KERNEL); + if (!alx->msix_entries) { + netdev_warn(alx->dev, "Allocation of msix entries failed!\n"); + return false; + } + + for (i = 0; i < num_vec; i++) + alx->msix_entries[i].entry = i; + + err = pci_enable_msix(alx->hw.pdev, alx->msix_entries, num_vec); + if (err) { + kfree(alx->msix_entries); + netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n"); + return false; + } + + alx->num_vec = num_vec; + return true; +} + +static int alx_request_msix(struct alx_priv *alx) +{ + struct net_device *netdev = alx->dev; + int i, err, vector = 0, free_vector = 0; + + err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc, + 0, netdev->name, alx); + if (err) + goto out_err; + + vector++; + sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name); + + err = request_irq(alx->msix_entries[vector].vector, + alx_intr_msix_ring, 0, alx->irq_lbl, alx); + if (err) + goto out_free; + + return 0; + +out_free: + free_irq(alx->msix_entries[free_vector++].vector, alx); + + vector--; + for (i = 0; i < vector; i++) + free_irq(alx->msix_entries[free_vector++].vector, alx); + +out_err: + return err; +} + static void alx_init_intr(struct alx_priv *alx, bool msix) { + if (msix) { + if (alx_enable_msix(alx)) + alx->flags |= ALX_FLAG_USING_MSIX; + } + if (!(alx->flags & ALX_FLAG_USING_MSIX)) { + alx->num_vec = 1; + if (!pci_enable_msi(alx->hw.pdev)) alx->flags |= ALX_FLAG_USING_MSI; } @@ -636,6 +749,12 @@ static void alx_init_intr(struct alx_priv *alx, bool msix) static void alx_disable_advanced_intr(struct alx_priv *alx) { + if (alx->flags & ALX_FLAG_USING_MSIX) { + kfree(alx->msix_entries); + pci_disable_msix(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSIX; + } + if (alx->flags & ALX_FLAG_USING_MSI) { pci_disable_msi(alx->hw.pdev); alx->flags &= ~ALX_FLAG_USING_MSI; @@ -645,22 +764,36 @@ static void alx_disable_advanced_intr(struct alx_priv *alx) static void alx_irq_enable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; /* level-1 interrupt switch */ alx_write_mem32(hw, ALX_ISR, 0); alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_post_write(hw); + + if (alx->flags & ALX_FLAG_USING_MSIX) + /* enable all msix irqs */ + for (i = 0; i < alx->num_vec; i++) + alx_mask_msix(hw, i, false); } static void alx_irq_disable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; alx_write_mem32(hw, ALX_ISR, ALX_ISR_DIS); alx_write_mem32(hw, ALX_IMR, 0); alx_post_write(hw); - synchronize_irq(alx->hw.pdev->irq); + if (alx->flags & ALX_FLAG_USING_MSIX) { + for (i = 0; i < alx->num_vec; i++) { + alx_mask_msix(hw, i, true); + synchronize_irq(alx->msix_entries[i].vector); + } + } else { + synchronize_irq(alx->hw.pdev->irq); + } } static int alx_request_irq(struct alx_priv *alx) @@ -672,6 +805,17 @@ static int alx_request_irq(struct alx_priv *alx) msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT; + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl); + err = alx_request_msix(alx); + if (!err) + goto out; + + /* msix request failed, realloc resources */ + alx_disable_advanced_intr(alx); + alx_init_intr(alx, false); + } + if (alx->flags & ALX_FLAG_USING_MSI) { alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl | ALX_MSI_MASK_SEL_LINE); @@ -690,14 +834,23 @@ static int alx_request_irq(struct alx_priv *alx) out: if (!err) alx_config_vector_mapping(alx); + else + netdev_err(alx->dev, "IRQ registration failed!\n"); return err; } static void alx_free_irq(struct alx_priv *alx) { struct pci_dev *pdev = alx->hw.pdev; + int i; - free_irq(pdev->irq, alx); + if (alx->flags & ALX_FLAG_USING_MSIX) { + /* we have only 2 vectors without multi queue support */ + for (i = 0; i < 2; i++) + free_irq(alx->msix_entries[i].vector, alx); + } else { + free_irq(pdev->irq, alx); + } alx_disable_advanced_intr(alx); } @@ -1256,7 +1409,10 @@ static void alx_poll_controller(struct net_device *netdev) { struct alx_priv *alx = netdev_priv(netdev); - if (alx->flags & ALX_FLAG_USING_MSI) + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_intr_msix_misc(0, alx); + alx_intr_msix_ring(0, alx); + } else if (alx->flags & ALX_FLAG_USING_MSI) alx_intr_msi(0, alx); else alx_intr_legacy(0, alx); -- cgit v1.1 From 0c58ee0bfa28ad06dbc2b6305b1b950f7c392cdf Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Fri, 9 Sep 2016 12:19:55 +0200 Subject: alx: add module parameter to enable msi-x support msi-x support is default disabled in the alx driver. In order to test msi-x interrupts for regressions add a module parameter to the driver. Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index a4f74d4..9887cee 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -51,6 +51,9 @@ const char alx_drv_name[] = "alx"; +static bool msix = false; +module_param(msix, bool, 0); +MODULE_PARM_DESC(msix, "Enable msi-x interrupt support"); static void alx_free_txbuf(struct alx_priv *alx, int entry) { @@ -1018,7 +1021,7 @@ static int __alx_open(struct alx_priv *alx, bool resume) { int err; - alx_init_intr(alx, false); + alx_init_intr(alx, msix); if (!resume) netif_carrier_off(alx->dev); -- cgit v1.1 From e808bb6ed042020d3a15cb6e85ca646bc7c9eda8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 9 Sep 2016 20:40:16 +0200 Subject: ATM-iphase: Use kmalloc_array() in tx_init() * Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of data types by pointer dereferences to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 809dd1e..9d8807e 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1975,7 +1975,9 @@ static int tx_init(struct atm_dev *dev) buf_desc_ptr++; tx_pkt_start += iadev->tx_buf_sz; } - iadev->tx_buf = kmalloc(iadev->num_tx_desc*sizeof(struct cpcs_trailer_desc), GFP_KERNEL); + iadev->tx_buf = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->tx_buf), + GFP_KERNEL); if (!iadev->tx_buf) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_dle; @@ -1995,8 +1997,9 @@ static int tx_init(struct atm_dev *dev) sizeof(*cpcs), DMA_TO_DEVICE); } - iadev->desc_tbl = kmalloc(iadev->num_tx_desc * - sizeof(struct desc_tbl_t), GFP_KERNEL); + iadev->desc_tbl = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->desc_tbl), + GFP_KERNEL); if (!iadev->desc_tbl) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_all_tx_bufs; @@ -2124,7 +2127,9 @@ static int tx_init(struct atm_dev *dev) memset((caddr_t)(iadev->seg_ram+i), 0, iadev->num_vc*4); vc = (struct main_vc *)iadev->MAIN_VC_TABLE_ADDR; evc = (struct ext_vc *)iadev->EXT_VC_TABLE_ADDR; - iadev->testTable = kmalloc(sizeof(long)*iadev->num_vc, GFP_KERNEL); + iadev->testTable = kmalloc_array(iadev->num_vc, + sizeof(*iadev->testTable), + GFP_KERNEL); if (!iadev->testTable) { printk("Get freepage failed\n"); goto err_free_desc_tbl; -- cgit v1.1 From 4214ebf4654798309364d0c678b799e402f38288 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:19 +0100 Subject: Fix memory leaks in cifs_do_mount() Fix memory leaks introduced by the patch fs/cifs: make share unaccessible at root level mountable Also move allocation of cifs_sb->prepath to cifs_setup_cifs_sb(). Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 20 ++++++++------------ fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 10 +++++++++- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6bbec5e..cc9cdab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -686,26 +686,22 @@ cifs_do_mount(struct file_system_type *fs_type, cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; + goto out_free; } - if (volume_info->prepath) { - cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL); - if (cifs_sb->prepath == NULL) { - root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; - } + rc = cifs_setup_cifs_sb(volume_info, cifs_sb); + if (rc) { + root = ERR_PTR(rc); + goto out_free; } - cifs_setup_cifs_sb(volume_info, cifs_sb); - rc = cifs_mount(cifs_sb, volume_info); if (rc) { if (!(flags & MS_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); - goto out_mountdata; + goto out_free; } mnt_data.vol = volume_info; @@ -752,9 +748,9 @@ out: cifs_cleanup_volume_info(volume_info); return root; -out_mountdata: +out_free: + kfree(cifs_sb->prepath); kfree(cifs_sb->mountdata); -out_cifs_sb: kfree(cifs_sb); out_nls: unload_nls(volume_info->local_nls); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1243bd3..95dab43 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, unsigned int to_read); extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, unsigned int to_read); -extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7ae0328..4546926 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3222,7 +3222,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, } } -void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb) { INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); @@ -3316,6 +3316,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); + + if (pvolume_info->prepath) { + cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL); + if (cifs_sb->prepath == NULL) + return -ENOMEM; + } + + return 0; } static void -- cgit v1.1 From c1d8b24d18192764fe82067ec6aa8d4c3bf094e0 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:20 +0100 Subject: Compare prepaths when comparing superblocks The patch fs/cifs: make share unaccessible at root level mountable makes use of prepaths when any component of the underlying path is inaccessible. When mounting 2 separate shares having different prepaths but are other wise similar in other respects, we end up sharing superblocks when we shouldn't be doing so. Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/connect.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4546926..2e4f4ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) return 1; } +static int +match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) +{ + struct cifs_sb_info *old = CIFS_SB(sb); + struct cifs_sb_info *new = mnt_data->cifs_sb; + + if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { + if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) + return 0; + /* The prepath should be null terminated strings */ + if (strcmp(new->prepath, old->prepath)) + return 0; + + return 1; + } + return 0; +} + int cifs_match_super(struct super_block *sb, void *data) { @@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data) if (!match_server(tcp_srv, volume_info) || !match_session(ses, volume_info) || - !match_tcon(tcon, volume_info->UNC)) { + !match_tcon(tcon, volume_info->UNC) || + !match_prepath(sb, mnt_data)) { rc = 0; goto out; } -- cgit v1.1 From 348c1bfa84dfc47da1f1234b7f2bf09fa798edea Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:21 +0100 Subject: Move check for prefix path to within cifs_get_root() Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cc9cdab..14ae4b8 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) char *s, *p; char sep; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + return dget(sb->s_root); + full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); if (full_path == NULL) @@ -731,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type, sb->s_flags |= MS_ACTIVE; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) - root = dget(sb->s_root); - else - root = cifs_get_root(volume_info, sb); - + root = cifs_get_root(volume_info, sb); if (IS_ERR(root)) goto out_super; -- cgit v1.1 From ba63f23d69a3a10e7e527a02702023da68ef8a6d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Sep 2016 14:20:38 -0700 Subject: fscrypto: require write access to mount to set encryption policy Since setting an encryption policy requires writing metadata to the filesystem, it should be guarded by mnt_want_write/mnt_drop_write. Otherwise, a user could cause a write to a frozen or readonly filesystem. This was handled correctly by f2fs but not by ext4. Make fscrypt_process_policy() handle it rather than relying on the filesystem to get it right. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org # 4.1+; check fs/{ext4,f2fs} Signed-off-by: Theodore Ts'o Acked-by: Jaegeuk Kim --- fs/crypto/policy.c | 38 +++++++++++++++++++++++++------------- fs/ext4/ioctl.c | 2 +- fs/f2fs/file.c | 9 +-------- include/linux/fscrypto.h | 5 ++--- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index f96547f..ed115ac 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -11,6 +11,7 @@ #include #include #include +#include static int inode_has_encryption_context(struct inode *inode) { @@ -92,31 +93,42 @@ static int create_encryption_context_from_policy(struct inode *inode, return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL); } -int fscrypt_process_policy(struct inode *inode, +int fscrypt_process_policy(struct file *filp, const struct fscrypt_policy *policy) { + struct inode *inode = file_inode(filp); + int ret; + if (!inode_owner_or_capable(inode)) return -EACCES; if (policy->version != 0) return -EINVAL; + ret = mnt_want_write_file(filp); + if (ret) + return ret; + if (!inode_has_encryption_context(inode)) { if (!S_ISDIR(inode->i_mode)) - return -EINVAL; - if (!inode->i_sb->s_cop->empty_dir) - return -EOPNOTSUPP; - if (!inode->i_sb->s_cop->empty_dir(inode)) - return -ENOTEMPTY; - return create_encryption_context_from_policy(inode, policy); + ret = -EINVAL; + else if (!inode->i_sb->s_cop->empty_dir) + ret = -EOPNOTSUPP; + else if (!inode->i_sb->s_cop->empty_dir(inode)) + ret = -ENOTEMPTY; + else + ret = create_encryption_context_from_policy(inode, + policy); + } else if (!is_encryption_context_consistent_with_policy(inode, + policy)) { + printk(KERN_WARNING + "%s: Policy inconsistent with encryption context\n", + __func__); + ret = -EINVAL; } - if (is_encryption_context_consistent_with_policy(inode, policy)) - return 0; - - printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", - __func__); - return -EINVAL; + mnt_drop_write_file(filp); + return ret; } EXPORT_SYMBOL(fscrypt_process_policy); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 10686fd..1bb7df5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -776,7 +776,7 @@ resizefs_out: (struct fscrypt_policy __user *)arg, sizeof(policy))) return -EFAULT; - return fscrypt_process_policy(inode, &policy); + return fscrypt_process_policy(filp, &policy); #else return -EOPNOTSUPP; #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 47abb96..28f4f4c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1757,21 +1757,14 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct fscrypt_policy policy; struct inode *inode = file_inode(filp); - int ret; if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, sizeof(policy))) return -EFAULT; - ret = mnt_want_write_file(filp); - if (ret) - return ret; - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - ret = fscrypt_process_policy(inode, &policy); - mnt_drop_write_file(filp); - return ret; + return fscrypt_process_policy(filp, &policy); } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cfa6cde..76cff18 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -274,8 +274,7 @@ extern void fscrypt_restore_control_page(struct page *); extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t, unsigned int); /* policy.c */ -extern int fscrypt_process_policy(struct inode *, - const struct fscrypt_policy *); +extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *); extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *); extern int fscrypt_has_permitted_context(struct inode *, struct inode *); extern int fscrypt_inherit_context(struct inode *, struct inode *, @@ -345,7 +344,7 @@ static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p, } /* policy.c */ -static inline int fscrypt_notsupp_process_policy(struct inode *i, +static inline int fscrypt_notsupp_process_policy(struct file *f, const struct fscrypt_policy *p) { return -EOPNOTSUPP; -- cgit v1.1 From 767ae08678c2c796bcd7f582ee457aee20a28a1e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:49 +0300 Subject: perf/core: Fix a race between mmap_close() and set_output() of AUX events In the mmap_close() path we need to stop all the AUX events that are writing data to the AUX area that we are unmapping, before we can safely free the pages. To determine if an event needs to be stopped, we're comparing its ->rb against the one that's getting unmapped. However, a SET_OUTPUT ioctl may turn up inside an AUX transaction and swizzle event::rb to some other ring buffer, but the transaction will keep writing data to the old ring buffer until the event gets scheduled out. At this point, mmap_close() will skip over such an event and will proceed to free the AUX area, while it's still being used by this event, which will set off a warning in the mmap_close() path and cause a memory corruption. To avoid this, always stop an AUX event before its ->rb is updated; this will release the (potentially) last reference on the AUX area of the buffer. If the event gets restarted, its new ring buffer will be used. If another SET_OUTPUT comes and switches it back to the old ring buffer that's getting unmapped, it's also fine: this ring buffer's aux_mmap_count will be zero and AUX transactions won't start any more. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 07ac859..a54f2c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info) return 0; } -static int perf_event_restart(struct perf_event *event) +static int perf_event_stop(struct perf_event *event, int restart) { struct stop_event_data sd = { .event = event, - .restart = 1, + .restart = restart, }; int ret = 0; @@ -4845,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event, spin_unlock_irqrestore(&rb->event_lock, flags); } + /* + * Avoid racing with perf_mmap_close(AUX): stop the event + * before swizzling the event::rb pointer; if it's getting + * unmapped, its aux_mmap_count will be 0 and it won't + * restart. See the comment in __perf_pmu_output_stop(). + * + * Data will inevitably be lost when set_output is done in + * mid-air, but then again, whoever does it like this is + * not in for the data anyway. + */ + if (has_aux(event)) + perf_event_stop(event, 0); + rcu_assign_pointer(event->rb, rb); if (old_rb) { @@ -6120,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } void perf_event_exec(void) @@ -6164,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) /* * In case of inheritance, it will be the parent that links to the - * ring-buffer, but it will be the child that's actually using it: + * ring-buffer, but it will be the child that's actually using it. + * + * We are using event::rb to determine if the event should be stopped, + * however this may race with ring_buffer_attach() (through set_output), + * which will make us skip the event that actually needs to be stopped. + * So ring_buffer_attach() has to stop an aux event before re-assigning + * its rb pointer. */ if (rcu_dereference(parent->rb) == rb) ro->err = __perf_event_stop(&sd); @@ -6678,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } /* @@ -7867,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) mmput(mm); restart: - perf_event_restart(event); + perf_event_stop(event, 1); } /* -- cgit v1.1 From b79ccadd6bb10e72cf784a298ca6dc1398eb9a24 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:50 +0300 Subject: perf/core: Fix aux_mmap_count vs aux_refcount order The order of accesses to ring buffer's aux_mmap_count and aux_refcount has to be preserved across the users, namely perf_mmap_close() and perf_aux_output_begin(), otherwise the inversion can result in the latter holding the last reference to the aux buffer and subsequently free'ing it in atomic context, triggering a warning. > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 257 at kernel/events/ring_buffer.c:541 __rb_free_aux+0x11a/0x130 > CPU: 0 PID: 257 Comm: stopbug Not tainted 4.8.0-rc1+ #2596 > Call Trace: > [] __warn+0xcb/0xf0 > [] warn_slowpath_null+0x1d/0x20 > [] __rb_free_aux+0x11a/0x130 > [] rb_free_aux+0x18/0x20 > [] perf_aux_output_begin+0x163/0x1e0 > [] bts_event_start+0x3a/0xd0 > [] bts_event_add+0x5d/0x80 > [] event_sched_in.isra.104+0xf6/0x2f0 > [] group_sched_in+0x6e/0x190 > [] ctx_sched_in+0x2fe/0x5f0 > [] perf_event_sched_in+0x60/0x80 > [] ctx_resched+0x5b/0x90 > [] __perf_event_enable+0x1e1/0x240 > [] event_function+0xa9/0x180 > [] ? perf_cgroup_attach+0x70/0x70 > [] remote_function+0x3f/0x50 > [] flush_smp_call_function_queue+0x83/0x150 > [] generic_smp_call_function_single_interrupt+0x13/0x60 > [] smp_call_function_single_interrupt+0x27/0x40 > [] call_function_single_interrupt+0x89/0x90 > [] finish_task_switch+0xa6/0x210 > [] ? finish_task_switch+0x67/0x210 > [] __schedule+0x3dd/0xb50 > [] schedule+0x35/0x80 > [] sys_sched_yield+0x61/0x70 > [] entry_SYSCALL_64_fastpath+0x18/0xa8 > ---[ end trace 6235f556f5ea83a9 ]--- This patch puts the checks in perf_aux_output_begin() in the same order as that of perf_mmap_close(). Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ae9b90d..257fa46 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!rb) return NULL; - if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) + if (!rb_has_aux(rb)) goto err; /* - * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), - * the aux buffer is in perf_mmap_close(), about to get freed. + * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(), + * about to get freed, so we leave immediately. + * + * Checking rb::aux_mmap_count and rb::refcount has to be done in + * the same order, see perf_mmap_close. Otherwise we end up freeing + * aux pages in this path, which is a bug, because in_atomic(). */ if (!atomic_read(&rb->aux_mmap_count)) - goto err_put; + goto err; + + if (!atomic_inc_not_zero(&rb->aux_refcount)) + goto err; /* * Nesting is not supported for AUX area, make sure nested -- cgit v1.1 From a9a94401c2b5805c71e39427b1af1bf1b9f67cd0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:51 +0300 Subject: perf/x86/intel/bts: Fix confused ordering of PMU callbacks The intel_bts driver is using a CPU-local 'started' variable to order callbacks and PMIs and make sure that AUX transactions don't get messed up. However, the ordering rules in regard to this variable is a complete mess, which recently resulted in perf_fuzzer-triggered warnings and panics. The general ordering rule that is patch is enforcing is that this cpu-local variable be set only when the cpu-local AUX transaction is active; consequently, this variable is to be checked before the AUX related bits can be touched. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 104 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393..61e1d71 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); + + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); - __bts_event_stop(event); + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; - if (bts->handle.event && bts->started) + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -407,9 +450,13 @@ int intel_bts_interrupt(void) struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC; - if (!event || !bts->started) + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return 0; buf = perf_get_aux(&bts->handle); @@ -432,12 +479,21 @@ int intel_bts_interrupt(void) !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); + + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } -- cgit v1.1 From 4d4c474124649198d9b0a065c06f9362cf18e14e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:52 +0300 Subject: perf/x86/intel/bts: Fix BTS PMI detection Since BTS doesn't have a dedicated PMI status bit, the driver needs to take extra care to check for the condition that triggers it to avoid spurious NMI warnings. Regardless of the local BTS context state, the only way of knowing that the NMI is ours is to compare the write pointer against the interrupt threshold. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-5-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 61e1d71..9233edf 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -446,26 +446,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err = -ENOSPC; + int err = -ENOSPC, handled = 0; + + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds->bts_index >= ds->bts_interrupt_threshold) + handled = 1; /* * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, * so we can only be INACTIVE or STOPPED */ if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) - return 0; + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -473,7 +484,7 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); -- cgit v1.1 From ef9ef3befa0d76008e988a9ed9fe439e803351b9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:53 +0300 Subject: perf/x86/intel/bts: Kill a silly warning At the moment, intel_bts will WARN() out if there is more than one event writing to the same ring buffer, via SET_OUTPUT, and will only send data from one event to a buffer. There is no reason to have this warning in, so kill it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-6-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 9233edf..bdcd651 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -378,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; -- cgit v1.1 From 8ef9b8455a2a3049efa9e46e8a6402b972a3eb41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Sep 2016 14:42:55 +0200 Subject: perf/x86/intel: Fix PEBSv3 record drain Alexander hit the WARN_ON_ONCE(!event) on his Skylake while running the perf fuzzer. This means the PEBSv3 record included a status bit for an inactive event, something that _should_ not happen. Move the code that filters the status bits against our known PEBS events up a spot to guarantee we only deal with events we know about. Further add "continue" statements to the WARN_ON_ONCE()s such that we'll not die nor generate silly events in case we ever do hit them again. Reported-by: Alexander Shishkin Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org Fixes: a3d86542de88 ("perf/x86/intel/pebs: Add PEBSv3 decoding") Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f..9b983a4 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *p = at; u64 pebs_status; - /* PEBS v3 has accurate status bits */ + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + + /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&p->status, - MAX_PEBS_EVENTS) + for_each_set_bit(bit, (unsigned long *)&pebs_status, + x86_pmu.max_pebs_events) counts[bit]++; continue; } - pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; - /* * On some CPUs the PEBS status can be zero when PEBS is * racing with clearing of GLOBAL_STATUS. @@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; event = cpuc->events[bit]; - WARN_ON_ONCE(!event); - WARN_ON_ONCE(!event->attr.precise_ip); + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; /* log dropped samples number */ if (error[bit]) -- cgit v1.1 From d817f432c2ab7639a4f69de73eafdc55e57c45ad Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:45 +0300 Subject: net/ip_tunnels: Introduce tunnel_id_to_key32() and key32_to_tunnel_id() Add utility functions to convert a 32 bits key into a 64 bits tunnel and vice versa. These functions will be used instead of cloning code in GRE and VXLAN, and in tc act_iptunnel which will be introduced in a following patch in this patchset. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jiri Benc Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- include/net/ip_tunnels.h | 19 +++++++++++++++++++ include/net/vxlan.h | 18 ------------------ net/ipv4/ip_gre.c | 23 ++--------------------- 4 files changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 199dec0..4bfeb97 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1291,7 +1291,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni_to_tun_id(vni), sizeof(*md)); + key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1945,7 +1945,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = vxlan_tun_id_to_vni(info->key.tun_id); + vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a5e7035..e598c63 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -222,6 +222,25 @@ static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } +static inline __be64 key32_to_tunnel_id(__be32 key) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)key; +#else + return (__force __be64)((__force u64)key << 32); +#endif +} + +/* Returns the least-significant 32 bits of a __be64. */ +static inline __be32 tunnel_id_to_key32(__be64 tun_id) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)tun_id; +#else + return (__force __be32)((__force u64)tun_id >> 32); +#endif +} + #ifdef CONFIG_INET int ip_tunnel_init(struct net_device *dev); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b96d036..0255613 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -350,24 +350,6 @@ static inline __be32 vxlan_vni_field(__be32 vni) #endif } -static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) -{ -#if defined(__BIG_ENDIAN) - return (__force __be32)tun_id; -#else - return (__force __be32)((__force u64)tun_id >> 32); -#endif -} - -static inline __be64 vxlan_vni_to_tun_id(__be32 vni) -{ -#if defined(__BIG_ENDIAN) - return (__force __be64)vni; -#else - return (__force __be64)((u64)(__force u32)vni << 32); -#endif -} - static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 113cc43..576f705 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -246,25 +246,6 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } -static __be64 key_to_tunnel_id(__be32 key) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)((__force u32)key); -#else - return (__force __be64)((__force u64)key << 32); -#endif -} - -/* Returns the least-significant 32 bits of a __be64. */ -static __be32 tunnel_id_to_key(__be64 x) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)x; -#else - return (__force __be32)((__force u64)x >> 32); -#endif -} - static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { @@ -290,7 +271,7 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, __be64 tun_id; flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); - tun_id = key_to_tunnel_id(tpi->key); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; @@ -446,7 +427,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; -- cgit v1.1 From 2ff378b7474feac1ec665d01e4dfc6907cccc11c Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:46 +0300 Subject: net/dst: Utility functions to build dst_metadata without supplying an skb Extract __ip_tun_set_dst() and __ipv6_tun_set_dst() out of ip_tun_rx_dst() and ipv6_tun_rx_dst(), to be used without supplying an skb. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 52 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 5db9f59..6965c8f 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -112,12 +112,13 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb return &dst->u.tun_info; } -static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, - __be64 tunnel_id, - int md_size) +static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, + __be32 daddr, + __u8 tos, __u8 ttl, + __be16 flags, + __be64 tunnel_id, + int md_size) { - const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); @@ -125,17 +126,30 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, - iph->saddr, iph->daddr, iph->tos, iph->ttl, + saddr, daddr, tos, ttl, 0, 0, 0, tunnel_id, flags); return tun_dst; } -static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); + + return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, + flags, tunnel_id, md_size); +} + +static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 tos, __u8 ttl, + __be32 label, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -150,14 +164,26 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.tp_src = 0; info->key.tp_dst = 0; - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; + info->key.u.ipv6.src = *saddr; + info->key.u.ipv6.dst = *daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - info->key.label = ip6_flowlabel(ip6h); + info->key.tos = tos; + info->key.ttl = ttl; + info->key.label = label; return tun_dst; } +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, + ipv6_get_dsfield(ip6h), ip6h->hop_limit, + ip6_flowlabel(ip6h), flags, tunnel_id, + md_size); +} #endif /* __NET_DST_METADATA_H */ -- cgit v1.1 From bc3103f1ed405de587fa43d8b0671e615505a700 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:47 +0300 Subject: net/sched: cls_flower: Classify packet in ip tunnels Introduce classifying by metadata extracted by the tunnel device. Outer header fields - source/dest ip and tunnel id, are extracted from the metadata when classifying. For example, the following will add a filter on the ingress Qdisc of shared vxlan device named 'vxlan0'. To forward packets with outer src ip 11.11.0.2, dst ip 11.11.0.1 and tunnel id 11. The packets will be forwarded to tap device 'vnet0' (after metadata is released): $ tc filter add dev vxlan0 protocol ip parent ffff: \ flower \ enc_src_ip 11.11.0.2 \ enc_dst_ip 11.11.0.1 \ enc_key_id 11 \ dst_ip 11.11.11.1 \ action tunnel_key release \ action mirred egress redirect dev vnet0 The action tunnel_key, will be introduced in the next patch in this series. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 11 +++++ net/sched/cls_flower.c | 100 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 51b5b24..f9c287c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -431,6 +431,17 @@ enum { TCA_FLOWER_KEY_VLAN_ID, TCA_FLOWER_KEY_VLAN_PRIO, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cf9ad5b..b084b2a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -23,9 +23,13 @@ #include #include +#include +#include + struct fl_flow_key { int indev_ifindex; struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; @@ -35,6 +39,11 @@ struct fl_flow_key { struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -124,11 +133,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; + struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); + + info = skb_tunnel_info(skb); + if (info) { + struct ip_tunnel_key *key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_key.enc_ipv4.src = key->u.ipv4.src; + skb_key.enc_ipv4.dst = key->u.ipv4.dst; + break; + case AF_INET6: + skb_key.enc_ipv6.src = key->u.ipv6.src; + skb_key.enc_ipv6.dst = key->u.ipv6.dst; + break; + } + + skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + } + skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -297,7 +326,15 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 }, - + [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, }; static void fl_set_key_val(struct nlattr **tb, @@ -409,6 +446,40 @@ static int fl_set_key(struct net *net, struct nlattr **tb, sizeof(key->tp.dst)); } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + fl_set_key_val(tb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, + &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)); + fl_set_key_val(tb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, + &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)); + } + + if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + fl_set_key_val(tb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)); + fl_set_key_val(tb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)); + } + + fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id.keyid)); + return 0; } @@ -821,6 +892,33 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->tp.dst)))) goto nla_put_failure; + if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)) || + fl_dump_key_val(skb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)))) + goto nla_put_failure; + else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)) || + fl_dump_key_val(skb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)))) + goto nla_put_failure; + + if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id))) + goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); if (tcf_exts_dump(skb, &f->exts)) -- cgit v1.1 From d0f6dd8a914f42c6f1a3a8c08caa16559d3d9a1b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:48 +0300 Subject: net/sched: Introduce act_tunnel_key This action could be used before redirecting packets to a shared tunnel device, or when redirecting packets arriving from a such a device. The action will release the metadata created by the tunnel device (decap), or set the metadata with the specified values for encap operation. For example, the following flower filter will forward all ICMP packets destined to 11.11.11.2 through the shared vxlan device 'vxlan0'. Before redirecting, a metadata for the vxlan tunnel is created using the tunnel_key action and it's arguments: $ tc filter add dev net0 protocol ip parent ffff: \ flower \ ip_proto 1 \ dst_ip 11.11.11.2 \ action tunnel_key set \ src_ip 11.11.0.1 \ dst_ip 11.11.0.2 \ id 11 \ action mirred egress redirect dev vxlan0 Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 30 +++ include/uapi/linux/tc_act/tc_tunnel_key.h | 41 ++++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/act_tunnel_key.c | 351 ++++++++++++++++++++++++++++++ 5 files changed, 434 insertions(+) create mode 100644 include/net/tc_act/tc_tunnel_key.h create mode 100644 include/uapi/linux/tc_act/tc_tunnel_key.h create mode 100644 net/sched/act_tunnel_key.c diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h new file mode 100644 index 0000000..253f8da --- /dev/null +++ b/include/net/tc_act/tc_tunnel_key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_TUNNEL_KEY_H +#define __NET_TC_TUNNEL_KEY_H + +#include + +struct tcf_tunnel_key_params { + struct rcu_head rcu; + int tcft_action; + int action; + struct metadata_dst *tcft_enc_metadata; +}; + +struct tcf_tunnel_key { + struct tc_action common; + struct tcf_tunnel_key_params __rcu *params; +}; + +#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) + +#endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h new file mode 100644 index 0000000..890106f --- /dev/null +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_TUNNEL_KEY_H +#define __LINUX_TC_TUNNEL_KEY_H + +#include + +#define TCA_ACT_TUNNEL_KEY 17 + +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +enum { + TCA_TUNNEL_KEY_UNSPEC, + TCA_TUNNEL_KEY_TM, + TCA_TUNNEL_KEY_PARMS, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV4_DST, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ + TCA_TUNNEL_KEY_PAD, + __TCA_TUNNEL_KEY_MAX, +}; + +#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b..72e3426 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -761,6 +761,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5..b9d046b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 0000000..dceff74 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = params->action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action %d.\n", + params->tcft_action); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + bool exists = false; + __be64 key_id; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + params_new->action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + kfree_rcu(params, rcu); + + rcu_read_unlock(); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + }; + struct tcf_t tm; + int ret = -1; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + opt.t_action = params->tcft_action; + opt.action = params->action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + ret = skb->len; + goto out; + +nla_put_failure: + nlmsg_trim(skb, b); +out: + rcu_read_unlock(); + + return ret; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai "); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2"); -- cgit v1.1 From e1487888eccc83e9eb5a3659955f79b039dc7945 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 7 Sep 2016 14:57:09 -0400 Subject: net: ethernet: renesas: sh_eth: add POST registers for rz Due to a mistake in the hardware manual, the FWSLC and POST1-4 registers were not documented and left out of the driver for RZ/A making the CAM feature non-operational. Additionally, when the offset values for POST1-4 are left blank, the driver attempts to set them using an offset of 0xFFFF which can cause a memory corruption or panic. This patch fixes the panic and properly enables CAM. Reported-by: Daniel Palmer Signed-off-by: Chris Brandt Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 799d58d..054e795 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -201,9 +201,14 @@ static const u16 sh_eth_offset_fast_rz[SH_ETH_MAX_REGISTER_OFFSET] = { [ARSTR] = 0x0000, [TSU_CTRST] = 0x0004, + [TSU_FWSLC] = 0x0038, [TSU_VTAG0] = 0x0058, [TSU_ADSBSY] = 0x0060, [TSU_TEN] = 0x0064, + [TSU_POST1] = 0x0070, + [TSU_POST2] = 0x0074, + [TSU_POST3] = 0x0078, + [TSU_POST4] = 0x007c, [TSU_ADRH0] = 0x0100, [TXNLCR0] = 0x0080, @@ -2786,6 +2791,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp) { if (sh_eth_is_rz_fast_ether(mdp)) { sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */ + sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, + TSU_FWSLC); /* Enable POST registers */ return; } -- cgit v1.1 From 8572763af48728561f8bf708b6c0ea9f4db5929e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:21 +0200 Subject: net: fs_enet: merge NAPI RX and NAPI TX Initially, a NAPI TX routine has been implemented separately from NAPI RX, as done on the freescale/gianfar driver. By merging NAPI RX and NAPI TX, we reduce the amount of TX completion interrupts. Handling of the budget in association with TX interrupts is based on indications provided at https://wiki.linuxfoundation.org/networking/napi We never proceed more than the complete TX ring on a single run. At the same time, we fix an issue in the handling of fep->tx_free: It is only when fep->tx_free goes up to MAX_SKB_FRAGS that we need to wake up the queue. There is no need to call netif_wake_queue() at every packet successfully transmitted. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 268 +++++++++------------ drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 16 +- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 57 +---- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 57 +---- drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 57 +---- 5 files changed, 160 insertions(+), 295 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 61fd486..37574a9 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -60,6 +60,9 @@ module_param(fs_enet_debug, int, 0); MODULE_PARM_DESC(fs_enet_debug, "Freescale bitmapped debugging message enable value"); +#define RX_RING_SIZE 32 +#define TX_RING_SIZE 64 + #ifdef CONFIG_NET_POLL_CONTROLLER static void fs_enet_netpoll(struct net_device *dev); #endif @@ -79,8 +82,8 @@ static void skb_align(struct sk_buff *skb, int align) skb_reserve(skb, align - off); } -/* NAPI receive function */ -static int fs_enet_rx_napi(struct napi_struct *napi, int budget) +/* NAPI function */ +static int fs_enet_napi(struct napi_struct *napi, int budget) { struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi); struct net_device *dev = fep->ndev; @@ -90,9 +93,102 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) int received = 0; u16 pkt_len, sc; int curidx; + int dirtyidx, do_wake, do_restart; + int tx_left = TX_RING_SIZE; - if (budget <= 0) - return received; + spin_lock(&fep->tx_lock); + bdp = fep->dirty_tx; + + /* clear status bits for napi*/ + (*fep->ops->napi_clear_event)(dev); + + do_wake = do_restart = 0; + while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) { + dirtyidx = bdp - fep->tx_bd_base; + + if (fep->tx_free == fep->tx_ring) + break; + + skb = fep->tx_skbuff[dirtyidx]; + + /* + * Check for errors. + */ + if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC | + BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) { + + if (sc & BD_ENET_TX_HB) /* No heartbeat */ + fep->stats.tx_heartbeat_errors++; + if (sc & BD_ENET_TX_LC) /* Late collision */ + fep->stats.tx_window_errors++; + if (sc & BD_ENET_TX_RL) /* Retrans limit */ + fep->stats.tx_aborted_errors++; + if (sc & BD_ENET_TX_UN) /* Underrun */ + fep->stats.tx_fifo_errors++; + if (sc & BD_ENET_TX_CSL) /* Carrier lost */ + fep->stats.tx_carrier_errors++; + + if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { + fep->stats.tx_errors++; + do_restart = 1; + } + } else + fep->stats.tx_packets++; + + if (sc & BD_ENET_TX_READY) { + dev_warn(fep->dev, + "HEY! Enet xmit interrupt and TX_READY.\n"); + } + + /* + * Deferred means some collisions occurred during transmit, + * but we eventually sent the packet OK. + */ + if (sc & BD_ENET_TX_DEF) + fep->stats.collisions++; + + /* unmap */ + if (fep->mapped_as_page[dirtyidx]) + dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp), + CBDR_DATLEN(bdp), DMA_TO_DEVICE); + else + dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), + CBDR_DATLEN(bdp), DMA_TO_DEVICE); + + /* + * Free the sk buffer associated with this last transmit. + */ + if (skb) { + dev_kfree_skb(skb); + fep->tx_skbuff[dirtyidx] = NULL; + } + + /* + * Update pointer to next buffer descriptor to be transmitted. + */ + if ((sc & BD_ENET_TX_WRAP) == 0) + bdp++; + else + bdp = fep->tx_bd_base; + + /* + * Since we have freed up a buffer, the ring is no longer + * full. + */ + if (++fep->tx_free == MAX_SKB_FRAGS) + do_wake = 1; + tx_left--; + } + + fep->dirty_tx = bdp; + + if (do_restart) + (*fep->ops->tx_restart)(dev); + + spin_unlock(&fep->tx_lock); + + if (do_wake) + netif_wake_queue(dev); /* * First, grab all of the stats for the incoming packet. @@ -100,10 +196,8 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) */ bdp = fep->cur_rx; - /* clear RX status bits for napi*/ - (*fep->ops->napi_clear_rx_event)(dev); - - while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) { + while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0 && + received < budget) { curidx = bdp - fep->rx_bd_base; /* @@ -197,134 +291,19 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) bdp = fep->rx_bd_base; (*fep->ops->rx_bd_done)(dev); - - if (received >= budget) - break; } fep->cur_rx = bdp; - if (received < budget) { + if (received < budget && tx_left) { /* done */ napi_complete(napi); - (*fep->ops->napi_enable_rx)(dev); - } - return received; -} + (*fep->ops->napi_enable)(dev); -static int fs_enet_tx_napi(struct napi_struct *napi, int budget) -{ - struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, - napi_tx); - struct net_device *dev = fep->ndev; - cbd_t __iomem *bdp; - struct sk_buff *skb; - int dirtyidx, do_wake, do_restart; - u16 sc; - int has_tx_work = 0; - - spin_lock(&fep->tx_lock); - bdp = fep->dirty_tx; - - /* clear TX status bits for napi*/ - (*fep->ops->napi_clear_tx_event)(dev); - - do_wake = do_restart = 0; - while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0) { - dirtyidx = bdp - fep->tx_bd_base; - - if (fep->tx_free == fep->tx_ring) - break; - - skb = fep->tx_skbuff[dirtyidx]; - - /* - * Check for errors. - */ - if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC | - BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) { - - if (sc & BD_ENET_TX_HB) /* No heartbeat */ - fep->stats.tx_heartbeat_errors++; - if (sc & BD_ENET_TX_LC) /* Late collision */ - fep->stats.tx_window_errors++; - if (sc & BD_ENET_TX_RL) /* Retrans limit */ - fep->stats.tx_aborted_errors++; - if (sc & BD_ENET_TX_UN) /* Underrun */ - fep->stats.tx_fifo_errors++; - if (sc & BD_ENET_TX_CSL) /* Carrier lost */ - fep->stats.tx_carrier_errors++; - - if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { - fep->stats.tx_errors++; - do_restart = 1; - } - } else - fep->stats.tx_packets++; - - if (sc & BD_ENET_TX_READY) { - dev_warn(fep->dev, - "HEY! Enet xmit interrupt and TX_READY.\n"); - } - - /* - * Deferred means some collisions occurred during transmit, - * but we eventually sent the packet OK. - */ - if (sc & BD_ENET_TX_DEF) - fep->stats.collisions++; - - /* unmap */ - if (fep->mapped_as_page[dirtyidx]) - dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp), - CBDR_DATLEN(bdp), DMA_TO_DEVICE); - else - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - CBDR_DATLEN(bdp), DMA_TO_DEVICE); - - /* - * Free the sk buffer associated with this last transmit. - */ - if (skb) { - dev_kfree_skb(skb); - fep->tx_skbuff[dirtyidx] = NULL; - } - - /* - * Update pointer to next buffer descriptor to be transmitted. - */ - if ((sc & BD_ENET_TX_WRAP) == 0) - bdp++; - else - bdp = fep->tx_bd_base; - - /* - * Since we have freed up a buffer, the ring is no longer - * full. - */ - if (++fep->tx_free >= MAX_SKB_FRAGS) - do_wake = 1; - has_tx_work = 1; - } - - fep->dirty_tx = bdp; - - if (do_restart) - (*fep->ops->tx_restart)(dev); - - if (!has_tx_work) { - napi_complete(napi); - (*fep->ops->napi_enable_tx)(dev); + return received; } - spin_unlock(&fep->tx_lock); - - if (do_wake) - netif_wake_queue(dev); - - if (has_tx_work) - return budget; - return 0; + return budget; } /* @@ -350,18 +329,18 @@ fs_enet_interrupt(int irq, void *dev_id) nr++; int_clr_events = int_events; - int_clr_events &= ~fep->ev_napi_rx; + int_clr_events &= ~fep->ev_napi; (*fep->ops->clear_int_events)(dev, int_clr_events); if (int_events & fep->ev_err) (*fep->ops->ev_error)(dev, int_events); - if (int_events & fep->ev_rx) { + if (int_events & fep->ev) { napi_ok = napi_schedule_prep(&fep->napi); - (*fep->ops->napi_disable_rx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_rx); + (*fep->ops->napi_disable)(dev); + (*fep->ops->clear_int_events)(dev, fep->ev_napi); /* NOTE: it is possible for FCCs in NAPI mode */ /* to submit a spurious interrupt while in poll */ @@ -369,17 +348,6 @@ fs_enet_interrupt(int irq, void *dev_id) __napi_schedule(&fep->napi); } - if (int_events & fep->ev_tx) { - napi_ok = napi_schedule_prep(&fep->napi_tx); - - (*fep->ops->napi_disable_tx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_tx); - - /* NOTE: it is possible for FCCs in NAPI mode */ - /* to submit a spurious interrupt while in poll */ - if (napi_ok) - __napi_schedule(&fep->napi_tx); - } } handled = nr > 0; @@ -659,7 +627,8 @@ static void fs_timeout(struct net_device *dev) } phy_start(dev->phydev); - wake = fep->tx_free && !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); + wake = fep->tx_free >= MAX_SKB_FRAGS && + !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); spin_unlock_irqrestore(&fep->lock, flags); if (wake) @@ -751,11 +720,10 @@ static int fs_enet_open(struct net_device *dev) int err; /* to initialize the fep->cur_rx,... */ - /* not doing this, will cause a crash in fs_enet_rx_napi */ + /* not doing this, will cause a crash in fs_enet_napi */ fs_init_bds(fep->ndev); napi_enable(&fep->napi); - napi_enable(&fep->napi_tx); /* Install our interrupt handler. */ r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED, @@ -763,7 +731,6 @@ static int fs_enet_open(struct net_device *dev) if (r != 0) { dev_err(fep->dev, "Could not allocate FS_ENET IRQ!"); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return -EINVAL; } @@ -771,7 +738,6 @@ static int fs_enet_open(struct net_device *dev) if (err) { free_irq(fep->interrupt, dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return err; } phy_start(dev->phydev); @@ -789,7 +755,6 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -939,8 +904,8 @@ static int fs_enet_probe(struct platform_device *ofdev) fpi->cp_command = *data; } - fpi->rx_ring = 32; - fpi->tx_ring = 64; + fpi->rx_ring = RX_RING_SIZE; + fpi->tx_ring = TX_RING_SIZE; fpi->rx_copybreak = 240; fpi->napi_weight = 17; fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0); @@ -1024,8 +989,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; - netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index e29f54a..fee24c8 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -81,12 +81,9 @@ struct fs_ops { void (*adjust_link)(struct net_device *dev); void (*restart)(struct net_device *dev); void (*stop)(struct net_device *dev); - void (*napi_clear_rx_event)(struct net_device *dev); - void (*napi_enable_rx)(struct net_device *dev); - void (*napi_disable_rx)(struct net_device *dev); - void (*napi_clear_tx_event)(struct net_device *dev); - void (*napi_enable_tx)(struct net_device *dev); - void (*napi_disable_tx)(struct net_device *dev); + void (*napi_clear_event)(struct net_device *dev); + void (*napi_enable)(struct net_device *dev); + void (*napi_disable)(struct net_device *dev); void (*rx_bd_done)(struct net_device *dev); void (*tx_kickstart)(struct net_device *dev); u32 (*get_int_events)(struct net_device *dev); @@ -122,7 +119,6 @@ struct phy_info { struct fs_enet_private { struct napi_struct napi; - struct napi_struct napi_tx; struct device *dev; /* pointer back to the device (must be initialized first) */ struct net_device *ndev; spinlock_t lock; /* during all ops except TX pckt processing */ @@ -152,10 +148,8 @@ struct fs_enet_private { int oldduplex, oldspeed, oldlink; /* current settings */ /* event masks */ - u32 ev_napi_rx; /* mask of NAPI rx events */ - u32 ev_napi_tx; /* mask of NAPI rx events */ - u32 ev_rx; /* rx event mask */ - u32 ev_tx; /* tx event mask */ + u32 ev_napi; /* mask of NAPI events */ + u32 ev; /* event mask */ u32 ev_err; /* error event mask */ u16 bd_rx_empty; /* mask of BD rx empty */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index d71761a..7919896 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -124,10 +124,8 @@ out: return ret; } -#define FCC_NAPI_RX_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB) -#define FCC_NAPI_TX_EVENT_MSK (FCC_ENET_TXB) -#define FCC_RX_EVENT (FCC_ENET_RXF) -#define FCC_TX_EVENT (FCC_ENET_TXB) +#define FCC_NAPI_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB | FCC_ENET_TXB) +#define FCC_EVENT (FCC_ENET_RXF | FCC_ENET_TXB) #define FCC_ERR_EVENT_MSK (FCC_ENET_TXE) static int setup_data(struct net_device *dev) @@ -137,10 +135,8 @@ static int setup_data(struct net_device *dev) if (do_pd_setup(fep) != 0) return -EINVAL; - fep->ev_napi_rx = FCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FCC_RX_EVENT; - fep->ev_tx = FCC_TX_EVENT; + fep->ev_napi = FCC_NAPI_EVENT_MSK; + fep->ev = FCC_EVENT; fep->ev_err = FCC_ERR_EVENT_MSK; return 0; @@ -424,52 +420,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - W16(fccp, fcc_fcce, FCC_NAPI_RX_EVENT_MSK); + W16(fccp, fcc_fcce, FCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - S16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); + S16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - C16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - W16(fccp, fcc_fcce, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - S16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - C16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); + C16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -595,12 +567,9 @@ const struct fs_ops fs_fcc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 35a318e..21fbaaf 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -109,10 +109,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define FEC_NAPI_RX_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB) -#define FEC_NAPI_TX_EVENT_MSK (FEC_ENET_TXF) -#define FEC_RX_EVENT (FEC_ENET_RXF) -#define FEC_TX_EVENT (FEC_ENET_TXF) +#define FEC_NAPI_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_TXF) +#define FEC_EVENT (FEC_ENET_RXF | FEC_ENET_TXF) #define FEC_ERR_EVENT_MSK (FEC_ENET_HBERR | FEC_ENET_BABR | \ FEC_ENET_BABT | FEC_ENET_EBERR) @@ -126,10 +124,8 @@ static int setup_data(struct net_device *dev) fep->fec.hthi = 0; fep->fec.htlo = 0; - fep->ev_napi_rx = FEC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FEC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FEC_RX_EVENT; - fep->ev_tx = FEC_TX_EVENT; + fep->ev_napi = FEC_NAPI_EVENT_MSK; + fep->ev = FEC_EVENT; fep->ev_err = FEC_ERR_EVENT_MSK; return 0; @@ -396,52 +392,28 @@ static void stop(struct net_device *dev) } } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FW(fecp, ievent, FEC_NAPI_RX_EVENT_MSK); + FW(fecp, ievent, FEC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FS(fecp, imask, FEC_NAPI_RX_EVENT_MSK); + FS(fecp, imask, FEC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FC(fecp, imask, FEC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FW(fecp, ievent, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FS(fecp, imask, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FC(fecp, imask, FEC_NAPI_TX_EVENT_MSK); + FC(fecp, imask, FEC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -513,12 +485,9 @@ const struct fs_ops fs_fec_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index e8b9c33..9d52e1e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -115,10 +115,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define SCC_NAPI_RX_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB) -#define SCC_NAPI_TX_EVENT_MSK (SCCE_ENET_TXB) -#define SCC_RX_EVENT (SCCE_ENET_RXF) -#define SCC_TX_EVENT (SCCE_ENET_TXB) +#define SCC_NAPI_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB | SCCE_ENET_TXB) +#define SCC_EVENT (SCCE_ENET_RXF | SCCE_ENET_TXB) #define SCC_ERR_EVENT_MSK (SCCE_ENET_TXE | SCCE_ENET_BSY) static int setup_data(struct net_device *dev) @@ -130,10 +128,8 @@ static int setup_data(struct net_device *dev) fep->scc.hthi = 0; fep->scc.htlo = 0; - fep->ev_napi_rx = SCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = SCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = SCC_RX_EVENT; - fep->ev_tx = SCC_TX_EVENT | SCCE_ENET_TXE; + fep->ev_napi = SCC_NAPI_EVENT_MSK; + fep->ev = SCC_EVENT | SCCE_ENET_TXE; fep->ev_err = SCC_ERR_EVENT_MSK; return 0; @@ -379,52 +375,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - W16(sccp, scc_scce, SCC_NAPI_RX_EVENT_MSK); + W16(sccp, scc_scce, SCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - S16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); + S16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - C16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - W16(sccp, scc_scce, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - S16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - C16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); + C16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -497,12 +469,9 @@ const struct fs_ops fs_scc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, -- cgit v1.1 From 070e1f01827c658b76bef6e3fa79046b4e4a7693 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:23 +0200 Subject: net: fs_enet: don't unmap DMA when packet len is below copybreak When the length of the packet is below the defined copybreak limit, the received packet is copied into a newly allocated skb in order to reuse the skb. This is only interesting if it allow us to avoid a new DMA mapping. We shall therefore not DMA unmap and remap the skb->data. Instead, we invalidate the cache with dma_sync_single_for_cpu() once the received data has been copied into the new skb. The following measures have been obtained on a mpc885 running at 132Mhz. Measurement is done using the timebase with packets sent to the target with 'ping -s 1' (packet len is 60): * Without this patch: 182 TB ticks * With this patch: 143 TB ticks As a comparison, if we set the copybreak limit to 0, then we get 148 TB ticks. It means that without this patch, duration is even worse when copying received data to a new skb instead of allocating a new skb for next packet to be received Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 36 ++++++++++++---------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 37574a9..f2a60cd 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -226,21 +226,10 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) if (sc & BD_ENET_RX_OV) fep->stats.rx_crc_errors++; - skb = fep->rx_skbuff[curidx]; - - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - - skbn = skb; - + skbn = fep->rx_skbuff[curidx]; } else { skb = fep->rx_skbuff[curidx]; - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - /* * Process the incoming frame. */ @@ -256,12 +245,30 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) skb_copy_from_linear_data(skb, skbn->data, pkt_len); swap(skb, skbn); + dma_sync_single_for_cpu(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(pkt_len), + DMA_FROM_DEVICE); } } else { skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE); - if (skbn) + if (skbn) { + dma_addr_t dma; + skb_align(skbn, ENET_RX_ALIGN); + + dma_unmap_single(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + + dma = dma_map_single(fep->dev, + skbn->data, + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + CBDW_BUFADDR(bdp, dma); + } } if (skbn != NULL) { @@ -276,9 +283,6 @@ static int fs_enet_napi(struct napi_struct *napi, int budget) } fep->rx_skbuff[curidx] = skbn; - CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data, - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE)); CBDW_DATLEN(bdp, 0); CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY); -- cgit v1.1 From b0ba357bfb463bb10fe486c99c5dff892fa207fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Sep 2016 14:26:25 +0200 Subject: net: fs_enet: make rx_copybreak value configurable Measurement shows that on a MPC8xx running at 132MHz, the optimal limit is 112: * 114 bytes packets are processed in 147 TB ticks with higher copybreak * 114 bytes packets are processed in 148 TB ticks with lower copybreak * 128 bytes packets are processed in 154 TB ticks with higher copybreak * 128 bytes packets are processed in 148 TB ticks with lower copybreak * 238 bytes packets are processed in 172 TB ticks with higher copybreak * 238 bytes packets are processed in 148 TB ticks with lower copybreak However it might be different on other processors and/or frequencies. So it is useful to make it configurable. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index f2a60cd..dc120c1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -830,6 +830,44 @@ static void fs_set_msglevel(struct net_device *dev, u32 value) fep->msg_enable = value; } +static int fs_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = fpi->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int fs_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, const void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + fpi->rx_copybreak = *(u32 *)data; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + static const struct ethtool_ops fs_ethtool_ops = { .get_drvinfo = fs_get_drvinfo, .get_regs_len = fs_get_regs_len, @@ -841,6 +879,8 @@ static const struct ethtool_ops fs_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_tunable = fs_get_tunable, + .set_tunable = fs_set_tunable, }; static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -- cgit v1.1 From a73ec314a0d28cdbc29b4e4ad10871df0829986d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:13 -0400 Subject: appletalk: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f066781..10d2bdc 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1278,7 +1278,7 @@ out: return err; } -#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE) +#if IS_ENABLED(CONFIG_IPDDP) static __inline__ int is_ip_over_ddp(struct sk_buff *skb) { return skb->data[12] == 22; -- cgit v1.1 From 9a81c34ace3598188f633d4654a2a57b7f7a2c2a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:14 -0400 Subject: lec: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/atm/lec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index e574a7e..5d26938 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -31,7 +31,7 @@ #include /* Proxy LEC knows about bridging */ -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) #include "../bridge/br_private.h" static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; @@ -121,7 +121,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) { char *buff; @@ -155,7 +155,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ /* * Open/initialize the netdevice. This is called (in the current kernel) @@ -222,7 +222,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) lec_handle_bridge(skb, dev); #endif @@ -426,7 +426,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) (unsigned short)(0xffff & mesg->content.normal.flag); break; case l_should_bridge: -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) { pr_debug("%s: bridge zeppelin asks about %pM\n", dev->name, mesg->content.proxy.mac_addr); @@ -452,7 +452,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ break; default: pr_info("%s: Unknown message type %d\n", dev->name, mesg->type); -- cgit v1.1 From 181402a5c7899fad945485130ded47ca2bf1161e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:15 -0400 Subject: net: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 34b5322..b0d307b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3904,8 +3904,7 @@ static void net_tx_action(struct softirq_action *h) } } -#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ - (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; -- cgit v1.1 From 6ca40d4e8463c53e6b778010b9331268865725a6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:16 -0400 Subject: ipv4: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6556927..b913f5b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -490,7 +490,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) +#if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif skb_copy_secmark(to, from); -- cgit v1.1 From 9dd79945b0f846ca5282c7df7ecf3823f0243898 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:17 -0400 Subject: l2tp: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 2 +- net/l2tp/l2tp_eth.c | 4 ++-- net/l2tp/l2tp_ppp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 5871537..2599af6 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -139,7 +139,7 @@ struct l2tp_session { void (*session_close)(struct l2tp_session *session); void (*ref)(struct l2tp_session *session); void (*deref)(struct l2tp_session *session); -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) void (*show)(struct seq_file *m, void *priv); #endif uint8_t priv[0]; /* private data */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 57fc5a4..ef2cd30 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -195,7 +195,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) } } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -268,7 +268,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; session->session_close = l2tp_eth_delete; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = l2tp_eth_show; #endif diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 34eff77..41d47bf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -552,7 +552,7 @@ out: return error; } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -723,7 +723,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->recv_skb = pppol2tp_recv; session->session_close = pppol2tp_session_close; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = pppol2tp_show; #endif -- cgit v1.1 From 0013de38a829db3f83a36e3e178ff386eb589c51 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:18 -0400 Subject: net: sched: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2c1ae54..a379bae 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -29,7 +29,7 @@ #include #include -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #endif @@ -125,14 +125,14 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb->nfct); #else return 0; #endif } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ -- cgit v1.1 From aebf5de07aabd44db740c9d33b6daa1abd19fa56 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:19 -0400 Subject: sctp: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 912eb16..f99d485 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -48,7 +48,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, -#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) +#if IS_ENABLED(CONFIG_CRYPTO_SHA256) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, .hmac_name = "hmac(sha256)", -- cgit v1.1 From 65b323e2ffbb05db4136ee822e08a9b0ec6ac716 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 9 Sep 2016 08:43:20 -0400 Subject: xfrm: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 250e567..44ac85f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -17,7 +17,7 @@ #include #include #include -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +#if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP) #include #endif -- cgit v1.1 From d62292e85028e553943a285cb6006de0f17dea1e Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:17 +0300 Subject: net/mlx5: Skip waiting for vf pages in internal error In case of device in internal error state there is no need to wait for vf pages since they will be reclaimed manually later in the unload flow. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 673a7c9..d458515 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, { struct fw_page *fwp; struct rb_node *p; + u32 func_id; u32 npages; u32 i = 0; @@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); p = rb_first(&dev->priv.page_root); while (p && i < npages) { fwp = rb_entry(p, struct fw_page, rb_node); - MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); p = rb_next(p); + if (fwp->func_id != func_id) + continue; + + MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); i++; } @@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); int prev_vfs_pages = dev->priv.vfs_pages; + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, dev->priv.name); while (dev->priv.vfs_pages) { -- cgit v1.1 From 6b6adee3dad25bbe568ee24fc843372d02fb425f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:18 +0300 Subject: net/mlx5: SRIOV core code refactoring Simplify the code and makes it look modular and symmetric. Split sriov enable/disable to two levels: device level and pci level. When user enable/disable sriov (via sriov_configure driver callback) we will enable/disable both device and pci sriov. When driver load/unload we will enable/disable (on demand) only device sriov while keeping the PCI sriov enabled for next driver load. On internal/pci error, VFs will be kept enabled on PCI and the reset is done only in device level. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 12 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 216 +++++++++------------ include/linux/mlx5/driver.h | 2 - 4 files changed, 101 insertions(+), 131 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c132ef1..baba53f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1180,8 +1180,7 @@ out: return 0; err_sriov: - if (mlx5_sriov_cleanup(dev)) - dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + mlx5_sriov_cleanup(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); @@ -1241,19 +1240,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; - err = mlx5_sriov_cleanup(dev); - if (err) { - dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", - __func__); - return err; - } - mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); goto out; } + + mlx5_sriov_cleanup(dev); mlx5_unregister_device(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 714b71b..7dd14cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -89,6 +89,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 78e7892..72a8215 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } -static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - err = mlx5_core_enable_hca(dev, vf); + if (sriov->enabled_vfs) { + mlx5_core_warn(dev, + "failed to enable SRIOV on device, already enabled with %d vfs\n", + sriov->enabled_vfs); + return -EBUSY; + } + +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + if (err) { + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } +#endif + + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); if (err) { - mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); - } else { - sriov->vfs_ctx[vf - 1].enabled = 1; - mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; } + sriov->vfs_ctx[vf].enabled = 1; + sriov->enabled_vfs++; + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } + + return 0; } -static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - if (sriov->vfs_ctx[vf - 1].enabled) { - if (mlx5_core_disable_hca(dev, vf)) - mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); - else - sriov->vfs_ctx[vf - 1].enabled = 0; + if (!sriov->enabled_vfs) + return; + + for (vf = 0; vf < sriov->num_vfs; vf++) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; } + sriov->vfs_ctx[vf].enabled = 0; + sriov->enabled_vfs--; } + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } -static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err; - - if (pci_num_vf(pdev)) - pci_disable_sriov(pdev); - - enable_vfs(dev, num_vfs); + int err = 0; - err = pci_enable_sriov(pdev, num_vfs); - if (err) { - dev_warn(&pdev->dev, "enable sriov failed %d\n", err); - goto ex; + if (pci_num_vf(pdev)) { + mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); + return -EBUSY; } - return 0; + err = pci_enable_sriov(pdev, num_vfs); + if (err) + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); -ex: - disable_vfs(dev, num_vfs); return err; } -static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +static void mlx5_pci_disable_sriov(struct pci_dev *pdev) +{ + pci_disable_sriov(pdev); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); - if (!sriov->vfs_ctx) - return -ENOMEM; + err = mlx5_device_enable_sriov(dev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); + return err; + } - sriov->enabled_vfs = num_vfs; - err = mlx5_core_create_vfs(pdev, num_vfs); + err = mlx5_pci_enable_sriov(pdev, num_vfs); if (err) { - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; + mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev); return err; } + sriov->num_vfs = num_vfs; + return 0; } -static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_sriov_disable(struct pci_dev *pdev) { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - sriov->num_vfs = num_vfs; -} - -static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov; - - sriov = &dev->priv.sriov; - disable_vfs(dev, sriov->num_vfs); - - if (mlx5_wait_for_vf_pages(dev)) - mlx5_core_warn(dev, "timeout claiming VFs pages\n"); - + mlx5_pci_disable_sriov(pdev); + mlx5_device_disable_sriov(dev); sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); if (!mlx5_core_is_pf(dev)) @@ -156,92 +180,44 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EINVAL; } - mlx5_core_cleanup_vfs(dev); - - if (!num_vfs) { -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; - if (!pci_vfs_assigned(pdev)) - pci_disable_sriov(pdev); - else - mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n"); - return 0; - } - - err = mlx5_core_sriov_enable(pdev, num_vfs); - if (err) { - mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err); - return err; - } + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev); - mlx5_core_init_vfs(dev, num_vfs); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); -#endif - - return num_vfs; -} - -static int sync_required(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int cur_vfs = pci_num_vf(pdev); - - if (cur_vfs != sriov->num_vfs) { - mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n", - cur_vfs, sriov->num_vfs); - return 1; - } - - return 0; + return err ? err : num_vfs; } int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct pci_dev *pdev = dev->pdev; - int cur_vfs; + int total_vfs; if (!mlx5_core_is_pf(dev)) return 0; - if (!sync_required(dev->pdev)) - return 0; - - cur_vfs = pci_num_vf(pdev); - sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; - sriov->enabled_vfs = cur_vfs; - - mlx5_core_init_vfs(dev, cur_vfs); -#ifdef CONFIG_MLX5_CORE_EN - if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, - SRIOV_LEGACY); -#endif - - enable_vfs(dev, cur_vfs); + /* If sriov VFs exist in PCI level, enable them in device level */ + if (!sriov->num_vfs) + return 0; + mlx5_device_enable_sriov(dev, sriov->num_vfs); return 0; } -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) { - struct pci_dev *pdev = dev->pdev; - int err; + struct mlx5_core_sriov *sriov = &dev->priv.sriov; if (!mlx5_core_is_pf(dev)) - return 0; + return; - err = mlx5_core_sriov_configure(pdev, 0); - if (err) - return err; - - return 0; + mlx5_device_disable_sriov(dev); + kfree(sriov->vfs_ctx); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5cb9fa7..0d7aedf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -828,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); -int mlx5_sriov_init(struct mlx5_core_dev *dev); -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); -- cgit v1.1 From 737a234bb6384800a5b632be85c6b0ad6221d137 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:19 +0300 Subject: net/mlx5: Introduce attach/detach to interface API Add attach/detach callbacks to interface API. This is crucial for implementing seamless reset flow which releases the hardware and it's resources upon detach while keeping software structures and state (e.g netdev) then reset and reallocate the hardware needed resources upon attach. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 149 +++++++++++++++++++++---- include/linux/mlx5/driver.h | 2 + 2 files changed, 131 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index baba53f..108d8f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -81,6 +81,7 @@ struct mlx5_device_context { struct list_head list; struct mlx5_interface *intf; void *context; + unsigned long state; }; enum { @@ -778,6 +779,11 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -786,12 +792,15 @@ static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!mlx5_lag_intf_add(intf, priv)) return; - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; - dev_ctx->intf = intf; + dev_ctx->intf = intf; dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); @@ -802,21 +811,114 @@ static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) } } +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); - intf->remove(dev, dev_ctx->context); - kfree(dev_ctx); + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) return; - } + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +static void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +static void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; } static int mlx5_register_device(struct mlx5_core_dev *dev) @@ -1162,16 +1264,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_sriov; } - err = mlx5_register_device(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto err_reg_dev; + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1247,12 +1349,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } + if (mlx5_device_registered(dev)) + mlx5_detach_device(dev); + mlx5_sriov_cleanup(dev); - mlx5_unregister_device(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); mlx5_cleanup_mkey_table(dev); @@ -1364,6 +1467,9 @@ static int init_one(struct pci_dev *pdev, dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); err = devlink_register(devlink, &pdev->dev); if (err) @@ -1391,11 +1497,14 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_priv *priv = &dev->priv; devlink_unregister(devlink); + mlx5_unregister_device(dev); + if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d7aedf..85c4786 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -930,6 +930,8 @@ enum { struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); + int (*attach)(struct mlx5_core_dev *dev, void *context); + void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); -- cgit v1.1 From 59211bd3b6329c3e5f4a90ac3d7f87ffa7867073 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:20 +0300 Subject: net/mlx5: Split the load/unload flow into hardware and software flows Gather all software context creating/destroying in one function and call it once in the first load and in the last unload. load/unload functions will now receive indication if we need to create/destroy the software contexts. In internal/pci error do the unload/load flows without releasing the software objects. In this way we perserve the sw core state and it help us restoring old driver state after PCI error/shutdown. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 171 ++++++++++++++++--------- 1 file changed, 107 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 108d8f2..966647f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1093,8 +1093,76 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) debugfs_remove(priv->dbg_root); } -#define MLX5_IB_MOD "mlx5_ib" -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto out; + } + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + err = mlx5_init_cq_table(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq table\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + return 0; + +err_tables_cleanup: + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_cleanup_rl_table(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) { struct pci_dev *pdev = dev->pdev; int err; @@ -1127,12 +1195,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out_err; } - mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); - goto err_pagealloc_cleanup; + goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); @@ -1185,34 +1251,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; - } - - err = mlx5_query_board_id(dev); - if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + if (boot && mlx5_init_once(dev, priv)) { + dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; } err = mlx5_enable_msix(dev); if (err) { dev_err(&pdev->dev, "enable msix failed\n"); - goto err_stop_poll; - } - - err = mlx5_eq_init(dev); - if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); - goto disable_msix; + goto err_cleanup_once; } err = mlx5_alloc_uuars(dev, &priv->uuari); if (err) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); - goto err_eq_cleanup; + goto err_disable_msix; } err = mlx5_start_eqs(dev); @@ -1228,15 +1281,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } err = mlx5_irq_set_affinity_hints(dev); - if (err) + if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - - mlx5_init_cq_table(dev); - mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); + goto err_affinity_hints; + } err = mlx5_init_fs(dev); if (err) { @@ -1244,12 +1292,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_fs; } - err = mlx5_init_rl_table(dev); - if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); - goto err_rl; - } - #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1281,22 +1323,19 @@ out: return 0; -err_sriov: +err_reg_dev: mlx5_sriov_cleanup(dev); +err_sriov: #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif -err_reg_dev: - mlx5_cleanup_rl_table(dev); -err_rl: mlx5_cleanup_fs(dev); + err_fs: - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1305,12 +1344,13 @@ err_stop_eqs: err_free_uar: mlx5_free_uuars(dev, &priv->uuari); -err_eq_cleanup: - mlx5_eq_cleanup(dev); - -disable_msix: +err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { @@ -1327,8 +1367,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); -err_pagealloc_cleanup: - mlx5_pagealloc_cleanup(dev); +err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: @@ -1338,7 +1377,8 @@ out_err: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) { int err = 0; @@ -1346,6 +1386,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); + if (cleanup) + mlx5_cleanup_once(dev); goto out; } @@ -1356,18 +1398,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); - mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); + if (cleanup) + mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); err = mlx5_cmd_teardown_hca(dev); if (err) { @@ -1377,7 +1415,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); - mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); out: @@ -1416,6 +1453,7 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif }; +#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1462,11 +1500,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - err = mlx5_load_one(dev, priv); + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); if (err) pr_info("failed request module on %s\n", MLX5_IB_MOD); @@ -1478,8 +1519,9 @@ static int init_one(struct pci_dev *pdev, return 0; clean_load: - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, true); clean_health: + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); @@ -1499,12 +1541,13 @@ static void remove_one(struct pci_dev *pdev) devlink_unregister(devlink); mlx5_unregister_device(dev); - if (mlx5_unload_one(dev, priv)) { + if (mlx5_unload_one(dev, priv, true)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); @@ -1519,7 +1562,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? @@ -1591,7 +1634,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv); + err = mlx5_load_one(dev, priv, false); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" , __func__, err); @@ -1613,7 +1656,7 @@ static void shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } -- cgit v1.1 From acab721b5d8d9431cc80acc827973eeeda4dec24 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:21 +0300 Subject: net/mlx5: Implement SRIOV attach/detach flows Needed for lightweight and modular internal/pci error handling. Implement sriov attach function which enables pre-saved number of vfs on the device side. Implement sriov detach function which disable the current vfs on the device side. Init/cleanup function only handles sriov software context allocation and destruction. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 29 ++++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7dd14cf..04b719a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -91,6 +91,8 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 72a8215..f4f02b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -188,6 +188,25 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return err ? err : num_vfs; } +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + return 0; + + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, sriov->num_vfs); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -203,12 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) if (!sriov->vfs_ctx) return -ENOMEM; - /* If sriov VFs exist in PCI level, enable them in device level */ - if (!sriov->num_vfs) - return 0; - - mlx5_device_enable_sriov(dev, sriov->num_vfs); - return 0; + return mlx5_sriov_attach(dev); } void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) @@ -217,7 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - - mlx5_device_disable_sriov(dev); + mlx5_sriov_detach(dev); kfree(sriov->vfs_ctx); } -- cgit v1.1 From 62a9b90ad83ebe584bf22a2e716a96e75f82e137 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:22 +0300 Subject: net/mlx5: Implement eswitch attach/detach flows Needed for lightweight and modular internal/pci error handling. Implement eswitch attach function which allocates/starts hw related resources. Implement eswitch detach function which releases/stops hw related resources. Init/cleanup function only handle eswitch software context allocation and destruction. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 24 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1014305..24058894 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1559,6 +1559,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } +void mlx5_eswitch_attach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ +} + +void mlx5_eswitch_detach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_disable_vport(esw, 0); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -1635,9 +1654,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; + mlx5_eswitch_attach(esw); dev->priv.eswitch = esw; - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: if (esw->work_queue) @@ -1656,8 +1674,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) return; esw_info(esw->dev, "cleanup\n"); - esw_disable_vport(esw, 0); + mlx5_eswitch_detach(esw); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a961409..48c273d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -204,6 +204,8 @@ struct mlx5_eswitch { /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_attach(struct mlx5_eswitch *esw); +void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); -- cgit v1.1 From c2d6e31a0008f8188f935f8dd81c81c44697b256 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:23 +0300 Subject: net/mlx5: Align sriov/eswitch modules with the new load/unload flow. Init/cleanup sriov/eswitch in the core software context init/cleanup flows. Attach/detach sriov/eswitch in the core load/unload flows. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/main.c | 42 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 4 +-- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 24058894..015f1bfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1654,7 +1654,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; - mlx5_eswitch_attach(esw); dev->priv.eswitch = esw; return 0; abort: @@ -1675,7 +1674,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); - mlx5_eswitch_detach(esw); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 966647f..16660cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1136,8 +1136,30 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_tables_cleanup; } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + goto err_rl_cleanup; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init sriov %d\n", err); + goto err_eswitch_cleanup; + } + return 0; +err_eswitch_cleanup: +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); + +err_rl_cleanup: +#endif + mlx5_cleanup_rl_table(dev); + err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1153,6 +1175,10 @@ out: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_sriov_cleanup(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif mlx5_cleanup_rl_table(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1293,14 +1319,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } #ifdef CONFIG_MLX5_CORE_EN - err = mlx5_eswitch_init(dev); - if (err) { - dev_err(&pdev->dev, "eswitch init failed %d\n", err); - goto err_reg_dev; - } + mlx5_eswitch_attach(dev->priv.eswitch); #endif - err = mlx5_sriov_init(dev); + err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); goto err_sriov; @@ -1324,11 +1346,11 @@ out: return 0; err_reg_dev: - mlx5_sriov_cleanup(dev); + mlx5_sriov_detach(dev); err_sriov: #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); @@ -1394,9 +1416,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); - mlx5_sriov_cleanup(dev); + mlx5_sriov_detach(dev); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); mlx5_irq_clear_affinity_hints(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f4f02b6..e086277 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -222,7 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) if (!sriov->vfs_ctx) return -ENOMEM; - return mlx5_sriov_attach(dev); + return 0; } void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) @@ -231,6 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_sriov_detach(dev); + kfree(sriov->vfs_ctx); } -- cgit v1.1 From 1ab2068a4c663cbb2e0e0cfea934bc4e163abed0 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:24 +0300 Subject: net/mlx5: Implement vports admin state backup/restore Save the user configuration in the vport sturct. Restore saved old configuration upon vport enable. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 249 ++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 16 +- 2 files changed, 124 insertions(+), 141 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 015f1bfe..654b76f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -116,57 +116,6 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, } /* E-Switch vport context HW commands */ -static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0}; - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); - MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); - return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); -} - -static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 *vlan, u8 *qos) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0}; - int err; - bool cvlan_strip; - bool cvlan_insert; - - *vlan = 0; - *qos = 0; - - if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || - !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; - - err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); - if (err) - goto out; - - cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_strip); - - cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_insert); - - if (cvlan_strip || cvlan_insert) { - *vlan = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_id); - *qos = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_pcp); - } - - esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", - vport, *vlan, *qos); -out: - return err; -} - static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, void *in, int inlen) { @@ -921,7 +870,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", vport_num, promisc_all, promisc_mc); - if (!vport->trusted || !vport->enabled) { + if (!vport->info.trusted || !vport->enabled) { promisc_uc = 0; promisc_mc = 0; promisc_all = 0; @@ -1257,30 +1206,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_flow_spec *spec; - u8 smac[ETH_ALEN]; int err = 0; u8 *smac_v; - if (vport->spoofchk) { - err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); - if (err) { - esw_warn(esw->dev, - "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", - vport->vport, err); - return err; - } + if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; - if (!is_valid_ether_addr(smac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } } esw_vport_cleanup_ingress_rules(esw, vport); - if (!vport->vlan && !vport->qos && !vport->spoofchk) { + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { esw_vport_disable_ingress_acl(esw, vport); return 0; } @@ -1289,7 +1228,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1299,16 +1238,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - if (vport->vlan || vport->qos) + if (vport->info.vlan || vport->info.qos) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - if (vport->spoofchk) { + if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16); - ether_addr_copy(smac_v, smac); + ether_addr_copy(smac_v, vport->info.mac); } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; @@ -1354,7 +1293,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_vport_cleanup_egress_rules(esw, vport); - if (!vport->vlan && !vport->qos) { + if (!vport->info.vlan && !vport->info.qos) { esw_vport_disable_egress_acl(esw, vport); return 0; } @@ -1363,7 +1302,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1377,7 +1316,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = @@ -1411,6 +1350,41 @@ out: return err; } +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static void esw_apply_vport_conf(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int vport_num = vport->vport; + + if (!vport_num) + return; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + vport->info.link_state); + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, + (vport->info.vlan || vport->info.qos)); + + /* Only legacy mode needs ACLs */ + if (esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } +} static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { @@ -1421,23 +1395,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Only VFs need ACLs for VST and spoofchk filtering */ - if (vport_num && esw->mode == SRIOV_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + /* Restore old vport configuration */ + esw_apply_vport_conf(esw, vport); /* Sync with current vport context */ vport->enabled_events = enable_events; vport->enabled = true; /* only PF is trusted by default */ - vport->trusted = (vport_num) ? false : true; + if (!vport_num) + vport->info.trusted = true; + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1457,11 +1425,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) vport->enabled = false; synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1473,7 +1436,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + if (vport_num && esw->mode == SRIOV_LEGACY) { + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1645,6 +1613,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) struct mlx5_vport *vport = &esw->vports[vport_num]; vport->vport = vport_num; + vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); @@ -1705,18 +1674,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) -static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) -{ - ((u8 *)node_guid)[7] = mac[0]; - ((u8 *)node_guid)[6] = mac[1]; - ((u8 *)node_guid)[5] = mac[2]; - ((u8 *)node_guid)[4] = 0xff; - ((u8 *)node_guid)[3] = 0xfe; - ((u8 *)node_guid)[2] = mac[3]; - ((u8 *)node_guid)[1] = mac[4]; - ((u8 *)node_guid)[0] = mac[5]; -} - int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { @@ -1729,13 +1686,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { mlx5_core_warn(esw->dev, "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", vport); - return -EPERM; + err = -EPERM; + goto unlock; } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); @@ -1743,7 +1702,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", vport, err); - return err; + goto unlock; } node_guid_gen_from_mac(&node_guid, mac); @@ -1753,9 +1712,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", vport, err); - mutex_lock(&esw->state_lock); + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); + +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1763,22 +1725,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state) { + struct mlx5_vport *evport; + int err = 0; + if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - return mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport, link_state); + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to set vport %d link state, err = %d", + vport, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return 0; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport; - u16 vlan; - u8 qos; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1790,14 +1768,14 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; - mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); - ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport); - query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); - ivi->vlan = vlan; - ivi->qos = qos; - ivi->spoofchk = evport->spoofchk; + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + mutex_unlock(&esw->state_lock); return 0; } @@ -1817,23 +1795,23 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan || qos) set = 1; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); if (err) - return err; + goto unlock; - mutex_lock(&esw->state_lock); - evport->vlan = vlan; - evport->qos = qos; + evport->info.vlan = vlan; + evport->info.qos = qos; if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto out; + goto unlock; err = esw_vport_egress_config(esw, evport); } -out: +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1850,16 +1828,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - pschk = evport->spoofchk; - evport->spoofchk = spoofchk; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; - } + if (err) + evport->info.spoofchk = pschk; mutex_unlock(&esw->state_lock); return err; @@ -1875,10 +1851,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - evport->trusted = setting; + evport = &esw->vports[vport]; + evport->info.trusted = setting; if (evport->enabled) esw_vport_change_handle_locked(evport); mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48c273d..6855783 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -109,6 +109,16 @@ struct vport_egress { struct mlx5_flow_rule *drop_rule; }; +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u8 qos; + u64 node_guid; + int link_state; + bool spoofchk; + bool trusted; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -121,10 +131,8 @@ struct mlx5_vport { struct vport_ingress ingress; struct vport_egress egress; - u16 vlan; - u8 qos; - bool spoofchk; - bool trusted; + struct mlx5_vport_info info; + bool enabled; u16 enabled_events; }; -- cgit v1.1 From 26e59d8077a31972dc81fe5ff75aa4fd5b260b71 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:25 +0300 Subject: net/mlx5e: Implement mlx5e interface attach/detach callbacks Needed to support seamless and lightweight PCI/Internal error recovery. Implement the attach/detach interface callbacks. In attach callback we only allocate HW resources. In detach callback we only deallocate HW resources. All SW/kernel objects initialzing/destroying is kept in add/remove callbacks. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 200 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 39 ++++- 3 files changed, 183 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9699560..a9358cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -844,9 +844,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv); +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv); void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03586ee..af4c61e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1883,6 +1883,9 @@ int mlx5e_close(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + if (!netif_device_present(netdev)) + return -ENODEV; + mutex_lock(&priv->state_lock); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -3401,13 +3404,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv) +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) { + int nch = profile->max_nch(mdev); struct net_device *netdev; struct mlx5e_priv *priv; - int nch = profile->max_nch(mdev); - int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, @@ -3425,12 +3428,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_netdev; + goto err_cleanup_nic; + + return netdev; + +err_cleanup_nic: + profile->cleanup(priv); + free_netdev(netdev); + + return NULL; +} + +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + const struct mlx5e_profile *profile; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + profile = priv->profile; + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); err = mlx5e_create_umr_mkey(priv); if (err) { mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_wq; + goto out; } err = profile->init_tx(priv); @@ -3453,20 +3475,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_set_dev_port_mtu(netdev); - err = register_netdev(netdev); - if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_dealloc_q_counters; - } - if (profile->enable) profile->enable(priv); - return priv; + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); -err_dealloc_q_counters: - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); + return 0; err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -3477,13 +3495,8 @@ err_cleanup_tx: err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_wq: - destroy_workqueue(priv->wq); - -err_free_netdev: - free_netdev(netdev); - - return NULL; +out: + return err; } static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) @@ -3509,16 +3522,80 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) } } +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); + + flush_workqueue(priv->wq); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_close(netdev); + netif_device_detach(netdev); + rtnl_unlock(); + + mlx5e_destroy_q_counter(priv); + profile->cleanup_rx(priv); + mlx5e_close_drop_rq(priv); + profile->cleanup_tx(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); + cancel_delayed_work_sync(&priv->update_stats_work); +} + +/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying + * hardware contexts and to connect it to the current netdev. + */ +static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(mdev, netdev); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + if (!netif_device_present(netdev)) + return; + + mlx5e_detach_netdev(mdev, netdev); + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); void *ppriv = NULL; - void *ret; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; + void *priv; + int vport; + int err; + struct net_device *netdev; - if (mlx5e_create_mdev_resources(mdev)) + err = mlx5e_check_required_hca_cap(mdev); + if (err) return NULL; mlx5e_register_vport_rep(mdev); @@ -3526,12 +3603,39 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (MLX5_CAP_GEN(mdev, vport_group_manager)) ppriv = &esw->offloads.vport_reps[0]; - ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); - if (!ret) { - mlx5e_destroy_mdev_resources(mdev); - return NULL; + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + goto err_unregister_reps; + } + + priv = netdev_priv(netdev); + + err = mlx5e_attach(mdev, priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_detach; } - return ret; + + return priv; + +err_detach: + mlx5e_detach(mdev, priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(mdev, priv); + +err_unregister_reps: + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + return NULL; } void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) @@ -3539,30 +3643,11 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); - if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { - netif_device_detach(netdev); - mlx5e_close(netdev); - } else { - unregister_netdev(netdev); - } - - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); - mlx5e_close_drop_rq(priv); - profile->cleanup_tx(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); - cancel_delayed_work_sync(&priv->update_stats_work); + unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); - - if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) - free_netdev(netdev); + free_netdev(netdev); } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) @@ -3572,12 +3657,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; int vport; - mlx5e_destroy_netdev(mdev, priv); - for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); - mlx5e_destroy_mdev_resources(mdev); + mlx5e_detach(mdev, vpriv); + mlx5e_destroy_netdev(mdev, priv); } static void *mlx5e_get_netdev(void *vpriv) @@ -3590,6 +3674,8 @@ static void *mlx5e_get_netdev(void *vpriv) static struct mlx5_interface mlx5e_interface = { .add = mlx5e_add, .remove = mlx5e_remove, + .attach = mlx5e_attach, + .detach = mlx5e_detach, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 29db473..3c97da1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = { int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { - rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); - if (!rep->priv_data) { - mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n", - rep->vport); + struct net_device *netdev; + int err; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!netdev) { + pr_warn("Failed to create representor netdev for vport %d\n", + rep->vport); return -EINVAL; } + + rep->priv_data = netdev_priv(netdev); + + err = mlx5e_attach_netdev(esw->dev, netdev); + if (err) { + pr_warn("Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + return 0; + +err_detach_netdev: + mlx5e_detach_netdev(esw->dev, netdev); + +err_destroy_netdev: + mlx5e_destroy_netdev(esw->dev, rep->priv_data); + + return err; + } void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_priv *priv = rep->priv_data; + struct net_device *netdev = priv->netdev; + mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } -- cgit v1.1 From 9df30601c843aeb9877c966d9d75d4947117c923 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:26 +0300 Subject: net/mlx5e: Restore vlan filter after seamless reset When detaching the mlx5e interface clear all the vlans rules from the vlan flow table. When attaching it back restore all the active vlans rules to the HW. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 38 +++++++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1587a9f..36fbc6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, return 0; } +static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) @@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) if (err) goto err_free_g; - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_groups; + mlx5e_add_vlan_rules(priv); return 0; -err_destroy_vlan_flow_groups: - mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); err_destroy_vlan_table: @@ -1043,6 +1069,7 @@ err_destroy_vlan_table: static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { + mlx5e_del_vlan_rules(priv); mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } @@ -1100,7 +1127,6 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); -- cgit v1.1 From f1ee87fe55c86d4c5adc804db15b3ed06169fba5 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:27 +0300 Subject: net/mlx5: Organize device list API in one place Hide the exposed (external) mlx5_dev_list and mlx5_intf_mutex and expose an organized modular API to manage and manipulate mlx5 devices list. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 345 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lag.c | 24 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 270 ---------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 17 +- 5 files changed, 362 insertions(+), 296 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dev.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index dad326c..0343725 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o + fs_counters.o rl.o lag.o dev.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 0000000..a9dbc28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" + +static LIST_HEAD(intf_list); +static LIST_HEAD(mlx5_dev_list); +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); + +struct mlx5_device_context { + struct list_head list; + struct mlx5_interface *intf; + void *context; + unsigned long state; +}; + +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + if (!mlx5_lag_intf_add(intf, priv)) + return; + + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else { + kfree(dev_ctx); + } +} + +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&mlx5_intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + +static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u16)((dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + u16 pci_id = mlx5_gen_pci_id(dev); + struct mlx5_core_dev *res = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + + list_for_each_entry(priv, &mlx5_dev_list, dev_list) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + res = tmp_dev; + break; + } + } + + return res; +} + +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, event, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} + +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 92c3e0d..5595724 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -277,7 +277,7 @@ static void mlx5_do_bond_work(struct work_struct *work) bond_work); int status; - status = mutex_trylock(&mlx5_intf_mutex); + status = mlx5_dev_list_trylock(); if (!status) { /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); @@ -285,7 +285,7 @@ static void mlx5_do_bond_work(struct work_struct *work) } mlx5_do_bond(ldev); - mutex_unlock(&mlx5_intf_mutex); + mlx5_dev_list_unlock(); } static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, @@ -466,35 +466,21 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, mutex_unlock(&lag_mutex); } -static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) -{ - return (u16)((dev->pdev->bus->number << 8) | - PCI_SLOT(dev->pdev->devfn)); -} /* Must be called with intf_mutex held */ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - struct mlx5_priv *priv; - u16 pci_id; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) return; - pci_id = mlx5_gen_pci_id(dev); - - mlx5_core_for_each_priv(priv) { - tmp_dev = container_of(priv, struct mlx5_core_dev, priv); - if ((dev != tmp_dev) && - (mlx5_gen_pci_id(tmp_dev) == pci_id)) { - ldev = tmp_dev->priv.lag; - break; - } - } + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; if (!ldev) { ldev = mlx5_lag_dev_alloc(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 16660cf..d9c3c70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -72,18 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); -static LIST_HEAD(intf_list); - -LIST_HEAD(mlx5_dev_list); -DEFINE_MUTEX(mlx5_intf_mutex); - -struct mlx5_device_context { - struct list_head list; - struct mlx5_interface *intf; - void *context; - unsigned long state; -}; - enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, @@ -779,248 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } -enum { - MLX5_INTERFACE_ADDED, - MLX5_INTERFACE_ATTACHED, -}; - -static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - if (!mlx5_lag_intf_add(intf, priv)) - return; - - dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) - return; - - dev_ctx->intf = intf; - dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - if (intf->attach) - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - } -} - -static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, - struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) - return dev_ctx; - return NULL; -} - -static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); - - if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - intf->remove(dev, dev_ctx->context); - - kfree(dev_ctx); -} - -static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - if (intf->attach) { - if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; - intf->attach(dev, dev_ctx->context); - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - } else { - if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; - dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - } -} - -static void mlx5_attach_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_attach_interface(intf, priv); - mutex_unlock(&mlx5_intf_mutex); -} - -static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = mlx5_get_device(intf, priv); - if (!dev_ctx) - return; - - if (intf->detach) { - if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; - intf->detach(dev, dev_ctx->context); - clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - } else { - if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; - intf->remove(dev, dev_ctx->context); - clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - } -} - -static void mlx5_detach_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_detach_interface(intf, priv); - mutex_unlock(&mlx5_intf_mutex); -} - -static bool mlx5_device_registered(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv; - bool found = false; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - if (priv == &dev->priv) - found = true; - mutex_unlock(&mlx5_intf_mutex); - - return found; -} - -static int mlx5_register_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&priv->dev_list, &mlx5_dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} - -static void mlx5_unregister_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_remove_device(intf, priv); - list_del(&priv->dev_list); - mutex_unlock(&mlx5_intf_mutex); -} - -int mlx5_register_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_interface); - -void mlx5_unregister_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_remove_device(intf, priv); - list_del(&intf->list); - mutex_unlock(&mlx5_intf_mutex); -} -EXPORT_SYMBOL(mlx5_unregister_interface); - -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - -/* Must be called with intf_mutex held */ -void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_add_device(intf, &dev->priv); - break; - } -} - -/* Must be called with intf_mutex held */ -void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_remove_device(intf, &dev->priv); - break; - } -} static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { @@ -1446,22 +1192,6 @@ out: return err; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - struct mlx5_core_event_handler { void (*event)(struct mlx5_core_dev *dev, enum mlx5_dev_event event, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 04b719a..3d0cfb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -46,9 +46,6 @@ extern int mlx5_core_debug_mask; -extern struct list_head mlx5_dev_list; -extern struct mutex mlx5_intf_mutex; - #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ @@ -73,9 +70,6 @@ do { \ #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) -#define mlx5_core_for_each_priv(__priv) \ - list_for_each_entry(__priv, &mlx5_dev_list, dev_list) - enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -106,8 +100,19 @@ void mlx5_cq_tasklet_cb(unsigned long data); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev); +bool mlx5_device_registered(struct mlx5_core_dev *dev); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); -- cgit v1.1 From 3a8963acc70e69606729404713cfa9a03b58b18c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Sep 2016 12:45:24 -0700 Subject: Revert "hv_netvsc: make inline functions static" These functions are used by other code misc-next tree. This reverts commit 30d1de08c87ddde6f73936c3350e7e153988fe02. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 85 +-------------------------------------------- include/linux/hyperv.h | 84 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 84 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2a9ccc4..ff05b9b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -34,89 +34,6 @@ #include "hyperv_net.h" /* - * An API to support in-place processing of incoming VMBUS packets. - */ -#define VMBUS_PKT_TRAILER 8 - -static struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; - u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; -} - -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - BUG_ON((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize); - - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; -} - -/* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); - */ -static void commit_rd_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; - - if (hv_need_to_signal_on_read(ring_info)) - vmbus_set_event(channel); -} - -/* * Switch the data path from the synthetic interface to the VF * interface. */ @@ -840,7 +757,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, return msg_size; } -static int netvsc_send_pkt( +static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, struct netvsc_device *net_device, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b01c8c3..5df444b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1429,4 +1429,88 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static inline struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static inline void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static inline void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + + #endif /* _HYPERV_H */ -- cgit v1.1 From ed227099dac95128e2aecd62af51bb9d922e5977 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 9 Sep 2016 17:42:30 -0300 Subject: openvswitch: use alias for genetlink family names When userspace tries to create datapaths and the module is not loaded, it will simply fail. With this patch, the module will be automatically loaded. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 524c0fd..0536ab3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2437,3 +2437,7 @@ module_exit(dp_cleanup); MODULE_DESCRIPTION("Open vSwitch switching datapath"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); -- cgit v1.1 From 2594a2a928a010bf27e6545f90bc2de7ed5ed075 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Sep 2016 14:22:45 -0700 Subject: tcp: better use ooo_last_skb in tcp_data_queue_ofo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Willem noticed that we could avoid an rbtree lookup if the the attempt to coalesce incoming skb to the last skb failed for some reason. Since most ooo additions are at the tail, this is definitely worth adding a test and fast path. Suggested-by: Willem de Bruijn Signed-off-by: Eric Dumazet Cc: Yaogong Wang Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5934c4..70b892d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4461,6 +4461,12 @@ coalesce_done: skb = NULL; goto add_sack; } + /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */ + if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) { + parent = &tp->ooo_last_skb->rbnode; + p = &parent->rb_right; + goto insert; + } /* Find place to insert this segment. Handle overlaps on the way. */ parent = NULL; @@ -4503,7 +4509,7 @@ coalesce_done: } p = &parent->rb_right; } - +insert: /* Insert segment into RB tree. */ rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); -- cgit v1.1 From 78706121d59d0692534d087df6eefef5469fc5a1 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:02:06 +0200 Subject: ATM-nicstar: Use kmalloc_array() in get_scq() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 700ed15..50dec13 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -871,8 +871,9 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) kfree(scq); return NULL; } - scq->skb = kmalloc(sizeof(struct sk_buff *) * - (size / NS_SCQE_SIZE), GFP_KERNEL); + scq->skb = kmalloc_array(size / NS_SCQE_SIZE, + sizeof(*scq->skb), + GFP_KERNEL); if (!scq->skb) { dma_free_coherent(&card->pcidev->dev, 2 * size, scq->org, scq->dma); -- cgit v1.1 From 24310fd565b16ec7c39e3db5b0240f0cdbbd0475 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:18:10 +0200 Subject: ATM-nicstar: Improve another size determination in get_scq() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 50dec13..96062e1 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -862,7 +862,7 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) if (size != VBR_SCQSIZE && size != CBR_SCQSIZE) return NULL; - scq = kmalloc(sizeof(scq_info), GFP_KERNEL); + scq = kmalloc(sizeof(*scq), GFP_KERNEL); if (!scq) return NULL; scq->org = dma_alloc_coherent(&card->pcidev->dev, -- cgit v1.1 From ee41f07c2f2c944465ade7c9591a6139a783be41 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:30:09 +0200 Subject: ATM-nicstar: Improve another size determination in ns_init_card() Replace the specification of a data structure by a reference for a field in a local variable as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 96062e1..ef977bf 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -611,7 +611,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev) for (j = 0; j < card->rct_size; j++) ns_write_sram(card, j * 4, u32d, 4); - memset(card->vcmap, 0, NS_MAX_RCTSIZE * sizeof(vc_map)); + memset(card->vcmap, 0, sizeof(card->vcmap)); for (j = 0; j < NS_FRSCD_NUM; j++) card->scd2vc[j] = NULL; -- cgit v1.1 From 304f0a4edbbb2d273d7c6043df4126b623cd472c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:48:17 +0200 Subject: ATM-nicstar: Refactor a kmalloc() call in ns_init_card() * The script "checkpatch.pl" can point out that assignments should usually not be performed within condition checks. Thus move an assignment for a local variable to a separate statement in this function. * Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index ef977bf..04f5781 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -370,7 +370,8 @@ static int ns_init_card(int i, struct pci_dev *pcidev) return error; } - if ((card = kmalloc(sizeof(ns_dev), GFP_KERNEL)) == NULL) { + card = kmalloc(sizeof(*card), GFP_KERNEL); + if (!card) { printk ("nicstar%d: can't allocate memory for device structure.\n", i); -- cgit v1.1 From 0ba8abb770a8771ffb05cce2a5a7441530f06a55 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 08:56:03 +0200 Subject: ATM-nicstar: Refactor a dev_alloc_skb() call in dequeue_rx() The script "checkpatch.pl" can point out that assignments should usually not be performed within condition checks. Thus move an assignment for a local variable to a separate statement in this function. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 04f5781..c7296b5 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2023,7 +2023,8 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe) cell = skb->data; for (i = ns_rsqe_cellcount(rsqe); i; i--) { - if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) { + sb = dev_alloc_skb(NS_SMSKBSIZE); + if (!sb) { printk ("nicstar%d: Can't allocate buffers for aal0.\n", card->index); -- cgit v1.1 From 32230ac1ccbd66f36bd6955eddc45fc06861c1b5 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 09:55:53 +0200 Subject: ATM-ZeitNet: Use kmalloc_array() in start_tx() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index cecfb94..d378ff2 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -998,8 +998,9 @@ static int start_tx(struct atm_dev *dev) DPRINTK("start_tx\n"); zatm_dev = ZATM_DEV(dev); - zatm_dev->tx_map = kmalloc(sizeof(struct atm_vcc *)* - zatm_dev->chans,GFP_KERNEL); + zatm_dev->tx_map = kmalloc_array(zatm_dev->chans, + sizeof(*zatm_dev->tx_map), + GFP_KERNEL); if (!zatm_dev->tx_map) return -ENOMEM; zatm_dev->tx_bw = ATM_OC3_PCR; zatm_dev->free_shapers = (1 << NR_SHAPERS)-1; -- cgit v1.1 From 5ad3ea3d3952dcbb8047f97fbfa49804ea53a53a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:07:38 +0200 Subject: ATM-ZeitNet: Improve a size determination in zatm_open() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index d378ff2..218c6af 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1399,7 +1399,7 @@ static int zatm_open(struct atm_vcc *vcc) DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi, vcc->vci); if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { - zatm_vcc = kmalloc(sizeof(struct zatm_vcc),GFP_KERNEL); + zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL); if (!zatm_vcc) { clear_bit(ATM_VF_ADDR,&vcc->flags); return -ENOMEM; -- cgit v1.1 From 0f0d0ed0870eca21e36dc520d7d9be292c103f80 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:21:15 +0200 Subject: ATM-ZeitNet: Replace one kzalloc() call by kcalloc() * The script "checkpatch.pl" can point information out like the following. WARNING: Prefer kcalloc over kzalloc with multiply Thus fix the affected source code place. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. * Delete the local variable "size" which became unnecessary with this refactoring. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 218c6af..2cc9e2a 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -598,12 +598,13 @@ static void close_rx(struct atm_vcc *vcc) static int start_rx(struct atm_dev *dev) { struct zatm_dev *zatm_dev; - int size,i; + int i; DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); - size = sizeof(struct atm_vcc *)*zatm_dev->chans; - zatm_dev->rx_map = kzalloc(size,GFP_KERNEL); + zatm_dev->rx_map = kcalloc(zatm_dev->chans, + sizeof(*zatm_dev->rx_map), + GFP_KERNEL); if (!zatm_dev->rx_map) return -ENOMEM; /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */ zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR); -- cgit v1.1 From cf9932a9414e241571008edd7412ab22f02b5704 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 10 Sep 2016 10:38:04 +0200 Subject: ATM-ZeitNet: Fix indentation for one DPRINTK() call in start_rx() Adjust the indentation for a call of the macro "DPRINTK" in this function. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2cc9e2a..d3dc954 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -600,7 +600,7 @@ static int start_rx(struct atm_dev *dev) struct zatm_dev *zatm_dev; int i; -DPRINTK("start_rx\n"); + DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); zatm_dev->rx_map = kcalloc(zatm_dev->chans, sizeof(*zatm_dev->rx_map), -- cgit v1.1 From 9ee0034b8f49aaaa7e7c2da8db1038915db99c19 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:52 -0700 Subject: net: flow: Add l3mdev flow update Add l3mdev hook to set FLOWI_FLAG_SKIP_NH_OIF flag and update oif/iif in flow struct if its oif or iif points to a device enslaved to an L3 Master device. Only 1 needs to be converted to match the l3mdev FIB rule. This moves the flow adjustment for l3mdev to a single point catching all lookups. It is redundant for existing hooks (those are removed in later patches) but is needed for missed lookups such as PMTU updates. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 ++++++ net/ipv4/fib_rules.c | 3 +++ net/ipv6/fib6_rules.c | 3 +++ net/l3mdev/l3mdev.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e900950..81e175e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -49,6 +49,8 @@ struct l3mdev_ops { int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +void l3mdev_update_flow(struct net *net, struct flowi *fl); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -290,6 +292,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } +static inline +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6e9ea69..770bebe 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, }; int err; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp)); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); #ifdef CONFIG_IP_ROUTE_CLASSID if (arg.rule) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5857c1f..eea23b5 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index c4a1c3e..43610e5 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -222,3 +222,38 @@ out: return rc; } + +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ + struct net_device *dev; + int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } + } + } + + if (fl->flowi_iif) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(l3mdev_update_flow); -- cgit v1.1 From a8e3e1a9f02094145580ea7920c6a1d9aabd5539 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:53 -0700 Subject: net: l3mdev: Add hook to output path This patch adds the infrastructure to the output path to pass an skb to an l3mdev device if it has a hook registered. This is the Tx parallel to l3mdev_ip{6}_rcv in the receive path and is the basis for removing the existing hook that returns the vrf dst on the fib lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 ++++++++ net/ipv6/ip6_output.c | 8 ++++++++ net/ipv6/output_core.c | 7 +++++++ net/ipv6/raw.c | 7 +++++++ 5 files changed, 78 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 81e175e..53d5274 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,7 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include #include /** @@ -18,6 +19,10 @@ * * @l3mdev_fib_table: Get FIB table id to use for lookups * + * @l3mdev_l3_rcv: Hook in L3 receive path + * + * @l3mdev_l3_out: Hook in L3 output path + * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow @@ -29,6 +34,9 @@ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); + struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, + struct sock *sk, struct sk_buff *skb, + u16 proto); /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, @@ -201,6 +209,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return l3mdev_l3_rcv(skb, AF_INET6); } +static inline +struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) +{ + struct net_device *dev = skb_dst(skb)->dev; + + if (netif_is_l3_slave(dev)) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && master->l3mdev_ops->l3mdev_l3_out) + skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, + skb, proto); + } + + return skb; +} + +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET); +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET6); +} #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) @@ -287,6 +323,18 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) } static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b913f5b..41e10e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -99,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 993fd96..6ea6caa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -236,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out((struct sock *)sk, skb); + if (unlikely(!skb)) + return 0; + /* hooks should never assume socket lock is held. * we promote our socket to non const */ diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b..7cca8ac 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 590dd1f..54404f0 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); -- cgit v1.1 From 5f02ce24c2696fec33f2a5dfcf753996f5fdd211 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:54 -0700 Subject: net: l3mdev: Allow the l3mdev to be a loopback Allow an L3 master device to act as the loopback for that L3 domain. For IPv4 the device can also have the address 127.0.0.1. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 +++--- net/ipv4/route.c | 8 ++++++-- net/ipv6/route.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 53d5274..3ee1105 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -90,7 +90,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. @@ -99,7 +99,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; - const struct net_device *master; + struct net_device *master; if (!dev) return NULL; @@ -254,7 +254,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e99278..f49b2c5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2018,7 +2018,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && + !(dev_out->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) @@ -2302,7 +2304,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, else fl4->saddr = fl4->daddr; } - dev_out = net->loopback_dev; + + /* L3 master device is the loopback for that domain */ + dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 09d43ff..2c68111 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2558,8 +2558,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT); + struct net_device *dev = net->loopback_dev; + struct rt6_info *rt; + + /* use L3 Master device as loopback for host routes if device + * is enslaved and address is not link local or multicast + */ + if (!rt6_need_strict(addr)) + dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; + + rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); -- cgit v1.1 From ebfc102c566d0d9c174ff9b721fd35ebda01f7eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:55 -0700 Subject: net: vrf: Flip IPv4 output path from FIB lookup hook to out hook Flip the IPv4 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv4 output processing to send the packet to the VRF driver on xmit. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/route.c | 4 ---- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1ce7420..08540b9 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -227,6 +227,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, } #endif +/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ +static int vrf_ip_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, skb_dst(skb)->dev, dst_output); + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { @@ -292,7 +306,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } - ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else @@ -531,6 +545,53 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rtable *rth; + + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) { + dst = &rth->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_out(vrf_dev, sk, skb); + } + + return skb; +} + /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { @@ -1067,6 +1128,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, + .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, .l3mdev_get_saddr6 = vrf_get_saddr6, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f49b2c5..ad83f85 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2246,10 +2246,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - - rth = l3mdev_get_rtable(dev_out, fl4); - if (rth) - goto out; } if (!fl4->daddr) { -- cgit v1.1 From 4c1feac58e06270321cc500b85c2d94a11495775 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:56 -0700 Subject: net: vrf: Flip IPv6 output path from FIB lookup hook to out hook Flip the IPv6 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv6 output processing to send the packet to the VRF driver on xmit. Link scope addresses (linklocal and multicast) need special handling: specifically the oif the flow struct can not be changed because we want the lookup tied to the enslaved interface. ie., the source address and the returned route MUST point to the interface scope passed in. Convert the existing vrf_get_rt6_dst to handle only link scope addresses. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 124 ++++++++++++++++++++++++++++++++++----------------- include/net/l3mdev.h | 8 ++-- net/ipv6/route.c | 11 +++-- net/l3mdev/l3mdev.c | 15 +++---- 4 files changed, 100 insertions(+), 58 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 08540b9..f5372ed 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) +static int vrf_ip6_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, + sk, skb, NULL, skb_dst(skb)->dev, dst_output); + + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { @@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); - ret = ip6_local_out(net, skb->sk, skb); + ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else @@ -391,6 +405,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rt6_info *rt6; + + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + rcu_read_lock(); + + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) { + dst = &rt6->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -477,6 +528,13 @@ out: return rc; } #else +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + return skb; +} + static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } @@ -587,6 +645,8 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); + case AF_INET6: + return vrf_ip6_out(vrf_dev, sk, skb); } return skb; @@ -1031,53 +1091,33 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - struct flowi6 *fl6) +/* send to link-local or multicast address via interface enslaved to + * VRF device. Force lookup to VRF table without changing flow struct + */ +static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, + struct flowi6 *fl6) { - bool need_strict = rt6_need_strict(&fl6->daddr); - struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; - /* send to link-local or multicast address */ - if (need_strict) { - int flags = RT6_LOOKUP_F_IFACE; - - /* VRF device does not have a link-local address and - * sending packets to link-local or mcast addresses over - * a VRF device does not make sense - */ - if (fl6->flowi6_oif == dev->ifindex) { - struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; - - dst_hold(dst); - return dst; - } - - if (!ipv6_addr_any(&fl6->saddr)) - flags |= RT6_LOOKUP_F_HAS_SADDR; - - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); - if (rt) - dst = &rt->dst; - - } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - - rcu_read_lock(); - - rt = rcu_dereference(vrf->rt6); - if (likely(rt)) { - dst = &rt->dst; - dst_hold(dst); - } - - rcu_read_unlock(); + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + return dst; } - /* make sure oif is set to VRF device for lookup */ - if (!need_strict) - fl6->flowi6_oif = dev->ifindex; + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; return dst; } @@ -1130,7 +1170,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) - .l3mdev_get_rt6_dst = vrf_get_rt6_dst, + .l3mdev_link_scope_lookup = vrf_link_scope_lookup, .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3ee1105..51aab20 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -27,7 +27,7 @@ * * @l3mdev_get_saddr: Get source address for a flow * - * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device + * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { @@ -45,7 +45,7 @@ struct l3mdev_ops { struct flowi4 *fl4); /* IPv6 ops */ - struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); int (*l3mdev_get_saddr6)(struct net_device *dev, const struct sock *sk, @@ -177,7 +177,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -299,7 +299,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2c68111..87e0a01 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1188,12 +1188,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags) { - struct dst_entry *dst; bool any_src; - dst = l3mdev_get_rt6_dst(net, fl6); - if (dst) - return dst; + if (rt6_need_strict(&fl6->daddr)) { + struct dst_entry *dst; + + dst = l3mdev_link_scope_lookup(net, fl6); + if (dst) + return dst; + } fl6->flowi6_iif = LOOPBACK_IFINDEX; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 43610e5..ac9d928d 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** - * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns - * cached route for L3 master device if relevant - * to flow + * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link + * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; @@ -121,15 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_rt6_dst) - dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + dev->l3mdev_ops->l3mdev_link_scope_lookup) + dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } -EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); +EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_get_saddr - get source address for a flow based on an interface -- cgit v1.1 From e0d56fdd734224666e7bd5fafbc620286d2a7ee8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:57 -0700 Subject: net: l3mdev: remove redundant calls A previous patch added l3mdev flow update making these hooks redundant. Remove them. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 +-- net/ipv4/route.c | 12 ++---------- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/ip6_output.c | 2 -- net/ipv6/ndisc.c | 11 ++--------- net/ipv6/route.c | 7 +------ net/ipv6/tcp_ipv6.c | 8 ++------ net/ipv6/xfrm6_policy.c | 2 +- 8 files changed, 10 insertions(+), 37 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41e10e3..05d1058 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1582,8 +1582,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; + oif = oif ? : skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ad83f85..b52496f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1831,7 +1831,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); + fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -2150,7 +2150,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2160,9 +2159,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; - master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); - if (master_idx) - fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? @@ -2263,8 +2259,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2577,9 +2572,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; - if (netif_index_is_l3_master(net, fl4.flowi4_oif)) - fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; - if (iif) { struct net_device *dev; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b644a23..3155ed7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) int oif = 0; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6ea6caa..1cb41b3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1070,8 +1070,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (!fl6->flowi6_oif) - fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fe65cdc..d8e6714 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; - int oif = l3mdev_fib_oif(skb->dev); + int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; - int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, oif); - - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87e0a01..ad4a7ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1164,7 +1164,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = l3mdev_fib_oif(skb->dev), + .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -3349,11 +3349,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; - if (netif_index_is_l3_master(net, oif)) { - fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF; - } - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 04529a3..54cf719 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else { - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; - - fl6.flowi6_oif = oif; - } + else + fl6.flowi6_oif = oif ? : skb->skb_iif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6cc9700..b7b7e86 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; -- cgit v1.1 From d66f6c0a8f3c0bcc4ee7a9b1da4b0ebe7ee555a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:58 -0700 Subject: net: ipv4: Remove l3mdev_get_saddr No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 38 -------------------------------------- include/net/l3mdev.h | 12 ------------ include/net/route.h | 10 ---------- net/ipv4/raw.c | 6 ------ net/ipv4/udp.c | 6 ------ net/l3mdev/l3mdev.c | 31 ------------------------------- 6 files changed, 103 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f5372ed..9ad2a16 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -863,43 +863,6 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } -/* called under rcu_read_lock */ -static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) -{ - struct fib_result res = { .tclassid = 0 }; - struct net *net = dev_net(dev); - u32 orig_tos = fl4->flowi4_tos; - u8 flags = fl4->flowi4_flags; - u8 scope = fl4->flowi4_scope; - u8 tos = RT_FL_TOS(fl4); - int rc; - - if (unlikely(!fl4->daddr)) - return 0; - - fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; - fl4->flowi4_iif = LOOPBACK_IFINDEX; - /* make sure oif is set to VRF device for lookup */ - fl4->flowi4_oif = dev->ifindex; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); - - rc = fib_lookup(net, fl4, &res, 0); - if (!rc) { - if (res.type == RTN_LOCAL) - fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; - else - fib_select_path(net, &res, fl4, -1); - } - - fl4->flowi4_flags = flags; - fl4->flowi4_tos = orig_tos; - fl4->flowi4_scope = scope; - - return rc; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1166,7 +1129,6 @@ static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, - .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 51aab20..1129e1d 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -25,8 +25,6 @@ * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * - * @l3mdev_get_saddr: Get source address for a flow - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -41,8 +39,6 @@ struct l3mdev_ops { /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); - int (*l3mdev_get_saddr)(struct net_device *dev, - struct flowi4 *fl4); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, @@ -175,8 +171,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -292,12 +286,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return false; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - return 0; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/include/net/route.h b/include/net/route.h index ad777d7..0429d47 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); - if (!src && oif) { - int rc; - - rc = l3mdev_get_saddr(net, oif, fl4); - if (rc < 0) - return ERR_PTR(rc); - - src = fl4->saddr; - } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 438f50c..90a85c9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, &fl4); - if (err < 0) - goto done; - } - if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 058c312..7d96dc2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flow_flags, faddr, saddr, dport, inet->inet_sport); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, fl4); - if (err < 0) - goto out; - } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ac9d928d..be40df6 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,37 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -/** - * l3mdev_get_saddr - get source address for a flow based on an interface - * enslaved to an L3 master device - * @net: network namespace for device index lookup - * @ifindex: Interface index - * @fl4: IPv4 flow struct - */ - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr); - int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6) { -- cgit v1.1 From 8a966fc016b67d2a8ab4a83d22ded8cde032a0eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:59 -0700 Subject: net: ipv6: Remove l3mdev_get_saddr6 No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 41 ----------------------------------------- include/net/l3mdev.h | 11 ----------- net/ipv6/ip6_output.c | 9 +-------- net/l3mdev/l3mdev.c | 24 ------------------------ 4 files changed, 1 insertion(+), 84 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9ad2a16..3a34f54 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1084,46 +1084,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, return dst; } - -/* called under rcu_read_lock */ -static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net *net = dev_net(dev); - struct dst_entry *dst; - struct rt6_info *rt; - int err; - - if (rt6_need_strict(&fl6->daddr)) { - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, - RT6_LOOKUP_F_IFACE); - if (unlikely(!rt)) - return 0; - - dst = &rt->dst; - } else { - __u8 flags = fl6->flowi6_flags; - - fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; - fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_route_output(net, sk, fl6); - rt = (struct rt6_info *)dst; - - fl6->flowi6_flags = flags; - } - - err = dst->error; - if (!err) { - err = ip6_route_get_saddr(net, rt, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl6->saddr); - } - - dst_release(dst); - - return err; -} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { @@ -1133,7 +1093,6 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, - .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1129e1d..a5e506e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -43,9 +43,6 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); - int (*l3mdev_get_saddr6)(struct net_device *dev, - const struct sock *sk, - struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -172,8 +169,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -292,12 +287,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) return NULL; } -static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - return 0; -} - static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1cb41b3..6001e78 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -926,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; - if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && - (!*dst || !(*dst)->error)) { - err = l3mdev_get_saddr6(net, sk, fl6); - if (err) - goto out_err; - } - /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -1024,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, out_err_release: dst_release(*dst); *dst = NULL; -out_err: + if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index be40df6..8da86ce 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,30 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net_device *dev; - int rc = 0; - - if (fl6->flowi6_oif) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr6) - rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); - /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device -- cgit v1.1 From ca28b8f2b8f316b9973693c72770c98da3e9500e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:00 -0700 Subject: net: l3mdev: Remove l3mdev_fib_oif No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a5e506e..a586035 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -107,26 +107,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } -/* get index of an interface to use for FIB lookups. For devices - * enslaved to an L3 master device FIB lookups are based on the - * master index - */ -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; -} - -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - int oif; - - rcu_read_lock(); - oif = l3mdev_fib_oif_rcu(dev); - rcu_read_unlock(); - - return oif; -} - u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -248,15 +228,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) return NULL; } -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; -- cgit v1.1 From afb460fe0ef0af6d98ed51006153acb01278df2d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:01 -0700 Subject: net: l3mdev: remove get_rtable method No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 21 --------------------- include/net/l3mdev.h | 21 --------------------- 2 files changed, 42 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a34f54..ccce59f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -843,26 +843,6 @@ static u32 vrf_fib_table(const struct net_device *dev) return vrf->tb_id; } -static struct rtable *vrf_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - struct rtable *rth = NULL; - - if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) - dst_hold(&rth->dst); - - rcu_read_unlock(); - } - - return rth; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1088,7 +1068,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, - .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a586035..3832099 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -23,8 +23,6 @@ * * @l3mdev_l3_out: Hook in L3 output path * - * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -36,10 +34,6 @@ struct l3mdev_ops { struct sock *sk, struct sk_buff *skb, u16 proto); - /* IPv4 ops */ - struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, - const struct flowi4 *fl4); - /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); @@ -120,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) - return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); - - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; @@ -241,12 +226,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; -- cgit v1.1 From c71ad3d45a5e928e617ca436f3ce88bb773fb766 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:02 -0700 Subject: net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 ++--- include/net/flow.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccce59f..55674b0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -165,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, }; int ret = NET_XMIT_DROP; struct dst_entry *dst; @@ -265,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; struct net *net = dev_net(vrf_dev); diff --git a/include/net/flow.h b/include/net/flow.h index d47ef4b..035aa77 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,8 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_L3MDEV_SRC 0x04 -#define FLOWI_FLAG_SKIP_NH_OIF 0x08 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; -- cgit v1.1 From b519d408ea32040b1c7e10b155a3ee9a36660947 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Sep 2016 14:50:01 -0400 Subject: NFSv4.1: Fix the CREATE_SESSION slot number accounting Ensure that we conform to the algorithm described in RFC5661, section 18.36.4 for when to bump the sequence id. In essence we do it for all cases except when the RPC call timed out, or in case of the server returning NFS4ERR_DELAY or NFS4ERR_STALE_CLIENTID. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c380d2e..a9dec32 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7570,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_create_session(clp, status); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EACCES: + case -EAGAIN: + goto out; + }; + + clp->cl_seqid++; if (!status) { /* Verify the session's negotiated channel_attrs values */ status = nfs4_verify_channel_attrs(&args, &res); /* Increment the clientid slot sequence id */ - if (clp->cl_seqid == res.seqid) - clp->cl_seqid++; if (status) goto out; nfs4_update_session(session, &res); -- cgit v1.1 From bd0b841fee49de421f615cc173ccff063303672f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Sep 2016 14:41:49 -0700 Subject: nvme: make NVME_RDMA depend on BLOCK Commit aa71987472a9 ("nvme: fabrics drivers don't need the nvme-pci driver") removed the dependency on BLK_DEV_NVME, but the cdoe does depend on the block layer (which used to be an implicit dependency through BLK_DEV_NVME). Otherwise you get various errors from the kbuild test robot random config testing when that happens to hit a configuration with BLOCK device support disabled. Cc: Christoph Hellwig Cc: Jay Freyensee Cc: Sagi Grimberg Signed-off-by: Linus Torvalds --- drivers/nvme/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 0c644f7..f7d37a6 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -30,7 +30,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" - depends on INFINIBAND + depends on INFINIBAND && BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL -- cgit v1.1 From 02154927c115c7599677df57203988e05b576346 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 10 Sep 2016 12:39:03 -0700 Subject: net: dsa: bcm_sf2: Get VLAN_PORT_MASK from b53_device While migrating the bcm_sf2 driver to use b53_common, we left a small piece untouched where we kept our local copy of the per-port port_vlan_ctl bitmask value. This value is now maintained by b53_device so we need to use it instead of our local (and now stale) copy of it. Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- drivers/net/dsa/bcm_sf2.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 51f1fc0..5bf4f34 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -256,7 +256,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); reg &= ~PORT_VLAN_CTRL_MASK; reg |= (1 << port); - reg |= priv->port_sts[port].vlan_ctl_mask; + reg |= priv->dev->ports[port].vlan_ctl_mask; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); bcm_sf2_imp_vlan_setup(ds, cpu_port); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 46c4ea7..4469267 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -50,8 +50,6 @@ struct bcm_sf2_port_status { unsigned int link; struct ethtool_eee eee; - - u16 vlan_ctl_mask; }; struct bcm_sf2_priv { -- cgit v1.1 From 74a9e9054456658f047bf71d9108844defde793d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 11 Sep 2016 10:56:17 +0300 Subject: net/mlx4_en: Fix the return value of mlx4_en_dcbnl_set_all() mlx4_en_dcbnl_set_all() returns u8, so return value can't be negative in case of failure. Fixes: af7d51852631 ("net/mlx4_en: Add DCB PFC support through CEE netlink commands") Signed-off-by: Kamal Heib Signed-off-by: Rana Shahout Reported-by: Dan Carpenter Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 99c6bbd..97081e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -158,10 +158,9 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) struct mlx4_en_priv *priv = netdev_priv(netdev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg; - int err = 0; if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) - return -EINVAL; + return 1; if (dcb_cfg->pfc_state) { int tc; @@ -199,15 +198,17 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) en_dbg(DRV, priv, "Set pfc off\n"); } - err = mlx4_SET_PORT_general(mdev->dev, priv->port, - priv->rx_skb_size + ETH_FCS_LEN, - priv->prof->tx_pause, - priv->prof->tx_ppp, - priv->prof->rx_pause, - priv->prof->rx_ppp); - if (err) + if (mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp)) { en_err(priv, "Failed setting pause params\n"); - return err; + return 1; + } + + return 0; } static u8 mlx4_en_dcbnl_get_state(struct net_device *dev) -- cgit v1.1 From c677071741343381f4f555867c04af7ec4a90869 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 11 Sep 2016 10:56:18 +0300 Subject: net/mlx4_en: Fix the return value of mlx4_en_dcbnl_set_state() mlx4_en_dcbnl_set_state() returns u8, the return value from mlx4_en_setup_tc() could be negative in case of failure, so fix that. Fixes: af7d51852631 ("net/mlx4_en: Add DCB PFC support through CEE netlink commands") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 97081e5..316a707 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -239,7 +239,10 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; } - return mlx4_en_setup_tc(dev, num_tcs); + if (mlx4_en_setup_tc(dev, num_tcs)) + return 1; + + return 0; } /* On success returns a non-zero 802.1p user priority bitmap -- cgit v1.1 From 564ed9b187082da79726a2f25bb88a58ebeec7e8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 11 Sep 2016 10:56:19 +0300 Subject: net/mlx4_en: Fixes for DCBX This patch adds a capability check before enabling DCBX. In addition, it re-organizes the relevant data structures, and fixes a typo in a define. Fixes: af7d51852631 ("net/mlx4_en: Add DCB PFC support through CEE netlink commands") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 31 +++++++++++++------------- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 21 +++++++---------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 15 +++---------- drivers/net/ethernet/mellanox/mlx4/port.c | 4 ++-- 4 files changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 316a707..b04760a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -94,7 +94,7 @@ static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap) *cap = true; break; case DCB_CAP_ATTR_DCBX: - *cap = priv->cee_params.dcbx_cap; + *cap = priv->dcbx_cap; break; case DCB_CAP_ATTR_PFC_TCS: *cap = 1 << mlx4_max_tc(priv->mdev->dev); @@ -111,14 +111,14 @@ static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev) { struct mlx4_en_priv *priv = netdev_priv(netdev); - return priv->cee_params.dcb_cfg.pfc_state; + return priv->cee_config.pfc_state; } static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state) { struct mlx4_en_priv *priv = netdev_priv(netdev); - priv->cee_params.dcb_cfg.pfc_state = state; + priv->cee_config.pfc_state = state; } static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, @@ -126,7 +126,7 @@ static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, { struct mlx4_en_priv *priv = netdev_priv(netdev); - *setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc; + *setting = priv->cee_config.dcb_pfc[priority]; } static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, @@ -134,8 +134,8 @@ static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, { struct mlx4_en_priv *priv = netdev_priv(netdev); - priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting; - priv->cee_params.dcb_cfg.pfc_state = true; + priv->cee_config.dcb_pfc[priority] = setting; + priv->cee_config.pfc_state = true; } static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) @@ -157,12 +157,11 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) { struct mlx4_en_priv *priv = netdev_priv(netdev); struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg; - if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) return 1; - if (dcb_cfg->pfc_state) { + if (priv->cee_config.pfc_state) { int tc; priv->prof->rx_pause = 0; @@ -170,7 +169,7 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) { u8 tc_mask = 1 << tc; - switch (dcb_cfg->tc_config[tc].dcb_pfc) { + switch (priv->cee_config.dcb_pfc[tc]) { case pfc_disabled: priv->prof->tx_ppp &= ~tc_mask; priv->prof->rx_ppp &= ~tc_mask; @@ -226,7 +225,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) struct mlx4_en_priv *priv = netdev_priv(dev); int num_tcs = 0; - if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) return 1; if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) @@ -256,7 +255,7 @@ static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) .selector = idtype, .protocol = id, }; - if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) return 0; return dcb_getapp(netdev, &app); @@ -268,7 +267,7 @@ static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype, struct mlx4_en_priv *priv = netdev_priv(netdev); struct dcb_app app; - if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) return -EINVAL; memset(&app, 0, sizeof(struct dcb_app)); @@ -437,7 +436,7 @@ static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - return priv->cee_params.dcbx_cap; + return priv->dcbx_cap; } static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) @@ -446,7 +445,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) struct ieee_ets ets = {0}; struct ieee_pfc pfc = {0}; - if (mode == priv->cee_params.dcbx_cap) + if (mode == priv->dcbx_cap) return 0; if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || @@ -455,7 +454,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) !(mode & DCB_CAP_DCBX_HOST)) goto err; - priv->cee_params.dcbx_cap = mode; + priv->dcbx_cap = mode; ets.ets_cap = IEEE_8021QAZ_MAX_TCS; pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4198e9b..fedb829 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -71,10 +71,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { if (up) { - priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + if (priv->dcbx_cap) + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; } else { priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; - priv->cee_params.dcb_cfg.pfc_state = false; + priv->cee_config.pfc_state = false; } } #endif /* CONFIG_MLX4_EN_DCB */ @@ -3048,9 +3049,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_priv *priv; int i; int err; -#ifdef CONFIG_MLX4_EN_DCB - struct tc_configuration *tc; -#endif dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), MAX_TX_RINGS, MAX_RX_RINGS); @@ -3117,16 +3115,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { - priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE | - DCB_CAP_DCBX_HOST | - DCB_CAP_DCBX_VER_IEEE; + priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; priv->flags |= MLX4_EN_DCB_ENABLED; - priv->cee_params.dcb_cfg.pfc_state = false; + priv->cee_config.pfc_state = false; - for (i = 0; i < MLX4_EN_NUM_UP; i++) { - tc = &priv->cee_params.dcb_cfg.tc_config[i]; - tc->dcb_pfc = pfc_disabled; - } + for (i = 0; i < MLX4_EN_NUM_UP; i++) + priv->cee_config.dcb_pfc[i] = pfc_disabled; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 2c2913d..9099dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -482,20 +482,10 @@ enum dcb_pfc_type { pfc_enabled_rx }; -struct tc_configuration { - enum dcb_pfc_type dcb_pfc; -}; - struct mlx4_en_cee_config { bool pfc_state; - struct tc_configuration tc_config[MLX4_EN_NUM_UP]; + enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP]; }; - -struct mlx4_en_cee_params { - u8 dcbx_cap; - struct mlx4_en_cee_config dcb_cfg; -}; - #endif struct ethtool_flow_id { @@ -624,7 +614,8 @@ struct mlx4_en_priv { struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS]; - struct mlx4_en_cee_params cee_params; + struct mlx4_en_cee_config cee_config; + u8 dcbx_cap; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 3d2095e..c5b2064 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,7 +52,7 @@ #define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 #define MLX4_IGNORE_FCS_MASK 0x1 -#define MLNX4_TX_MAX_NUMBER 8 +#define MLX4_TC_MAX_NUMBER 8 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { @@ -2022,7 +2022,7 @@ int mlx4_max_tc(struct mlx4_dev *dev) u8 num_tc = dev->caps.max_tc_eth; if (!num_tc) - num_tc = MLNX4_TX_MAX_NUMBER; + num_tc = MLX4_TC_MAX_NUMBER; return num_tc; } -- cgit v1.1 From 7a61fc86af4a0c957c5b8c5777ab21d5cc286748 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 11 Sep 2016 10:56:20 +0300 Subject: net/mlx4_en: Fix panic on xmit while port is down When port is down, tx drop counter update is not needed. Updating the counter in this case can cause a kernel panic as when the port is down, ring can be NULL. Fixes: 63a664b7e92b ("net/mlx4_en: fix tx_dropped bug") Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 9df87ca..e2509bb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -818,7 +818,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); if (unlikely(!real_size)) - goto tx_drop; + goto tx_drop_count; /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); @@ -826,7 +826,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); - goto tx_drop; + goto tx_drop_count; } bf_ok = ring->bf_enabled; @@ -1071,9 +1071,10 @@ tx_drop_unmap: PCI_DMA_TODEVICE); } +tx_drop_count: + ring->tx_dropped++; tx_drop: dev_kfree_skb_any(skb); - ring->tx_dropped++; return NETDEV_TX_OK; } @@ -1106,7 +1107,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, goto tx_drop; if (mlx4_en_is_tx_ring_full(ring)) - goto tx_drop; + goto tx_drop_count; /* fetch ring->cons far ahead before needing it to avoid stall */ ring_cons = READ_ONCE(ring->cons); @@ -1176,7 +1177,8 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, return NETDEV_TX_OK; -tx_drop: +tx_drop_count: ring->tx_dropped++; +tx_drop: return NETDEV_TX_BUSY; } -- cgit v1.1 From bd00a240dc52e28706fbbe3aceda63e6c291b433 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 7 Sep 2016 10:46:30 +0530 Subject: powerpc/powernv: Fix restore of SPRs upon wake up from hypervisor state loss pnv_wakeup_tb_loss() currently expects cr4 to be "eq" if the CPU is waking up from a complete hypervisor state loss. Hence, it currently restores the SPR contents only if cr4 is "eq". However, after commit bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction"), on ISA v3.0 CPUs, the function pnv_restore_hyp_resource() sets cr4 to contain the result of the comparison between the state the CPU has woken up from and the first deep stop state before calling pnv_wakeup_tb_loss(). Thus if the CPU woke up from a state that is deeper than the first deep stop state, cr4 will have "gt" set and hence, pnv_wakeup_tb_loss() will fail to restore the SPRs on waking up from such a state. Fix the code in pnv_wakeup_tb_loss() to restore the SPR states when cr4 is "eq" or "gt". Fixes: bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction") Signed-off-by: Gautham R. Shenoy Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2265c63..bd739fe 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -411,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) @@ -453,7 +453,7 @@ lwarx_loop2: * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -481,7 +481,7 @@ first_thread_in_subcore: * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) @@ -526,7 +526,7 @@ timebase_resync: * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock /* * First thread in the core to wake up and its waking up with @@ -557,7 +557,7 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ -- cgit v1.1 From ffed15d3ce3f710b94e6f402e1ca2318f7d7c0e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Sep 2016 12:48:28 +1000 Subject: powerpc/kernel: Fix size of NUM_CPU_FTR_KEYS on 32-bit The number of CPU feature keys is meant to map 1:1 to the number of CPU feature flags defined in cputable.h, and the latter must fit in an unsigned long. In commit 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()"), I incorrectly defined NUM_CPU_FTR_KEYS to 64. There should be no real adverse consequences of this bug, other than us allocating too many keys. Fix it by using BITS_PER_LONG. Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") Tested-by: Meelis Roos Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpu_has_feature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 2ef55f8..b312b15 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature) #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS #include -#define NUM_CPU_FTR_KEYS 64 +#define NUM_CPU_FTR_KEYS BITS_PER_LONG extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; -- cgit v1.1 From 9395452b4aab7bc2475ef8935b4a4fb99d778d70 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Sep 2016 20:02:25 -0700 Subject: Linux 4.8-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a4e6cc5..1a8c8dd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.1 From f190fd92458da3e869b4e2c6289e2c617490ae53 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 2 Sep 2016 10:37:56 +0200 Subject: USB: serial: simple: add support for another Infineon flashloader This patch adds support for Infineon flashloader 0x8087/0x0801. The flashloader is used in Telit LE940B modem family with Telit flashing application. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index a204782..e98b6e5 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -54,7 +54,8 @@ DEVICE(funsoft, FUNSOFT_IDS); /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ - { USB_DEVICE(0x8087, 0x0716) } + { USB_DEVICE(0x8087, 0x0716) }, \ + { USB_DEVICE(0x8087, 0x0801) } DEVICE(flashloader, FLASHLOADER_IDS); /* Google Serial USB SubClass */ -- cgit v1.1 From 981b178964d03f6f8e6cca01568c17d1dbafdf0e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:35 +0200 Subject: dt-bindings: mmc: sdhci-st: Mention the discretionary "icn" clock The interconnect (ICN) clock is required for functional working of MMC on some ST platforms. When not supplied it can result in broken MMC and the following output: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-st.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt index 88faa91..3cd4c43 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt +++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt @@ -10,7 +10,7 @@ Required properties: subsystem (mmcss) inside the FlashSS (available in STiH407 SoC family). -- clock-names: Should be "mmc". +- clock-names: Should be "mmc" and "icn". (NB: The latter is not compulsory) See: Documentation/devicetree/bindings/resource-names.txt - clocks: Phandle to the clock. See: Documentation/devicetree/bindings/clock/clock-bindings.txt -- cgit v1.1 From 3ae50f4512ce831e8b63eb54ad969417ff30ada7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:36 +0200 Subject: mmc: sdhci-st: Handle interconnect clock Some ST platforms contain interconnect (ICN) clocks which must be handed correctly in order to obtain full functionality of a given IP. In this case, if the ICN clocks are not handled properly by the ST SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. A decent point was raised about minimising the use of a local variable that we 'could' do without. I've chosen consistency over the possibility of reducing the local variable count by 1. Thinking that it's more important for the code to be grouped and authoured in a similar manner/style for greater maintainability/readability. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-st.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c index c95ba83..ed92ce72 100644 --- a/drivers/mmc/host/sdhci-st.c +++ b/drivers/mmc/host/sdhci-st.c @@ -28,6 +28,7 @@ struct st_mmc_platform_data { struct reset_control *rstc; + struct clk *icnclk; void __iomem *top_ioaddr; }; @@ -353,7 +354,7 @@ static int sdhci_st_probe(struct platform_device *pdev) struct sdhci_host *host; struct st_mmc_platform_data *pdata; struct sdhci_pltfm_host *pltfm_host; - struct clk *clk; + struct clk *clk, *icnclk; int ret = 0; u16 host_version; struct resource *res; @@ -365,6 +366,11 @@ static int sdhci_st_probe(struct platform_device *pdev) return PTR_ERR(clk); } + /* ICN clock isn't compulsory, but use it if it's provided. */ + icnclk = devm_clk_get(&pdev->dev, "icn"); + if (IS_ERR(icnclk)) + icnclk = NULL; + rstc = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(rstc)) rstc = NULL; @@ -389,6 +395,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } clk_prepare_enable(clk); + clk_prepare_enable(icnclk); /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, @@ -400,6 +407,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } pltfm_host->clk = clk; + pdata->icnclk = icnclk; /* Configure the Arasan HC inside the flashSS */ st_mmcss_cconfig(np, host); @@ -422,6 +430,7 @@ static int sdhci_st_probe(struct platform_device *pdev) return 0; err_out: + clk_disable_unprepare(icnclk); clk_disable_unprepare(clk); err_of: sdhci_pltfm_free(pdev); @@ -442,6 +451,8 @@ static int sdhci_st_remove(struct platform_device *pdev) ret = sdhci_pltfm_unregister(pdev); + clk_disable_unprepare(pdata->icnclk); + if (rstc) reset_control_assert(rstc); @@ -462,6 +473,7 @@ static int sdhci_st_suspend(struct device *dev) if (pdata->rstc) reset_control_assert(pdata->rstc); + clk_disable_unprepare(pdata->icnclk); clk_disable_unprepare(pltfm_host->clk); out: return ret; @@ -475,6 +487,7 @@ static int sdhci_st_resume(struct device *dev) struct device_node *np = dev->of_node; clk_prepare_enable(pltfm_host->clk); + clk_prepare_enable(pdata->icnclk); if (pdata->rstc) reset_control_deassert(pdata->rstc); -- cgit v1.1 From b7fb44dacae04219c82f20897382ba34860d1a16 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 3 Aug 2016 17:02:15 -0500 Subject: nl80211: Allow GET_INTERFACE dumps to be filtered This patch allows GET_INTERFACE dumps to be filtered based on NL80211_ATTR_WIPHY or NL80211_ATTR_WDEV. The documentation for GET_INTERFACE mentions that this is possible: "Request an interface's configuration; either a dump request on a %NL80211_ATTR_WIPHY or ..." However, this behavior has not been implemented until now. Johannes: rewrite most of the patch: * use nl80211_dump_wiphy_parse() to also allow passing an interface to be able to dump its siblings * fix locking (must hold rtnl around using nl80211_fam.attrbuf) * make init self-contained instead of relying on other cb->args Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4997857..7ebad35 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2525,10 +2525,35 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; + int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; rtnl_lock(); + if (!cb->args[2]) { + struct nl80211_dump_wiphy_state state = { + .filter_wiphy = -1, + }; + int ret; + + ret = nl80211_dump_wiphy_parse(skb, cb, &state); + if (ret) + return ret; + + filter_wiphy = state.filter_wiphy; + + /* + * if filtering, set cb->args[2] to +1 since 0 is the default + * value needed to determine that parsing is necessary. + */ + if (filter_wiphy >= 0) + cb->args[2] = filter_wiphy + 1; + else + cb->args[2] = -1; + } else if (cb->args[2] > 0) { + filter_wiphy = cb->args[2] - 1; + } + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2536,6 +2561,10 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; continue; } + + if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx) + continue; + if_idx = 0; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { -- cgit v1.1 From 5a1f044b5048e834f936fbb33a93e5d8410779ec Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 29 Aug 2016 23:25:14 +0300 Subject: cfg80211: clarify the requirements of .disconnect() cfg80211 expects the .disconnect() handler to call cfg80211_disconnect() when done. Make this requirement more explicit. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9c23f4d3..d5e7f69 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2423,7 +2423,8 @@ struct cfg80211_qos_map { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. + * @disconnect: Disconnect from the BSS/ESS. Once done, call + * cfg80211_disconnected(). * (invoked with the wireless_dev mutex held) * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call -- cgit v1.1 From d82121845d44334f5ec3c98906d1e4a592350beb Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Mon, 29 Aug 2016 23:25:15 +0300 Subject: mac80211: refactor monitor representation in sdata Insert the u32 monitor flags variable in a new structure that represents a monitor interface. This will allow to add more configuration variables to that structure which will happen in an upcoming change. Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 ++++---- net/mac80211/driver-ops.c | 2 +- net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/iface.c | 16 ++++++++-------- net/mac80211/rx.c | 4 ++-- net/mac80211/status.c | 2 +- net/mac80211/tx.c | 2 +- net/mac80211/util.c | 2 +- 8 files changed, 23 insertions(+), 19 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 543b1d4..f2c8cd2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -39,7 +39,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_MONITOR && flags) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } return wdev; @@ -89,11 +89,11 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & mask) != (sdata->u.mntr_flags & mask)) + if ((*flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); @@ -103,7 +103,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c258f10..c701b64 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -62,7 +62,7 @@ int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f56d342..9211cce 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -824,6 +824,10 @@ struct txq_info { struct ieee80211_txq txq; }; +struct ieee80211_if_mntr { + u32 flags; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -922,7 +926,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ibss ibss; struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; - u32 mntr_flags; + struct ieee80211_if_mntr mntr; } u; #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b123a9e..c8509d9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -188,7 +188,7 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, continue; if (iter->vif.type == NL80211_IFTYPE_MONITOR && - !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(iter->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; m = iter->vif.addr; @@ -217,7 +217,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) return -EBUSY; if (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) check_dup = false; ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup); @@ -357,7 +357,7 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset) { struct ieee80211_local *local = sdata->local; - u32 flags = sdata->u.mntr_flags; + u32 flags = sdata->u.mntr.flags; #define ADJUST(_f, _s) do { \ if (flags & MONITOR_FLAG_##_f) \ @@ -589,12 +589,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) } break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs++; break; } - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { res = drv_add_interface(local, sdata); if (res) goto err_stop; @@ -926,7 +926,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs--; break; } @@ -1012,7 +1012,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; /* fall through */ @@ -1444,7 +1444,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; sdata->dev->netdev_ops = &ieee80211_monitorif_ops; - sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | + sdata->u.mntr.flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dce3b1..708c3b1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -567,7 +567,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) continue; if (!ieee80211_sdata_running(sdata)) @@ -3147,7 +3147,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR || - !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + !(sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)) continue; if (prev_dev) { diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a2a6826..fabd9ff 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -709,7 +709,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, if (!ieee80211_sdata_running(sdata)) continue; - if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) && + if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) && !send_to_cooked) continue; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1d0746d..efc38e7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1643,7 +1643,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { vif = &sdata->vif; break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 42bf0b6..e777c2a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -598,7 +598,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; break; case NL80211_IFTYPE_AP_VLAN: -- cgit v1.1 From e4819013840bbad025ed6da660c1e8b3e9e8430a Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 31 Aug 2016 00:35:07 +0530 Subject: cfg80211: Remove deprecated create_singlethread_workqueue The workqueue "cfg80211_wq" is involved in cleanup, scan and event related works. It queues multiple work items &rdev->event_work, &rdev->dfs_update_channels_wk, &wiphy_to_rdev(request->wiphy)->scan_done_wk, &wiphy_to_rdev(wiphy)->sched_scan_results_wk, which require strict execution ordering. Hence, an ordered dedicated workqueue has been used. Since it's a wireless driver, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 2029b49a..4911cd9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1252,7 +1252,7 @@ static int __init cfg80211_init(void) if (err) goto out_fail_reg; - cfg80211_wq = create_singlethread_workqueue("cfg80211"); + cfg80211_wq = alloc_ordered_workqueue("cfg80211", WQ_MEM_RECLAIM); if (!cfg80211_wq) { err = -ENOMEM; goto out_fail_wq; -- cgit v1.1 From 480dd46b9d6812e5fb7172c305ee0f1154c26eed Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 22 Aug 2016 17:14:04 +0300 Subject: mac80211: RX BA support for sta max_rx_aggregation_subframes The ability to change the max_rx_aggregation frames is useful in cases of IOP. There exist some devices (latest mobile phones and some AP's) that tend to not respect a BA sessions maximum size (in Kbps). These devices won't respect the AMPDU size that was negotiated during association (even though they do respect the maximal number of packets). This violation is characterized by a valid number of packets in a single AMPDU. Even so, the total size will exceed the size negotiated during association. Eventually, this will cause some undefined behavior, which in turn causes the hw to drop packets, causing the throughput to plummet. This patch will make the subframe limitation to be held by each station, instead of being held only by hw. Signed-off-by: Maxim Altshul Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/agg-rx.c | 7 +++++-- net/mac80211/sta_info.c | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cca510a..a1457ca 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1735,6 +1735,9 @@ struct ieee80211_sta_rates { * @supp_rates: Bitmap of supported rates (per band) * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU + * that this station is allowed to transmit to us. + * Can be modified by driver. * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to @@ -1775,6 +1778,7 @@ struct ieee80211_sta { u16 aid; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + u8 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff60..282e99b 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -298,10 +298,13 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, buf_size = IEEE80211_MAX_AMPDU_BUF; /* make sure the size doesn't exceed the maximum supported by the hw */ - if (buf_size > local->hw.max_rx_aggregation_subframes) - buf_size = local->hw.max_rx_aggregation_subframes; + if (buf_size > sta->sta.max_rx_aggregation_subframes) + buf_size = sta->sta.max_rx_aggregation_subframes; params.buf_size = buf_size; + ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", + buf_size, sta->sta.addr); + /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 19f14c9..5e70fa5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -340,6 +340,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); + sta->sta.max_rx_aggregation_subframes = + local->hw.max_rx_aggregation_subframes; + sta->local = local; sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; -- cgit v1.1 From 42bd20d99857e69e368d5421ea402127d5835cd3 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Mon, 29 Aug 2016 23:25:16 +0300 Subject: mac80211: add support for MU-MIMO air sniffer add support to MU-MIMO air sniffer according groupID: in monitor mode, use a given MU-MIMO groupID to monitor stations that belongs to that group using MU-MIMO. add support for following a station according to its MAC address using VHT MU-MIMO sniffer: the monitors wait until they get an action MU-MIMO notification frame, then parses it in order to find the groupID that corresponds to the given MAC address and monitors packets destined to that groupID using VHT MU-MIMO. Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 23 ++++++++++++++++++++++- net/mac80211/driver-ops.h | 3 ++- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/iface.c | 5 +++++ net/mac80211/rx.c | 20 ++++++++++++++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f2c8cd2..5d4afea 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,8 +73,29 @@ static int ieee80211_change_iface(struct wiphy *wiphy, sdata->u.mgd.use_4addr = params->use_4addr; } - if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *monitor_sdata; + u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; + + monitor_sdata = rtnl_dereference(local->monitor_sdata); + if (monitor_sdata && + wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) { + memcpy(monitor_sdata->vif.bss_conf.mu_group.membership, + params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); + memcpy(monitor_sdata->vif.bss_conf.mu_group.position, + params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, + WLAN_USER_POSITION_LEN); + monitor_sdata->vif.mu_mimo_owner = true; + ieee80211_bss_info_change_notify(monitor_sdata, + BSS_CHANGED_MU_GROUPS); + + ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr, + params->macaddr); + } + + if (!flags) + return 0; if (ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 42a41ae..c39f93b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,7 +162,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || - sdata->vif.type == NL80211_IFTYPE_MONITOR)) + (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !sdata->vif.mu_mimo_owner))) return; if (!check_sdata_in_driver(sdata)) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9211cce..7576168 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -3,7 +3,7 @@ * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg - * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2013-2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -826,6 +826,7 @@ struct txq_info { struct ieee80211_if_mntr { u32 flags; + u8 mu_follow_addr[ETH_ALEN] __aligned(2); }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c8509d9..b0abddc7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -43,6 +43,8 @@ * by either the RTNL, the iflist_mtx or RCU. */ +static void ieee80211_iface_work(struct work_struct *work); + bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) { struct ieee80211_chanctx_conf *chanctx_conf; @@ -448,6 +450,9 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + skb_queue_head_init(&sdata->skb_queue); + INIT_WORK(&sdata->work, ieee80211_iface_work); + return 0; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 708c3b1..6a265aa 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -485,6 +485,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct net_device *prev_dev = NULL; int present_fcs_len = 0; unsigned int rtap_vendor_space = 0; + struct ieee80211_mgmt *mgmt; + struct ieee80211_sub_if_data *monitor_sdata = + rcu_dereference(local->monitor_sdata); if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data; @@ -585,6 +588,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, ieee80211_rx_stats(sdata->dev, skb->len); } + mgmt = (void *)skb->data; + if (monitor_sdata && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN && + ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_VHT && + mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT && + is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) && + ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) { + struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC); + + if (mu_skb) { + mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&monitor_sdata->skb_queue, mu_skb); + ieee80211_queue_work(&local->hw, &monitor_sdata->work); + } + } + if (prev_dev) { skb->dev = prev_dev; netif_receive_skb(skb); -- cgit v1.1 From 99ee7cae3bf3ce04e90d7b193d9f4f59a7044d91 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:17 +0300 Subject: mac80211: add support for radiotap timestamp field Use the existing device timestamp from the RX status information to add support for the new radiotap timestamp field. Currently only 32-bit counters are supported, but we also add the radiotap mactime where applicable. This new field allows more flexibility in where the timestamp is taken etc. The non-timestamp data in the field is taken from a new field in the hw struct. Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 21 +++++++++++++++++++++ include/net/mac80211.h | 12 ++++++++++++ net/mac80211/main.c | 3 +++ net/mac80211/rx.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index b0fd947..ba07b9d 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -190,6 +190,10 @@ struct ieee80211_radiotap_header { * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 * * Contains VHT information about this frame. + * + * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable + * + * Contains timestamp information for this frame. */ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TSFT = 0, @@ -214,6 +218,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, + IEEE80211_RADIOTAP_TIMESTAMP = 22, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -321,6 +326,22 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 #define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +/* For IEEE80211_RADIOTAP_TIMESTAMP */ +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 + +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 + /* helpers */ static inline int ieee80211_get_radiotap_len(unsigned char *data) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a1457ca..08bac23 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2145,6 +2145,14 @@ enum ieee80211_hw_flags { * the default is _GI | _BANDWIDTH. * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * + * @radiotap_timestamp: Information for the radiotap timestamp field; if the + * 'units_pos' member is set to a non-negative value it must be set to + * a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a + * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp + * field will be added and populated from the &struct ieee80211_rx_status + * device_timestamp. If the 'accuracy' member is non-negative, it's put + * into the accuracy radiotap field and the accuracy known flag is set. + * * @netdev_features: netdev features to be set in each netdev created * from this HW. Note that not all features are usable with mac80211, * other features will be rejected during HW registration. @@ -2188,6 +2196,10 @@ struct ieee80211_hw { u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; + struct { + int units_pos; + s16 accuracy; + } radiotap_timestamp; netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d00ea9b..ac053a9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -660,6 +660,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); + local->hw.radiotap_timestamp.units_pos = -1; + local->hw.radiotap_timestamp.accuracy = -1; + return &local->hw; err_free: wiphy_free(wiphy); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6a265aa..284f0f2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -180,6 +180,11 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 12; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + len = ALIGN(len, 8); + len += 12; + } + if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); @@ -447,6 +452,31 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + u16 accuracy = 0; + u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; + + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP); + + /* ensure 8 byte alignment */ + while ((pos - (u8 *)rthdr) & 7) + pos++; + + put_unaligned_le64(status->device_timestamp, pos); + pos += sizeof(u64); + + if (local->hw.radiotap_timestamp.accuracy >= 0) { + accuracy = local->hw.radiotap_timestamp.accuracy; + flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY; + } + put_unaligned_le16(accuracy, pos); + pos += sizeof(u16); + + *pos++ = local->hw.radiotap_timestamp.units_pos; + *pos++ = flags; + } + for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; -- cgit v1.1 From bfe40fa395ddc41e45310a4426574703a05e1177 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:18 +0300 Subject: mac80211: send delBA on unexpected BlockAck data frames When we receive data frames with ACK policy BlockAck, send delBA as requested by the 802.11 spec. Since this would be happening for every frame inside an A-MPDU if it's really received outside a session, limit it to a single attempt. Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 4 +++- net/mac80211/rx.c | 9 ++++++++- net/mac80211/sta_info.h | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 282e99b..a5d69df 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -409,8 +409,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } end: - if (status == WLAN_STATUS_SUCCESS) + if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); + __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + } mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 284f0f2..ad636c9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1122,8 +1122,15 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); - if (!tid_agg_rx) + if (!tid_agg_rx) { + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && + !test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); goto dont_reorder; + } /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 0556be3..530231b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -230,6 +230,8 @@ struct tid_ampdu_rx { * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs * @agg_session_valid: bitmap indicating which TID has a rx BA session open on + * @unexpected_agg: bitmap indicating which TID already sent a delBA due to + * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID * @tid_start_tx: sessions where start was requested @@ -244,6 +246,7 @@ struct sta_ampdu_mlme { unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; + unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; -- cgit v1.1 From 53f249747d5ec6434415a6895b5690bf4f1d5d7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:19 +0300 Subject: mac80211: send delBA on unexpected BlockAck Request If we don't have a BA session, send delBA, as requested by the IEEE 802.11 spec. Apply the same limit of sending such a delBA only once as in the previous patch. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ad636c9..e796060 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2592,6 +2592,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) tid = le16_to_cpu(bar_data.control) >> 12; + if (!test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); + tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) return RX_DROP_MONITOR; -- cgit v1.1 From 83843c80dcf11a78995d167255b03072a1e49c2c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 28 Aug 2016 13:10:37 +0200 Subject: mac80211: fix tim recalculation after PS response Handle the case where the mac80211 intermediate queues are empty and the driver has buffered frames Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737d..aa58df8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1616,7 +1616,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { - unsigned long tids = sta->txq_buffered_tids & driver_release_tids; int tid; /* @@ -1648,7 +1647,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(driver_release_tids & BIT(tid)) || + txqi->tin.backlog_packets) continue; sta_info_recalc_tim(sta); -- cgit v1.1 From df6ef5d8a87ace995d5c10a7bd684be05911a321 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 4 Sep 2016 18:00:59 +0200 Subject: mac80211: fix sequence number assignment for PS response frames When using intermediate queues, sequence number allocation is deferred until dequeue. This doesn't work for PS response frames, which bypass those queues. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 65 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5023966..cc8e955 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,6 +796,36 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -853,7 +883,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) + if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, + tx->skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; @@ -1243,36 +1274,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -3264,7 +3265,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) + if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; -- cgit v1.1 From ad8d52b897a14711e026889053befbbee7fd51ba Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: ds: fix suspend/resume PCMCIA suspend/resume no longer works since the commit mentioned below, as the callbacks are no longer made. Convert the driver to the new dev_pm_ops, which restores the suspend/resume functionality. Tested on the arm arch Assabet platform. Fixes: aa8e54b559479 ("PM / sleep: Go direct_complete if driver has no callbacks") Signed-off-by: Russell King --- drivers/pcmcia/ds.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 489ea10..69b5e81 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -977,7 +977,7 @@ static int pcmcia_bus_uevent(struct device *dev, struct kobj_uevent_env *env) /************************ runtime PM support ***************************/ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state); +static int pcmcia_dev_suspend(struct device *dev); static int pcmcia_dev_resume(struct device *dev); static int runtime_suspend(struct device *dev) @@ -985,7 +985,7 @@ static int runtime_suspend(struct device *dev) int rc; device_lock(dev); - rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); + rc = pcmcia_dev_suspend(dev); device_unlock(dev); return rc; } @@ -1135,7 +1135,7 @@ ATTRIBUTE_GROUPS(pcmcia_dev); /* PM support, also needed for reset */ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state) +static int pcmcia_dev_suspend(struct device *dev) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); struct pcmcia_driver *p_drv = NULL; @@ -1410,6 +1410,9 @@ static struct class_interface pcmcia_bus_interface __refdata = { .remove_dev = &pcmcia_bus_remove_socket, }; +static const struct dev_pm_ops pcmcia_bus_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume) +}; struct bus_type pcmcia_bus_type = { .name = "pcmcia", @@ -1418,8 +1421,7 @@ struct bus_type pcmcia_bus_type = { .dev_groups = pcmcia_dev_groups, .probe = pcmcia_device_probe, .remove = pcmcia_device_remove, - .suspend = pcmcia_dev_suspend, - .resume = pcmcia_dev_resume, + .pm = &pcmcia_bus_pm_ops, }; -- cgit v1.1 From 6dec04e8f30a8cf1e782500244d8601c1f8505ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: sa11xx_base: fix reporting of timing information Fix the reporting of the currently programmed timing information. These entries have been showing zero due to the clock rate being a factor of 1000 too big. With this change, we go from: I/O : 165 (0) attribute: 300 (0) common : 300 (0) to: I/O : 165 (172) attribute: 300 (316) common : 300 (316) Signed-off-by: Russell King --- drivers/pcmcia/sa11xx_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index 9f6ec87..af37b7e 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -144,7 +144,7 @@ static int sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) { struct soc_pcmcia_timing timing; - unsigned int clock = clk_get_rate(skt->clk); + unsigned int clock = clk_get_rate(skt->clk) / 1000; unsigned long mecr = MECR; char *p = buf; -- cgit v1.1 From cbd5a16820e576d26bf985ad62b8c4cdf792fb45 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: sa11xx_base: add units to the timing information Add units to the timing information, so we know that the numbers are nanoseconds. The output changes from: I/O : 165 (172) attribute: 300 (316) common : 300 (316) to: I/O : 165ns (172ns) attribute: 300ns (316ns) common : 300ns (316ns) Signed-off-by: Russell King --- drivers/pcmcia/sa11xx_base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index af37b7e..48140ac 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -150,13 +150,13 @@ sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) soc_common_pcmcia_get_timing(skt, &timing); - p+=sprintf(p, "I/O : %u (%u)\n", timing.io, + p+=sprintf(p, "I/O : %uns (%uns)\n", timing.io, sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr))); - p+=sprintf(p, "attribute: %u (%u)\n", timing.attr, + p+=sprintf(p, "attribute: %uns (%uns)\n", timing.attr, sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr))); - p+=sprintf(p, "common : %u (%u)\n", timing.mem, + p+=sprintf(p, "common : %uns (%uns)\n", timing.mem, sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr))); return p - buf; -- cgit v1.1 From a466ebd2fc6a793e55f028a008b9f094d7d30fe3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: soc_common: fix SS_STSCHG polarity SS_STSCHG should be set for an IO card when the BVD1 signal is asserted low, not high. Signed-off-by: Russell King --- drivers/pcmcia/soc_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index eed5e9c..d5ca760 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -235,7 +235,7 @@ static unsigned int soc_common_pcmcia_skt_state(struct soc_pcmcia_socket *skt) stat |= skt->cs_state.Vcc ? SS_POWERON : 0; if (skt->cs_state.flags & SS_IOCARD) - stat |= state.bvd1 ? SS_STSCHG : 0; + stat |= state.bvd1 ? 0 : SS_STSCHG; else { if (state.bvd1 == 0) stat |= SS_BATDEAD; -- cgit v1.1 From 3f8df892b2312011f2ba73aedc0a192d70b8844e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 10:14:20 +0100 Subject: pcmcia: sa1111: fix propagation of lowlevel board init return code When testing Lubbock, it was noticed that the sa1111 pcmcia driver bound but was not functional due to no sockets being registered. This is because the return code from the lowlevel board initialisation was not being propagated out of the probe function. Fix this. Tested-by: Robert Jarzmik Signed-off-by: Russell King --- drivers/pcmcia/sa1111_badge4.c | 22 ++++++++-------------- drivers/pcmcia/sa1111_generic.c | 22 +++++++++++++++++----- drivers/pcmcia/sa1111_jornada720.c | 25 ++++++++++--------------- drivers/pcmcia/sa1111_lubbock.c | 32 +++++++++++++------------------- drivers/pcmcia/sa1111_neponset.c | 26 ++++++++++---------------- 5 files changed, 58 insertions(+), 69 deletions(-) diff --git a/drivers/pcmcia/sa1111_badge4.c b/drivers/pcmcia/sa1111_badge4.c index 12f0dd0..2f49093 100644 --- a/drivers/pcmcia/sa1111_badge4.c +++ b/drivers/pcmcia/sa1111_badge4.c @@ -134,20 +134,14 @@ static struct pcmcia_low_level badge4_pcmcia_ops = { int pcmcia_badge4_init(struct sa1111_dev *dev) { - int ret = -ENODEV; - - if (machine_is_badge4()) { - printk(KERN_INFO - "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", - __func__, - badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - - sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); - ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + printk(KERN_INFO + "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", + __func__, + badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); + + sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); + return sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } static int __init pcmv_setup(char *s) diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index a1531fe..3d95dff 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "sa1111_generic.h" @@ -203,19 +204,30 @@ static int pcmcia_probe(struct sa1111_dev *dev) sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR); sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR); + ret = -ENODEV; #ifdef CONFIG_SA1100_BADGE4 - pcmcia_badge4_init(dev); + if (machine_is_badge4()) + ret = pcmcia_badge4_init(dev); #endif #ifdef CONFIG_SA1100_JORNADA720 - pcmcia_jornada720_init(dev); + if (machine_is_jornada720()) + ret = pcmcia_jornada720_init(dev); #endif #ifdef CONFIG_ARCH_LUBBOCK - pcmcia_lubbock_init(dev); + if (machine_is_lubbock()) + ret = pcmcia_lubbock_init(dev); #endif #ifdef CONFIG_ASSABET_NEPONSET - pcmcia_neponset_init(dev); + if (machine_is_assabet()) + ret = pcmcia_neponset_init(dev); #endif - return 0; + + if (ret) { + release_mem_region(dev->res.start, 512); + sa1111_disable_device(dev); + } + + return ret; } static int pcmcia_remove(struct sa1111_dev *dev) diff --git a/drivers/pcmcia/sa1111_jornada720.c b/drivers/pcmcia/sa1111_jornada720.c index c2c3058..480a3ed 100644 --- a/drivers/pcmcia/sa1111_jornada720.c +++ b/drivers/pcmcia/sa1111_jornada720.c @@ -94,22 +94,17 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = { int pcmcia_jornada720_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; + unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; - if (machine_is_jornada720()) { - unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; + /* Fixme: why messing around with SA11x0's GPIO1? */ + GRER |= 0x00000002; - GRER |= 0x00000002; + /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ + sa1111_set_io_dir(sadev, pin, 0, 0); + sa1111_set_io(sadev, pin, 0); + sa1111_set_sleep_io(sadev, pin, 0); - /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(sadev, pin, 0, 0); - sa1111_set_io(sadev, pin, 0); - sa1111_set_sleep_io(sadev, pin, 0); - - sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index c5caf57..df2b6b2 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -210,27 +210,21 @@ static struct pcmcia_low_level lubbock_pcmcia_ops = { int pcmcia_lubbock_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; - - if (machine_is_lubbock()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - - /* Set CF Socket 1 power to standby mode. */ - lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); - ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, - pxa2xx_drv_pcmcia_add_one); - } + /* Set CF Socket 1 power to standby mode. */ + lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - return ret; + pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); + pxa2xx_configure_sockets(&sadev->dev); + return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, + pxa2xx_drv_pcmcia_add_one); } MODULE_LICENSE("GPL"); diff --git a/drivers/pcmcia/sa1111_neponset.c b/drivers/pcmcia/sa1111_neponset.c index 1d78739..019c395 100644 --- a/drivers/pcmcia/sa1111_neponset.c +++ b/drivers/pcmcia/sa1111_neponset.c @@ -110,20 +110,14 @@ static struct pcmcia_low_level neponset_pcmcia_ops = { int pcmcia_neponset_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; - - if (machine_is_assabet()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } -- cgit v1.1 From 817ed5748e40bbc5b5f2aa0c3094c4a7adfb8881 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Thu, 1 Sep 2016 08:31:08 +0200 Subject: pcmcia: lubbock: fix sockets configuration On lubbock board, the probe of the driver crashes by dereferencing very early a platform_data structure which is not set, in pxa2xx_configure_sockets(). The stack fixed is : [ 0.244353] SA1111 Microprocessor Companion Chip: silicon revision 1, metal revision 1 [ 0.256321] sa1111 sa1111: Providing IRQ336-390 [ 0.340899] clocksource: Switched to clocksource oscr0 [ 0.472263] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 0.480469] pgd = c0004000 [ 0.483432] [00000004] *pgd=00000000 [ 0.487105] Internal error: Oops: f5 [#1] ARM [ 0.491497] Modules linked in: [ 0.494650] CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-rc3-00080-g1aaa68426f0c-dirty #2068 [ 0.503229] Hardware name: Intel DBPXA250 Development Platform (aka Lubbock) [ 0.510344] task: c3e42000 task.stack: c3e44000 [ 0.514984] PC is at pxa2xx_configure_sockets+0x4/0x24 (drivers/pcmcia/pxa2xx_base.c:227) [ 0.520193] LR is at pcmcia_lubbock_init+0x1c/0x38 [ 0.525079] pc : [] lr : [] psr: a0000053 [ 0.525079] sp : c3e45e70 ip : 100019ff fp : 00000000 [ 0.536651] r10: c0828900 r9 : c0434838 r8 : 00000000 [ 0.541953] r7 : c0820700 r6 : c0857b30 r5 : c3ec1400 r4 : c0820758 [ 0.548549] r3 : 00000000 r2 : 0000000c r1 : c3c09c40 r0 : c3ec1400 [ 0.555154] Flags: NzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none [ 0.562450] Control: 0000397f Table: a0004000 DAC: 00000053 [ 0.568257] Process swapper (pid: 1, stack limit = 0xc3e44190) [ 0.574154] Stack: (0xc3e45e70 to 0xc3e46000) [ 0.578610] 5e60: c4849800 00000000 c3ec1400 c024769c [ 0.586928] 5e80: 00000000 c3ec140c c3c0ee0c c3ec1400 c3ec1434 c020c410 c3ec1400 c3ec1434 [ 0.595244] 5ea0: c0820700 c080b408 c0828900 c020c5f8 00000000 c0820700 c020c578 c020ac5c [ 0.603560] 5ec0: c3e687cc c3e71e10 c0820700 00000000 c3c02de0 c020bae4 c03c62f7 c03c62f7 [ 0.611872] 5ee0: c3e68780 c0820700 c042e034 00000000 c043c440 c020cdec c080b408 00000005 [ 0.620188] 5f00: c042e034 c00096c0 c0034440 c01c730c 20000053 ffffffff 00000000 00000000 [ 0.628502] 5f20: 00000000 c3ffcb87 c3ffcb90 c00346ac c3e66ba0 c03f7914 00000092 00000005 [ 0.636811] 5f40: 00000005 c03f847c 00000091 c03f847c 00000000 00000005 c0434828 00000005 [ 0.645125] 5f60: c043482c 00000092 c043c440 c0828900 c0434838 c0418d2c 00000005 00000005 [ 0.653430] 5f80: 00000000 c041858c 00000000 c032e9f0 00000000 00000000 00000000 00000000 [ 0.661729] 5fa0: 00000000 c032e9f8 00000000 c000f0f0 00000000 00000000 00000000 00000000 [ 0.670020] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 0.678311] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 0.686673] (pxa2xx_configure_sockets) from pcmcia_lubbock_init (/drivers/pcmcia/sa1111_lubbock.c:161) [ 0.696026] (pcmcia_lubbock_init) from pcmcia_probe (/drivers/pcmcia/sa1111_generic.c:213) [ 0.704358] (pcmcia_probe) from driver_probe_device (/drivers/base/dd.c:378 /drivers/base/dd.c:499) [ 0.712848] (driver_probe_device) from __driver_attach (/./include/linux/device.h:983 /drivers/base/dd.c:733) [ 0.721414] (__driver_attach) from bus_for_each_dev (/drivers/base/bus.c:313) [ 0.729723] (bus_for_each_dev) from bus_add_driver (/drivers/base/bus.c:708) [ 0.738036] (bus_add_driver) from driver_register (/drivers/base/driver.c:169) [ 0.746185] (driver_register) from do_one_initcall (/init/main.c:778) [ 0.754561] (do_one_initcall) from kernel_init_freeable (/init/main.c:843 /init/main.c:851 /init/main.c:869 /init/main.c:1016) [ 0.763409] (kernel_init_freeable) from kernel_init (/init/main.c:944) [ 0.771660] (kernel_init) from ret_from_fork (/arch/arm/kernel/entry-common.S:119) [ 0.779347] Code: c03c6305 c03c631e c03c632e e5903048 (e993000c) All code ======== 0: c03c6305 eorsgt r6, ip, r5, lsl #6 4: c03c631e eorsgt r6, ip, lr, lsl r3 8: c03c632e eorsgt r6, ip, lr, lsr #6 c: e5903048 ldr r3, [r0, #72] ; 0x48 10:* e993000c ldmib r3, {r2, r3} <-- trapping instruction Signed-off-by: Robert Jarzmik Signed-off-by: Russell King --- drivers/pcmcia/pxa2xx_base.c | 9 +++++---- drivers/pcmcia/pxa2xx_base.h | 2 +- drivers/pcmcia/sa1111_lubbock.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 483f919..91b5f57 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -214,9 +214,8 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt, } #endif -void pxa2xx_configure_sockets(struct device *dev) +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops) { - struct pcmcia_low_level *ops = dev->platform_data; /* * We have at least one socket, so set MECR:CIT * (Card Is There) @@ -322,7 +321,7 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev) goto err1; } - pxa2xx_configure_sockets(&dev->dev); + pxa2xx_configure_sockets(&dev->dev, ops); dev_set_drvdata(&dev->dev, sinfo); return 0; @@ -348,7 +347,9 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) static int pxa2xx_drv_pcmcia_resume(struct device *dev) { - pxa2xx_configure_sockets(dev); + struct pcmcia_low_level *ops = (struct pcmcia_low_level *)dev->platform_data; + + pxa2xx_configure_sockets(dev, ops); return 0; } diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h index b609b45..e58c7a4 100644 --- a/drivers/pcmcia/pxa2xx_base.h +++ b/drivers/pcmcia/pxa2xx_base.h @@ -1,4 +1,4 @@ int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); -void pxa2xx_configure_sockets(struct device *dev); +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops); diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index df2b6b2..e741f49 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -222,7 +222,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev) lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); + pxa2xx_configure_sockets(&sadev->dev, &lubbock_pcmcia_ops); return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, pxa2xx_drv_pcmcia_add_one); } -- cgit v1.1 From cb034407ec3f816540f359300cda1122faabdbbd Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Sep 2016 10:21:51 +0100 Subject: ARM: sa1111: fix error code propagation in sa1111_probe() Ensure that we propagate the platform_get_irq() error code out of the probe function. This allows probe deferrals to work correctly should platform_get_irq() not be able to resolve the interrupt in a DT environment at probe time. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index fb0a0a4..332b923 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1017,7 +1017,7 @@ static int sa1111_probe(struct platform_device *pdev) return -EINVAL; irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; return __sa1111_probe(&pdev->dev, mem, irq); } -- cgit v1.1 From 7c0091eceab231b59e51b80bbcf5a2205a0fa905 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:21:46 +0100 Subject: ARM: sa1111: fix pcmcia interrupt mask polarity The polarity of the high IRQs was being calculated using SA1111_IRQMASK_HI(), but this assumes a Linux interrupt number, not a hardware interrupt number. Hence, the resulting mask was incorrect. Fix this. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 332b923..cfa61b8 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -472,8 +472,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) * specifies that S0ReadyInt and S1ReadyInt should be '1'. */ sa1111_writel(0, irqbase + SA1111_INTPOL0); - sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) | - SA1111_IRQMASK_HI(IRQ_S1_READY_NINT), + sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) | + BIT(IRQ_S1_READY_NINT & 31), irqbase + SA1111_INTPOL1); /* clear all IRQs */ -- cgit v1.1 From 06dfe5cc0cc684e735cb0232fdb756d30780b05d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:34:05 +0100 Subject: ARM: sa1111: fix pcmcia suspend/resume SA1111 PCMCIA was broken when PCMCIA switched to using dev_pm_ops for the PCMCIA socket class. PCMCIA used to handle suspend/resume via the socket hosting device, which happened at normal device suspend/resume time. However, the referenced commit changed this: much of the resume now happens much earlier, in the noirq resume handler of dev_pm_ops. However, on SA1111, the PCMCIA device is not accessible as the SA1111 has not been resumed at _noirq time. It's slightly worse than that, because the SA1111 has already been put to sleep at _noirq time, so suspend doesn't work properly. Fix this by converting the core SA1111 code to use dev_pm_ops as well, and performing its own suspend/resume at noirq time. This fixes these errors in the kernel log: pcmcia_socket pcmcia_socket0: time out after reset pcmcia_socket pcmcia_socket1: time out after reset and the resulting lack of PCMCIA cards after a S2RAM cycle. Fixes: d7646f7632549 ("pcmcia: use dev_pm_ops for class pcmcia_socket_class") Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index cfa61b8..7838659 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -869,9 +869,9 @@ struct sa1111_save_data { #ifdef CONFIG_PM -static int sa1111_suspend(struct platform_device *dev, pm_message_t state) +static int sa1111_suspend_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags; unsigned int val; @@ -934,9 +934,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state) * restored by their respective drivers, and must be called * via LDM after this function. */ -static int sa1111_resume(struct platform_device *dev) +static int sa1111_resume_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags, id; void __iomem *base; @@ -952,7 +952,7 @@ static int sa1111_resume(struct platform_device *dev) id = sa1111_readl(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { __sa1111_remove(sachip); - platform_set_drvdata(dev, NULL); + dev_set_drvdata(dev, NULL); kfree(save); return 0; } @@ -1003,8 +1003,8 @@ static int sa1111_resume(struct platform_device *dev) } #else -#define sa1111_suspend NULL -#define sa1111_resume NULL +#define sa1111_suspend_noirq NULL +#define sa1111_resume_noirq NULL #endif static int sa1111_probe(struct platform_device *pdev) @@ -1038,6 +1038,11 @@ static int sa1111_remove(struct platform_device *pdev) return 0; } +static struct dev_pm_ops sa1111_pm_ops = { + .suspend_noirq = sa1111_suspend_noirq, + .resume_noirq = sa1111_resume_noirq, +}; + /* * Not sure if this should be on the system bus or not yet. * We really want some way to register a system device at @@ -1050,10 +1055,9 @@ static int sa1111_remove(struct platform_device *pdev) static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, .remove = sa1111_remove, - .suspend = sa1111_suspend, - .resume = sa1111_resume, .driver = { .name = "sa1111", + .pm = &sa1111_pm_ops, }, }; -- cgit v1.1 From 87d5dd62c07f90ed2b0d6718f5c666f69e7d39b0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 16:09:17 +0100 Subject: ARM: sa1111: fix missing clk_disable() SA1111 forgets to call clk_disable() in the probe error cleanup path. Add the necessary call. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 7838659..2e076c4 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -754,7 +754,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) if (sachip->irq != NO_IRQ) { ret = sa1111_setup_irq(sachip, pd->irq_base); if (ret) - goto err_unmap; + goto err_clk; } #ifdef CONFIG_ARCH_SA1100 @@ -799,6 +799,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) return 0; + err_clk: + clk_disable(sachip->clk); err_unmap: iounmap(sachip->base); err_clk_unprep: -- cgit v1.1 From 5df20f2141eadb5430caaad20eceac61cfe0f139 Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Tue, 6 Sep 2016 11:59:00 -0700 Subject: mac80211: make mpath path fixing more robust A fixed mpath was not quite being treated as such: 1) if a PERR frame was received, a fixed mpath was deactivated. 2) queued path discovery for fixed mpath was potentially being considered, changing mpath state. 3) other mpath flags were potentially being inherited when fixing the mpath. Just assign PATH_FIXED and SN_VALID. This solves several issues when fixing a mesh path in one direction. The reverse direction mpath should probably also be fixed, or root announcements at least be enabled. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 3 ++- net/mac80211/mesh_pathtbl.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bd..faccef9 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -746,6 +746,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, sta = next_hop_deref_protected(mpath); if (mpath->flags & MESH_PATH_ACTIVE && ether_addr_equal(ta, sta->sta.addr) && + !(mpath->flags & MESH_PATH_FIXED) && (!(mpath->flags & MESH_PATH_SN_VALID) || SN_GT(target_sn, mpath->sn) || target_sn == 0)) { mpath->flags &= ~MESH_PATH_ACTIVE; @@ -1012,7 +1013,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { + if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) { spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6db2ddf..f0e6175 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -826,7 +826,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; - mpath->flags |= MESH_PATH_FIXED; + mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); mesh_path_tx_pending(mpath); -- cgit v1.1 From 6b3142b2b852cd5e3216d1aa800a0a49377e6e1c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 7 Sep 2016 21:56:09 +0100 Subject: ARM: 8612/1: LPAE: initialize cache policy correctly The cachepolicy variable gets initialized using a masked pmd value. So far, the pmd has been masked with flags valid for the 2-page table format, but the 3-page table format requires a different mask. On LPAE, this lead to a wrong assumption of what initial cache policy has been used. Later a check forces the cache policy to writealloc and prints the following warning: Forcing write-allocate cache policy for SMP This patch introduces a new definition PMD_SECT_CACHE_MASK for both page table formats which masks in all cache flags in both cases. Signed-off-by: Stefan Agner Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/include/asm/pgtable-3level-hwdef.h | 1 + arch/arm/mm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index d0131ee..3f82e9d 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -47,6 +47,7 @@ #define PMD_SECT_WB (PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_MINICACHE (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE) #define PMD_SECT_WBWA (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) +#define PMD_SECT_CACHE_MASK (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_NONSHARED_DEV (PMD_SECT_TEX(2)) /* diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index f8f1cff..4cd664a 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -62,6 +62,7 @@ #define PMD_SECT_WT (_AT(pmdval_t, 2) << 2) /* normal inner write-through */ #define PMD_SECT_WB (_AT(pmdval_t, 3) << 2) /* normal inner write-back */ #define PMD_SECT_WBWA (_AT(pmdval_t, 7) << 2) /* normal inner write-alloc */ +#define PMD_SECT_CACHE_MASK (_AT(pmdval_t, 7) << 2) /* * + Level 3 descriptor (PTE) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6344913..30fe03f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -137,7 +137,7 @@ void __init init_default_cache_policy(unsigned long pmd) initial_pmd_value = pmd; - pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + pmd &= PMD_SECT_CACHE_MASK; for (i = 0; i < ARRAY_SIZE(cache_policies); i++) if (cache_policies[i].pmd == pmd) { -- cgit v1.1 From 07f56e6646228da27122e81d5a5a232fdf3d3a50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 22:08:45 +0100 Subject: ARM: locomo: fix locomo irq handling Accidentally booting Collie on Assabet reveals that the locomo driver incorrectly overwrites gpio-sa1100's chip data for its parent interrupt, leading to oops in sa1100_gpio_unmask() and sa1100_update_edge_regs() when "gpio: sa1100: convert to use IO accessors" is applied. Fix locomo to use the handler data rather than chip data for its parent interrupt. Signed-off-by: Russell King --- arch/arm/common/locomo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 0e97b4b..6c7b06854 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -140,7 +140,7 @@ static struct locomo_dev_info locomo_devices[] = { static void locomo_handler(struct irq_desc *desc) { - struct locomo *lchip = irq_desc_get_chip_data(desc); + struct locomo *lchip = irq_desc_get_handler_data(desc); int req, i; /* Acknowledge the parent IRQ */ @@ -200,8 +200,7 @@ static void locomo_setup_irq(struct locomo *lchip) * Install handler for IRQ_LOCOMO_HW. */ irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING); - irq_set_chip_data(lchip->irq, lchip); - irq_set_chained_handler(lchip->irq, locomo_handler); + irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip); /* Install handlers for IRQ_LOCOMO_* */ for ( ; irq <= lchip->irq_base + 3; irq++) { -- cgit v1.1 From 1a57c286d8ced1e4144c6201a19bbb70827edee6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 4 Sep 2016 21:45:56 +0100 Subject: ARM: pxa/lubbock: add pcmcia clock Add the required PCMCIA clock for the SA1111 "1800" device. This clock is used to compute timing information for the PCMCIA interface in the SoC device, rather than the SA1111. Hence, the provision of this clock is a convenience for the driver and does not reflect the hardware, so this must not be copied into DT. Acked-by: Robert Jarzmik Tested-by: Robert Jarzmik Signed-off-by: Russell King --- arch/arm/mach-pxa/lubbock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 7245f33..d6159f8 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -137,6 +137,18 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { // no D+ pullup; lubbock can't connect/disconnect in software }; +static void lubbock_init_pcmcia(void) +{ + struct clk *clk; + + /* Add an alias for the SA1111 PCMCIA clock */ + clk = clk_get_sys("pxa2xx-pcmcia", NULL); + if (!IS_ERR(clk)) { + clkdev_create(clk, NULL, "1800"); + clk_put(clk); + } +} + static struct resource sa1111_resources[] = { [0] = { .start = 0x10000000, @@ -467,6 +479,8 @@ static void __init lubbock_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + lubbock_init_pcmcia(); + clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL); pxa_set_udc_info(&udc_info); pxa_set_fb_info(NULL, &sharp_lm8v31); -- cgit v1.1 From ecfcdfec7e0cc64215a194044305f02a5a836e6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 15:38:12 +0200 Subject: netfilter: nf_nat: handle NF_DROP from nfnetlink_parse_nat_setup() nf_nat_setup_info() returns NF_* verdicts, so convert them to error codes that is what ctnelink expects. This has passed overlook without having any impact since this nf_nat_setup_info() has always returned NF_ACCEPT so far. Since 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable"), this is problem. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818..19c081e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -807,7 +807,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, if (err < 0) return err; - return nf_nat_setup_info(ct, &range, manip); + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } #else static int -- cgit v1.1 From e89ca58f9c901c8c4cfb09f96d879b186bb01492 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: add DELETING queue flag When we get a surprise disconnect from the target we queue a periodic reconnect (which is the sane thing to do...). We only move the queues out of CONNECTED when we retry to reconnect (after 10 seconds in the default case) but we stop the blk queues immediately so we are not bothered with traffic from now on. If delete() is kicking off in this period the queues are still in CONNECTED state. Part of the delete sequence is trying to issue ctrl shutdown if the admin queue is CONNECTED (which it is!). This request is issued but stuck in blk-mq waiting for the queues to start again. This might be the one preventing us from forward progress... The patch separates the queue flags to CONNECTED and DELETING. Now we will move out of CONNECTED as soon as error recovery kicks in (before stopping the queues) and DELETING is on when we start the queue deletion. Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a9d43f0..eeb08b6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@ -559,6 +560,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -616,7 +618,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@ -769,8 +771,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -1350,7 +1357,7 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) cancel_delayed_work_sync(&ctrl->reconnect_work); /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { + if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { /* Free this queue ourselves */ nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); -- cgit v1.1 From e87a911fed07e368c6f97e75152e6297a7dfba48 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: use ib_client API to detect device removal Change nvme-rdma to use the IB Client API to detect device removal. This has the wonderful benefit of being able to blow away all the ib/rdma_cm resources for the device being removed. No craziness about not destroying the cm_id handling the event. No deadlocks due to broken iw_cm/rdma_cm/iwarp dependencies. And no need to have a bound cm_id around during controller recovery/reconnect to catch device removal events. We don't use the device_add aspect of the ib_client service since we only want to create resources for an IB device if we have a target utilizing that device. Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 108 ++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 68 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index eeb08b6..d6bdf55 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1320,64 +1320,6 @@ out_destroy_queue_ib: return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret = 0; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* - * Queue controller deletion. Keep a reference until all - * work is flushed since delete_work will free the ctrl mem - */ - kref_get(&ctrl->ctrl.kref); - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@ -1419,8 +1361,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@ -2030,27 +1972,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } -- cgit v1.1 From 1bda18de8f15a611a61d1a935b679db2e153338a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 16:24:38 +0100 Subject: nvme-rdma: fix null pointer dereference on req->mr If there is an error on req->mr, req->mr is set to null, however the following statement sets req->mr->need_inval causing a null pointer dereference. Fix this by bailing out to label 'out' to immediately return and hence skip over the offending null pointer dereference. Fixes: f5b7b559e1488 ("nvme-rdma: Get rid of duplicate variable") Signed-off-by: Colin Ian King Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d6bdf55..c2c2c28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -293,6 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } req->mr->need_inval = false; -- cgit v1.1 From 2cfe199ca5a8816ee80fe15bcf202dd1020aaea0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 14:58:06 +0200 Subject: nvme-rdma: add back dependency on CONFIG_BLOCK A recent change removed the dependency on BLK_DEV_NVME, which implies the dependency on PCI and BLOCK. We don't need CONFIG_PCI, but without CONFIG_BLOCK we get tons of build errors, e.g. In file included from drivers/nvme/host/core.c:16:0: linux/blk-mq.h:182:33: error: 'struct gendisk' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] drivers/nvme/host/core.c: In function 'nvme_setup_rw': drivers/nvme/host/core.c:295:21: error: implicit declaration of function 'rq_data_dir' [-Werror=implicit-function-declaration] drivers/nvme/host/nvme.h: In function 'nvme_map_len': drivers/nvme/host/nvme.h:217:6: error: implicit declaration of function 'req_op' [-Werror=implicit-function-declaration] drivers/nvme/host/scsi.c: In function 'nvme_trans_bdev_limits_page': drivers/nvme/host/scsi.c:768:85: error: implicit declaration of function 'queue_max_hw_sectors' [-Werror=implicit-function-declaration] This adds back the specific CONFIG_BLOCK dependency to avoid broken configurations. Signed-off-by: Arnd Bergmann Fixes: aa71987472a9 ("nvme: fabrics drivers don't need the nvme-pci driver") Signed-off-by: Sagi Grimberg --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 0c644f7..4b6cfff 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -31,6 +31,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND + depends on BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL -- cgit v1.1 From bf2c4b6f9b74c2ee1dd3c050b181e9b9c86fbcdb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Sep 2016 10:50:38 -0400 Subject: svcauth_gss: Revert 64c59a3726f2 ("Remove unnecessary allocation") rsc_lookup steals the passed-in memory to avoid doing an allocation of its own, so we can't just pass in a pointer to memory that someone else is using. If we really want to avoid allocation there then maybe we should preallocate somwhere, or reference count these handles. For now we should revert. On occasion I see this on my server: kernel: kernel BUG at /home/cel/src/linux/linux-2.6/mm/slub.c:3851! kernel: invalid opcode: 0000 [#1] SMP kernel: Modules linked in: cts rpcsec_gss_krb5 sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd btrfs xor iTCO_wdt iTCO_vendor_support raid6_pq pcspkr i2c_i801 i2c_smbus lpc_ich mfd_core mei_me sg mei shpchp wmi ioatdma ipmi_si ipmi_msghandler acpi_pad acpi_power_meter rpcrdma ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb mlx4_core ahci libahci libata ptp pps_core dca i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod kernel: CPU: 7 PID: 145 Comm: kworker/7:2 Not tainted 4.8.0-rc4-00006-g9d06b0b #15 kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 kernel: Workqueue: events do_cache_clean [sunrpc] kernel: task: ffff8808541d8000 task.stack: ffff880854344000 kernel: RIP: 0010:[] [] kfree+0x155/0x180 kernel: RSP: 0018:ffff880854347d70 EFLAGS: 00010246 kernel: RAX: ffffea0020fe7660 RBX: ffff88083f9db064 RCX: 146ff0f9d5ec5600 kernel: RDX: 000077ff80000000 RSI: ffff880853f01500 RDI: ffff88083f9db064 kernel: RBP: ffff880854347d88 R08: ffff8808594ee000 R09: ffff88087fdd8780 kernel: R10: 0000000000000000 R11: ffffea0020fe76c0 R12: ffff880853f01500 kernel: R13: ffffffffa013cf76 R14: ffffffffa013cff0 R15: ffffffffa04253a0 kernel: FS: 0000000000000000(0000) GS:ffff88087fdc0000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fed60b020c3 CR3: 0000000001c06000 CR4: 00000000001406e0 kernel: Stack: kernel: ffff8808589f2f00 ffff880853f01500 0000000000000001 ffff880854347da0 kernel: ffffffffa013cf76 ffff8808589f2f00 ffff880854347db8 ffffffffa013d006 kernel: ffff8808589f2f20 ffff880854347e00 ffffffffa0406f60 0000000057c7044f kernel: Call Trace: kernel: [] rsc_free+0x16/0x90 [auth_rpcgss] kernel: [] rsc_put+0x16/0x30 [auth_rpcgss] kernel: [] cache_clean+0x2e0/0x300 [sunrpc] kernel: [] do_cache_clean+0xe/0x70 [sunrpc] kernel: [] process_one_work+0x1ff/0x3b0 kernel: [] worker_thread+0x2bc/0x4a0 kernel: [] ? rescuer_thread+0x3a0/0x3a0 kernel: [] kthread+0xe4/0xf0 kernel: [] ret_from_fork+0x1f/0x40 kernel: [] ? kthread_stop+0x110/0x110 kernel: Code: f7 ff ff eb 3b 65 8b 05 da 30 e2 7e 89 c0 48 0f a3 05 a0 38 b8 00 0f 92 c0 84 c0 0f 85 d1 fe ff ff 0f 1f 44 00 00 e9 f5 fe ff ff <0f> 0b 49 8b 03 31 f6 f6 c4 40 0f 85 62 ff ff ff e9 61 ff ff ff kernel: RIP [] kfree+0x155/0x180 kernel: RSP kernel: ---[ end trace 3fdec044969def26 ]--- It seems to be most common after a server reboot where a client has been using a Kerberos mount, and reconnects to continue its workload. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1d28181..d858202 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -569,9 +569,10 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) struct rsc *found; memset(&rsci, 0, sizeof(rsci)); - rsci.handle.data = handle->data; - rsci.handle.len = handle->len; + if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) + return NULL; found = rsc_lookup(cd, &rsci); + rsc_free(&rsci); if (!found) return NULL; if (cache_check(cd, &found->h, NULL)) -- cgit v1.1 From aa211d2074ec4266b89673a54719421464c943e3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:32 -0400 Subject: 3c59x: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 25c55ab..9133e79 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -3089,7 +3089,7 @@ static void set_rx_mode(struct net_device *dev) iowrite16(new_mode, ioaddr + EL3_CMD); } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Setup the card so that it can receive frames with an 802.1q VLAN tag. Note that this must be done after each RxReset due to some backwards compatibility logic in the Cyclone and Tornado ASICs */ -- cgit v1.1 From 5a5ab1611aa5c17a32b64a4c5069c26e1fd7c960 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:33 -0400 Subject: starfire: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/adaptec/starfire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 1d10696..8af2c88 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -66,7 +66,7 @@ */ #define ZEROCOPY -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define VLAN_SUPPORT #endif -- cgit v1.1 From 941992d2944789641470626e9336d663236b1d28 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:34 -0400 Subject: ethernet: amd: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/7990.c | 6 +++--- drivers/net/ethernet/amd/amd8111e.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index dcf2a1f..dc57f27 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -45,14 +45,14 @@ #define WRITERDP(lp, x) out_be16(lp->base + LANCE_RDP, (x)) #define READRDP(lp) in_be16(lp->base + LANCE_RDP) -#if defined(CONFIG_HPLANCE) || defined(CONFIG_HPLANCE_MODULE) +#if IS_ENABLED(CONFIG_HPLANCE) #include "hplance.h" #undef WRITERAP #undef WRITERDP #undef READRDP -#if defined(CONFIG_MVME147_NET) || defined(CONFIG_MVME147_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME147_NET) /* Lossage Factor Nine, Mr Sulu. */ #define WRITERAP(lp, x) (lp->writerap(lp, x)) @@ -86,7 +86,7 @@ static inline __u16 READRDP(struct lance_private *lp) } #endif -#endif /* CONFIG_HPLANCE || CONFIG_HPLANCE_MODULE */ +#endif /* IS_ENABLED(CONFIG_HPLANCE) */ /* debugging output macros, various flavours */ /* #define TEST_HITS */ diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 94960055..f92cc971 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -89,7 +89,7 @@ Revision History: #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define AMD8111E_VLAN_TAG_USED 1 #else #define AMD8111E_VLAN_TAG_USED 0 -- cgit v1.1 From da556d6a1a386aaf9ead99ef5574497c535bd26e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:35 -0400 Subject: bnx2: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c..ecd357d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -50,7 +50,7 @@ #include #include -#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) +#if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 #include "cnic_if.h" #endif -- cgit v1.1 From 067577868b4fdeaf2f93ec5c4e9b29a0eded6528 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:36 -0400 Subject: sundance: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 58c6338..79d8009 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -867,7 +867,7 @@ static int netdev_open(struct net_device *dev) /* Initialize other registers. */ __set_mac_addr(dev); -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); #else iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); -- cgit v1.1 From 504e76e5b93d9c6a5fbfaa8ea63ad3f7fe77f601 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:37 -0400 Subject: net/fsl_pq_mdio: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index f3c63dc..446c7b3 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -195,7 +195,7 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) return 0; } -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) /* * Return the TBIPA address, starting from the address * of the mapped GFAR MDIO registers (struct gfar) @@ -228,7 +228,7 @@ static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) } #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) /* * Return the TBIPAR address for a QE MDIO node, starting from the address * of the mapped MII registers (struct fsl_pq_mii) @@ -306,7 +306,7 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) #endif static const struct of_device_id fsl_pq_mdio_match[] = { -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) { .compatible = "fsl,gianfar-tbi", .data = &(struct fsl_pq_mdio_data) { @@ -344,7 +344,7 @@ static const struct of_device_id fsl_pq_mdio_match[] = { }, }, #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) { .compatible = "fsl,ucc-mdio", .data = &(struct fsl_pq_mdio_data) { -- cgit v1.1 From e2eae5b80f66a9a9b6fd5246fffc86d9ec3f49df Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:38 -0400 Subject: i825xx: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/i825xx/82596.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index befb4ac..ce235b7 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -89,10 +89,10 @@ static char version[] __initdata = #define DEB(x,y) if (i596_debug & (x)) y -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_MVME16x_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME16x_NET) #define ENABLE_MVME16x_NET #endif -#if defined(CONFIG_BVME6000_NET) || defined(CONFIG_BVME6000_NET_MODULE) +#if IS_ENABLED(CONFIG_BVME6000_NET) #define ENABLE_BVME6000_NET #endif -- cgit v1.1 From ee58c1149e2bba6043e31f736f6f6136562758ac Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:39 -0400 Subject: ixgbe: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 33c0250..b06e32d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -45,10 +45,10 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_dcb.h" -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) #define IXGBE_FCOE #include "ixgbe_fcoe.h" -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ +#endif /* IS_ENABLED(CONFIG_FCOE) */ #ifdef CONFIG_IXGBE_DCA #include #endif -- cgit v1.1 From bb152934015bb1f3633ead0cd3404227355294ea Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:40 -0400 Subject: net: mvneta: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta_bm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h index e74fd44..a32de43 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.h +++ b/drivers/net/ethernet/marvell/mvneta_bm.h @@ -133,7 +133,7 @@ struct mvneta_bm_pool { void *mvneta_frag_alloc(unsigned int frag_size); void mvneta_frag_free(unsigned int frag_size, void *data); -#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE) +#if IS_ENABLED(CONFIG_MVNETA_BM) void mvneta_bm_pool_destroy(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, u8 port_map); void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, -- cgit v1.1 From cbbe6096de425131e94c95bfe017718dc625330e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:41 -0400 Subject: natsemi: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/ns83820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index eb807b0..569ade6 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -134,7 +134,7 @@ static int lnksts = 0; /* CFG_LNKSTS bit polarity */ /* tunables */ #define RX_BUF_SIZE 1500 /* 8192 */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define NS83820_VLAN_ACCEL_SUPPORT #endif -- cgit v1.1 From 795f02344af4c81ddc30a90f0b11b7bba053abd4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:42 -0400 Subject: sfc: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/falcon_boards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 1736f4b..f6883b2 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -64,7 +64,7 @@ #define LM87_ALARM_TEMP_INT 0x10 #define LM87_ALARM_TEMP_EXT1 0x20 -#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM87) static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values) { @@ -455,7 +455,7 @@ static int sfe4001_init(struct efx_nic *efx) struct falcon_board *board = falcon_board(efx); int rc; -#if defined(CONFIG_SENSORS_LM90) || defined(CONFIG_SENSORS_LM90_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM90) board->hwmon_client = i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info); #else -- cgit v1.1 From 547e530a5e10fbc8e78bf2573508e46ca1bf571f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:43 -0400 Subject: sis900: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Acked-by: Daniele Venzano Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 ++-- drivers/net/ethernet/sis/sis900.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 95001ee4..6f85276 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1426,7 +1426,7 @@ static void sis900_set_mode(struct sis900_private *sp, int speed, int duplex) rx_flags |= RxATX; } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Can accept Jumbo packet */ rx_flags |= RxAJAB; #endif @@ -1750,7 +1750,7 @@ static int sis900_rx(struct net_device *net_dev) data_size = rx_status & DSIZE; rx_size = data_size - CRC_SIZE; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* ``TOOLONG'' flag means jumbo packet received. */ if ((rx_status & TOOLONG) && data_size <= MAX_FRAME_SIZE) rx_status &= (~ ((unsigned int)TOOLONG)); diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h index 7d430d3..f0da3dc 100644 --- a/drivers/net/ethernet/sis/sis900.h +++ b/drivers/net/ethernet/sis/sis900.h @@ -310,7 +310,7 @@ enum sis630_revision_id { #define CRC_SIZE 4 #define MAC_HEADER_SIZE 14 -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define MAX_FRAME_SIZE (1518 + 4) #else #define MAX_FRAME_SIZE 1518 -- cgit v1.1 From 12c70f30533ebf31e86c070253555149b9bf6ff6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:44 -0400 Subject: stmmac: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Reviewed-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 2533b91..d3292c4a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -30,7 +30,7 @@ #include #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define STMMAC_VLAN_TAG_USED #include #endif -- cgit v1.1 From 5f94bebe72ede10da4b779690894f34b259cfce4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:45 -0400 Subject: hamradio: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index d95a50a..622ab3a 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -484,7 +484,7 @@ static void bpq_setup(struct net_device *dev) dev->flags = 0; dev->features = NETIF_F_LLTX; /* Allow recursion */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; #endif -- cgit v1.1 From 4c73195edbe3a5d7e14ea549bb261cf35c29f0cc Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 12 Sep 2016 10:03:46 -0400 Subject: iwlegacy: use IS_ENABLED() instead of checking for built-in or module The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either built-in or as a module, use that macro instead of open coding the same. Using the macro makes the code more readable by helping abstract away some of the Kconfig built-in and module enable details. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/wireless/intel/iwlegacy/common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index 726ede3..3bba521 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -1320,7 +1320,7 @@ struct il_priv { u64 timestamp; union { -#if defined(CONFIG_IWL3945) || defined(CONFIG_IWL3945_MODULE) +#if IS_ENABLED(CONFIG_IWL3945) struct { void *shared_virt; dma_addr_t shared_phys; @@ -1351,7 +1351,7 @@ struct il_priv { } _3945; #endif -#if defined(CONFIG_IWL4965) || defined(CONFIG_IWL4965_MODULE) +#if IS_ENABLED(CONFIG_IWL4965) struct { struct il_rx_phy_res last_phy_res; bool last_phy_res_valid; -- cgit v1.1 From 8d51dbb8c7fb5412f0935c20f66e27d2c63ef4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 12 Sep 2016 15:55:43 +0200 Subject: mac80211: Re-structure aqm debugfs output and keep CoDel stats per txq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the 'aqm' stats in mac80211 only keeps overlimit drop stats, not CoDel stats. This moves the CoDel stats into the txqi structure to keep them per txq in order to show them in debugfs. In addition, the aqm debugfs output is restructured by splitting it up into three files: One global per phy, one per netdev and one per station, in the appropriate directories. The files are all called aqm, and are only created if the driver supports the wake_tx_queue op (rather than emitting an error on open as previously). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 151 ++++++++---------------------------------- net/mac80211/debugfs_netdev.c | 37 ++++++++++- net/mac80211/debugfs_sta.c | 52 +++++++++++++++ net/mac80211/ieee80211_i.h | 2 +- net/mac80211/tx.c | 4 +- 5 files changed, 117 insertions(+), 129 deletions(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2906c10..5bbb470 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -71,138 +71,39 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); -struct aqm_info { - struct ieee80211_local *local; - size_t size; - size_t len; - unsigned char buf[0]; -}; - -#define AQM_HDR_LEN 200 -#define AQM_HW_ENTRY_LEN 40 -#define AQM_TXQ_ENTRY_LEN 110 - -static int aqm_open(struct inode *inode, struct file *file) +static ssize_t aqm_read(struct file *file, + char __user *user_buf, + size_t count, + loff_t *ppos) { - struct ieee80211_local *local = inode->i_private; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - struct txq_info *txqi; + struct ieee80211_local *local = file->private_data; struct fq *fq = &local->fq; - struct aqm_info *info = NULL; + char buf[200]; int len = 0; - int i; - - if (!local->ops->wake_tx_queue) - return -EOPNOTSUPP; - - len += AQM_HDR_LEN; - len += 6 * AQM_HW_ENTRY_LEN; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - len += AQM_TXQ_ENTRY_LEN; - list_for_each_entry_rcu(sta, &local->sta_list, list) - len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq); - rcu_read_unlock(); - - info = vmalloc(len); - if (!info) - return -ENOMEM; spin_lock_bh(&local->fq.lock); rcu_read_lock(); - file->private_data = info; - info->local = local; - info->size = len; - len = 0; - - len += scnprintf(info->buf + len, info->size - len, - "* hw\n" - "access name value\n" - "R fq_flows_cnt %u\n" - "R fq_backlog %u\n" - "R fq_overlimit %u\n" - "R fq_collisions %u\n" - "RW fq_limit %u\n" - "RW fq_quantum %u\n", - fq->flows_cnt, - fq->backlog, - fq->overlimit, - fq->collisions, - fq->limit, - fq->quantum); - - len += scnprintf(info->buf + len, - info->size - len, - "* vif\n" - "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - txqi = to_txq_info(sdata->vif.txq); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %u %u %u %u %u %u %u %u\n", - sdata->name, - sdata->vif.addr, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - - len += scnprintf(info->buf + len, - info->size - len, - "* sta\n" - "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - sdata = sta->sdata; - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - txqi = to_txq_info(sta->sta.txq[i]); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %d %d %u %u %u %u %u %u %u\n", - sdata->name, - sta->sta.addr, - txqi->txq.tid, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - } - - info->len = len; + len = scnprintf(buf, sizeof(buf), + "access name value\n" + "R fq_flows_cnt %u\n" + "R fq_backlog %u\n" + "R fq_overlimit %u\n" + "R fq_collisions %u\n" + "RW fq_limit %u\n" + "RW fq_quantum %u\n", + fq->flows_cnt, + fq->backlog, + fq->overlimit, + fq->collisions, + fq->limit, + fq->quantum); rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); - return 0; -} - -static int aqm_release(struct inode *inode, struct file *file) -{ - vfree(file->private_data); - return 0; -} - -static ssize_t aqm_read(struct file *file, - char __user *user_buf, - size_t count, - loff_t *ppos) -{ - struct aqm_info *info = file->private_data; - return simple_read_from_buffer(user_buf, count, ppos, - info->buf, info->len); + buf, len); } static ssize_t aqm_write(struct file *file, @@ -210,8 +111,7 @@ static ssize_t aqm_write(struct file *file, size_t count, loff_t *ppos) { - struct aqm_info *info = file->private_data; - struct ieee80211_local *local = info->local; + struct ieee80211_local *local = file->private_data; char buf[100]; size_t len; @@ -237,8 +137,7 @@ static ssize_t aqm_write(struct file *file, static const struct file_operations aqm_ops = { .write = aqm_write, .read = aqm_read, - .open = aqm_open, - .release = aqm_release, + .open = simple_open, .llseek = default_llseek, }; @@ -428,7 +327,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); - DEBUGFS_ADD_MODE(aqm, 0600); + + if (local->ops->wake_tx_queue) + DEBUGFS_ADD_MODE(aqm, 0600); statsd = debugfs_create_dir("statistics", phyd); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index a5ba739..5d35c0f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -30,7 +30,7 @@ static ssize_t ieee80211_if_read( size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) { - char buf[70]; + char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); @@ -486,6 +486,38 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( } IEEE80211_IF_FILE_R(num_buffered_multicast); +static ssize_t ieee80211_if_fmt_aqm( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + struct txq_info *txqi = to_txq_info(sdata->vif.txq); + int len; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + len = scnprintf(buf, + buflen, + "ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n" + "%u %u %u %u %u %u %u %u %u %u\n", + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + return len; +} +IEEE80211_IF_FILE_R(aqm); + /* IBSS attributes */ static ssize_t ieee80211_if_fmt_tsf( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -618,6 +650,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_2ghz); DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); + + if (sdata->local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); } static void add_sta_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fd33413..fb26935 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -133,6 +133,55 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, } STA_OPS(last_seq_ctrl); +#define AQM_TXQ_ENTRY_LEN 130 + +static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->local; + size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1); + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + struct txq_info *txqi; + ssize_t rv; + int i; + + if (!buf) + return -ENOMEM; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + p += scnprintf(p, + bufsz+buf-p, + "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + txqi = to_txq_info(sta->sta.txq[i]); + p += scnprintf(p, bufsz+buf-p, + "%d %d %u %u %u %u %u %u %u %u %u\n", + txqi->txq.tid, + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + } + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} +STA_OPS(aqm); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -478,6 +527,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); + if (local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); + if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, sta->debugfs_dir, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7576168..c71c735 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -818,6 +818,7 @@ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; + struct codel_stats cstats; unsigned long flags; /* keep last! */ @@ -1117,7 +1118,6 @@ struct ieee80211_local { struct fq fq; struct codel_vars *cvars; struct codel_params cparams; - struct codel_stats cstats; const struct ieee80211_ops *ops; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index efc38e7..ee9e7d6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1343,7 +1343,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); cparams = &local->cparams; - cstats = &local->cstats; + cstats = &txqi->cstats; if (flow == &txqi->def_flow) cvars = &txqi->def_cvars; @@ -1403,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_tin_init(&txqi->tin); fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); + codel_stats_init(&txqi->cstats); txqi->txq.vif = &sdata->vif; @@ -1441,7 +1442,6 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) return ret; codel_params_init(&local->cparams); - codel_stats_init(&local->cstats); local->cparams.interval = MS2TIME(100); local->cparams.target = MS2TIME(20); local->cparams.ecn = true; -- cgit v1.1 From 11d62caf93cf12ce80ff8304849887666ec8880a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 08:28:22 +0200 Subject: mac80211: simplify TDLS RA lookup smatch pointed out that the second check of "tdls_auth" was pointless since if it was true, we returned from the function already. We can further simplify the code by moving the first check (if it's a TDLS peer at all) into the outer if, to only handle that inside. This simplifies the control flow here. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ee9e7d6..61d302d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2263,15 +2263,9 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_STATION: if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { sta = sta_info_get(sdata, skb->data); - if (sta) { - bool tdls_peer, tdls_auth; - - tdls_peer = test_sta_flag(sta, - WLAN_STA_TDLS_PEER); - tdls_auth = test_sta_flag(sta, - WLAN_STA_TDLS_PEER_AUTH); - - if (tdls_peer && tdls_auth) { + if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + if (test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH)) { *sta_out = sta; return 0; } @@ -2283,8 +2277,7 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, * after a TDLS sta is removed due to being * unreachable. */ - if (tdls_peer && !tdls_auth && - !ieee80211_is_tdls_setup(skb)) + if (!ieee80211_is_tdls_setup(skb)) return -EINVAL; } -- cgit v1.1 From 4440a2ab3b9f40dddbe006331ef0659c76859296 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 13 Sep 2016 08:49:18 +0800 Subject: netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions When memory is exhausted, nfct_seqadj_ext_add may fail to add the synproxy and seqadj extensions. The function nf_ct_seqadj_init doesn't check if get valid seqadj pointer by the nfct_seqadj. Now drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer in nf_ct_seqadj_init from init_conntrack(). Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_synproxy.h | 14 ++++++++++++++ net/netfilter/nf_conntrack_core.c | 6 +++--- net/netfilter/nf_nat_core.c | 3 ++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 6793614..e693731 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -27,6 +27,20 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) #endif } +static inline bool nf_ct_add_synproxy(struct nf_conn *ct, + const struct nf_conn *tmpl) +{ + if (tmpl && nfct_synproxy(tmpl)) { + if (!nfct_seqadj_ext_add(ct)) + return false; + + if (!nfct_synproxy_ext_add(ct)) + return false; + } + + return true; +} + struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43a..9934b0c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1035,9 +1035,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (tmpl && nfct_synproxy(tmpl)) { - nfct_seqadj_ext_add(ct); - nfct_synproxy_ext_add(ct); + if (!nf_ct_add_synproxy(ct, tmpl)) { + nf_conntrack_free(ct); + return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 19c081e..ecee105 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_DST_NAT; if (nfct_help(ct)) - nfct_seqadj_ext_add(ct); + if (!nfct_seqadj_ext_add(ct)) + return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { -- cgit v1.1 From 53a5d5ddccf849dbc27a8c1bba0b43c3a45fb792 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Sep 2016 18:42:08 +0800 Subject: crypto: echainiv - Replace chaining with multiplication The current implementation uses a global per-cpu array to store data which are used to derive the next IV. This is insecure as the attacker may change the stored data. This patch removes all traces of chaining and replaces it with multiplication of the salt and the sequence number. Fixes: a10f554fa7e0 ("crypto: echainiv - Add encrypted chain IV...") Cc: stable@vger.kernel.org Reported-by: Mathias Krause Signed-off-by: Herbert Xu --- crypto/echainiv.c | 115 ++++++++++++------------------------------------------ 1 file changed, 24 insertions(+), 91 deletions(-) diff --git a/crypto/echainiv.c b/crypto/echainiv.c index 1b01fe9..e3d889b 100644 --- a/crypto/echainiv.c +++ b/crypto/echainiv.c @@ -1,8 +1,8 @@ /* * echainiv: Encrypted Chain IV Generator * - * This generator generates an IV based on a sequence number by xoring it - * with a salt and then encrypting it with the same key as used to encrypt + * This generator generates an IV based on a sequence number by multiplying + * it with a salt and then encrypting it with the same key as used to encrypt * the plain text. This algorithm requires that the block size be equal * to the IV size. It is mainly useful for CBC. * @@ -24,81 +24,17 @@ #include #include #include -#include #include -#include -#include +#include #include -#define MAX_IV_SIZE 16 - -static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv); - -/* We don't care if we get preempted and read/write IVs from the next CPU. */ -static void echainiv_read_iv(u8 *dst, unsigned size) -{ - u32 *a = (u32 *)dst; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - *a++ = this_cpu_read(*b); - b++; - } -} - -static void echainiv_write_iv(const u8 *src, unsigned size) -{ - const u32 *a = (const u32 *)src; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - this_cpu_write(*b, *a); - a++; - b++; - } -} - -static void echainiv_encrypt_complete2(struct aead_request *req, int err) -{ - struct aead_request *subreq = aead_request_ctx(req); - struct crypto_aead *geniv; - unsigned int ivsize; - - if (err == -EINPROGRESS) - return; - - if (err) - goto out; - - geniv = crypto_aead_reqtfm(req); - ivsize = crypto_aead_ivsize(geniv); - - echainiv_write_iv(subreq->iv, ivsize); - - if (req->iv != subreq->iv) - memcpy(req->iv, subreq->iv, ivsize); - -out: - if (req->iv != subreq->iv) - kzfree(subreq->iv); -} - -static void echainiv_encrypt_complete(struct crypto_async_request *base, - int err) -{ - struct aead_request *req = base->data; - - echainiv_encrypt_complete2(req, err); - aead_request_complete(req, err); -} - static int echainiv_encrypt(struct aead_request *req) { struct crypto_aead *geniv = crypto_aead_reqtfm(req); struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv); struct aead_request *subreq = aead_request_ctx(req); - crypto_completion_t compl; - void *data; + __be64 nseqno; + u64 seqno; u8 *info; unsigned int ivsize = crypto_aead_ivsize(geniv); int err; @@ -108,8 +44,6 @@ static int echainiv_encrypt(struct aead_request *req) aead_request_set_tfm(subreq, ctx->child); - compl = echainiv_encrypt_complete; - data = req; info = req->iv; if (req->src != req->dst) { @@ -127,29 +61,30 @@ static int echainiv_encrypt(struct aead_request *req) return err; } - if (unlikely(!IS_ALIGNED((unsigned long)info, - crypto_aead_alignmask(geniv) + 1))) { - info = kmalloc(ivsize, req->base.flags & - CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: - GFP_ATOMIC); - if (!info) - return -ENOMEM; - - memcpy(info, req->iv, ivsize); - } - - aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); aead_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen, info); aead_request_set_ad(subreq, req->assoclen); - crypto_xor(info, ctx->salt, ivsize); + memcpy(&nseqno, info + ivsize - 8, 8); + seqno = be64_to_cpu(nseqno); + memset(info, 0, ivsize); + scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); - echainiv_read_iv(info, ivsize); - err = crypto_aead_encrypt(subreq); - echainiv_encrypt_complete2(req, err); - return err; + do { + u64 a; + + memcpy(&a, ctx->salt + ivsize - 8, 8); + + a |= 1; + a *= seqno; + + memcpy(info + ivsize - 8, &a, 8); + } while ((ivsize -= 8)); + + return crypto_aead_encrypt(subreq); } static int echainiv_decrypt(struct aead_request *req) @@ -196,8 +131,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl, alg = crypto_spawn_aead_alg(spawn); err = -EINVAL; - if (inst->alg.ivsize & (sizeof(u32) - 1) || - inst->alg.ivsize > MAX_IV_SIZE) + if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize) goto free_inst; inst->alg.encrypt = echainiv_encrypt; @@ -206,7 +140,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl, inst->alg.init = aead_init_geniv; inst->alg.exit = aead_exit_geniv; - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx); inst->alg.base.cra_ctxsize += inst->alg.ivsize; -- cgit v1.1 From acdb04d0b36769b3e05990c488dc74d8b7ac8060 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Sep 2016 14:43:29 +0800 Subject: crypto: skcipher - Fix blkcipher walk OOM crash When we need to allocate a temporary blkcipher_walk_next and it fails, the code is supposed to take the slow path of processing the data block by block. However, due to an unrelated change we instead end up dereferencing the NULL pointer. This patch fixes it by moving the unrelated bsize setting out of the way so that we enter the slow path as inteded. Fixes: 7607bd8ff03b ("[CRYPTO] blkcipher: Added blkcipher_walk_virt_block") Cc: stable@vger.kernel.org Reported-by: xiakaixu Reported-by: Ard Biesheuvel Signed-off-by: Herbert Xu Tested-by: Ard Biesheuvel --- crypto/blkcipher.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 3699995..a832426 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -233,6 +233,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, return blkcipher_walk_done(desc, walk, -EINVAL); } + bsize = min(walk->walk_blocksize, n); + walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); if (!scatterwalk_aligned(&walk->in, walk->alignmask) || @@ -245,7 +247,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); -- cgit v1.1 From f82e90b28654804ab72881d577d87c3d5c65e2bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:52 +0100 Subject: crypto: arm/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 86464859cc77 ("crypto: arm - AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index da3c042..aef022a 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; -- cgit v1.1 From 2db34e78f126c6001d79d3b66ab1abb482dc7caa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:53 +0100 Subject: crypto: arm64/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5c88804..6b2aa0f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; -- cgit v1.1 From 5f4761dda2ba3743ceb5eb5b5e7483172927831a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 3 Sep 2016 17:38:19 +0100 Subject: ath10k: fix memory leak on caldata on error exit path caldata is not being free'd on the error exit path, causing a memory leak and data definitely should not be freed. Free caldata instead of data. Thanks to Kalle Valo for spotting that data should not be free'd. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 0635995..a6d9c06 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2726,7 +2726,7 @@ static int ath10k_pci_hif_fetch_cal_eeprom(struct ath10k *ar, void **data, return 0; err_free: - kfree(data); + kfree(caldata); return -EINVAL; } -- cgit v1.1 From 214d553944815245897f6ac71f0b2e1905badcd9 Mon Sep 17 00:00:00 2001 From: Chaehyun Lim Date: Mon, 5 Sep 2016 22:38:02 +0900 Subject: ath10k: remove unused variable ar_pci Trival fix to remove unused variable ar_pci in ath10k_pci_tx_pipe_cleanup when building with W=1: drivers/net/wireless/ath/ath10k/pci.c:1696:21: warning: variable 'ar_pci' set but not used [-Wunused-but-set-variable] Signed-off-by: Chaehyun Lim Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index a6d9c06..0457e31 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1691,14 +1691,12 @@ static void ath10k_pci_rx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) static void ath10k_pci_tx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) { struct ath10k *ar; - struct ath10k_pci *ar_pci; struct ath10k_ce_pipe *ce_pipe; struct ath10k_ce_ring *ce_ring; struct sk_buff *skb; int i; ar = pci_pipe->hif_ce_state; - ar_pci = ath10k_pci_priv(ar); ce_pipe = pci_pipe->ce_hdl; ce_ring = ce_pipe->src_ring; -- cgit v1.1 From 8c1d7fa53166dd82bcf6be5ffc83bc4066150bf5 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Tue, 6 Sep 2016 12:05:28 -0700 Subject: ath10k: enable peer stats by default IFTYPE_MESH_POINT need to rely on these for accurate path selection metrics. Other modes will probably also find them useful. Enabling peer stats has the side effect of reducing max number of STAs from 128 to 118. There should be negligible performance impact. If users really need 128 STAs and don't mind losing out on peer stats, they can still disable them: echo 0 > debugfs/ieee80211/phyn/ath10k/peer_stats Signed-off-by: Thomas Pedersen Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 3abf8d6..e859ca6 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2145,6 +2145,9 @@ static void ath10k_core_register_work(struct work_struct *work) struct ath10k *ar = container_of(work, struct ath10k, register_work); int status; + /* peer stats are enabled by default */ + set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags); + status = ath10k_core_probe_fw(ar); if (status) { ath10k_err(ar, "could not probe fw (%d)\n", status); -- cgit v1.1 From 3040420158c139f64776935587bfad2584152f4c Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 18 Aug 2016 18:26:35 -0700 Subject: ath10k: improve logging message Helps to know the sta pointer. Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: add %pK and remove the colon] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index c4d965f..0a44dab 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6022,8 +6022,8 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, * Existing station deletion. */ ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac vdev %d peer delete %pM (sta gone)\n", - arvif->vdev_id, sta->addr); + "mac vdev %d peer delete %pM sta %pK (sta gone)\n", + arvif->vdev_id, sta->addr, sta); ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); if (ret) -- cgit v1.1 From 43d923e2c192ecef19447dc2b0ca0bab6d8b1f64 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:13 +0300 Subject: ath10k: move ath10k_hw_params definition to hw.h This is to prepare for rx descriptor abstraction where we'll be dereferencing ath10k_hw_params member in hw.h. Moreover hw.h looks more suitable to house ath10k_hw_params definition than core.h Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 53 +--------------------------------- drivers/net/wireless/ath/ath10k/hw.h | 53 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index c223913..6ec9495 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -736,58 +736,7 @@ struct ath10k { struct ath10k_htc htc; struct ath10k_htt htt; - struct ath10k_hw_params { - u32 id; - u16 dev_id; - const char *name; - u32 patch_load_addr; - int uart_pin; - u32 otp_exe_param; - - /* Type of hw cycle counter wraparound logic, for more info - * refer enum ath10k_hw_cc_wraparound_type. - */ - enum ath10k_hw_cc_wraparound_type cc_wraparound_type; - - /* Some of chip expects fragment descriptor to be continuous - * memory for any TX operation. Set continuous_frag_desc flag - * for the hardware which have such requirement. - */ - bool continuous_frag_desc; - - /* CCK hardware rate table mapping for the newer chipsets - * like QCA99X0, QCA4019 got revised. The CCK h/w rate values - * are in a proper order with respect to the rate/preamble - */ - bool cck_rate_map_rev2; - - u32 channel_counters_freq_hz; - - /* Mgmt tx descriptors threshold for limiting probe response - * frames. - */ - u32 max_probe_resp_desc_thres; - - /* The padding bytes's location is different on various chips */ - enum ath10k_hw_4addr_pad hw_4addr_pad; - - u32 tx_chain_mask; - u32 rx_chain_mask; - u32 max_spatial_stream; - u32 cal_data_len; - - struct ath10k_hw_params_fw { - const char *dir; - const char *board; - size_t board_size; - size_t board_ext_size; - } fw; - - /* qca99x0 family chips deliver broadcast/multicast management - * frames encrypted and expect software do decryption. - */ - bool sw_decrypt_mcast_mgmt; - } hw_params; + struct ath10k_hw_params hw_params; /* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */ struct ath10k_fw_components normal_mode_fw; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index e014cd7..af0d5d1 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -363,6 +363,59 @@ enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_SHIFTED_EACH = 2, }; +struct ath10k_hw_params { + u32 id; + u16 dev_id; + const char *name; + u32 patch_load_addr; + int uart_pin; + u32 otp_exe_param; + + /* Type of hw cycle counter wraparound logic, for more info + * refer enum ath10k_hw_cc_wraparound_type. + */ + enum ath10k_hw_cc_wraparound_type cc_wraparound_type; + + /* Some of chip expects fragment descriptor to be continuous + * memory for any TX operation. Set continuous_frag_desc flag + * for the hardware which have such requirement. + */ + bool continuous_frag_desc; + + /* CCK hardware rate table mapping for the newer chipsets + * like QCA99X0, QCA4019 got revised. The CCK h/w rate values + * are in a proper order with respect to the rate/preamble + */ + bool cck_rate_map_rev2; + + u32 channel_counters_freq_hz; + + /* Mgmt tx descriptors threshold for limiting probe response + * frames. + */ + u32 max_probe_resp_desc_thres; + + /* The padding bytes's location is different on various chips */ + enum ath10k_hw_4addr_pad hw_4addr_pad; + + u32 tx_chain_mask; + u32 rx_chain_mask; + u32 max_spatial_stream; + u32 cal_data_len; + + struct ath10k_hw_params_fw { + const char *dir; + const char *board; + size_t board_size; + size_t board_ext_size; + } fw; + + /* qca99x0 family chips deliver broadcast/multicast management + * frames encrypted and expect software do decryption. + */ + bool sw_decrypt_mcast_mgmt; +}; + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 -- cgit v1.1 From ae02c8719aab19bf311b6ce2881feb844456297e Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:22 +0300 Subject: ath10k: add provision for Rx descriptor abstraction There are slight differences in Rx hw descriptor information among different chips. So far driver does not use those new information for any functionalities, but there is one important information which is available from QCA99X0 onwards to indicate the number of bytes that hw padded at the begining of the rx payload and this information is needed to undecap the rx packet. Add an abstraction for Rx desc to make use of the new desc information available. The callback that this patch defines to retrieve the padding bytes will be used in follow-up patch. Signed-off-by: Vasanthakumar Thiagarajan [Rename operations to hw_ops for other purposes] Signed-off-by: Benjamin Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 12 ++++++++++++ drivers/net/wireless/ath/ath10k/hw.c | 13 +++++++++++++ drivers/net/wireless/ath/ath10k/hw.h | 12 ++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index e859ca6..2d405a6 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -68,6 +68,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA9887_HW_1_0_VERSION, @@ -87,6 +88,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9887_BOARD_DATA_SZ, .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_2_1_VERSION, @@ -104,6 +106,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_2_1_VERSION, @@ -122,6 +125,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_3_0_VERSION, @@ -140,6 +144,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA6174_HW_3_2_VERSION, @@ -159,6 +164,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -183,6 +189,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -207,6 +214,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -230,6 +238,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -247,6 +256,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -264,6 +274,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -289,6 +300,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, }, }; diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index f903d46..c2ecb9b 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -219,3 +219,16 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, survey->time = CCNT_TO_MSEC(ar, cc); survey->time_busy = CCNT_TO_MSEC(ar, rcc); } + +const struct ath10k_hw_ops qca988x_ops = { +}; + +static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd) +{ + return MS(__le32_to_cpu(rxd->msdu_end.qca99x0.info1), + RX_MSDU_END_INFO1_L3_HDR_PAD); +} + +const struct ath10k_hw_ops qca99x0_ops = { + .rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes, +}; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index af0d5d1..1b5ea31 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -414,8 +414,20 @@ struct ath10k_hw_params { * frames encrypted and expect software do decryption. */ bool sw_decrypt_mcast_mgmt; + + const struct ath10k_hw_ops *hw_ops; }; +struct htt_rx_desc; + +/* Defines needed for Rx descriptor abstraction */ +struct ath10k_hw_ops { + int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd); +}; + +extern const struct ath10k_hw_ops qca988x_ops; +extern const struct ath10k_hw_ops qca99x0_ops; + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 -- cgit v1.1 From 9e19e13261423eeb4398177001daa874c2128aa4 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:29 +0300 Subject: ath10k: properly remove padding from the start of rx payload In QCA99X0 (QCA99X0, QCA9984, QCA9888 and QCA4019) family chips, hw adds padding at the begining of the rx payload to make L3 header 4-byte aligned. In the chips doing this type of padding, the number of bytes padded will be indicated through msdu_end:info1. Define a hw_rx_desc_ops wrapper to retrieve the number of padded bytes and use this while doing undecap. This should fix padding related issues with ethernt decap format with QCA99X0, QCA9984, QCA9888 and QCA4019 hw. Signed-off-by: Vasanthakumar Thiagarajan [Rename operations to hw_ops for other purposes] Signed-off-by: Benjamin Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 36 +++++++++++++++++++------------- drivers/net/wireless/ath/ath10k/hw.h | 9 ++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 714b0de..a3785a9 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1051,9 +1051,11 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, const u8 first_hdr[64]) { struct ieee80211_hdr *hdr; + struct htt_rx_desc *rxd; size_t hdr_len; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1067,19 +1069,12 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, */ /* pull decapped header and copy SA & DA */ - if ((ar->hw_params.hw_4addr_pad == ATH10K_HW_4ADDR_PAD_BEFORE) && - ieee80211_has_a4(((struct ieee80211_hdr *)first_hdr)->frame_control)) { - /* The QCA99X0 4 address mode pad 2 bytes at the - * beginning of MSDU - */ - hdr = (struct ieee80211_hdr *)(msdu->data + 2); - /* The skb length need be extended 2 as the 2 bytes at the tail - * be excluded due to the padding - */ - skb_put(msdu, 2); - } else { - hdr = (struct ieee80211_hdr *)(msdu->data); - } + rxd = (void *)msdu->data - sizeof(*rxd); + + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + + hdr = (struct ieee80211_hdr *)(msdu->data + l3_pad_bytes); hdr_len = ath10k_htt_rx_nwifi_hdrlen(ar, hdr); ether_addr_copy(da, ieee80211_get_DA(hdr)); @@ -1146,6 +1141,8 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, void *rfc1042; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1156,6 +1153,11 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, if (WARN_ON_ONCE(!rfc1042)) return; + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, l3_pad_bytes); + /* pull decapped header and copy SA & DA */ eth = (struct ethhdr *)msdu->data; ether_addr_copy(da, eth->h_dest); @@ -1186,6 +1188,8 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, { struct ieee80211_hdr *hdr; size_t hdr_len; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1193,7 +1197,11 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, * [payload] */ - skb_pull(msdu, sizeof(struct amsdu_subframe_hdr)); + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, sizeof(struct amsdu_subframe_hdr) + l3_pad_bytes); hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 1b5ea31..204f882 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -428,6 +428,15 @@ struct ath10k_hw_ops { extern const struct ath10k_hw_ops qca988x_ops; extern const struct ath10k_hw_ops qca99x0_ops; +static inline int +ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw, + struct htt_rx_desc *rxd) +{ + if (hw->hw_ops->rx_desc_get_l3_pad_bytes) + return hw->hw_ops->rx_desc_get_l3_pad_bytes(rxd); + return 0; +} + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 -- cgit v1.1 From 95b5bf7ccca0d7ba575a7e26a7ed4146b9190071 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Sep 2016 17:25:36 +0300 Subject: ath10k: remove 4-addr padding related hw_param configuration hw_4addr_pad was added to handle different types of padding in 4-address rx frame. But this padding is not very specific to 4-address, it can happen even with three address + ethernet decap mode. Since the padding information can be obtained through Rx desc for QCA99X0 and newer chips, this hw_param is not needed any more. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 9 --------- drivers/net/wireless/ath/ath10k/hw.h | 8 -------- 2 files changed, 17 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 2d405a6..3a8984b 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -60,7 +60,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, @@ -80,7 +79,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA9887_HW_1_0_FW_DIR, @@ -117,7 +115,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, @@ -136,7 +133,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, @@ -155,7 +151,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { /* uses same binaries as hw3.0 */ @@ -177,7 +172,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -202,7 +196,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -226,7 +219,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .continuous_frag_desc = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 3, .rx_chain_mask = 3, .max_spatial_stream = 2, @@ -288,7 +280,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 125000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0x3, .rx_chain_mask = 0x3, .max_spatial_stream = 2, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 204f882..308e423 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -338,11 +338,6 @@ enum ath10k_hw_rate_rev2_cck { ATH10K_HW_RATE_REV2_CCK_SP_11M, }; -enum ath10k_hw_4addr_pad { - ATH10K_HW_4ADDR_PAD_AFTER, - ATH10K_HW_4ADDR_PAD_BEFORE, -}; - enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_DISABLED = 0, @@ -395,9 +390,6 @@ struct ath10k_hw_params { */ u32 max_probe_resp_desc_thres; - /* The padding bytes's location is different on various chips */ - enum ath10k_hw_4addr_pad hw_4addr_pad; - u32 tx_chain_mask; u32 rx_chain_mask; u32 max_spatial_stream; -- cgit v1.1 From 4854f175c3182816d906c4bc34be5f30556346a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:39:29 +0200 Subject: mac80211: remove useless open_count check __ieee80211_suspend() checks early on if there's anything to do by checking open_count, so there's no need to check again later in the function. Remove the useless check. Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 00a43a7..28a3a09 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -178,8 +178,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ - if (local->open_count) - ieee80211_stop_device(local); + ieee80211_stop_device(local); suspend: local->suspended = true; -- cgit v1.1 From ebf9ff753c041b296241990aef76163bbb2cc9c8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:28 +0200 Subject: genirq: Provide irq_gc_{lock_irqsave,unlock_irqrestore}() helpers Some irqchip drivers need to take the generic chip lock outside of the irq context. Provide the irq_gc_{lock_irqsave,unlock_irqrestore}() helpers to allow one to disable irqs while entering a critical section protected by gc->lock. Note that we do not provide optimized version of these helpers for !SMP, because they are not called from the hot-path. [ tglx: Added a comment when these helpers should be [not] used ] Signed-off-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-1-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index b52424e..0ac26c8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -945,6 +945,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { } static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } #endif +/* + * The irqsave variants are for usage in non interrupt code. Do not use + * them in irq_chip callbacks. Use irq_gc_lock() instead. + */ +#define irq_gc_lock_irqsave(gc, flags) \ + raw_spin_lock_irqsave(&(gc)->lock, flags) + +#define irq_gc_unlock_irqrestore(gc, flags) \ + raw_spin_unlock_irqrestore(&(gc)->lock, flags) + static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { -- cgit v1.1 From 5eb0d6eb3fac3daa60d9190eed9fa41cf809c756 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:29 +0200 Subject: irqchip/atmel-aic: Fix potential deadlock in ->xlate() aic5_irq_domain_xlate() and aic_irq_domain_xlate() take the generic chip lock without disabling interrupts, which can lead to a deadlock if an interrupt occurs while the lock is held in one of these functions. Replace irq_gc_{lock,unlock}() calls by irq_gc_{lock_irqsave,unlock_irqrestore}() ones to prevent this bug from happening. Fixes: b1479ebb7720 ("irqchip: atmel-aic: Add atmel AIC/AIC5 drivers") Signed-off-by: Boris Brezillon Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-2-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-atmel-aic.c | 5 +++-- drivers/irqchip/irq-atmel-aic5.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 112e17c..37f952d 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -176,6 +176,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d, { struct irq_domain_chip_generic *dgc = d->gc; struct irq_chip_generic *gc; + unsigned long flags; unsigned smr; int idx; int ret; @@ -194,11 +195,11 @@ static int aic_irq_domain_xlate(struct irq_domain *d, gc = dgc->gc[idx]; - irq_gc_lock(gc); + irq_gc_lock_irqsave(gc, flags); smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); - irq_gc_unlock(gc); + irq_gc_unlock_irqrestore(gc, flags); return ret; } diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 4f0d068..2a624d8 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -258,6 +258,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, unsigned int *out_type) { struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0); + unsigned long flags; unsigned smr; int ret; @@ -269,12 +270,12 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, if (ret) return ret; - irq_gc_lock(bgc); + irq_gc_lock_irqsave(bgc, flags); irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); smr = irq_reg_readl(bgc, AT91_AIC5_SMR); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(bgc, smr, AT91_AIC5_SMR); - irq_gc_unlock(bgc); + irq_gc_unlock_irqrestore(bgc, flags); return ret; } -- cgit v1.1 From 308433155a67cb097142292c8943e0aa8d1a1c79 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 8 Sep 2016 12:50:43 -0400 Subject: net: bridge: add helper to call /sbin/bridge-stp If /sbin/bridge-stp is available on the system, bridge tries to execute it instead of the kernel implementation when starting/stopping STP. If anything goes wrong with /sbin/bridge-stp, bridge silently falls back to kernel STP, making hard to debug userspace STP. This patch adds a br_stp_call_user helper to start/stop userspace STP and debug errors from the program: abnormal exit status is stored in the lower byte and normal exit status is stored in higher byte. Below is a simple example on a kernel with dynamic debug enabled: # ln -s /bin/false /sbin/bridge-stp # brctl stp br0 on br0: failed to start userspace STP (256) # dmesg br0: /sbin/bridge-stp exited with code 1 br0: failed to start userspace STP (256) br0: using kernel STP Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 341caa0..d8ad73b 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,17 +134,36 @@ void br_stp_disable_port(struct net_bridge_port *p) br_become_root_bridge(br); } -static void br_stp_start(struct net_bridge *br) +static int br_stp_call_user(struct net_bridge *br, char *arg) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; + char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL }; char *envp[] = { NULL }; + int rc; + + /* call userspace STP and report program errors */ + rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (rc > 0) { + if (rc & 0xff) + br_debug(br, BR_STP_PROG " received signal %d\n", + rc & 0x7f); + else + br_debug(br, BR_STP_PROG " exited with code %d\n", + (rc >> 8) & 0xff); + } + + return rc; +} + +static void br_stp_start(struct net_bridge *br) +{ struct net_bridge_port *p; + int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - else - r = -ENOENT; + err = br_stp_call_user(br, "start"); + + if (err && err != -ENOENT) + br_err(br, "failed to start userspace STP (%d)\n", err); spin_lock_bh(&br->lock); @@ -153,9 +172,10 @@ static void br_stp_start(struct net_bridge *br) else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - if (r == 0) { + if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ del_timer(&br->hello_timer); list_for_each_entry(p, &br->port_list, list) @@ -173,14 +193,13 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; - char *envp[] = { NULL }; struct net_bridge_port *p; + int err; if (br->stp_enabled == BR_USER_STP) { - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - br_info(br, "userspace STP stopped, return code %d\n", r); + err = br_stp_call_user(br, "stop"); + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ mod_timer(&br->hello_timer, jiffies + br->hello_time); -- cgit v1.1 From 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 9 Sep 2016 22:43:12 +0800 Subject: bnx2: Reset device during driver initialization When system enters into kdump kernel because of kernel panic, it won't shutdown devices. On-flight DMA will continue transferring data until device driver initializes. All devices are supposed to reset during driver initialization. And this property is used to fix the kdump failure in system with intel iommu. Other systems with hardware iommu should be similar. Please check commit 091d42e ("iommu/vt-d: Copy translation tables from old kernel") and those commits around. But bnx2 driver doesn't reset device during driver initialization. The device resetting is deferred to net device up stage. This will cause hardware iommu handling failure on bnx2 device. And its resetting relies on firmware. So in this patch move the firmware requesting code to earlier bnx2_init_one(), then next call bnx2_reset_chip to reset device. Signed-off-by: Baoquan He Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c..505ceaf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6356,10 +6356,6 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto out; - netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6428,7 +6424,6 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); - bnx2_release_firmware(bp); goto out; } @@ -8575,6 +8570,12 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto error; + + + bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8607,6 +8608,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: + bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); -- cgit v1.1 From c20cb8119337052a84e40cba94af732d870e22e3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 00:56:55 +0000 Subject: tipc: fix possible memory leak in tipc_udp_enable() 'ub' is malloced in tipc_udp_enable() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: ba5aa84a2d22 ("tipc: split UDP nl address parsing") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index dd27468..d80cd3f 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -665,7 +665,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { pr_err("Invalid UDP bearer configuration"); - return -EINVAL; + err = -EINVAL; + goto err; } err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, -- cgit v1.1 From 99c1790e5bbd31fe2b646bff868a55a13b1eeeb2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 10 Sep 2016 19:59:05 +1000 Subject: net: Remove NO_IRQ from powerpc-only network drivers We'd like to eventually remove NO_IRQ on powerpc, so remove usages of it from powerpc-only drivers. Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_mac.h | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 10 +++++----- drivers/net/ethernet/ibm/emac/mal.c | 5 ++--- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 4 ++-- 8 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h index 8ddeedb..ddf0260 100644 --- a/drivers/net/ethernet/freescale/fman/fman_mac.h +++ b/drivers/net/ethernet/freescale/fman/fman_mac.h @@ -192,7 +192,7 @@ struct fman_mac_params { /* A handle to the FM object this port related to */ void *fm; /* MDIO exceptions interrupt source - not valid for all - * MACs; MUST be set to 'NO_IRQ' for MACs that don't have + * MACs; MUST be set to 0 for MACs that don't have * mdio-irq, or for polling */ void *dev_id; /* device cookie used by the exception cbs */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index 7919896..120c758 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -90,7 +90,7 @@ static int do_pd_setup(struct fs_enet_private *fep) int ret = -EINVAL; fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) goto out; fep->fcc.fccp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 21fbaaf..777beff 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index 9d52e1e..15abd37 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->scc.sccp = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d..ec4d0f3 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2750,7 +2750,7 @@ static int emac_probe(struct platform_device *ofdev) /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ dev->emac_irq = irq_of_parse_and_map(np, 0); dev->wol_irq = irq_of_parse_and_map(np, 1); - if (dev->emac_irq == NO_IRQ) { + if (!dev->emac_irq) { printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); goto err_free; } @@ -2913,9 +2913,9 @@ static int emac_probe(struct platform_device *ofdev) err_reg_unmap: iounmap(dev->emacp); err_irq_unmap: - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); err_free: free_netdev(ndev); @@ -2957,9 +2957,9 @@ static int emac_remove(struct platform_device *ofdev) emac_dbg_unregister(dev); iounmap(dev->emacp); - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); free_netdev(dev->ndev); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index fdb5cdb..aaf6fec 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -597,9 +597,8 @@ static int mal_probe(struct platform_device *ofdev) mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); } - if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ || - mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ || - mal->rxde_irq == NO_IRQ) { + if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq || + !mal->txde_irq || !mal->rxde_irq) { printk(KERN_ERR "mal%d: failed to map interrupts !\n", index); err = -ENODEV; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 62454d7..bfe17d9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1424,7 +1424,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) scrq = adapter->tx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_tx_irq_failed; @@ -1444,7 +1444,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { scrq = adapter->rx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_rx_irq_failed; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 79f0ec4..bc258d7 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ fail_alloc_rx: gelic_card_free_chain(card, card->tx_chain.head); fail_alloc_tx: free_irq(card->irq, card); - netdev->irq = NO_IRQ; + netdev->irq = 0; fail_request_irq: ps3_sb_event_receive_port_destroy(dev, card->irq); fail_alloc_irq: @@ -1843,7 +1843,7 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) netdev0 = card->netdev[GELIC_PORT_ETHERNET_0]; /* disconnect event port */ free_irq(card->irq, card); - netdev0->irq = NO_IRQ; + netdev0->irq = 0; ps3_sb_event_receive_port_destroy(card->dev, card->irq); wait_event(card->waitq, -- cgit v1.1 From a7c22bda52e24094c7bc96afcd897ddad2b28bc0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 11:17:57 +0000 Subject: net: macb: fix missing unlock on error in macb_start_xmit() Fix missing unlock before return from function macb_start_xmit() in the error handling case. Fixes: 007e4ba3ee13 ("net: macb: initialize checksum when using checksum offloading") Signed-off-by: Wei Yongjun Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 0294b6a..63144bb 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1398,7 +1398,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (macb_clear_csum(skb)) { dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + goto unlock; } /* Map socket buffer for DMA transfer */ -- cgit v1.1 From a5f54fcc8a63f4e93ea48243c3a762aa848299d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 12:31:30 +0000 Subject: net: ethernet: dwmac: fix non static symbol warning Fixes the following sparse warning: drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c:172:1: warning: symbol 'stm32_dwmac_pm_ops' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 79d8b92..e5a926b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -169,7 +169,8 @@ static int stm32_dwmac_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, stm32_dwmac_suspend, stm32_dwmac_resume); +static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, + stm32_dwmac_suspend, stm32_dwmac_resume); static const struct of_device_id stm32_dwmac_match[] = { { .compatible = "st,stm32-dwmac"}, -- cgit v1.1 From 715f5552b1e90ba3eecf6d1a6d044d0d5226663f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Sep 2016 23:11:23 +0800 Subject: sctp: hold the transport before using it in sctp_hash_cmp Since commit 4f0087812648 ("sctp: apply rhashtable api to send/recv path"), sctp uses transport rhashtable with .obj_cmpfn sctp_hash_cmp, in which it compares the members of the transport with the rhashtable args to check if it's the right transport. But sctp uses the transport without holding it in sctp_hash_cmp, it can cause a use-after-free panic. As after it gets transport from hashtable, another CPU may close the sk and free the asoc. In sctp_association_free, it frees all the transports, meanwhile, the assoc's refcnt may be reduced to 0, assoc can be destroyed by sctp_association_destroy. So after that, transport->assoc is actually an unavailable memory address in sctp_hash_cmp. Although sctp_hash_cmp is under rcu_read_lock, it still can not avoid this, as assoc is not freed by RCU. This patch is to hold the transport before checking it's members with sctp_transport_hold, in which it checks the refcnt first, holds it if it's not 0. Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d3..1555fb8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -796,27 +796,34 @@ struct sctp_hash_cmp_arg { static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { + struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; - const struct sctp_transport *t = ptr; - struct sctp_association *asoc = t->asoc; - const struct net *net = x->net; + struct sctp_association *asoc; + int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) - return 1; - if (!net_eq(sock_net(asoc->base.sk), net)) - return 1; + return err; + if (!sctp_transport_hold(t)) + return err; + + asoc = t->asoc; + if (!net_eq(sock_net(asoc->base.sk), x->net)) + goto out; if (x->ep) { if (x->ep != asoc->ep) - return 1; + goto out; } else { if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) - return 1; + goto out; if (!sctp_bind_addr_match(&asoc->base.bind_addr, x->laddr, sctp_sk(asoc->base.sk))) - return 1; + goto out; } - return 0; + err = 0; +out: + sctp_transport_put(t); + return err; } static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) -- cgit v1.1 From 971d3a44c00dcb27353f929c4c28367956c15527 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 11 Sep 2016 17:54:03 +0200 Subject: net: ethernet: apm: xgene: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phy_dev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_ethtool.c | 4 ++-- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 24 +++++++++++----------- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 8 ++++---- drivers/net/ethernet/apm/xgene/xgene_enet_main.h | 1 - 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index 22a7b26..e1f44ae 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -57,7 +57,7 @@ static void xgene_get_drvinfo(struct net_device *ndev, static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (phydev == NULL) @@ -96,7 +96,7 @@ static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (!phydev) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index da413c8..c481f10 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -713,7 +713,7 @@ static void xgene_enet_adjust_link(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); const struct xgene_mac_ops *mac_ops = pdata->mac_ops; - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (phydev->link) { if (pdata->phy_speed != phydev->speed) { @@ -773,15 +773,13 @@ int xgene_enet_phy_connect(struct net_device *ndev) netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; } - - pdata->phy_dev = phy_dev; } else { #ifdef CONFIG_ACPI struct acpi_device *adev = acpi_phy_find_device(dev); if (adev) - pdata->phy_dev = adev->driver_data; - - phy_dev = pdata->phy_dev; + phy_dev = adev->driver_data; + else + phy_dev = NULL; if (!phy_dev || phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link, @@ -849,8 +847,6 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata, if (!phy) return -EIO; - pdata->phy_dev = phy; - return ret; } @@ -890,14 +886,18 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata) void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); } void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); mdiobus_unregister(pdata->mdio_bus); mdiobus_free(pdata->mdio_bus); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index b8b9495..522ba92 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -748,8 +748,8 @@ static int xgene_enet_open(struct net_device *ndev) if (ret) return ret; - if (pdata->phy_dev) { - phy_start(pdata->phy_dev); + if (ndev->phydev) { + phy_start(ndev->phydev); } else { schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF); netif_carrier_off(ndev); @@ -772,8 +772,8 @@ static int xgene_enet_close(struct net_device *ndev) mac_ops->tx_disable(pdata); mac_ops->rx_disable(pdata); - if (pdata->phy_dev) - phy_stop(pdata->phy_dev); + if (ndev->phydev) + phy_stop(ndev->phydev); else cancel_delayed_work_sync(&pdata->link_work); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index b339fc1..7735371 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -174,7 +174,6 @@ struct xgene_cle_ops { struct xgene_enet_pdata { struct net_device *ndev; struct mii_bus *mdio_bus; - struct phy_device *phy_dev; int phy_speed; struct clk *clk; struct platform_device *pdev; -- cgit v1.1 From 36a19b299536746f5c01d7716dac962f831e4d38 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 11 Sep 2016 17:54:04 +0200 Subject: net: ethernet: apm: xgene: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_ethtool.c | 61 +++++++++++++--------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index e1f44ae..d372d42 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -54,46 +54,59 @@ static void xgene_get_drvinfo(struct net_device *ndev, sprintf(info->bus_info, "%s", pdev->name); } -static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; + u32 supported; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (phydev == NULL) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { if (pdata->mdio_driver) { if (!phydev) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_MII; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_ENABLE; + supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | + SUPPORTED_MII; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_1000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_MII; + cmd->base.autoneg = AUTONEG_ENABLE; } else { - cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_10000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; + supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_10000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_FIBRE; + cmd->base.autoneg = AUTONEG_DISABLE; } return 0; } -static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; @@ -102,7 +115,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { @@ -110,7 +123,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } } @@ -152,12 +165,12 @@ static void xgene_get_ethtool_stats(struct net_device *ndev, static const struct ethtool_ops xgene_ethtool_ops = { .get_drvinfo = xgene_get_drvinfo, - .get_settings = xgene_get_settings, - .set_settings = xgene_set_settings, .get_link = ethtool_op_get_link, .get_strings = xgene_get_strings, .get_sset_count = xgene_get_sset_count, - .get_ethtool_stats = xgene_get_ethtool_stats + .get_ethtool_stats = xgene_get_ethtool_stats, + .get_link_ksettings = xgene_get_link_ksettings, + .set_link_ksettings = xgene_set_link_ksettings, }; void xgene_enet_set_ethtool_ops(struct net_device *ndev) -- cgit v1.1 From 440f895aa97f81a2bdc02993da5360a1f6da2fb5 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 11 Sep 2016 21:43:34 +0200 Subject: drivers: net: phy: xgene: Fix 'remove' function If 'IS_ERR(pdata->clk)' is true, then 'clk_disable_unprepare(pdata->clk)' will do nothing. It is likely that 'if (!IS_ERR(pdata->clk))' was expected here. In fact, the test can even be removed because 'clk_disable_unprepare' already handles such cases. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/phy/mdio-xgene.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index 7756748..92af182 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -424,10 +424,8 @@ static int xgene_mdio_remove(struct platform_device *pdev) mdiobus_unregister(mdio_bus); mdiobus_free(mdio_bus); - if (dev->of_node) { - if (IS_ERR(pdata->clk)) - clk_disable_unprepare(pdata->clk); - } + if (dev->of_node) + clk_disable_unprepare(pdata->clk); return 0; } -- cgit v1.1 From 0c83f88c02085a762d52ebcd9cc4ca3df39db797 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:23 +0200 Subject: mlxsw: spectrum: Correctly report autonegotiation Up until now the device always reported autonegotiation to be off although it was on by default. Allow the user to disable / enable autonegotiation and report its status correctly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 14 ++++++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a7efd2a..cbec5f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1815,7 +1815,12 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | SUPPORTED_Pause | SUPPORTED_Asym_Pause | SUPPORTED_Autoneg; - cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); + if (mlxsw_sp_port->link.autoneg) { + cmd->advertising = + mlxsw_sp_from_ptys_advert_link(eth_proto_admin); + cmd->advertising |= ADVERTISED_Autoneg; + cmd->autoneg = AUTONEG_ENABLE; + } mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); @@ -1873,11 +1878,13 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, u32 eth_proto_new; u32 eth_proto_cap; u32 eth_proto_admin; + bool autoneg; int err; + autoneg = cmd->autoneg == AUTONEG_ENABLE; speed = ethtool_cmd_speed(cmd); - eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? + eth_proto_new = autoneg ? mlxsw_sp_to_ptys_advert_link(cmd->advertising) : mlxsw_sp_to_ptys_speed(speed); @@ -1907,6 +1914,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, if (!netif_running(dev)) return 0; + mlxsw_sp_port->link.autoneg = autoneg; + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); if (err) { netdev_err(dev, "Failed to set admin status"); @@ -2082,6 +2091,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mapping.module = module; mlxsw_sp_port->mapping.width = width; mlxsw_sp_port->mapping.lane = lane; + mlxsw_sp_port->link.autoneg = 1; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 31a2f3d..969c250 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -341,7 +341,8 @@ struct mlxsw_sp_port { } vport; struct { u8 tx_pause:1, - rx_pause:1; + rx_pause:1, + autoneg:1; } link; struct { struct ieee_ets *ets; -- cgit v1.1 From 4149b97f728edc9247939ece42a784c14b4e212f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:24 +0200 Subject: mlxsw: spectrum: Report link partner's advertised speeds If autonegotiation was performed successfully, then we should report the link partner's advertised speeds instead of the operational speed of the port. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 19 +++++++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 9 ++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index b83d0a7..43ce27f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2138,6 +2138,18 @@ MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); +enum { + MLXSW_REG_PTYS_AN_STATUS_NA, + MLXSW_REG_PTYS_AN_STATUS_OK, + MLXSW_REG_PTYS_AN_STATUS_FAIL, +}; + +/* reg_ptys_an_status + * Autonegotiation status. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); + #define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) @@ -2184,6 +2196,13 @@ MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); +/* reg_ptys_eth_proto_lp_advertise + * The protocols that were advertised by the link partner during + * autonegotiation. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32); + static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, u32 proto_admin) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cbec5f3..07930cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1800,6 +1800,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_oper; + u8 autoneg_status; + u32 eth_proto_lp; int err; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); @@ -1810,6 +1812,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, } mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, ð_proto_oper); + eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); + autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | @@ -1826,7 +1830,10 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); - cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper); + + if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) + cmd->lp_advertising = + mlxsw_sp_from_ptys_advert_link(eth_proto_lp); cmd->transceiver = XCVR_INTERNAL; return 0; -- cgit v1.1 From 0213424adad63bfea08ef19cd6c997648ddcf44e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:25 +0200 Subject: mlxsw: spectrum: Report port type according to operational speed In case port isn't operational we shouldn't report the port type, but instead return PORT_OTHER. This is consistent with most other drivers that return PORT_OTHER when media type can't be determined. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 07930cc..38e4f03 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1828,7 +1828,6 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); - eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) -- cgit v1.1 From 91bdc7a43ac7cfe044caa5ab7e74dca114f15904 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:26 +0200 Subject: mlxsw: spectrum: Indicate support of multiple port types The device can support multiple port types, so don't return on first match. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 38e4f03..d49f518 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1706,21 +1706,23 @@ static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) { + u32 modes = 0; + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - return SUPPORTED_FIBRE; + modes |= SUPPORTED_FIBRE; if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) - return SUPPORTED_Backplane; - return 0; + modes |= SUPPORTED_Backplane; + return modes; } static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) -- cgit v1.1 From b9d66a36aa7737d0f975d99aabc200b7496e26b8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 12 Sep 2016 13:26:27 +0200 Subject: mlxsw: spectrum: Add support for new ethtool API Remove the deprecated {get,set}_settings callbacks and instead add {get,set}_link_ksettings along with support for newly available speeds. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 344 +++++++++++++------------ 2 files changed, 185 insertions(+), 160 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 43ce27f..4e2354c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2164,6 +2164,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2 BIT(18) #define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d49f518..27bbcaf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1599,158 +1599,179 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) } struct mlxsw_sp_port_link_mode { + enum ethtool_link_mode_bit_indices mask_ethtool; u32 mask; - u32 supported; - u32 advertised; u32 speed; }; static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, - .supported = SUPPORTED_100baseT_Full, - .advertised = ADVERTISED_100baseT_Full, - .speed = 100, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, - .speed = 100, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = SPEED_1000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, + .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT, + .speed = SPEED_20000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, + .speed = SPEED_56000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, - .speed = 25000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, + .speed = SPEED_56000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, - .speed = 50000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, + .speed = SPEED_56000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = SPEED_100000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, - .speed = 100000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, }, }; #define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode) -static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) +static void +mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { - u32 modes = 0; - if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - modes |= SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) - modes |= SUPPORTED_Backplane; - return modes; + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); } -static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) +static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode) { - u32 modes = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].supported; + __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + mode); } - return modes; -} - -static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto) -{ - u32 modes = 0; - int i; - - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].advertised; - } - return modes; } static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; @@ -1767,8 +1788,8 @@ static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.speed = speed; + cmd->base.duplex = duplex; } static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) @@ -1793,60 +1814,15 @@ static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) return PORT_OTHER; } -static int mlxsw_sp_port_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_cap; - u32 eth_proto_admin; - u32 eth_proto_oper; - u8 autoneg_status; - u32 eth_proto_lp; - int err; - - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); - return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, - ð_proto_admin, ð_proto_oper); - eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); - autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); - - cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | - mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | - SUPPORTED_Pause | SUPPORTED_Asym_Pause | - SUPPORTED_Autoneg; - if (mlxsw_sp_port->link.autoneg) { - cmd->advertising = - mlxsw_sp_from_ptys_advert_link(eth_proto_admin); - cmd->advertising |= ADVERTISED_Autoneg; - cmd->autoneg = AUTONEG_ENABLE; - } - mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), - eth_proto_oper, cmd); - - cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); - - if (autoneg_status == MLXSW_REG_PTYS_AN_STATUS_OK && eth_proto_lp) - cmd->lp_advertising = - mlxsw_sp_from_ptys_advert_link(eth_proto_lp); - - cmd->transceiver = XCVR_INTERNAL; - return 0; -} - -static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising) +static u32 +mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (advertising & mlxsw_sp_port_link_mode[i].advertised) + if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp_port_link_mode[i].mask; } return ptys_proto; @@ -1876,65 +1852,113 @@ static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed) return ptys_proto; } -static int mlxsw_sp_port_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *cmd) +{ + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + + mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd); + mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported); +} + +static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg, + struct ethtool_link_ksettings *cmd) { + if (!autoneg) + return; + + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising); +} + +static void +mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status, + struct ethtool_link_ksettings *cmd) +{ + if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp) + return; + + ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising); +} + +static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp; struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 speed; - u32 eth_proto_new; - u32 eth_proto_cap; - u32 eth_proto_admin; + u8 autoneg_status; bool autoneg; int err; - autoneg = cmd->autoneg == AUTONEG_ENABLE; - speed = ethtool_cmd_speed(cmd); + autoneg = mlxsw_sp_port->link.autoneg; + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, + ð_proto_oper); + + mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd); - eth_proto_new = autoneg ? - mlxsw_sp_to_ptys_advert_link(cmd->advertising) : - mlxsw_sp_to_ptys_speed(speed); + mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd); + + eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); + autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); + mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd); + + cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper); + mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, + cmd); + + return 0; +} + +static int +mlxsw_sp_port_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap, eth_proto_new; + bool autoneg; + int err; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); + if (err) return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + + autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + eth_proto_new = autoneg ? + mlxsw_sp_to_ptys_advert_link(cmd) : + mlxsw_sp_to_ptys_speed(cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { - netdev_err(dev, "Not supported proto admin requested"); + netdev_err(dev, "No supported speed requested\n"); return -EINVAL; } - if (eth_proto_new == eth_proto_admin) - return 0; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to set proto admin"); + if (err) return err; - } if (!netif_running(dev)) return 0; mlxsw_sp_port->link.autoneg = autoneg; - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } - - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); return 0; } @@ -1948,8 +1972,8 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, - .get_settings = mlxsw_sp_port_get_settings, - .set_settings = mlxsw_sp_port_set_settings, + .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, }; static int -- cgit v1.1 From ad5987b47e96a0fb6d13fea250e936aed000093c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:53:55 +0200 Subject: nl80211: validate number of probe response CSA counters Due to an apparent copy/paste bug, the number of counters for the beacon configuration were checked twice, instead of checking the number of probe response counters. Fix this to check the number of probe response counters before parsing those. Cc: stable@vger.kernel.org Fixes: 9a774c78e211 ("cfg80211: Support multiple CSA counters") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a..4809f4d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6978,7 +6978,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > + (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) return -EINVAL; -- cgit v1.1 From b6b5555bc89f52e49244104ca4d7764c7b0f11cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:25:58 +0200 Subject: cfg80211: disallow shared key authentication with key index 4 Key index 4 can only be used for an IGTK, so the range checks for shared key authentication should treat 4 as an error, fix that in the code. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 2 +- net/wireless/nl80211.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c284d88..d6abb07 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -222,7 +222,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); if (auth_type == NL80211_AUTHTYPE_SHARED_KEY) - if (!key || !key_len || key_idx < 0 || key_idx > 4) + if (!key || !key_len || key_idx < 0 || key_idx > 3) return -EINVAL; if (wdev->current_bss && diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ebad35..c11c1ef 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7388,7 +7388,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || key.p.key_len != WLAN_KEY_LEN_WEP104)) return -EINVAL; - if (key.idx > 4) + if (key.idx > 3) return -EINVAL; } else { key.p.key_len = 0; -- cgit v1.1 From 42ee231cd12c2e1eb015163d04a65950e895d4b7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:51:03 +0200 Subject: nl80211: fix connect keys range check Only key index 0-3 should be accepted, 4/5 are for IGTKs and cannot be used as connect keys. Fix the range checking to not allow such erroneous configurations. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c11c1ef..524f5d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -866,7 +866,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, err = -EINVAL; if (!parse.p.key) goto error; - if (parse.idx < 0 || parse.idx > 4) + if (parse.idx < 0 || parse.idx > 3) goto error; if (parse.def) { if (def) -- cgit v1.1 From 386b1f273850380a1887044673922843736c6d0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:10:02 +0200 Subject: nl80211: only allow WEP keys during connect command This was already documented that way in nl80211.h, but the parsing code still accepted other key types. Change it to really only accept WEP keys as documented. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 524f5d2..6cb33ae 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -881,16 +881,19 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, parse.idx, false, NULL); if (err) goto error; + if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && + parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { + err = -EINVAL; + goto error; + } result->params[parse.idx].cipher = parse.p.cipher; result->params[parse.idx].key_len = parse.p.key_len; result->params[parse.idx].key = result->data[parse.idx]; memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); - if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 || - parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) { - if (no_ht) - *no_ht = true; - } + /* must be WEP key if we got here */ + if (no_ht) + *no_ht = true; } return result; -- cgit v1.1 From 9381e267b69acfea96c8429dc99da3e78835cef1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:11:32 +0200 Subject: cfg80211: wext: only allow WEP keys to be configured before connected When not connected, anything but WEP keys shouldn't be allowed to be configured for later - only static WEP keys make sense at this point. Change wext to reject anything else just like nl80211 does. Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 9f27221..e45a764 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -487,6 +487,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, err = 0; if (wdev->current_bss) err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); + else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && + params->cipher != WLAN_CIPHER_SUITE_WEP104) + return -EINVAL; if (err) return err; -- cgit v1.1 From e9c8f8d3a4d54106a30f2b981b53d658c9bc0c8e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:37:40 +0200 Subject: cfg80211: validate key index better Don't accept it if a key_idx < 0 snuck through, reject WEP keys with key index 4 and 5 (which are used for IGTKs) and don't allow IGTKs with key indices other than 4 and 5. This makes the key data match expectations better. Signed-off-by: Johannes Berg --- net/wireless/util.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 0675f51..12e2d3f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -218,7 +218,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx > 5) + if (key_idx < 0 || key_idx > 5) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) @@ -249,7 +249,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* Disallow BIP (group-only) cipher as pairwise cipher */ if (pairwise) return -EINVAL; + if (key_idx < 4) + return -EINVAL; break; + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + if (key_idx > 3) + return -EINVAL; default: break; } -- cgit v1.1 From 89b706fb28e431fa7639348536c284fb375eb3c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 16:39:38 +0200 Subject: cfg80211: reduce connect key caching struct size After the previous patches, connect keys can only (correctly) be used for storing static WEP keys. Therefore, remove all the data for dealing with key index 4/5 and reduce the size of the key material to the maximum for WEP keys. Signed-off-by: Johannes Berg --- net/wireless/core.h | 6 +++--- net/wireless/ibss.c | 6 ++---- net/wireless/nl80211.c | 1 - net/wireless/util.c | 5 +---- net/wireless/wext-compat.c | 6 +++--- net/wireless/wext-sme.c | 3 +-- 6 files changed, 10 insertions(+), 17 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index eee9144..5555e3c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,9 +249,9 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[6]; - u8 data[6][WLAN_MAX_KEY_LEN]; - int def, defmgmt; + struct key_params params[4]; + u8 data[4][WLAN_KEY_LEN_WEP104]; + int def; }; enum cfg80211_chan_mode { diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4a4dda5..896cbb2 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -284,10 +284,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys) wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; - } wdev->wext.ibss.privacy = wdev->wext.default_key != -1; @@ -295,7 +293,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6cb33ae..71af96e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -854,7 +854,6 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, return ERR_PTR(-ENOMEM); result->def = -1; - result->defmgmt = -1; nla_for_each_nested(key, keys, rem) { memset(&parse, 0, sizeof(parse)); diff --git a/net/wireless/util.c b/net/wireless/util.c index 12e2d3f..9e6e2aa 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 6; i++) { + for (i = 0; i < 4; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, @@ -925,9 +925,6 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) netdev_err(dev, "failed to set defkey %d\n", i); continue; } - if (wdev->connect_keys->defmgmt == i) - if (rdev_set_default_mgmt_key(rdev, dev, i)) - netdev_err(dev, "failed to set mgtdef %d\n", i); } kzfree(wdev->connect_keys); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index e45a764..7b97d43 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -408,10 +408,10 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), - GFP_KERNEL); + GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } @@ -460,7 +460,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (err == -ENOENT) err = 0; if (!err) { - if (!addr) { + if (!addr && idx < 4) { memset(wdev->wext.keys->data[idx], 0, sizeof(wdev->wext.keys->data[idx])); wdev->wext.keys->params[idx].key_len = 0; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index a4e8af3..f6523a4 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -35,7 +35,6 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; if (wdev->wext.default_key != -1) wdev->wext.connect.privacy = true; } @@ -47,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } -- cgit v1.1 From 035ee288ae7ade4152f1c3cf23a587b04fdc526c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 6 Sep 2016 06:20:46 +0200 Subject: PCI: Fix bridge_d3 update on device removal Starting with v4.8, we allow a PCIe port to runtime suspend to D3hot if the port itself and its children satisfy a number of conditions. Once a child is removed, we recheck those conditions in case the removed device was blocking the port from suspending. The rechecking needs to happen *after* the device has been removed from the bus it resides on. Otherwise when walking the port's subordinate bus in pci_bridge_d3_update(), the device being removed would erroneously still be taken into account. However the device is removed from the bus_list in pci_destroy_dev() and we currently recheck *before* that. Fix it. Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Acked-by: Rafael J. Wysocki --- drivers/pci/remove.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index d1ef7ac..f9357e0 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -40,6 +40,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pci_bridge_d3_device_removed(dev); pci_free_resources(dev); put_device(&dev->dev); } @@ -96,8 +97,6 @@ static void pci_remove_bus_device(struct pci_dev *dev) dev->subordinate = NULL; } - pci_bridge_d3_device_removed(dev); - pci_destroy_dev(dev); } -- cgit v1.1 From 08a39685a771b4b1108889ea5e4e0a71b51782ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: rxrpc: Make sure we initialise the peer hash key Peer records created for incoming connections weren't getting their hash key set. This meant that incoming calls wouldn't see more than one DATA packet - which is not a problem for AFS CM calls with small request data blobs. Signed-off-by: David Howells --- net/rxrpc/peer_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 2efe29a..3e6cd17 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -203,6 +203,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) */ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) { + peer->hash_key = hash_key; rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; @@ -238,7 +239,6 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { - peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); rxrpc_init_peer(peer, hash_key); } -- cgit v1.1 From bc4abfcf51835420d61440b2b7aa18181bc1f273 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: rxrpc: Add missing wakeup on Tx window rotation We need to wake up the sender when Tx window rotation due to an incoming ACK makes space in the buffer otherwise the sender is liable to just hang endlessly. This problem isn't noticeable if the Tx phase transfers no more than will fit in a single window or the Tx window rotates fast enough that it doesn't get full. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index afeba98..a707d59 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) spin_unlock(&call->lock); + wake_up(&call->waitq); + while (list) { skb = list; list = skb->next; -- cgit v1.1 From 91c2c7b656a80984362dbcb3d326e4a7274d0607 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: rxrpc: The IDLE ACK packet should use rxrpc_idle_ack_delay The IDLE ACK packet should use the rxrpc_idle_ack_delay setting when the timer is set for it. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2b976e7..6143204 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -95,7 +95,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, break; case RXRPC_ACK_IDLE: - if (rxrpc_soft_ack_delay < expiry) + if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; break; -- cgit v1.1 From 33b603fda815faf12f66156a49b510126fac984b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:21 +0100 Subject: rxrpc: Requeue call for recvmsg if more data rxrpc_recvmsg() needs to make sure that the call it has just been processing gets requeued for further attention if the buffer has been filled and there's more data to be consumed. The softirq producer only queues the call and wakes the socket if it fills the first slot in the window, so userspace might end up sleeping forever otherwise, despite there being data available. This is not a problem provided the userspace buffer is big enough or it empties the buffer completely before more data comes in. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 20d0b5c..16ff56f6 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -463,6 +463,10 @@ try_again: flags, &copied); if (ret == -EAGAIN) ret = 0; + + if (after(call->rx_top, call->rx_hard_ack) && + call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + rxrpc_notify_socket(call); break; default: ret = 0; -- cgit v1.1 From b25de3605339c94a6c27d42efe8f7748ea206a8b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: rxrpc: Add missing unlock in rxrpc_call_accept() Add a missing unlock in rxrpc_call_accept() in the path taken if there's no call to wake up. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index b8acec0..06e328f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -425,9 +425,11 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->to_be_accepted)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + kleave(" = -ENODATA [empty]"); + return ERR_PTR(-ENODATA); + } /* check the user ID isn't already in use */ pp = &rx->calls.rb_node; -- cgit v1.1 From 89a80ed4c09afb2aff6abe32f6dd68605f857a7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: rxrpc: Use skb->len not skb->data_len skb->len should be used rather than skb->data_len when referring to the amount of data in a packet. This will only cause a malfunction in the following cases: (1) We receive a jumbo packet (validation and splitting both are wrong). (2) We see if there's extra ACK info in an ACK packet (we think it's not there and just ignore it). Signed-off-by: David Howells --- net/rxrpc/input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a707d59..5958ef8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -127,7 +127,7 @@ static bool rxrpc_validate_jumbo(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sp->offset; - unsigned int len = skb->data_len; + unsigned int len = skb->len; int nr_jumbo = 1; u8 flags = sp->hdr.flags; @@ -196,7 +196,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u8 ack = 0, flags, annotation = 0; _enter("{%u,%u},{%u,%u}", - call->rx_hard_ack, call->rx_top, skb->data_len, seq); + call->rx_hard_ack, call->rx_top, skb->len, seq); _proto("Rx DATA %%%u { #%u f=%02x }", sp->hdr.serial, seq, sp->hdr.flags); @@ -233,7 +233,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, next_subpacket: queued = false; ix = seq & RXRPC_RXTX_BUFF_MASK; - len = skb->data_len; + len = skb->len; if (flags & RXRPC_JUMBO_PACKET) len = RXRPC_JUMBO_DATALEN; @@ -444,7 +444,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } offset = sp->offset + nr_acks + 3; - if (skb->data_len >= offset + sizeof(buf.info)) { + if (skb->len >= offset + sizeof(buf.info)) { if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) return rxrpc_proto_abort("XAI", call, 0); rxrpc_input_ackinfo(call, skb, &buf.info); -- cgit v1.1 From 01fd0742248cfc99b3b0cba1e09e1c0ecb8658fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 10:23:01 +0100 Subject: rxrpc: Allow tx_winsize to grow in response to an ACK Allow tx_winsize to grow when the ACK info packet shows a larger receive window at the other end rather than only permitting it to shrink. Signed-off-by: David Howells --- net/rxrpc/input.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5958ef8..8e529af 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -333,14 +333,16 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; + u32 rwind = ntohl(ackinfo->rwind); _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", sp->hdr.serial, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), - ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max)); + rwind, ntohl(ackinfo->jumbo_max)); - if (call->tx_winsize > ntohl(ackinfo->rwind)) - call->tx_winsize = ntohl(ackinfo->rwind); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + call->tx_winsize = rwind; mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); -- cgit v1.1 From cbd00891de9bb4756bac6f6edfa945d5a6468977 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 09:12:34 +0100 Subject: rxrpc: Adjust the call ref tracepoint to show kernel API refs Adjust the call ref tracepoint to show references held on a call by the kernel API separately as much as possible and add an additional trace to at the allocation point from the preallocation buffer for an incoming call. Note that this doesn't show the allocation of a client call for the kernel separately at the moment. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_accept.c | 3 ++- net/rxrpc/call_object.c | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index caa226d..25d00de 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -299,7 +299,7 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_release_call(rxrpc_sk(sock->sk), call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b1cb79e..47c74a5 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -540,8 +540,10 @@ enum rxrpc_call_trace { rxrpc_call_seen, rxrpc_call_got, rxrpc_call_got_userid, + rxrpc_call_got_kernel, rxrpc_call_put, rxrpc_call_put_userid, + rxrpc_call_put_kernel, rxrpc_call_put_noqueue, rxrpc_call__nr_trace }; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 06e328f..5fd9d2c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -121,7 +121,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_got_kernel); user_attach_call(call, user_call_ID); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); @@ -300,6 +300,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_see_call(call); call->conn = conn; call->peer = rxrpc_get_peer(conn->params.peer); return call; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 18ab13f..3f94765 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -56,8 +56,10 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_got_kernel] = "Gke", [rxrpc_call_put] = "PUT", [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_kernel] = "Pke", [rxrpc_call_put_noqueue] = "PNQ", }; -- cgit v1.1 From 3432a757b1f889f8c0d33cd9fcabdae172ed812b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 09:05:14 +0100 Subject: rxrpc: Fix prealloc refcounting The preallocated call buffer holds a ref on the calls within that buffer. The ref was being released in the wrong place - it worked okay for incoming calls to the AFS cache manager service, but doesn't work right for incoming calls to a userspace service. Instead of releasing an extra ref service calls in rxrpc_release_call(), the ref needs to be released during the acceptance/rejectance process. To this end: (1) The prealloc ref is now normally released during rxrpc_new_incoming_call(). (2) For preallocated kernel API calls, the kernel API's ref needs to be released when the call is discarded on socket close. (3) We shouldn't take a second ref in rxrpc_accept_call(). (4) rxrpc_recvmsg_new_call() needs to get a ref of its own when it adds the call to the to_be_accepted socket queue. In doing (4) above, we would prefer not to put the call's refcount down to 0 as that entails doing cleanup in softirq context, but it's unlikely as there are several refs held elsewhere, at least one of which must be put by someone in process context calling rxrpc_release_call(). However, it's not a problem if we do have to do that. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 9 ++++++++- net/rxrpc/call_object.c | 3 --- net/rxrpc/recvmsg.c | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 5fd9d2c..26c293e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -221,6 +221,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); rxrpc_release_call(rx, call); @@ -402,6 +403,13 @@ found_service: if (call->state == RXRPC_CALL_SERVER_ACCEPTING) rxrpc_notify_socket(call); + /* We have to discard the prealloc queue's ref here and rely on a + * combination of the RCU read lock and refs held either by the socket + * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel + * service to prevent the call from being deallocated too early. + */ + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %p{%d}", call, call->debug_id); out: spin_unlock(&rx->incoming_lock); @@ -469,7 +477,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ - rxrpc_get_call(call, rxrpc_call_got); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rxrpc_get_call(call, rxrpc_call_got_userid); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 3f94765..9aa1c4b 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -464,9 +464,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->rxtx_buffer[i] = NULL; } - /* We have to release the prealloc backlog ref */ - if (rxrpc_is_service_call(call)) - rxrpc_put_call(call, rxrpc_call_put); _leave(""); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 16ff56f6..a284205 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -118,6 +118,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, list_del_init(&call->recvmsg_link); write_unlock_bh(&rx->recvmsg_lock); + rxrpc_get_call(call, rxrpc_call_got); write_lock(&rx->call_lock); list_add_tail(&call->accept_link, &rx->to_be_accepted); write_unlock(&rx->call_lock); -- cgit v1.1 From 75e42126399220069ada0ca0e93237993c6afccf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 22:36:22 +0100 Subject: rxrpc: Correctly initialise, limit and transmit call->rx_winsize call->rx_winsize should be initialised to the sysctl setting and the sysctl setting should be limited to the maximum we want to permit. Further, we need to place this in the ACK info instead of the sysctl setting. Furthermore, discard the idea of accepting the subpackets of a jumbo packet that lie beyond the receive window when the first packet of the jumbo is within the window. Just discard the excess subpackets instead. This allows the receive window to be opened up right to the buffer size less one for the dead slot. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 ++- net/rxrpc/call_object.c | 2 +- net/rxrpc/input.c | 23 ++++++++++++++++------- net/rxrpc/misc.c | 5 ++++- net/rxrpc/output.c | 4 ++-- net/rxrpc/sysctl.c | 2 +- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 47c74a5..e78c40b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -498,6 +498,7 @@ struct rxrpc_call { */ #define RXRPC_RXTX_BUFF_SIZE 64 #define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) +#define RXRPC_INIT_RX_WINDOW_SIZE 32 struct sk_buff **rxtx_buffer; u8 *rxtx_annotations; #define RXRPC_TX_ANNO_ACK 0 @@ -518,7 +519,7 @@ struct rxrpc_call { rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ - u8 nr_jumbo_dup; /* Number of jumbo duplicates */ + u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9aa1c4b..22f9b0d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -152,7 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); /* Leave space in the ring to handle a maxed-out jumbo packet */ - call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46; + call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->rx_expect_next = 1; return call; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8e529af..75af0bd 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -164,7 +164,7 @@ protocol_error: * (that information is encoded in the ACK packet). */ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, - u8 annotation, bool *_jumbo_dup) + u8 annotation, bool *_jumbo_bad) { /* Discard normal packets that are duplicates. */ if (annotation == 0) @@ -174,9 +174,9 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, * more partially duplicate jumbo packets, we refuse to take any more * jumbos for this call. */ - if (!*_jumbo_dup) { - call->nr_jumbo_dup++; - *_jumbo_dup = true; + if (!*_jumbo_bad) { + call->nr_jumbo_bad++; + *_jumbo_bad = true; } } @@ -191,7 +191,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq = sp->hdr.seq, hard_ack; - bool immediate_ack = false, jumbo_dup = false, queued; + bool immediate_ack = false, jumbo_bad = false, queued; u16 len; u8 ack = 0, flags, annotation = 0; @@ -222,7 +222,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, flags = sp->hdr.flags; if (flags & RXRPC_JUMBO_PACKET) { - if (call->nr_jumbo_dup > 3) { + if (call->nr_jumbo_bad > 3) { ack = RXRPC_ACK_NOSPACE; ack_serial = serial; goto ack; @@ -259,7 +259,7 @@ next_subpacket: } if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup); + rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); if (ack != RXRPC_ACK_DUPLICATE) { ack = RXRPC_ACK_DUPLICATE; ack_serial = serial; @@ -304,6 +304,15 @@ skip: annotation++; if (flags & RXRPC_JUMBO_PACKET) annotation |= RXRPC_RX_ANNO_JLAST; + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } + goto ack; + } _proto("Rx DATA Jumbo %%%u", serial); goto next_subpacket; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fd096f7..8b91078 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -50,7 +50,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46; +unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; +#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE +#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE +#endif /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 719a4c2..90c7722 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -71,10 +71,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, mtu = call->conn->params.peer->if_mtu; mtu -= call->conn->params.peer->hdrsize; - jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max; + jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); - pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); + pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); *ackp++ = 0; diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index b7ca8cf..a03c61c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -20,7 +20,7 @@ static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; -static const unsigned int n_max_acks = RXRPC_MAXACKS; +static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; /* * RxRPC operating parameters. -- cgit v1.1 From 9ad18b75c2f6e4a78ce204e79f37781f8815c0fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 23:19:01 -0400 Subject: asm-generic: make get_user() clear the destination on errors both for access_ok() failures and for faults halfway through Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/asm-generic/uaccess.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 04e21a4..32901d1 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -230,14 +230,18 @@ extern int __put_user_bad(void) __attribute__((noreturn)); might_fault(); \ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ __get_user((x), (__typeof__(*(ptr)) *)__p) : \ - -EFAULT; \ + ((x) = (__typeof__(*(ptr)))0,-EFAULT); \ }) #ifndef __get_user_fn static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) { - size = __copy_from_user(x, ptr, size); - return size ? -EFAULT : size; + size_t n = __copy_from_user(x, ptr, size); + if (unlikely(n)) { + memset(x + (size - n), 0, n); + return -EFAULT; + } + return 0; } #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k) -- cgit v1.1 From eb47e0293baaa3044022059f1fa9ff474bfe35cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 19:34:00 -0400 Subject: cris: buggered copy_from_user/copy_to_user/clear_user * copy_from_user() on access_ok() failure ought to zero the destination * none of those primitives should skip the access_ok() check in case of small constant size. Cc: stable@vger.kernel.org Acked-by: Jesper Nilsson Signed-off-by: Al Viro --- arch/cris/include/asm/uaccess.h | 71 +++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h index e3530d0..56c7d57 100644 --- a/arch/cris/include/asm/uaccess.h +++ b/arch/cris/include/asm/uaccess.h @@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); extern unsigned long __do_clear_user(void __user *to, unsigned long n); -static inline unsigned long -__generic_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_user(to, from, n); - return n; -} - -static inline unsigned long -__generic_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - return __copy_user_zeroing(to, from, n); - return n; -} - -static inline unsigned long -__generic_clear_user(void __user *to, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __do_clear_user(to, n); - return n; -} - static inline long __strncpy_from_user(char *dst, const char __user *src, long count) { @@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) else if (n == 24) __asm_copy_from_user_24(to, from, ret); else - ret = __generic_copy_from_user(to, from, n); + ret = __copy_user_zeroing(to, from, n); return ret; } @@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) else if (n == 24) __asm_copy_to_user_24(to, from, ret); else - ret = __generic_copy_to_user(to, from, n); + ret = __copy_user(to, from, n); return ret; } @@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n) else if (n == 24) __asm_clear_24(to, ret); else - ret = __generic_clear_user(to, n); + ret = __do_clear_user(to, n); return ret; } -#define clear_user(to, n) \ - (__builtin_constant_p(n) ? \ - __constant_clear_user(to, n) : \ - __generic_clear_user(to, n)) +static inline size_t clear_user(void __user *to, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_clear_user(to, n); + else + return __do_clear_user(to, n); +} -#define copy_from_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user(to, from, n) : \ - __generic_copy_from_user(to, from, n)) +static inline size_t copy_from_user(void *to, const void __user *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_READ, from, n))) { + memset(to, 0, n); + return n; + } + if (__builtin_constant_p(n)) + return __constant_copy_from_user(to, from, n); + else + return __copy_user_zeroing(to, from, n); +} -#define copy_to_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user(to, from, n) : \ - __generic_copy_to_user(to, from, n)) +static inline size_t copy_to_user(void __user *to, const void *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_copy_to_user(to, from, n); + else + return __copy_user(to, from, n); +} /* We let the __ versions of copy_from/to_user inline, because they're often * used in fast paths and have only a small space overhead. -- cgit v1.1 From 3b8767a8f00cc6538ba6b1cf0f88502e2fd2eb90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 20:54:02 -0400 Subject: frv: fix clear_user() It should check access_ok(). Otherwise a bunch of places turn into trivially exploitable rootholes. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/frv/include/asm/uaccess.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index 3ac9a59..87d9e34 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -263,19 +263,25 @@ do { \ extern long __memset_user(void *dst, unsigned long count); extern long __memcpy_user(void *dst, const void *src, unsigned long count); -#define clear_user(dst,count) __memset_user(____force(dst), (count)) +#define __clear_user(dst,count) __memset_user(____force(dst), (count)) #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n)) #define __copy_to_user_inatomic(to, from, n) __memcpy_user(____force(to), (from), (n)) #else -#define clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) +#define __clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0) #define __copy_to_user_inatomic(to, from, n) (memcpy(____force(to), (from), (n)), 0) #endif -#define __clear_user clear_user +static inline unsigned long __must_check +clear_user(void __user *to, unsigned long n) +{ + if (likely(__access_ok(to, n))) + n = __clear_user(to, n); + return n; +} static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) -- cgit v1.1 From f35c1e0671728d1c9abc405d05ef548b5fcb2fc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:16:49 -0400 Subject: hexagon: fix strncpy_from_user() error return It's -EFAULT, not -1 (and contrary to the comment in there, __strnlen_user() can return 0 - on faults). Cc: stable@vger.kernel.org Acked-by: Richard Kuo Signed-off-by: Al Viro --- arch/hexagon/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index f000a38..f61cfb2 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src, { long res = __strnlen_user(src, n); - /* return from strnlen can't be zero -- that would be rubbish. */ + if (unlikely(!res)) + return -EFAULT; if (res > n) { copy_from_user(dst, src, n); -- cgit v1.1 From a5e541f796f17228793694d64b507f5f57db4cd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:31:41 -0400 Subject: ia64: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/ia64/include/asm/uaccess.h | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 465c709..6c2d2c8 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -272,20 +272,17 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) __cu_len; \ }) -#define copy_from_user(to, from, n) \ -({ \ - void *__cu_to = (to); \ - const void __user *__cu_from = (from); \ - long __cu_len = (n); \ - \ - __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_to, __cu_len, false); \ - __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ - } \ - __cu_len; \ -}) +static inline unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + if (likely(__access_ok(from, n, get_fs()))) + n = __copy_user((__force void __user *) to, from, n); + else + memset(to, 0, n); + return n; +} #define __copy_in_user(to, from, size) __copy_user((to), (from), (size)) -- cgit v1.1 From 8ae95ed4ae5fc7c3391ed668b2014c9e2079533b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 22:08:20 -0400 Subject: metag: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Acked-by: James Hogan Signed-off-by: Al Viro --- arch/metag/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 8282cbc..273e612 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) return __copy_user_zeroing(to, from, n); + memset(to, 0, n); return n; } -- cgit v1.1 From 05d9d0b96e53c52a113fd783c0c97c830c8dc7af Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 19 Aug 2016 12:10:02 -0700 Subject: ARC: uaccess: get_user to zero out dest in cause of fault Al reported potential issue with ARC get_user() as it wasn't clearing out destination pointer in case of fault due to bad address etc. Verified using following | { | u32 bogus1 = 0xdeadbeef; | u64 bogus2 = 0xdead; | int rc1, rc2; | | pr_info("Orig values %x %llx\n", bogus1, bogus2); | rc1 = get_user(bogus1, (u32 __user *)0x40000000); | rc2 = get_user(bogus2, (u64 __user *)0x50000000); | pr_info("access %d %d, new values %x %llx\n", | rc1, rc2, bogus1, bogus2); | } | [ARCLinux]# insmod /mnt/kernel-module/qtn.ko | Orig values deadbeef dead | access -14 -14, new values 0 0 Reported-by: Al Viro Cc: Linus Torvalds Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Al Viro --- arch/arc/include/asm/uaccess.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index a78d567..41faf17 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -83,7 +83,10 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ @@ -101,7 +104,11 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ + " mov %R1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ -- cgit v1.1 From e69d700535ac43a18032b3c399c69bf4639e89a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:18:53 -0400 Subject: mips: copy_from_user() must zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mips/include/asm/uaccess.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 11b965f..21a2aab 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_len = __invoke_copy_from_user(__cu_to, \ __cu_from, \ __cu_len); \ + } else { \ + memset(__cu_to, 0, __cu_len); \ } \ } \ __cu_len; \ -- cgit v1.1 From 43403eabf558d2800b429cd886e996fd555aa542 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:32:02 -0400 Subject: mn10300: failing __get_user() and get_user() should zero Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mn10300/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 20f7bf6..d012e87 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; }; "2:\n" \ " .section .fixup,\"ax\"\n" \ "3:\n\t" \ + " mov 0,%1\n" \ " mov %3,%0\n" \ " jmp 2b\n" \ " .previous\n" \ -- cgit v1.1 From ae7cc577ec2a4a6151c9e928fd1f595d953ecef1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:33:10 -0400 Subject: mn10300: copy_from_user() should zero on access_ok() failure... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/mn10300/lib/usercopy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c index 7826e6c..ce8899e 100644 --- a/arch/mn10300/lib/usercopy.c +++ b/arch/mn10300/lib/usercopy.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include +#include unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) @@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to, from, n); + else + memset(to, 0, n); return n; } -- cgit v1.1 From e33d1f6f72cc82fcfc3d1fb20c9e3ad83b1928fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:36:36 -0400 Subject: nios2: copy_from_user() should zero the tail of destination Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/nios2/include/asm/uaccess.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index caa51ff..2b4b9e9 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (!access_ok(VERIFY_READ, from, n)) - return n; - return __copy_from_user(to, from, n); + unsigned long res = n; + if (access_ok(VERIFY_READ, from, n)) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, const void *from, -- cgit v1.1 From 2e29f50ad5e23db37dde9be71410d95d50241ecd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:39:01 -0400 Subject: nios2: fix __get_user() a) should not leave crap on fault b) should _not_ require access_ok() in any cases. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/nios2/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index 2b4b9e9..0ab8232 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -142,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n); #define __get_user_unknown(val, size, ptr, err) do { \ err = 0; \ - if (copy_from_user(&(val), ptr, size)) { \ + if (__copy_from_user(&(val), ptr, size)) { \ err = -EFAULT; \ } \ } while (0) @@ -169,7 +169,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ -- cgit v1.1 From acb2505d0119033a80c85ac8d02dccae41271667 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 17:05:21 -0400 Subject: openrisc: fix copy_from_user() ... that should zero on faults. Also remove the helpful logics wrt range truncation copied from ppc32. Where it had ever been needed only in case of copy_from_user() *and* had not been merged into the mainline until a month after the need had disappeared. A decade before openrisc went into mainline, I might add... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/openrisc/include/asm/uaccess.h | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index a6bd07c..cbad29b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } - return n; + unsigned long res = n; + + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_tofrom_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_tofrom_user(to, from, n); return n; } @@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size); static inline __must_check unsigned long clear_user(void *addr, unsigned long size) { - - if (access_ok(VERIFY_WRITE, addr, size)) - return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, addr, size))) + size = __clear_user(addr, size); return size; } -- cgit v1.1 From aace880feea38875fbc919761b77e5732a3659ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 19:03:37 -0400 Subject: parisc: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/parisc/include/asm/uaccess.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0f59fd9..37a1bee 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -10,6 +10,7 @@ #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -221,13 +222,14 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; + unsigned long ret = n; if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) ret = __copy_from_user(to, from, n); else copy_from_user_overflow(); - + if (unlikely(ret)) + memset(to + (n - ret), 0, ret); return ret; } -- cgit v1.1 From 224264657b8b228f949b42346e09ed8c90136a8e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 19:16:26 -0400 Subject: ppc32: fix copy_from_user() should clear on access_ok() failures. Also remove the useless range truncation logics. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/powerpc/include/asm/uaccess.h | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c1dc6c1..c2ce5dd 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -308,40 +308,23 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) { + if (likely(access_ok(VERIFY_READ, from, n))) { if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - if (!__builtin_constant_p(n - over)) - check_object_size(to, n - over, false); - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; - } + memset(to, 0, n); return n; } static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) { if (!__builtin_constant_p(n)) check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); } - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - if (!__builtin_constant_p(n)) - check_object_size(from, n - over, true); - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; - } return n; } @@ -439,10 +422,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } return size; } -- cgit v1.1 From fd2d2b191fe75825c4c7a6f12f3fef35aaed7dd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:00:54 -0400 Subject: s390: get_user() should zero on failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/s390/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9b49cf1..2c5d292 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -266,28 +266,28 @@ int __put_user_bad(void) __attribute__((noreturn)); __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ - unsigned char __x; \ + unsigned char __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ - unsigned short __x; \ + unsigned short __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ - unsigned int __x; \ + unsigned int __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ - unsigned long long __x; \ + unsigned long long __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ -- cgit v1.1 From c2f18fa4cbb3ad92e033a24efa27583978ce9600 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:13:39 -0400 Subject: score: fix __get_user/get_user * should zero on any failure * __get_user() should use __copy_from_user(), not copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/score/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 20a3591..c5d4311 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -163,7 +163,7 @@ do { \ __get_user_asm(val, "lw", ptr); \ break; \ case 8: \ - if ((copy_from_user((void *)&val, ptr, 8)) == 0) \ + if (__copy_from_user((void *)&val, ptr, 8) == 0) \ __gu_err = 0; \ else \ __gu_err = -EFAULT; \ @@ -188,6 +188,8 @@ do { \ \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \ __get_user_common((x), size, __gu_ptr); \ + else \ + (x) = 0; \ \ __gu_err; \ }) @@ -201,6 +203,7 @@ do { \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3:li %0, %4\n" \ + "li %1, 0\n" \ "j 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ -- cgit v1.1 From b615e3c74621e06cd97f86373ca90d43d6d998aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:30:44 -0400 Subject: score: fix copy_from_user() and friends Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/score/include/asm/uaccess.h | 41 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index c5d4311..01aec8c 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -301,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long len) { - unsigned long over; + unsigned long res = len; - if (access_ok(VERIFY_READ, from, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_READ, from, len))) + res = __copy_tofrom_user(to, from, len); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } - return len; + if (unlikely(res)) + memset(to + (len - res), 0, res); + + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long len) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_WRITE, to, len))) + len = __copy_tofrom_user(to, from, len); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } return len; } -#define __copy_from_user(to, from, len) \ - __copy_tofrom_user((to), (from), (len)) +static inline unsigned long +__copy_from_user(void *to, const void *from, unsigned long len) +{ + unsigned long left = __copy_tofrom_user(to, from, len); + if (unlikely(left)) + memset(to + (len - left), 0, left); + return left; +} #define __copy_to_user(to, from, len) \ __copy_tofrom_user((to), (from), (len)) @@ -343,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len) static inline unsigned long __copy_from_user_inatomic(void *to, const void *from, unsigned long len) { - return __copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } -#define __copy_in_user(to, from, len) __copy_from_user(to, from, len) +#define __copy_in_user(to, from, len) __copy_tofrom_user(to, from, len) static inline unsigned long copy_in_user(void *to, const void *from, unsigned long len) { if (access_ok(VERIFY_READ, from, len) && access_ok(VERFITY_WRITE, to, len)) - return copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } /* -- cgit v1.1 From c6852389228df9fb3067f94f3b651de2a7921b36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:33:47 -0400 Subject: sh64: failing __get_user() should zero It could be done in exception-handling bits in __get_user_b() et.al., but the surgery involved would take more knowledge of sh64 details than I have or _want_ to have. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/sh/include/asm/uaccess_64.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index c01376c..ca5073d 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -24,6 +24,7 @@ #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + x = 0; \ switch (size) { \ case 1: \ retval = __get_user_asm_b((void *)&x, \ -- cgit v1.1 From 6e050503a150b2126620c1a1e9b3a368fcd51eac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:39:47 -0400 Subject: sh: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/sh/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index a49635c..92ade79 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) __kernel_size_t __copy_size = (__kernel_size_t) n; if (__copy_size && __access_ok(__copy_from, __copy_size)) - return __copy_user(to, from, __copy_size); + __copy_size = __copy_user(to, from, __copy_size); + + if (unlikely(__copy_size)) + memset(to + (n - __copy_size), 0, __copy_size); return __copy_size; } -- cgit v1.1 From 917400cecb4b52b5cde5417348322bb9c8272fa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 22 Aug 2016 00:23:07 -0400 Subject: sparc32: fix copy_from_user() Cc: stable@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Al Viro --- arch/sparc/include/asm/uaccess_32.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 341a5a1..035c89b 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -269,8 +269,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - } else + } else { + memset(to, 0, n); return n; + } } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) -- cgit v1.1 From 8f035983dd826d7e04f67b28acf8e2f08c347e41 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:16:58 -0400 Subject: blackfin: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/blackfin/include/asm/uaccess.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index 12f5d68..0a2a700 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -171,11 +171,12 @@ static inline int bad_user_access_length(void) static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) { memcpy(to, (const void __force *)from, n); - else - return n; - return 0; + return 0; + } + memset(to, 0, n); + return n; } static inline unsigned long __must_check -- cgit v1.1 From c90a3bc5061d57e7931a9b7ad14784e1a0ed497d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:20:13 -0400 Subject: m32r: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/m32r/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index cac7014..6f89821 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs); #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err = 0; \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ might_fault(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ -- cgit v1.1 From d0cf385160c12abd109746cad1f13e3b3e8b50b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:22:34 -0400 Subject: microblaze: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/microblaze/include/asm/uaccess.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 331b0d3..3a486d3 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -373,10 +373,13 @@ extern long __user_bad(void); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } #define __copy_to_user(to, from, n) \ -- cgit v1.1 From e98b9e37ae04562d52c96f46b3cf4c2e80222dc1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:23:33 -0400 Subject: microblaze: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/microblaze/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 3a486d3..8266767 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -227,7 +227,7 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \ long __gu_err; \ switch (sizeof(*(ptr))) { \ -- cgit v1.1 From 8630c32275bac2de6ffb8aea9d9b11663e7ad28e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:28:23 -0400 Subject: avr32: fix copy_from_user() really ugly, but apparently avr32 compilers turns access_ok() into something so bad that they want it in assembler. Left that way, zeroing added in inline wrapper. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/avr32/include/asm/uaccess.h | 11 ++++++++++- arch/avr32/kernel/avr32_ksyms.c | 2 +- arch/avr32/lib/copy_user.S | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index 68cf638..b1ec1fa 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h @@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from, extern __kernel_size_t copy_to_user(void __user *to, const void *from, __kernel_size_t n); -extern __kernel_size_t copy_from_user(void *to, const void __user *from, +extern __kernel_size_t ___copy_from_user(void *to, const void __user *from, __kernel_size_t n); static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, @@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to, { return __copy_user(to, (const void __force *)from, n); } +static inline __kernel_size_t copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + size_t res = ___copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c index d93ead0..7c6cf14 100644 --- a/arch/avr32/kernel/avr32_ksyms.c +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page); /* * Userspace access stuff. */ -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(___copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index ea59c04..96a6de9 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -25,11 +25,11 @@ .align 1 .global copy_from_user .type copy_from_user, @function -copy_from_user: +___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 rjmp __copy_user - .size copy_from_user, . - copy_from_user + .size ___copy_from_user, . - ___copy_from_user .global copy_to_user .type copy_to_user, @function -- cgit v1.1 From cd5892c756f51ed6ff18ff49c837d219bfd9bb5d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: rxrpc: Create an address for sendmsg() to bind unbound socket with Create an address for sendmsg() to bind unbound socket with rather than using a completely blank address otherwise the transport socket creation will fail because it will try to use address family 0. We use the address family specified in the protocol argument when the AF_RXRPC socket was created and SOCK_DGRAM as the default. For anything else, bind() must be used. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 25d00de..741b0d8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -401,6 +401,18 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) switch (rx->sk.sk_state) { case RXRPC_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; + rx->srx.transport.family = rx->family; + switch (rx->family) { + case AF_INET: + rx->srx.transport_len = sizeof(struct sockaddr_in); + break; + default: + ret = -EAFNOSUPPORT; + goto error_unlock; + } local = rxrpc_lookup_local(&rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); -- cgit v1.1 From aaa31cbc66733386406464ec6c5c0889d9968a95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: rxrpc: Don't specify protocol to when creating transport socket Pass 0 as the protocol argument when creating the transport socket rather than IPPROTO_UDP. Signed-off-by: David Howells --- net/rxrpc/local_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 782b9ad..8720be2 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -103,8 +103,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, - IPPROTO_UDP, &local->socket); + ret = sock_create_kern(&init_net, local->srx.transport.family, + local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; -- cgit v1.1 From 1c2bc7b948a2adee0d3e070f4ce14645efa0a2d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: rxrpc: Use rxrpc_extract_addr_from_skb() rather than doing this manually There are two places that want to transmit a packet in response to one just received and manually pick the address to reply to out of the sk_buff. Make them use rxrpc_extract_addr_from_skb() instead so that IPv6 is handled automatically. Signed-off-by: David Howells --- net/rxrpc/local_event.c | 13 +++++-------- net/rxrpc/output.c | 32 ++++++-------------------------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index cdd58e6..f073e93 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include @@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_in sin; + struct sockaddr_rxrpc srx; struct msghdr msg; struct kvec iov[2]; size_t len; @@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - sin.sin_family = AF_INET; - sin.sin_port = udp_hdr(skb)->source; - sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + return; - msg.msg_name = &sin; - msg.msg_namelen = sizeof(sin); + msg.msg_name = &srx.transport; + msg.msg_namelen = srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 90c7722..ec3621f 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include "ar-internal.h" @@ -272,10 +270,7 @@ send_fragmentable: */ void rxrpc_reject_packets(struct rxrpc_local *local) { - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; + struct sockaddr_rxrpc srx; struct rxrpc_skb_priv *sp; struct rxrpc_wire_header whdr; struct sk_buff *skb; @@ -292,32 +287,21 @@ void rxrpc_reject_packets(struct rxrpc_local *local) iov[1].iov_len = sizeof(code); size = sizeof(whdr) + sizeof(code); - msg.msg_name = &sa; + msg.msg_name = &srx.transport; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - memset(&whdr, 0, sizeof(whdr)); whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { rxrpc_see_skb(skb); sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; + code = htonl(skb->priority); whdr.epoch = htonl(sp->hdr.epoch); @@ -329,10 +313,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.flags &= RXRPC_CLIENT_INITIATED; kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; } rxrpc_free_skb(skb); -- cgit v1.1 From 75b54cb57ca34cbe7a87c6ac757c55360a624590 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Sep 2016 08:49:05 +0100 Subject: rxrpc: Add IPv6 support Add IPv6 support to AF_RXRPC. With this, AF_RXRPC sockets can be created: service = socket(AF_RXRPC, SOCK_DGRAM, PF_INET6); instead of: service = socket(AF_RXRPC, SOCK_DGRAM, PF_INET); The AFS filesystem doesn't support IPv6 at the moment, though, since that requires upgrades to some of the RPC calls. Note that a good portion of this patch is replacing "%pI4:%u" in print statements with "%pISpc" which is able to handle both protocols and print the port. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 15 +++++-- net/rxrpc/conn_object.c | 8 ++++ net/rxrpc/local_object.c | 35 +++++++-------- net/rxrpc/output.c | 16 +++++++ net/rxrpc/peer_event.c | 24 +++++++++++ net/rxrpc/peer_object.c | 109 +++++++++++++++++++++++++++++------------------ net/rxrpc/proc.c | 30 ++++++------- 7 files changed, 154 insertions(+), 83 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 741b0d8..f61f7b2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -106,19 +106,23 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, case AF_INET: if (srx->transport_len < sizeof(struct sockaddr_in)) return -EINVAL; - _debug("INET: %x @ %pI4", - ntohs(srx->transport.sin.sin_port), - &srx->transport.sin.sin_addr); tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; case AF_INET6: + if (srx->transport_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport) + + sizeof(struct sockaddr_in6); + break; + default: return -EAFNOSUPPORT; } if (tail < len) memset((void *)srx + tail, 0, len - tail); + _debug("INET: %pISp", &srx->transport); return 0; } @@ -409,6 +413,9 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) case AF_INET: rx->srx.transport_len = sizeof(struct sockaddr_in); break; + case AF_INET6: + rx->srx.transport_len = sizeof(struct sockaddr_in6); + break; default: ret = -EAFNOSUPPORT; goto error_unlock; @@ -563,7 +570,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET) + if (protocol != PF_INET && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ffa9add..c0ddba7 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -134,6 +134,14 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx.transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; default: BUG(); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 8720be2..f5b9bb0 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); + case AF_INET6: + /* If the choice of UDP6 port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&local->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); default: BUG(); } @@ -100,7 +109,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local) struct sock *sock; int ret, opt; - _enter("%p{%d}", local, local->srx.transport_type); + _enter("%p{%d,%d}", + local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ ret = sock_create_kern(&init_net, local->srx.transport.family, @@ -169,18 +179,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) long diff; int ret; - if (srx->transport.family == AF_INET) { - _enter("{%d,%u,%pI4+%hu}", - srx->transport_type, - srx->transport.family, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); - } else { - _enter("{%d,%u}", - srx->transport_type, - srx->transport.family); - return ERR_PTR(-EAFNOSUPPORT); - } + _enter("{%d,%d,%pISp}", + srx->transport_type, srx->transport.family, &srx->transport); mutex_lock(&rxrpc_local_mutex); @@ -233,13 +233,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) found: mutex_unlock(&rxrpc_local_mutex); - _net("LOCAL %s %d {%d,%u,%pI4+%hu}", - age, - local->debug_id, - local->srx.transport_type, - local->srx.transport.family, - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + _net("LOCAL %s %d {%pISp}", + age, local->debug_id, &local->srx.transport); _leave(" = %p", local); return local; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ec3621f..d7cd87f 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -258,6 +258,22 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; + + case AF_INET6: + opt = IPV6_PMTUDISC_DONT; + ret = kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 1, iov[0].iov_len); + + opt = IPV6_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + } + break; } up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index c894893..7421758 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,30 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; + case AF_INET6: + srx.transport.sin6.sin6_port = serr->port; + srx.transport_len = sizeof(struct sockaddr_in6); + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6"); + memcpy(&srx.transport.sin6.sin6_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in6_addr)); + break; + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP on v6 sock"); + memcpy(&srx.transport.sin6.sin6_addr.s6_addr + 12, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx.transport.sin6.sin6_addr, + &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + break; + } + break; + default: BUG(); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 3e6cd17..dfc07b4 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include "ar-internal.h" static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); @@ -50,6 +52,11 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; + case AF_INET6: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin6.sin6_addr); + p = (u16 *)&srx->transport.sin6.sin6_addr; + break; default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -93,6 +100,12 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); + case AF_INET6: + return ((u16 __force)peer->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); default: BUG(); } @@ -130,17 +143,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { - switch (srx->transport.family) { - case AF_INET: - _net("PEER %d {%d,%u,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - peer->srx.transport.family, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); - break; - } - + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); } return peer; @@ -152,22 +155,49 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, */ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { + struct dst_entry *dst; struct rtable *rt; - struct flowi4 fl4; + struct flowi fl; + struct flowi4 *fl4 = &fl.u.ip4; + struct flowi6 *fl6 = &fl.u.ip6; peer->if_mtu = 1500; - rt = ip_route_output_ports(&init_net, &fl4, NULL, - peer->srx.transport.sin.sin_addr.s_addr, 0, - htons(7000), htons(7001), - IPPROTO_UDP, 0, 0); - if (IS_ERR(rt)) { - _leave(" [route err %ld]", PTR_ERR(rt)); - return; + memset(&fl, 0, sizeof(fl)); + switch (peer->srx.transport.family) { + case AF_INET: + rt = ip_route_output_ports( + &init_net, fl4, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); + return; + } + dst = &rt->dst; + break; + + case AF_INET6: + fl6->flowi6_iif = LOOPBACK_IFINDEX; + fl6->flowi6_scope = RT_SCOPE_UNIVERSE; + fl6->flowi6_proto = IPPROTO_UDP; + memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)); + fl6->fl6_dport = htons(7001); + fl6->fl6_sport = htons(7000); + dst = ip6_route_output(&init_net, NULL, fl6); + if (IS_ERR(dst)) { + _leave(" [route err %ld]", PTR_ERR(dst)); + return; + } + break; + + default: + BUG(); } - peer->if_mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); + peer->if_mtu = dst_mtu(dst); + dst_release(dst); _leave(" [if_mtu %u]", peer->if_mtu); } @@ -207,17 +237,22 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; - if (peer->srx.transport.family == AF_INET) { + switch (peer->srx.transport.family) { + case AF_INET: peer->hdrsize = sizeof(struct iphdr); - switch (peer->srx.transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { + break; + case AF_INET6: + peer->hdrsize = sizeof(struct ipv6hdr); + break; + default: + BUG(); + } + + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: BUG(); } @@ -285,11 +320,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, *candidate; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); - _enter("{%d,%d,%pI4+%hu}", - srx->transport_type, - srx->transport_len, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); + _enter("{%pISp}", &srx->transport); /* search the peer list first */ rcu_read_lock(); @@ -326,11 +357,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, peer = candidate; } - _net("PEER %d {%d,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); return peer; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index d529d1b..65cd980 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -52,11 +52,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_sock *rx; struct rxrpc_peer *peer; struct rxrpc_call *call; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_calls) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID CallID End Use State Abort " " UserID\n"); return 0; @@ -68,9 +69,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) if (rx) { local = READ_ONCE(rx->local); if (local) - sprintf(lbuff, "%pI4:%u", - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &local->srx.transport); else strcpy(lbuff, "no_local"); } else { @@ -79,14 +78,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) peer = call->peer; if (peer) - sprintf(rbuff, "%pI4:%u", - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + sprintf(rbuff, "%pISpc", &peer->srx.transport); else strcpy(rbuff, "no_connection"); seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %lx\n", lbuff, rbuff, @@ -145,11 +142,12 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_connection_proc_list) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID End Use State Key " " Serial ISerial\n" ); @@ -163,16 +161,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) goto print; } - sprintf(lbuff, "%pI4:%u", - &conn->params.local->srx.transport.sin.sin_addr, - ntohs(conn->params.local->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); print: seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u" " %s %08x %08x %08x\n", lbuff, rbuff, -- cgit v1.1 From 6eaed1665fc6864fbdbffcc6f43a7f5d012f3052 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 13 Sep 2016 16:40:24 +1000 Subject: powerpc/powernv: Fix the state of root PE The PE for root bus (root PE) can be removed because of PCI hot remove in EEH recovery path for fenced PHB error. We need update @phb->root_pe_populated accordingly so that the root PE can be populated again in forthcoming PCI hot add path. Also, the PE shouldn't be destroyed as it's global and reserved resource. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Reported-by: Frederic Barrat Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c16d790..0c71a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3426,7 +3426,17 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) } } - pnv_ioda_free_pe(pe); + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) -- cgit v1.1 From 74712339a4fc0f4ddc710e6bca836a6b78b7d8de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Aug 2016 08:23:14 +0100 Subject: drm/i915: Restore lost "Initialized i915" welcome message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A side effect of removing the midlayer from driver loading was the loss of a useful message announcing to userspace that i915 had successfully started, e.g.: [drm] Initialized i915 1.6.0 20160425 for 0000:00:02.0 on minor 0 Reported-by: Timo Aaltonen Signed-off-by: Chris Wilson Fixes: 8f460e2c78f2 ("drm/i915: Demidlayer driver loading") Cc: Daniel Vetter Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/20160825072314.17402-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit bc5ca47c0af4f949ba889e666b7da65569e36093) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 95ddd56..5de36d8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1281,6 +1281,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_runtime_pm_enable(dev_priv); + /* Everything is in place, we can now relax! */ + DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", + driver.name, driver.major, driver.minor, driver.patchlevel, + driver.date, pci_name(pdev), dev_priv->drm.primary->index); + intel_runtime_pm_put(dev_priv); return 0; -- cgit v1.1 From 86dfb76cba284114cf586005cd943eeb6e4f328d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 7 Sep 2016 17:42:31 -0700 Subject: Revert "drm/i915/psr: Make idle_frames sensible again" This reverts commit 1c80c25fb622973dd135878e98d172be20859049 Author: Daniel Vetter Date: Wed May 18 18:47:12 2016 +0200 drm/i915/psr: Make idle_frames sensible again There are panels that needs 4 idle frames before entering PSR, but VBT is unproperly set. Also lately it was identified that idle frame count calculated at HW can be off by 1, what makes the minimum of 2, at least. Without the current vbt+1 we are with the risk of having HW calculating 0 idle frames and entering PSR when it shouldn't. Regardless the lack of link training. [Jani: there is some disagreement on the explanation, but the commit regresses so revert it is.] References: http://marc.info/?i=20160904191153.GA2328@light.dominikbrodowski.net Cc: Dominik Brodowski Cc: Jani Nikula Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi Fixes: 1c80c25fb622 ("drm/i915/psr: Make idle_frames sensible again") Cc: drm-intel-fixes@lists.freedesktop.org # v4.8-rc1+ Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1473295351-8766-1-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit 40918e0bb81be02f507a941f8b2741f0dc1771b0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_psr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2b0d1ba..cf171b4 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -255,14 +255,14 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); uint32_t max_sleep_time = 0x1f; - /* Lately it was identified that depending on panel idle frame count - * calculated at HW can be off by 1. So let's use what came - * from VBT + 1. - * There are also other cases where panel demands at least 4 - * but VBT is not being set. To cover these 2 cases lets use - * at least 5 when VBT isn't set to be on the safest side. + /* + * Let's respect VBT in case VBT asks a higher idle_frame value. + * Let's use 6 as the minimum to cover all known cases including + * the off-by-one issue that HW has in some cases. Also there are + * cases where sink should be able to train + * with the 5 or 6 idle patterns. */ - uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1; + uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); uint32_t val = EDP_PSR_ENABLE; val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT; -- cgit v1.1 From ea54ff4008892b46c7a3e6bc8ab8aaec9d198639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 13 Sep 2016 12:22:19 +0300 Subject: drm/i915: Ignore OpRegion panel type except on select machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out commit a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") has regressed quite a few machines. So it looks like we can't use the panel type from OpRegion on all systems, and yet we absolutely must use it on some specific systems. Despite trying, I was unable to find any automagic way to determine if the OpRegion panel type is respectable or not. The only glimmer of hope I had was bit 8 in the SCIC response, but that turned out to not work either (it was always 0 on both types of systems). So, to fix the regressions without breaking the machine we know to need the OpRegion panel type, let's just add a quirk for this. Only specific machines known to require the OpRegion panel type will therefore use it. Everyone else will fall bck to the VBT panel type. The only known machine so far is a "Conrac GmbH IX45GM2". The PCI subsystem ID on this machine is just a generic 8086:2a42, so of no use. Instead we'll go with a DMI match. I suspect we can now also revert commit aeddda06c1a7 ("drm/i915: Ignore panel type from OpRegion on SKL") but let's leave that to a separate patch. v2: Do the DMI match in the opregion code directly, as dev_priv->quirks gets populated too late Cc: Rob Kramer Cc: Martin van Es Cc: Andrea Arcangeli Cc: Dave Airlie Cc: Marco Krüger Cc: Sean Greenslade Cc: Trudy Tective Cc: Robin Müller Cc: Alexander Kobel Cc: Alexey Shumitsky Cc: Emil Andersen Lauridsen Cc: oceans112@gmail.com Cc: James Hogan Cc: James Bottomley Cc: stable@vger.kernel.org References: https://lists.freedesktop.org/archives/intel-gfx/2016-August/105545.html References: https://lists.freedesktop.org/archives/dri-devel/2016-August/116888.html References: https://lists.freedesktop.org/archives/intel-gfx/2016-June/098826.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94825 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97060 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97443 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97363 Fixes: a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") Tested-by: Marco Krüger Tested-by: Alexey Shumitsky Tested-by: Sean Greenslade Tested-by: Emil Andersen Lauridsen Tested-by: Robin Müller Tested-by: oceans112@gmail.com Tested-by: Rob Kramer Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1473758539-21565-1-git-send-email-ville.syrjala@linux.intel.com References: http://patchwork.freedesktop.org/patch/msgid/1473602239-15855-1-git-send-email-adrienverge@gmail.com Acked-by: Jani Nikula (cherry picked from commit c8ebfad7a063fe665417fa0eeb0da7cfe987d8ed) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_opregion.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index adca262..7acbbbf 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1047,6 +1047,23 @@ err_out: return err; } +static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id) +{ + DRM_INFO("Using panel type from OpRegion on %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id intel_use_opregion_panel_type[] = { + { + .callback = intel_use_opregion_panel_type_callback, + .ident = "Conrac GmbH IX45GM2", + .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"), + DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"), + }, + }, + { } +}; + int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) { @@ -1073,6 +1090,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) } /* + * So far we know that some machined must use it, others must not use it. + * There doesn't seem to be any way to determine which way to go, except + * via a quirk list :( + */ + if (!dmi_check_system(intel_use_opregion_panel_type)) { + DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); + return -ENODEV; + } + + /* * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us * low vswing for eDP, whereas the VBT panel type (2) gives us normal * vswing instead. Low vswing results in some display flickers, so -- cgit v1.1 From 0b97a484e52cb423662eb98904aad82dafcc1f10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:41:34 +0200 Subject: mac80211: check skb_linearize() return value The A-MSDU TX code (within TXQs) didn't always check the return value of skb_linearize() properly, resulting in potentially passing a frag- list SKB down to the driver even when it said it can't handle it. Fix that. Fixes: 6e0456b545456 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cc8e955..18b285e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1515,8 +1515,12 @@ out: spin_unlock_bh(&fq->lock); if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) - skb_linearize(skb); + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + return NULL; + } + } return skb; } -- cgit v1.1 From e5789608766113ca9c30d596d93ca7d5cbd8b461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:22:07 +0300 Subject: mmc: omap_hsmmc: Initialize dma_slave_config to avoid random data It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap_hsmmc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 24ebc9a..5f2f24a 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1409,11 +1409,18 @@ static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host, static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, struct mmc_request *req) { - struct dma_slave_config cfg; struct dma_async_tx_descriptor *tx; int ret = 0, i; struct mmc_data *data = req->data; struct dma_chan *chan; + struct dma_slave_config cfg = { + .src_addr = host->mapbase + OMAP_HSMMC_DATA, + .dst_addr = host->mapbase + OMAP_HSMMC_DATA, + .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .src_maxburst = data->blksz / 4, + .dst_maxburst = data->blksz / 4, + }; /* Sanity check: all the SG entries must be aligned by block size. */ for (i = 0; i < data->sg_len; i++) { @@ -1433,13 +1440,6 @@ static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, chan = omap_hsmmc_get_dma_chan(host, data); - cfg.src_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.dst_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.src_maxburst = data->blksz / 4; - cfg.dst_maxburst = data->blksz / 4; - ret = dmaengine_slave_config(chan, &cfg); if (ret) return ret; -- cgit v1.1 From df804d5e27490151da1ce9f216031a31352203e6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:21:54 +0300 Subject: mmc: omap: Initialize dma_slave_config to avoid random data in it's fields It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index f23d65e..be3c49f 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1016,14 +1016,16 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req) /* Only reconfigure if we have a different burst size */ if (*bp != burst) { - struct dma_slave_config cfg; - - cfg.src_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.dst_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.src_maxburst = burst; - cfg.dst_maxburst = burst; + struct dma_slave_config cfg = { + .src_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .dst_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .src_maxburst = burst, + .dst_maxburst = burst, + }; if (dmaengine_slave_config(c, &cfg)) goto use_pio; -- cgit v1.1 From 6cfeaf5125d425043d44002d0a1a8a147be582bf Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 14 Sep 2016 11:00:26 +0100 Subject: cpu/hotplug: Include linux/types.h in linux/cpuhotplug.h The linux/cpuhotplug.h header makes use of the bool type, but wasn't including linux/types.h to ensure that type has been defined. Fix this by including linux/types.h in preparation for including linux/cpuhotplug.h in a file that doesn't do so already. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Richard Cochran Cc: Sebastian Andrzej Siewior Cc: Ralf Baechle Cc: Anna-Maria Gleixner Link: http://lkml.kernel.org/r/20160914100027.20945-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf53..34bd805 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -1,6 +1,8 @@ #ifndef __CPUHOTPLUG_H #define __CPUHOTPLUG_H +#include + enum cpuhp_state { CPUHP_OFFLINE, CPUHP_CREATE_THREADS, -- cgit v1.1 From de75abbe0121a6c3c9c6b04c75300088e57ad1d5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 8 Sep 2016 11:48:28 +0200 Subject: arm/xen: fix SMP guests boot Commit 88e957d6e47f ("xen: introduce xen_vcpu_id mapping") broke SMP ARM guests on Xen. When FIFO-based event channels are in use (this is the default), evtchn_fifo_alloc_control_block() is called on CPU_UP_PREPARE event and this happens before we set up xen_vcpu_id mapping in xen_starting_cpu. Temporary fix the issue by setting direct Linux CPU id <-> Xen vCPU id mapping for all possible CPUs at boot. We don't currently support kexec/kdump on Xen/ARM so these ids always match. In future, we have several ways to solve the issue, e.g.: - Eliminate all hypercalls from CPU_UP_PREPARE, do them from the starting CPU. This can probably be done for both x86 and ARM and, if done, will allow us to get Xen's idea of vCPU id from CPUID/MPIDR on the starting CPU directly, no messing with ACPI/device tree required. - Save vCPU id information from ACPI/device tree on ARM and use it to initialize xen_vcpu_id mapping. This is the same trick we currently do on x86. Reported-by: Julien Grall Tested-by: Wei Chen Signed-off-by: Vitaly Kuznetsov Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/arm/xen/enlighten.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 3d2cef6..f193414 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, cpu) = cpu; - info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); @@ -330,6 +327,7 @@ static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; + int cpu; if (!xen_domain()) return 0; @@ -380,7 +378,8 @@ static int __init xen_guest_init(void) return -ENOMEM; /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, 0) = 0; + for_each_possible_cpu(cpu) + per_cpu(xen_vcpu_id, cpu) = cpu; xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, -- cgit v1.1 From 6b03144d93fc7de7ef03334384fea0fab058fa6d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Sep 2016 16:27:58 +0530 Subject: mwifiex: handle error if IRQ request fails in mwifiex_sdio_of() When this failure occurs, we will clear card->plt_wake_cfg so that device would initialize without wake up on external interrupt feature. This feature specific code in suspend and resume handlers will be skipped. Signed-off-by: Amitkumar Karwar Reviewed-by: Javier Martinez Canillas Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sdio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index 6dba409..8718950 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -122,9 +122,11 @@ static int mwifiex_sdio_probe_of(struct device *dev, struct sdio_mmc_card *card) IRQF_TRIGGER_LOW, "wifi_wake", cfg); if (ret) { - dev_err(dev, + dev_dbg(dev, "Failed to request irq_wifi %d (%d)\n", cfg->irq_wifi, ret); + card->plt_wake_cfg = NULL; + return 0; } disable_irq(cfg->irq_wifi); } -- cgit v1.1 From ae1799a1cb130170c3ba3370793cea5b0d9d2aa8 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Sep 2016 20:26:19 +0530 Subject: mwifiex: correction in Rx STBC field of htcapinfo Currently Rx STBC in assoc request frame is advertised as 3. It should be 2, as our chipsets support two spatial streams. Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 0a03d3f..c7f2faa 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2732,7 +2732,7 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, ht_info->cap &= ~IEEE80211_HT_CAP_SGI_40; if (adapter->user_dev_mcs_support == HT_STREAM_2X2) - ht_info->cap |= 3 << IEEE80211_HT_CAP_RX_STBC_SHIFT; + ht_info->cap |= 2 << IEEE80211_HT_CAP_RX_STBC_SHIFT; else ht_info->cap |= 1 << IEEE80211_HT_CAP_RX_STBC_SHIFT; -- cgit v1.1 From 54cdf5c727cb3d3124e61433a13e9724a7a4a952 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 9 Sep 2016 14:01:24 -0400 Subject: rtl8xxxu: Reset device on module unload if still attached If the USB dongle is still attached, reset it on module unload to avoid scans failing when reloading the driver. Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index c362083..d2611a4 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6129,6 +6129,11 @@ static void rtl8xxxu_disconnect(struct usb_interface *interface) mutex_destroy(&priv->usb_buf_mutex); mutex_destroy(&priv->h2c_mutex); + if (priv->udev->state != USB_STATE_NOTATTACHED) { + dev_info(&priv->udev->dev, + "Device still attached, trying to reset\n"); + usb_reset_device(priv->udev); + } usb_put_dev(priv->udev); ieee80211_free_hw(hw); } -- cgit v1.1 From 0cd7f70399f71fdd87b34d28670248c36e4db455 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Sep 2016 14:01:25 -0400 Subject: rtl8xxxu: fix spelling mistake "firmare" -> "firmware" Trivial fix to spelling mistakes in dev_dbg message. Signed-off-by: Colin Ian King Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index d2611a4..ca92022 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3921,11 +3921,11 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary); ret = rtl8xxxu_download_firmware(priv); - dev_dbg(dev, "%s: download_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret); if (ret) goto exit; ret = rtl8xxxu_start_firmware(priv); - dev_dbg(dev, "%s: start_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: start_firmware %i\n", __func__, ret); if (ret) goto exit; -- cgit v1.1 From 8136fd58ad60e25cf8b99d08bf92c09d02b416ef Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:05:48 +0200 Subject: ath: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/dfs_pattern_detector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 2f8136d..4100ffd 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -338,7 +338,7 @@ static bool dpd_set_domain(struct dfs_pattern_detector *dpd, return true; } -static struct dfs_pattern_detector default_dpd = { +static const struct dfs_pattern_detector default_dpd = { .exit = dpd_exit, .set_dfs_domain = dpd_set_domain, .add_pulse = dpd_add_pulse, -- cgit v1.1 From 1dc80798a8caab8b5788da96ab220c91a03d7d29 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:05:50 +0200 Subject: iwlegacy: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/3945.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c index 209dc99..4db327a 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945.c +++ b/drivers/net/wireless/intel/iwlegacy/3945.c @@ -2671,7 +2671,7 @@ const struct il_ops il3945_ops = { .send_led_cmd = il3945_send_led_cmd, }; -static struct il_cfg il3945_bg_cfg = { +static const struct il_cfg il3945_bg_cfg = { .name = "3945BG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, @@ -2700,7 +2700,7 @@ static struct il_cfg il3945_bg_cfg = { }, }; -static struct il_cfg il3945_abg_cfg = { +static const struct il_cfg il3945_abg_cfg = { .name = "3945ABG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, -- cgit v1.1 From d86e64768859fca82c78e52877ceeba04e25d27a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:06:03 +0200 Subject: rtlwifi: rtl818x: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 47e32cb..e7b11b4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -280,7 +280,7 @@ static struct rtl_mod_params rtl88ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl88ee_hal_cfg = { +static const struct rtl_hal_cfg rtl88ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl88e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 4780bdc..87aa209 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -258,7 +258,7 @@ static struct rtl_mod_params rtl92ce_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ce_hal_cfg = { +static const struct rtl_hal_cfg rtl92ce_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92c_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index c6e09a1..0538a4d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92de_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92de_hal_cfg = { +static const struct rtl_hal_cfg rtl92de_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8192de", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index c31c6bf..ac299cb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ee_hal_cfg = { +static const struct rtl_hal_cfg rtl92ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92ee_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index 31baca41..5e8e02d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -306,7 +306,7 @@ static struct rtl_mod_params rtl92se_mod_params = { /* Because memory R/W bursting will cause system hang/crash * for 92se, so we don't read back after every write action */ -static struct rtl_hal_cfg rtl92se_hal_cfg = { +static const struct rtl_hal_cfg rtl92se_hal_cfg = { .bar_id = 1, .write_readback = false, .name = "rtl92s_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index ff49a8c..89c828a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723e_mod_params = { .disable_watchdog = false, }; -static struct rtl_hal_cfg rtl8723e_hal_cfg = { +static const struct rtl_hal_cfg rtl8723e_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 2101793..20b53f0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723be_mod_params = { .ant_sel = 0, }; -static struct rtl_hal_cfg rtl8723be_hal_cfg = { +static const struct rtl_hal_cfg rtl8723be_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723be_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 4159f9b..22f687b1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -316,7 +316,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .disable_watchdog = 0, }; -static struct rtl_hal_cfg rtl8821ae_hal_cfg = { +static const struct rtl_hal_cfg rtl8821ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8821ae_pci", -- cgit v1.1 From 787764676f94114980d17e627b21937f4245c866 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 12 Sep 2016 18:55:27 +0530 Subject: mwifiex: Command 7 handling for USB chipsets Firmware image for newer USB chipsets starts with a command 7 block (special command). It doesn't contain data length field. This patch adds necessary handling. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.c | 4 ++++ drivers/net/wireless/marvell/mwifiex/usb.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 9213516..8a20620 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -1026,6 +1026,10 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, dnld_cmd = le32_to_cpu(fwdata->fw_hdr.dnld_cmd); tlen += sizeof(struct fw_header); + /* Command 7 doesn't have data length field */ + if (dnld_cmd == FW_CMD_7) + dlen = 0; + memcpy(fwdata->data, &firmware[tlen], dlen); fwdata->seq_num = cpu_to_le32(fw_seqnum); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h index b4e9246..1b49c52 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.h +++ b/drivers/net/wireless/marvell/mwifiex/usb.h @@ -51,6 +51,7 @@ #define FW_DNLD_TX_BUF_SIZE 620 #define FW_DNLD_RX_BUF_SIZE 2048 #define FW_HAS_LAST_BLOCK 0x00000004 +#define FW_CMD_7 0x00000007 #define FW_DATA_XMIT_SIZE \ (sizeof(struct fw_header) + dlen + sizeof(u32)) -- cgit v1.1 From b7450e248d71067e0c1a09614cf3d7571f7e10fa Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 12 Sep 2016 18:55:28 +0530 Subject: mwifiex: firmware name correction for usb8997 chipset Similar to pcie8997 chipset, first firmware submitted for usb8997 chipset will be usbusb8997_combo_v4.bin. This patch corrects the name used in driver. Signed-off-by: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h index 1b49c52..30e8eb8c 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.h +++ b/drivers/net/wireless/marvell/mwifiex/usb.h @@ -46,7 +46,7 @@ #define USB8766_DEFAULT_FW_NAME "mrvl/usb8766_uapsta.bin" #define USB8797_DEFAULT_FW_NAME "mrvl/usb8797_uapsta.bin" #define USB8801_DEFAULT_FW_NAME "mrvl/usb8801_uapsta.bin" -#define USB8997_DEFAULT_FW_NAME "mrvl/usb8997_uapsta.bin" +#define USB8997_DEFAULT_FW_NAME "mrvl/usbusb8997_combo_v4.bin" #define FW_DNLD_TX_BUF_SIZE 620 #define FW_DNLD_RX_BUF_SIZE 2048 -- cgit v1.1 From 7ccb8e633cfeb7969eba09bbf53346e746bb7f89 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 15 Aug 2016 19:34:44 +0200 Subject: ARM: multi_v7_defconfig: update XILINX_VDMA Commit fde57a7c4474 ("dmaengine: xilinx: Rename driver and config") renamed config XILINX_VDMA to config XILINX_DMA Update defconfig accordingly. Signed-off-by: Fabian Frederick Signed-off-by: Arnd Bergmann --- arch/arm/configs/multi_v7_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 2c8665c..ea3566f 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -781,7 +781,7 @@ CONFIG_MXS_DMA=y CONFIG_DMA_BCM2835=y CONFIG_DMA_OMAP=y CONFIG_QCOM_BAM_DMA=y -CONFIG_XILINX_VDMA=y +CONFIG_XILINX_DMA=y CONFIG_DMA_SUN6I=y CONFIG_STAGING=y CONFIG_SENSORS_ISL29018=y -- cgit v1.1 From f2a89d3b2b85b90b05453872aaabfdb412a21a03 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Aug 2016 10:54:16 +0100 Subject: arm64: dts: Fix broken architected timer interrupt trigger The ARM architected timer specification mandates that the interrupt associated with each timer is level triggered (which corresponds to the "counter >= comparator" condition). A number of DTs are being remarkably creative, declaring the interrupt to be edge triggered. A quick look at the TRM for the corresponding ARM CPUs clearly shows that this is wrong, and I've corrected those. For non-ARM designs (and in the absence of a publicly available TRM), I've made them active low as well, which can't be completely wrong as the GIC cannot disinguish between level low and level high. The respective maintainers are of course welcome to prove me wrong. While I was at it, I took the liberty to fix a couple of related issue, such as some spurious affinity bits on ThunderX, and their complete absence on ls1043a (both of which seem to be related to copy-pasting from other DTs). Acked-by: Duc Dang Acked-by: Carlo Caione Acked-by: Michal Simek Acked-by: Krzysztof Kozlowski Acked-by: Dinh Nguyen Acked-by: Masahiro Yamada Signed-off-by: Marc Zyngier Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 8 ++++---- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 8 ++++---- arch/arm64/boot/dts/apm/apm-storm.dtsi | 8 ++++---- arch/arm64/boot/dts/broadcom/ns2.dtsi | 8 ++++---- arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 8 ++++---- arch/arm64/boot/dts/exynos/exynos7.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi | 8 ++++---- arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 8 ++++---- arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi | 8 ++++---- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 8 ++++---- 11 files changed, 44 insertions(+), 44 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 445aa67..c2b9bcb 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -255,10 +255,10 @@ /* Local timer */ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; timer0: timer0@ffc03000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index e502c24..bf6c8d0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -102,13 +102,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, ; + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>; }; xtal: xtal-clk { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1c2c71..c29dab9 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -110,10 +110,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ - <1 13 0xff01>, /* Non-secure Phys IRQ */ - <1 14 0xff01>, /* Virt IRQ */ - <1 15 0xff01>; /* Hyp IRQ */ + interrupts = <1 0 0xff08>, /* Secure Phys IRQ */ + <1 13 0xff08>, /* Non-secure Phys IRQ */ + <1 14 0xff08>, /* Virt IRQ */ + <1 15 0xff08>; /* Hyp IRQ */ clock-frequency = <50000000>; }; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index f53b095..d4a12fa 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -88,13 +88,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, ; + IRQ_TYPE_LEVEL_LOW)>; }; pmu { diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 2eb9b22..04dc8a8 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -354,10 +354,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; pmu { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index ca663df..1628315 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -473,10 +473,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 0xff08>, + <1 14 0xff08>, + <1 11 0xff08>, + <1 10 0xff08>; }; pmu_system_controller: system-controller@105c0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index e669fbd..a67e210 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -119,10 +119,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x1>, /* Physical Secure PPI */ - <1 14 0x1>, /* Physical Non-Secure PPI */ - <1 11 0x1>, /* Virtual PPI */ - <1 10 0x1>; /* Hypervisor PPI */ + interrupts = <1 13 0xf08>, /* Physical Secure PPI */ + <1 14 0xf08>, /* Physical Non-Secure PPI */ + <1 11 0xf08>, /* Virtual PPI */ + <1 10 0xf08>; /* Hypervisor PPI */ }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 21023a3..e3b6034 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -191,10 +191,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */ - <1 14 0x8>, /* Physical Non-Secure PPI, active-low */ - <1 11 0x8>, /* Virtual PPI, active-low */ - <1 10 0x8>; /* Hypervisor PPI, active-low */ + interrupts = <1 13 4>, /* Physical Secure PPI, active-low */ + <1 14 4>, /* Physical Non-Secure PPI, active-low */ + <1 11 4>, /* Virtual PPI, active-low */ + <1 10 4>; /* Hypervisor PPI, active-low */ }; pmu { diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index eab1a42..c2a6745 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -122,10 +122,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; odmi: odmi@300000 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi index c223915..d73bdc8 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi @@ -129,10 +129,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; soc { diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index e595f22..3e2e51f 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -65,10 +65,10 @@ timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; amba_apu { -- cgit v1.1 From 29bf282dec94f6015a675c007614cb29563f1c18 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 6 Sep 2016 16:34:01 +1000 Subject: powerpc/powernv: Detach from PE on releasing PCI device The PCI hotplug can be part of EEH error recovery. The @pdn and the device's PE number aren't removed and added afterwords. The PE number in @pdn should be set to an invalid one. Otherwise, the PE's device count is decreased on removing devices while failing to be increased on adding devices. It leads to unbalanced PE's device count and make normal PCI hotplug path broken. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0c71a2f..da5da11 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3452,7 +3452,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev) if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + WARN_ON(--pe->device_count < 0); if (pe->device_count == 0) pnv_ioda_release_pe(pe); -- cgit v1.1 From ed7d9a1d7da6fe7b1c7477dc70e95051583fd60c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Sep 2016 17:03:06 +1000 Subject: powerpc/powernv/pci: Fix missed TCE invalidations that should fallback to OPAL In commit f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations"), we added logic to fallback to OPAL for doing TCE invalidations if we can't do it in Linux. Ben sent a v2 of the patch, containing these additional call sites, but I had already applied v1 and didn't notice. So fix them now. Fixes: f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index da5da11..bc0c91e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2217,7 +2217,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2355,7 +2355,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); -- cgit v1.1 From 85d5313ed717ad60769491c7c072d23bc0a68e7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 11:38:31 +0200 Subject: mac80211: reject TSPEC TIDs (TSIDs) for aggregation Since mac80211 doesn't currently support TSIDs 8-15 which can only be used after QoS TSPEC negotiation (and not even after WMM negotiation), reject attempts to set up aggregation sessions for them, which might confuse drivers. In mac80211 we do correctly handle that, but the TSIDs should never get used anyway, and drivers might not be able to handle it. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 8 +++++++- net/mac80211/agg-tx.c | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff60..afa9468 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, .timeout = timeout, .ssn = start_seq_num, }; - int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { + ht_dbg(sta->sdata, + "STA %pM requests BA session on unsupported tid %d\n", + sta->sta.addr, tid); + goto end_no_lock; + } + if (!sta->sta.ht_cap.ht_supported) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5650c46..45319cc 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -584,6 +584,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; + if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID)) + return -EINVAL; + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", pubsta->addr, tid); -- cgit v1.1 From cecf62352aee2b4fe114aafd1b8c5f265a4243ce Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 11:22:33 +0300 Subject: perf/x86/intel: Don't disable "intel_bts" around "intel" event batching At the moment, intel_bts events get disabled from intel PMU's disable callback, which includes event scheduling transactions of said PMU, which have nothing to do with intel_bts events. We do want to keep intel_bts events off inside the PMI handler to avoid filling up their buffer too soon. This patch moves intel_bts enabling/disabling directly to the PMI handler. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915082233.11065-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2cbde2f..4c9a79b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -2073,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2172,6 +2172,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters -- cgit v1.1 From a6805884e263e82d9fb87bd5f39ad4bb38cde246 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Aug 2016 13:44:50 +0300 Subject: ARM: keystone: defconfig: Fix USB configuration Simply enabling CONFIG_KEYSTONE_USB_PHY doesn't work anymore as it depends on CONFIG_NOP_USB_XCEIV. We need to enable that as well. This fixes USB on Keystone boards from v4.8-rc1 onwards. Signed-off-by: Roger Quadros Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- arch/arm/configs/keystone_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 71b42e6..78cd2f1 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -161,6 +161,7 @@ CONFIG_USB_MON=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_DWC3=y +CONFIG_NOP_USB_XCEIV=y CONFIG_KEYSTONE_USB_PHY=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -- cgit v1.1 From 7892a1f64a447b6f65fe2888688883b7c26d81d3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 9 Aug 2016 12:36:41 -0300 Subject: [media] rcar-fcp: Make sure rcar_fcp_enable() returns 0 on success When resuming from suspend-to-RAM on r8a7795/salvator-x: dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe940000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe944000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe948000.fdp1 failed to resume noirq: error 1 According to its documentation, rcar_fcp_enable() returns 0 on success or a negative error code if an error occurs. Hence fdp1_pm_runtime_resume() and vsp1_pm_runtime_resume() forward its return value to their callers. However, rcar_fcp_enable() forwards the return value of pm_runtime_get_sync(), which can actually be 1 on success, leading to the resume failure above. To fix this, consider only negative values returned by pm_runtime_get_sync() to be failures. Fixes: 7b49235e83b2347c ("[media] v4l: Add Renesas R-Car FCP driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rcar-fcp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c index 6a7bcc3..bc50c69 100644 --- a/drivers/media/platform/rcar-fcp.c +++ b/drivers/media/platform/rcar-fcp.c @@ -99,10 +99,16 @@ EXPORT_SYMBOL_GPL(rcar_fcp_put); */ int rcar_fcp_enable(struct rcar_fcp_device *fcp) { + int error; + if (!fcp) return 0; - return pm_runtime_get_sync(fcp->dev); + error = pm_runtime_get_sync(fcp->dev); + if (error < 0) + return error; + + return 0; } EXPORT_SYMBOL_GPL(rcar_fcp_enable); -- cgit v1.1 From f1c1f17ac52d22227c0074b3d661d7ed692b707a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 17:08:23 +0200 Subject: cfg80211: allow connect keys only with default (TX) key There's no point in allowing connect keys when one of them isn't also configured as the TX key, it would just confuse drivers and probably cause them to pick something for TX. Disallow this confusing and erroneous configuration. As wpa_supplicant will always send NL80211_ATTR_KEYS, even when there are no keys inside, allow that and treat it as though the attribute isn't present at all. Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 5 ++++- net/wireless/nl80211.c | 14 ++++++++++++++ net/wireless/sme.c | 3 +++ net/wireless/wext-sme.c | 2 +- 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 896cbb2..eafdfa5 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -114,6 +114,9 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, } } + if (WARN_ON(connkeys && connkeys->def < 0)) + return -EINVAL; + if (WARN_ON(wdev->connect_keys)) kzfree(wdev->connect_keys); wdev->connect_keys = connkeys; @@ -289,7 +292,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, wdev->wext.ibss.privacy = wdev->wext.default_key != -1; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 71af96e..f2a77c3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -848,6 +848,15 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, struct nlattr *key; struct cfg80211_cached_keys *result; int rem, err, def = 0; + bool have_key = false; + + nla_for_each_nested(key, keys, rem) { + have_key = true; + break; + } + + if (!have_key) + return NULL; result = kzalloc(sizeof(*result), GFP_KERNEL); if (!result) @@ -895,6 +904,11 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, *no_ht = true; } + if (result->def < 0) { + err = -EINVAL; + goto error; + } + return result; error: kfree(result); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index add6824..c08a3b5 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1043,6 +1043,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + } else { + if (WARN_ON(connkeys)) + return -EINVAL; } wdev->connect_keys = connkeys; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index f6523a4..88f1f69 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -42,7 +42,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (!wdev->wext.connect.ssid_len) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; -- cgit v1.1 From 93db1d9e6c96050b74bb2793de8db00cd0afe6ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:23:51 +0200 Subject: mac80211: fix possible out-of-bounds access In the unlikely situation that the supplicant has negotiated admission for the background AC (which it has no reason to as it's not supposed to be requiring admission control to start with, and we'd ignore such a requirement anyway), the loop here may terminate with non_acm_ac == 4, which leads to an array overrun. Check this explicitly just for completeness. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d426f6..7486f2d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1672,11 +1672,15 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) non_acm_ac++) if (!(sdata->wmm_acm & BIT(7 - 2 * non_acm_ac))) break; - /* The loop will result in using BK even if it requires - * admission control, such configuration makes no sense - * and we have to transmit somehow - the AC selection - * does the same thing. + /* Usually the loop will result in using BK even if it + * requires admission control, but such a configuration + * makes no sense and we have to transmit somehow - the + * AC selection does the same thing. + * If we started out trying to downgrade from BK, then + * the extra condition here might be needed. */ + if (non_acm_ac >= IEEE80211_NUM_ACS) + non_acm_ac = IEEE80211_AC_BK; if (drv_conf_tx(local, sdata, ac, &sdata->tx_conf[non_acm_ac])) sdata_err(sdata, -- cgit v1.1 From 58bd7f1158ac7543ccdcddc7f4ecd7db458e6d0b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:37:54 +0200 Subject: mac80211: fix scan completed tracing Passing the 'info' pointer where a 'info->aborted' is expected will always lead to tracing to erroneously record that the scan was aborted, fix that by passing the correct info->aborted. The remaining data will be collected in cfg80211, so I haven't duplicated it here. Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 070b40f..23d8ac8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -420,7 +420,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); - trace_api_scan_completed(local, info); + trace_api_scan_completed(local, info->aborted); set_bit(SCAN_COMPLETED, &local->scanning); if (info->aborted) -- cgit v1.1 From 76e1fb4b5532a9df9eb14cfe002412c7617c4ad0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:55:57 +0200 Subject: nl80211: always check nla_nest_start() return value If the message got full during nla_nest_start(), it can return NULL. None of the cases here seem like that can really happen, but check the return value nonetheless. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f2a77c3..60c8a74 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8022,6 +8022,8 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } data = nla_nest_start(skb, attr); + if (!data) + goto nla_put_failure; ((void **)skb->cb)[0] = rdev; ((void **)skb->cb)[1] = hdr; @@ -9458,8 +9460,14 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (req->n_match_sets) { matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + if (!matches) + return -ENOBUFS; + for (i = 0; i < req->n_match_sets; i++) { match = nla_nest_start(msg, i); + if (!match) + return -ENOBUFS; + nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, req->match_sets[i].ssid.ssid_len, req->match_sets[i].ssid.ssid); @@ -9474,6 +9482,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, for (i = 0; i < req->n_scan_plans; i++) { scan_plan = nla_nest_start(msg, i + 1); + if (!scan_plan) + return -ENOBUFS; + if (!scan_plan || nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, req->scan_plans[i].interval) || -- cgit v1.1 From 53b18980fded52e39520661af3528577d36eb279 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:59:21 +0200 Subject: nl80211: always check nla_put* return values A few instances were found where we didn't check them, add the missing checks even though they'll probably never trigger as the message should be large enough here. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 60c8a74..887c4c1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9453,8 +9453,10 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!freqs) return -ENOBUFS; - for (i = 0; i < req->n_channels; i++) - nla_put_u32(msg, i, req->channels[i]->center_freq); + for (i = 0; i < req->n_channels; i++) { + if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + return -ENOBUFS; + } nla_nest_end(msg, freqs); @@ -9468,9 +9470,10 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!match) return -ENOBUFS; - nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, - req->match_sets[i].ssid.ssid_len, - req->match_sets[i].ssid.ssid); + if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + req->match_sets[i].ssid.ssid_len, + req->match_sets[i].ssid.ssid)) + return -ENOBUFS; nla_nest_end(msg, match); } nla_nest_end(msg, matches); -- cgit v1.1 From 5140974dca69f0eace465bccd93891ca242a7e61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:58:31 +0200 Subject: mac80211: remove unused assignment The next line overwrites this assignment, so remove it; there's no real value in using it for the next assignment either. Signed-off-by: Johannes Berg --- net/mac80211/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e777c2a..b6865d8 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2555,7 +2555,6 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = sband->bitrates[i].bitrate; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5 * (1 << shift)); *pos++ = basic | (u8) rate; -- cgit v1.1 From 8826fef95bd5f846f7745d9ce1e3009927ec0cb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:00:23 +0200 Subject: mac80211: remove pointless chanctx NULL check If chanctx is derived as container_of() from a non-NULL pointer, it can't ever be NULL. Since we checked conf before, that's true here, so remove the useless NULL check. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5d4afea..e29ff57 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2961,10 +2961,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } chanctx = container_of(conf, struct ieee80211_chanctx, conf); - if (!chanctx) { - err = -EBUSY; - goto out; - } ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; -- cgit v1.1 From c7e9dbcf09bddd01568113103d62423d8894eabd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:03:00 +0200 Subject: mac80211: remove sta_remove_debugfs driver callback No drivers implement this, relying either on the recursive directory removal to remove their debugfs, or not having any to start with. Remove the dead driver callback. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 11 ++--------- net/mac80211/debugfs_sta.c | 4 ---- net/mac80211/driver-ops.h | 15 --------------- 3 files changed, 2 insertions(+), 28 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 08bac23..d9c8ccd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3101,11 +3101,8 @@ enum ieee80211_reconfig_type { * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback - * and @sta_remove_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. This callback can sleep. - * - * @sta_remove_debugfs: Remove the debugfs files which were added using - * @sta_add_debugfs. This callback can sleep. + * should be within a CONFIG_MAC80211_DEBUGFS conditional. This + * callback can sleep. * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating @@ -3501,10 +3498,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*sta_remove_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fb26935..a2fcdb4 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -544,10 +544,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) void ieee80211_sta_debugfs_remove(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; - - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); debugfs_remove_recursive(sta->debugfs_dir); sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c39f93b..fe35a1c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -499,21 +499,6 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, local->ops->sta_add_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct dentry *dir) -{ - might_sleep(); - - sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); - - if (local->ops->sta_remove_debugfs) - local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, - sta, dir); -} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, -- cgit v1.1 From e8a24cd4b87247beedb1addc7b683422092047e5 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 Sep 2016 12:48:32 +0530 Subject: mac80211: allow driver to handle packet-loss mechanism Based on consecutive msdu failures, mac80211 triggers CQM packet-loss mechanism. Drivers like ath10k that have its own connection monitoring algorithm, offloaded to firmware for triggering station kickout. In case of station kickout, driver will report low ack status by mac80211 API (ieee80211_report_low_ack). This flag will enable the driver to completely rely on firmware events for station kickout and bypass mac80211 packet loss mechanism. Signed-off-by: Rajkumar Manoharan Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/status.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9c8ccd..5296100 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2018,6 +2018,11 @@ struct ieee80211_txq { * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list * skbs, needed for zero-copy software A-MSDU. * + * @IEEE80211_HW_REPORTS_LOW_ACK: The driver (or firmware) reports low ack event + * by ieee80211_report_low_ack() based on its own algorithm. For such + * drivers, mac80211 packet loss mechanism will not be triggered and driver + * is completely depending on firmware event for station kickout. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2058,6 +2063,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_USES_RSS, IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, + IEEE80211_HW_REPORTS_LOW_ACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5bbb470..8ca62b6 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -201,6 +201,7 @@ static const char *hw_flag_names[] = { FLAG(USES_RSS), FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), + FLAG(REPORTS_LOW_ACK), #undef FLAG }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index fabd9ff..ea39f8a7 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -557,6 +557,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, static void ieee80211_lost_packet(struct sta_info *sta, struct ieee80211_tx_info *info) { + /* If driver relies on its own algorithm for station kickout, skip + * mac80211 packet loss mechanism. + */ + if (ieee80211_hw_check(&sta->local->hw, REPORTS_LOW_ACK)) + return; + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) -- cgit v1.1 From ec53c832ee90b86414ca243d0e6fdbb9cf5e413b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Sep 2016 10:57:50 +0200 Subject: cfg80211: remove unnecessary pointer-of For an array, there's no need to use &array, so just use the plain wiphy->addresses[i].addr here to silence smatch. Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index e46469b..0082f4b 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -57,7 +57,7 @@ static ssize_t addresses_show(struct device *dev, return sprintf(buf, "%pM\n", wiphy->perm_addr); for (i = 0; i < wiphy->n_addresses; i++) - buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr); + buf += sprintf(buf, "%pM\n", wiphy->addresses[i].addr); return buf - start; } -- cgit v1.1 From 54c5ef2e93ea002dc5dd63349298b2778fe59edb Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Wed, 10 Aug 2016 17:03:43 +0300 Subject: iwlwifi: mvm: update TX queue before making a copy of the skb Off-channel action frames (such as ANQP frames) must be sent either on the AUX queue or on the offchannel queue, otherwise the firmware will cause a SYSASSERT. In the current implementation, the queue to be used is correctly set in the original skb, but this is done after it is copied. Thus the copy remains with the original, incorrect queue. Fix this by setting the queue in the original skb before copying it. Fixes: commit 5c08b0f5026f ("iwlwifi: mvm: don't override the rate with the AMSDU len") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Beni Lev Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab..b3a87a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -513,6 +513,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) int hdrlen = ieee80211_hdrlen(hdr->frame_control); int queue; + /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used + * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel + * queue. STATION (HS2.0) uses the auxiliary context of the FW, + * and hence needs to be sent on the aux queue + */ + if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && + skb_info->control.vif->type == NL80211_IFTYPE_STATION) + IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; + memcpy(&info, skb->cb, sizeof(info)); if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) @@ -526,16 +535,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) /* This holds the amsdu headers length */ skb_info->driver_data[0] = (void *)(uintptr_t)0; - /* - * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used - * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel - * queue. STATION (HS2.0) uses the auxiliary context of the FW, - * and hence needs to be sent on the aux queue - */ - if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - info.control.vif->type == NL80211_IFTYPE_STATION) - IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; - queue = info.hw_queue; /* -- cgit v1.1 From b0eaf4506f5f95d15d6731d72c0ddf4a2179eefa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Sep 2016 23:39:12 +0200 Subject: kvm: x86: correctly reset dest_map->vector when restoring LAPIC state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When userspace sends KVM_SET_LAPIC, KVM schedules a check between the vCPU's IRR and ISR and the IOAPIC redirection table, in order to re-establish the IOAPIC's dest_map (the list of CPUs servicing the real-time clock interrupt with the corresponding vectors). However, __rtc_irq_eoi_tracking_restore_one was forgetting to set dest_map->vectors. Because of this, the IOAPIC did not process the real-time clock interrupt EOI, ioapic->rtc_status.pending_eoi got stuck at a non-zero value, and further RTC interrupts were reported to userspace as coalesced. Fixes: 9e4aabe2bb3454c83dac8139cf9974503ee044db Fixes: 4d99ba898dd0c521ca6cdfdde55c9b58aea3cb3d Cc: stable@vger.kernel.org Cc: Joerg Roedel Cc: David Gilbert Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5f42d03..c7220ba 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; @@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + old_val = test_bit(vcpu->vcpu_id, dest_map->map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __set_bit(vcpu->vcpu_id, dest_map->map); + dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __clear_bit(vcpu->vcpu_id, dest_map->map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } -- cgit v1.1 From 1c109fabbd51863475cd12ac206bdd249aee35af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Sep 2016 02:35:29 +0100 Subject: fix minor infoleak in get_user_ex() get_user_ex(x, ptr) should zero x on failure. It's not a lot of a leak (at most we are leaking uninitialized 64bit value off the kernel stack, and in a fairly constrained situation, at that), but the fix is trivial, so... Cc: stable@vger.kernel.org Signed-off-by: Al Viro [ This sat in different branch from the uaccess fixes since mid-August ] Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e3af86f..2131c4c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -433,7 +433,11 @@ do { \ #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ asm volatile("1: mov"itype" %1,%"rtype"0\n" \ "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ + ".section .fixup,\"ax\"\n" \ + "3:xor"itype" %"rtype"0,%"rtype"0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_EX(1b, 3b) \ : ltype(x) : "m" (__m(addr))) #define __put_user_nocheck(x, ptr, size) \ -- cgit v1.1 From 5297e0f0fe13305a1fc7f01986be0dccd063d57a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:00 -0700 Subject: vfs: fix return type of ioctl_file_dedupe_range All the VFS functions in the dedupe ioctl path return int status, so the ioctl handler ought to as well. Found by Coverity, CID 1350952. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- fs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 0f56deb..26aba09 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp) return thaw_super(sb); } -static long ioctl_file_dedupe_range(struct file *file, void __user *arg) +static int ioctl_file_dedupe_range(struct file *file, void __user *arg) { struct file_dedupe_range __user *argp = arg; struct file_dedupe_range *same = NULL; -- cgit v1.1 From b71dbf1032f546bf3efd60fb5d9d0cefd200a508 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:44 -0700 Subject: vfs: cap dedupe request structure size at PAGE_SIZE Kirill A Shutemov reports that the kernel doesn't try to cap dest_count in any way, and uses the number to allocate kernel memory. This causes high order allocation warnings in the kernel log if someone passes in a big enough value. We should clamp the allocation at PAGE_SIZE to avoid stressing the VM. The two existing users of the dedupe ioctl never send more than 120 requests, so we can safely clamp dest_range at PAGE_SIZE, because with 4k pages we can handle up to 127 dedupe candidates. Given the max extent length of 16MB, we can end up doing 2GB of IO which is plenty. [ Note: the "offsetof()" can't overflow, because 'count' is just a 16-bit integer. That's not obvious in the limited context of the patch, so I'm noting it here because it made me go look. - Linus ] Reported-by: "Kirill A. Shutemov" Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- fs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ioctl.c b/fs/ioctl.c index 26aba09..c415668 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -582,6 +582,10 @@ static int ioctl_file_dedupe_range(struct file *file, void __user *arg) } size = offsetof(struct file_dedupe_range __user, info[count]); + if (size > PAGE_SIZE) { + ret = -ENOMEM; + goto out; + } same = memdup_user(argp, size); if (IS_ERR(same)) { -- cgit v1.1 From 22f6b4d34fcf039c63a94e7670e0da24f8575a5a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 16 Sep 2016 00:31:22 +0200 Subject: aio: mark AIO pseudo-fs noexec This ensures that do_mmap() won't implicitly make AIO memory mappings executable if the READ_IMPLIES_EXEC personality flag is set. Such behavior is problematic because the security_mmap_file LSM hook doesn't catch this case, potentially permitting an attacker to bypass a W^X policy enforced by SELinux. I have tested the patch on my machine. To test the behavior, compile and run this: #define _GNU_SOURCE #include #include #include #include #include #include #include int main(void) { personality(READ_IMPLIES_EXEC); aio_context_t ctx = 0; if (syscall(__NR_io_setup, 1, &ctx)) err(1, "io_setup"); char cmd[1000]; sprintf(cmd, "cat /proc/%d/maps | grep -F '/[aio]'", (int)getpid()); system(cmd); return 0; } In the output, "rw-s" is good, "rwxs" is bad. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- fs/aio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index fb8e45b..4fe81d1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type, static const struct dentry_operations ops = { .d_dname = simple_dname, }; - return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC); + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + AIO_RING_MAGIC); + + if (!IS_ERR(root)) + root->d_sb->s_iflags |= SB_I_NOEXEC; + return root; } /* aio_setup -- cgit v1.1 From d560846e40fefd7b6e5c29d115f1d8f73db7f5e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Sep 2016 13:01:50 +0100 Subject: atm: iphase: fix newline escape and minor tweak to source formatting The newline escape is incorrect and needs fixing. Also adjust source formatting / indentation and add { } to trailing else. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 9d8807e..b275676 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1885,9 +1885,9 @@ static int open_tx(struct atm_vcc *vcc) if ((ret = ia_cbr_setup (iadev, vcc)) < 0) { return ret; } - } - else - printk("iadev: Non UBR, ABR and CBR traffic not supportedn"); + } else { + printk("iadev: Non UBR, ABR and CBR traffic not supported\n"); + } iadev->testTable[vcc->vci]->vc_status |= VC_ACTIVE; IF_EVENT(printk("ia open_tx returning \n");) -- cgit v1.1 From 5c0ca3f566d7a19e9bf9671dfc2108fad1b7b9b2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Sep 2016 13:04:57 +0100 Subject: test_bpf: fix the dummy skb after dissector changes Commit d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") made flow dissector look at vlan_proto when vlan is present. Since test_bpf sets skb->vlan_tci to ~0 (including VLAN_TAG_PRESENT) we have to populate skb->vlan_proto. Fixes false negative on test #24: test_bpf: #24 LD_PAYLOAD_OFF jited:0 175 ret 0 != 42 FAIL (1 times) Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 93f4501..94346b4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; + skb->vlan_proto = htons(ETH_P_IP); skb->dev = &dev; skb->dev->ifindex = SKB_DEV_IFINDEX; skb->dev->type = SKB_DEV_TYPE; -- cgit v1.1 From 07c0f09e23b47815251ed9e5ce245a58c6391974 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 12 Sep 2016 15:19:21 +0300 Subject: net/sched: act_tunnel_key: Remove rcu_read_lock protection Remove rcu_read_lock protection from tunnel_key_dump and use rtnl_dereference, dump operation is protected by rtnl lock. Also, remove rcu_read_lock from tunnel_key_release and use rcu_dereference_protected. Both operations are running exclusively and a writer couldn't modify t->params while those functions are executed. Fixes: 54d94fd89d90 ('net/sched: Introduce act_tunnel_key') Signed-off-by: Hadar Hen Zion Acked-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index dceff74..af47bdf 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -194,15 +194,12 @@ static void tunnel_key_release(struct tc_action *a, int bind) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rcu_dereference_protected(t->params, 1); if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(¶ms->tcft_enc_metadata->dst); kfree_rcu(params, rcu); - - rcu_read_unlock(); } static int tunnel_key_dump_addresses(struct sk_buff *skb, @@ -245,10 +242,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .bindcnt = t->tcf_bindcnt - bind, }; struct tcf_t tm; - int ret = -1; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; opt.action = params->action; @@ -272,15 +267,11 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; - ret = skb->len; - goto out; + return skb->len; nla_put_failure: nlmsg_trim(skb, b); -out: - rcu_read_unlock(); - - return ret; + return -1; } static int tunnel_key_walker(struct net *net, struct sk_buff *skb, -- cgit v1.1 From 04b3f8de4b6d90758938a40303c0ee9a86bcb8ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Sep 2016 23:38:42 +0200 Subject: bpf: drop unnecessary test in cls_bpf_classify and tcf_bpf The skb_mac_header_was_set() test in cls_bpf's and act_bpf's fast-path is actually unnecessary and can be removed altogether. This was added by commit a166151cbe33 ("bpf: fix bpf helpers to use skb->mac_header relative offsets"), which was later on improved by 3431205e0397 ("bpf: make programs see skb->data == L2 for ingress and egress"). We're always guaranteed to have valid mac header at the time we invoke cls_bpf_classify() or tcf_bpf(). Reason is that since 6d1ccff62780 ("net: reset mac header in dev_start_xmit()") we do skb_reset_mac_header() in __dev_queue_xmit() before we could call into sch_handle_egress() or any subsequent enqueue. sch_handle_ingress() always sees a valid mac header as well (things like skb_reset_mac_len() would badly fail otherwise). Thus, drop the unnecessary test in classifier and action case. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 3 --- net/sched/cls_bpf.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index bfa8707..78400de 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -44,9 +44,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; - if (unlikely(!skb_mac_header_was_set(skb))) - return TC_ACT_UNSPEC; - tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 4742f41..1d92d4d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -83,9 +83,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; - if (unlikely(!skb_mac_header_was_set(skb))) - return -1; - /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { -- cgit v1.1 From f53d8c7b18faf1bd361abe91f3c4bcbb21d0c985 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Sep 2016 23:38:43 +0200 Subject: bpf: use skb_at_tc_ingress helper in tcf_bpf We have a small skb_at_tc_ingress() helper for testing for ingress, so make use of it. cls_bpf already uses it and so should act_bpf. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 78400de..1d39600 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -39,10 +39,10 @@ static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { + bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); -- cgit v1.1 From 86da71b57383d40993cb90baafb3735cffe5d800 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 12 Sep 2016 20:13:09 -0400 Subject: net_sched: Introduce skbmod action This action is intended to be an upgrade from a usability perspective from pedit (as well as operational debugability). Compare this: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action pedit munge offset -14 u8 set 0x02 \ munge offset -13 u8 set 0x15 \ munge offset -12 u8 set 0x15 \ munge offset -11 u8 set 0x15 \ munge offset -10 u16 set 0x1515 \ pipe to: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action skbmod dmac 02:15:15:15:15:15 Also try to do a MAC address swap with pedit or worse try to debug a policy with destination mac, source mac and etherype. Then make few rules out of those and you'll get my point. In the future common use cases on pedit can be migrated to this action (as an example different fields in ip v4/6, transports like tcp/udp/sctp etc). For this first cut, this allows modifying basic ethernet header. The most important ethernet use case at the moment is when redirecting or mirroring packets to a remote machine. The dst mac address needs a re-write so that it doesnt get dropped or confuse an interconnecting (learning) switch or dropped by a target machine (which looks at the dst mac). And at times when flipping back the packet a swap of the MAC addresses is needed. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_skbmod.h | 30 ++++ include/uapi/linux/tc_act/tc_skbmod.h | 39 +++++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/act_skbmod.c | 301 ++++++++++++++++++++++++++++++++++ 5 files changed, 382 insertions(+) create mode 100644 include/net/tc_act/tc_skbmod.h create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h create mode 100644 net/sched/act_skbmod.c diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h new file mode 100644 index 0000000..644a211 --- /dev/null +++ b/include/net/tc_act/tc_skbmod.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __NET_TC_SKBMOD_H +#define __NET_TC_SKBMOD_H + +#include +#include + +struct tcf_skbmod_params { + struct rcu_head rcu; + u64 flags; /*up to 64 types of operations; extend if needed */ + u8 eth_dst[ETH_ALEN]; + u16 eth_type; + u8 eth_src[ETH_ALEN]; +}; + +struct tcf_skbmod { + struct tc_action common; + struct tcf_skbmod_params __rcu *skbmod_p; +}; +#define to_skbmod(a) ((struct tcf_skbmod *)a) + +#endif /* __NET_TC_SKBMOD_H */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h new file mode 100644 index 0000000..10fc07d --- /dev/null +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __LINUX_TC_SKBMOD_H +#define __LINUX_TC_SKBMOD_H + +#include + +#define TCA_ACT_SKBMOD 15 + +#define SKBMOD_F_DMAC 0x1 +#define SKBMOD_F_SMAC 0x2 +#define SKBMOD_F_ETYPE 0x4 +#define SKBMOD_F_SWAPMAC 0x8 + +struct tc_skbmod { + tc_gen; + __u64 flags; +}; + +enum { + TCA_SKBMOD_UNSPEC, + TCA_SKBMOD_TM, + TCA_SKBMOD_PARMS, + TCA_SKBMOD_DMAC, + TCA_SKBMOD_SMAC, + TCA_SKBMOD_ETYPE, + TCA_SKBMOD_PAD, + __TCA_SKBMOD_MAX +}; +#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 72e3426..7795d5a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b9d046b..148ae0d 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 0000000..e7d9638 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, "); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); -- cgit v1.1 From 5400e54add3618759ef607a97d711e356eef6e2a Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 13 Sep 2016 13:39:24 +0530 Subject: cxgb4vf: don't offload Rx checksums for IPv6 fragments The checksum provided by the device doesn't include the L3 headers, as IPv6 expects Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index c8fd4f8..f3ed9ce 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1648,14 +1648,15 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, if (csum_ok && !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP_F | RXF_TCP_F))) { - if (!pkt->ip_frag) + if (!pkt->ip_frag) { skb->ip_summed = CHECKSUM_UNNECESSARY; - else { + rxq->stats.rx_cso++; + } else if (pkt->l2info & htonl(RXF_IP_F)) { __sum16 c = (__force __sum16)pkt->csum; skb->csum = csum_unfold(c); skb->ip_summed = CHECKSUM_COMPLETE; + rxq->stats.rx_cso++; } - rxq->stats.rx_cso++; } else skb_checksum_none_assert(skb); -- cgit v1.1 From 0ca4e20ba3de1a1077ee36df22176c2055497923 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 13 Sep 2016 12:06:57 +0200 Subject: alx: fix error handling in __alx_open In commit 9ee7b683ea63 we moved the enablement of msi interrupts earlier in alx_init_intr. If there is an error in alx_alloc_rings, __alx_open returns with an error but msi (or msi-x) interrupts stays enabled. Add a new error label to disable msi (or msi-x) interrupts. Fixes: 9ee7b683ea63 ("alx: refactor msi enablement and disablement") Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 9887cee..c0f84b7 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1028,7 +1028,7 @@ static int __alx_open(struct alx_priv *alx, bool resume) err = alx_alloc_rings(alx); if (err) - return err; + goto out_disable_adv_intr; alx_configure(alx); @@ -1049,6 +1049,8 @@ static int __alx_open(struct alx_priv *alx, bool resume) out_free_rings: alx_free_rings(alx); +out_disable_adv_intr: + alx_disable_advanced_intr(alx); return err; } -- cgit v1.1 From aa72d708373dacfa690960b336543b867784b350 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:22 +0300 Subject: net/sched: cls_flower: Support masking for matching on tcp/udp ports Add the definitions for src/dst udp/tcp port masks and use them when setting && dumping the relevant keys. Signed-off-by: Or Gerlitz Signed-off-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 4 ++++ net/sched/cls_flower.c | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index f9c287c..60ea2a0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -442,6 +442,10 @@ enum { TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b084b2a..027523c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -335,6 +335,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -432,17 +436,17 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); } @@ -877,18 +881,18 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (key->basic.ip_proto == IPPROTO_TCP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; -- cgit v1.1 From a53d850a79c39b97a2d954d0db5c481c44e8dc7c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:23 +0300 Subject: net/sched: cls_flower: Remove an unused field from the filter key structure Commit c3f8324188fa "net: Add full IPv6 addresses to flow_keys" added an unused instance of struct flow_dissector_key_addrs into struct fl_flow_key, remove it. Signed-off-by: Or Gerlitz Reported-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 027523c..a3f4c70 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -33,7 +33,6 @@ struct fl_flow_key { struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_addrs ipaddrs; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; -- cgit v1.1 From 37a6c1512314d2439ef7136d773d5a470e0996b9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Sep 2016 15:28:24 +0300 Subject: net/sched: cls_flower: Specify vlan attributes format in the UAPI header Specify the format (size and endianess) for the vlan attributes. Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 60ea2a0..8915b61 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -428,9 +428,9 @@ enum { TCA_FLOWER_KEY_UDP_DST, /* be16 */ TCA_FLOWER_FLAGS, - TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ -- cgit v1.1 From 2679d040412df847d390a3a8f0f224a7c91f7fae Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 13 Sep 2016 10:08:54 -0400 Subject: openvswitch: avoid deferred execution of recirc actions The ovs kernel data path currently defers the execution of all recirc actions until stack utilization is at a minimum. This is too limiting for some packet forwarding scenarios due to the small size of the deferred action FIFO (10 entries). For example, broadcast traffic sent out more than 10 ports with recirculation results in packet drops when the deferred action FIFO becomes full, as reported here: http://openvswitch.org/pipermail/dev/2016-March/067672.html Since the current recursion depth is available (it is already tracked by the exec_actions_level pcpu variable), we can use it to determine whether to execute recirculation actions immediately (safe when recursion depth is low) or defer execution until more stack space is available. With this change, the deferred action fifo size becomes a non-issue for currently failing scenarios because it is no longer used when there are three or fewer recursions through ovs_execute_actions(). Suggested-by: Pravin Shelar Signed-off-by: Lance Richardson Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4fe9032..863e992 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -71,6 +71,8 @@ struct ovs_frag_data { static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); #define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) struct action_fifo { int head; int tail; @@ -78,7 +80,12 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; +struct recirc_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + static struct action_fifo __percpu *action_fifos; +static struct recirc_keys __percpu *recirc_keys; static DEFINE_PER_CPU(int, exec_actions_level); static void action_fifo_init(struct action_fifo *fifo) @@ -1020,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, int rem) { struct deferred_action *da; + int level; if (!is_flow_key_valid(key)) { int err; @@ -1043,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return 0; } + level = this_cpu_read(exec_actions_level); + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + struct recirc_keys *rks = this_cpu_ptr(recirc_keys); + struct sw_flow_key *recirc_key = &rks->key[level - 1]; + + *recirc_key = *key; + recirc_key->recirc_id = nla_get_u32(a); + ovs_dp_process_packet(skb, recirc_key); + + return 0; + } + da = add_deferred_actions(skb, key, NULL); if (da) { da->pkt_key.recirc_id = nla_get_u32(a); @@ -1209,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *acts, struct sw_flow_key *key) { - static const int ovs_recursion_limit = 5; int err, level; level = __this_cpu_inc_return(exec_actions_level); - if (unlikely(level > ovs_recursion_limit)) { + if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); kfree_skb(skb); @@ -1238,10 +1257,17 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; + recirc_keys = alloc_percpu(struct recirc_keys); + if (!recirc_keys) { + free_percpu(action_fifos); + return -ENOMEM; + } + return 0; } void action_fifos_exit(void) { free_percpu(action_fifos); + free_percpu(recirc_keys); } -- cgit v1.1 From 85e42b044e8dda0acb60c645d089f4a19e0136d3 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:56 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_get_4tuple() Add cxgb_get_4tuple() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/Kconfig | 1 + drivers/infiniband/hw/cxgb4/Makefile | 1 + drivers/infiniband/hw/cxgb4/cm.c | 41 +------------ drivers/net/ethernet/chelsio/libcxgb/Makefile | 4 +- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 72 +++++++++++++++++++++++ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 42 +++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 41 +------------ 7 files changed, 125 insertions(+), 77 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c create mode 100644 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 23f38cf..afe8b28 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T4 and T5 diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e11cf72..fa40b685 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,5 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b6a953a..e591f61 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -49,6 +49,7 @@ #include +#include #include "iw_cxgb4.h" #include "clip_tbl.h" @@ -2518,42 +2519,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -2582,8 +2547,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile index 2362230..2534e30 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/Makefile +++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile @@ -1,3 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o -libcxgb-y := libcxgb_ppm.o +libcxgb-y := libcxgb_ppm.o libcxgb_cm.o diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c new file mode 100644 index 0000000..d7342bb --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "libcxgb_cm.h" + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, + __be16 *local_port, __be16 *peer_port) +{ + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + if (ip->version == 4) { + pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", + __func__, ntohl(ip->saddr), ntohl(ip->daddr), + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", + __func__, ip6->saddr.s6_addr, ip6->daddr.s6_addr, + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } + *peer_port = tcp->source; + *local_port = tcp->dest; +} +EXPORT_SYMBOL(cxgb_get_4tuple); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h new file mode 100644 index 0000000..2ab8d9b --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIBCXGB_CM_H__ +#define __LIBCXGB_CM_H__ + +#include +#include + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, + int *, __u8 *, __u8 *, __be16 *, __be16 *); +#endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0ae0b13..8bb5a25 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -24,6 +24,7 @@ #include #include +#include #include "cxgbit.h" #include "clip_tbl.h" @@ -789,42 +790,6 @@ void _cxgbit_free_csk(struct kref *kref) kfree(csk); } -static void -cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, - __be16 *peer_port) -{ - u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", - __func__, - ntohl(ip->saddr), ntohl(ip->daddr), - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", - __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - - *peer_port = tcp->source; - *local_port = tcp->dest; -} - static int cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) { @@ -1340,8 +1305,8 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) goto rel_skb; } - cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip, - &local_port, &peer_port); + cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip, + peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { -- cgit v1.1 From 804c2f3e36ef60e6f50e6101ae06b02fbaa14b9a Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:57 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route() Add cxgb_find_route() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 53 +++++++---------------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 44 +++++++++++++++++++ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 4 ++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 36 +++------------ 4 files changed, 69 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e591f61..02f5e20 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -505,32 +505,6 @@ out: return dst; } -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -2215,9 +2189,11 @@ static int c4iw_reconnect(struct c4iw_ep *ep) /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { @@ -2556,9 +2532,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, @@ -3340,9 +3316,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -4006,8 +3984,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d7342bb..a318412 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include "libcxgb_cm.h" @@ -70,3 +71,46 @@ cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, *local_port = tcp->dest; } EXPORT_SYMBOL(cxgb_get_4tuple); + +static bool +cxgb_our_interface(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + struct net_device *egress_dev) +{ + int i; + + egress_dev = get_real_dev(egress_dev); + for (i = 0; i < lldi->nports; i++) + if (lldi->ports[i] == egress_dev) + return true; + return false; +} + +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + struct neighbour *n; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!cxgb_our_interface(lldi, get_real_dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; +} +EXPORT_SYMBOL(cxgb_find_route); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 2ab8d9b..fe69161 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -39,4 +39,8 @@ void cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, int *, __u8 *, __u8 *, __be16 *, __be16 *); +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __be32, __be32, __be16, __be16, u8); #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 8bb5a25..49b24b9 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -830,33 +830,6 @@ out: return dst; } -static struct dst_entry * -cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip, - __be16 local_port, __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, - local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!cxgbit_our_interface(cdev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) { unsigned int linkspeed; @@ -1315,10 +1288,11 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route(cdev, *(__be32 *)local_ip, - *(__be32 *)peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev, + *(__be32 *)local_ip, + *(__be32 *)peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid))); } else { pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 " "lport %d rport %d peer_mss %d\n" -- cgit v1.1 From 95554761d1db54f7c058cc0ed389282ce7361999 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:58 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route6() Add cxgb_find_route6() in libcxgb_cm.c to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 70 ++++++----------------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 33 +++++++++++ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 4 ++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 51 ++--------------- 4 files changed, 61 insertions(+), 97 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 02f5e20..a08a748 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -465,46 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev) return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -2197,10 +2157,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep) iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@ -2540,10 +2503,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@ -3339,10 +3303,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index a318412..0f0de5b 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "libcxgb_cm.h" @@ -114,3 +115,35 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, return &rt->dst; } EXPORT_SYMBOL(cxgb_find_route); + +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __u8 *local_ip, __u8 *peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } + +out: + return dst; +} +EXPORT_SYMBOL(cxgb_find_route6); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index fe69161..c4df04a 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -43,4 +43,8 @@ struct dst_entry * cxgb_find_route(struct cxgb4_lld_info *, struct net_device *(*)(struct net_device *), __be32, __be32, __be16, __be16, u8); +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __u8 *, __u8 *, __be16, __be16, u8, __u32); #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 49b24b9..e961ac4 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -790,46 +790,6 @@ void _cxgbit_free_csk(struct kref *kref) kfree(csk); } -static int -cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) -{ - u8 i; - - egress_dev = cxgbit_get_real_dev(egress_dev); - for (i = 0; i < cdev->lldi.nports; i++) - if (cdev->lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry * -cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip, - __be16 local_port, __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } -out: - return dst; -} - static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) { unsigned int linkspeed; @@ -1299,11 +1259,12 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route6(cdev, local_ip, peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &cnp->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev, + local_ip, peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &cnp->com.local_addr)->sin6_scope_id); } if (!dst) { pr_err("%s - failed to find dst entry!\n", -- cgit v1.1 From b65eef0a5b1a635f9b056cf74d3ef778f3794a75 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:23:59 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_is_neg_adv() Add cxgb_is_neg_adv() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 15 +++------------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 9 +++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 +---------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a08a748..b35fdc0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1987,15 +1987,6 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@ -2235,7 +2226,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@ -2751,7 +2742,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -4227,7 +4218,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index c4df04a..57fcc98 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -47,4 +47,13 @@ struct dst_entry * cxgb_find_route6(struct cxgb4_lld_info *, struct net_device *(*)(struct net_device *), __u8 *, __u8 *, __be16, __be16, u8, __u32); + +/* Returns whether a CPL status conveys negative advice. + */ +static inline bool cxgb_is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index e961ac4..c46bdd5 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -73,15 +73,6 @@ out: return wr_waitp->ret; } -/* Returns whether a CPL status conveys negative advice. - */ -static int cxgbit_is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static int cxgbit_np_hashfn(const struct cxgbit_np *cnp) { return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1); @@ -1704,7 +1695,7 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) pr_debug("%s: csk %p; tid %u; state %d\n", __func__, csk, tid, csk->com.state); - if (cxgbit_is_neg_adv(hdr->status)) { + if (cxgb_is_neg_adv(hdr->status)) { pr_err("%s: got neg advise %d on tid %u\n", __func__, hdr->status, tid); goto rel_skb; -- cgit v1.1 From 44c6d06992ac663e5163bdbe00844cb845ed5703 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:00 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_best_mtu() Add cxgb_best_mtu() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 32 +++++++---------------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 18 +++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 20 +++----------- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b35fdc0..c3c678f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -677,20 +677,6 @@ static int send_abort(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req = NULL; @@ -750,9 +736,9 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -1930,9 +1916,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -2374,9 +2360,9 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = compute_wscale(rcv_win); /* diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 57fcc98..7fb4feb 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -33,6 +33,9 @@ #ifndef __LIBCXGB_CM_H__ #define __LIBCXGB_CM_H__ + +#include + #include #include @@ -56,4 +59,19 @@ static inline bool cxgb_is_neg_adv(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE || status == CPL_ERR_KEEPALV_NEG_ADVICE; } + +static inline void +cxgb_best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts, int ipv6) +{ + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct tcphdr) + + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index c46bdd5..b09c09b 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -997,20 +997,6 @@ cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb, return ret < 0 ? ret : 0; } -static void -cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? round_up(TCPOLEN_TIMESTAMP, - 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) { if (csk->com.state != CSK_STATE_ESTABLISHED) { @@ -1135,9 +1121,9 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) INIT_TP_WR(rpl5, csk->tid); OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, csk->tid)); - cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, - req->tcpopt.tstamp, - (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, + req->tcpopt.tstamp, + (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); wscale = cxgbit_compute_wscale(csk->rcv_win); /* * Specify the largest window that will fit in opt0. The -- cgit v1.1 From cc516700c7edab4197d08998ac023c3043369391 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:01 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_compute_wscale() Add cxgb_compute_wscale() in libcxgb_cm.h to remove it's duplicate definitions from cxgb4/cm.c and cxgbit/cxgbit_cm.c. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 12 ++++++------ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 9 --------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 9 +++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 +---------- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c3c678f..b9d77df 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -689,7 +689,7 @@ static int send_connect(struct c4iw_ep *ep) u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@ -739,7 +739,7 @@ static int send_connect(struct c4iw_ep *ep) cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -1891,7 +1891,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@ -1919,7 +1919,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2339,7 +2339,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@ -2363,7 +2363,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = compute_wscale(rcv_win); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index aa47e0a..6a9bef1f 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -881,15 +881,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<com.cdev->lldi.mtus, csk->mtu, &mtu_idx, req->tcpopt.tstamp, (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = cxgbit_compute_wscale(csk->rcv_win); + wscale = cxgb_compute_wscale(csk->rcv_win); /* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack. -- cgit v1.1 From a1a234542b7817c28770ad4e80be1bf69e6a4f86 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:02 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_tid_release() Add cxgb_mk_tid_release() to remove duplicate code to form CPL_TID_RELEASE hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 10 ++++------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 13 +++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 ++--------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b9d77df..b818bd6 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -240,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index ecf3baa..fbb973e 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -83,4 +83,17 @@ static inline u32 cxgb_compute_wscale(u32 win) wscale++; return wscale; } + +static inline void +cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_tid_release *req; + + req = (struct cpl_tid_release *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index cd29c91..994058f 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -961,21 +961,14 @@ int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb) static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid) { - struct cpl_tid_release *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_tid_release), 16); struct sk_buff *skb; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_tid_release *)__skb_put(skb, len); - memset(req, 0, len); - - INIT_TP_WR(req, tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID( - CPL_TID_RELEASE, tid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + cxgb_mk_tid_release(skb, len, tid, 0); cxgbit_ofld_send(cdev, skb); } -- cgit v1.1 From 29fb6f42e7282322672eff8b4ad85918b9dcbae3 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:03 +0530 Subject: libcxgb, iw_cxgb4, cxgbit: add cxgb_mk_close_con_req() Add cxgb_mk_close_con_req() to remove duplicate code to form CPL_CLOSE_CON_REQ hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 13 ++++--------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 16 ++++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 13 +++---------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b818bd6..22bccd8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -637,21 +637,16 @@ static int send_flowc(struct c4iw_ep *ep) static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index fbb973e..e77661d 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -38,6 +38,7 @@ #include #include +#include void cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, @@ -96,4 +97,19 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); set_wr_txq(skb, CPL_PRIORITY_SETUP, chan); } + +static inline void +cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_close_con_req *req; + + req = (struct cpl_close_con_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 994058f..a8f5f36 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -615,21 +615,14 @@ void cxgbit_free_np(struct iscsi_np *np) static void cxgbit_send_halfclose(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_close_con_req *req; - unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16); + u32 len = roundup(sizeof(struct cpl_close_con_req), 16); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_close_con_req *)__skb_put(skb, len); - memset(req, 0, len); - - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - csk->tid)); - req->rsvd = 0; + cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx, + NULL, NULL); cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL; __skb_queue_tail(&csk->txq, skb); -- cgit v1.1 From a7e1a97f88058ed9b6aa054b38167fbe62f59f50 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:04 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_abort_req() Add cxgb_mk_abort_req() to remove duplicate code to form CPL_ABORT_REQ hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 13 ++++--------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 16 ++++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 13 +++---------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 22bccd8..484196e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -652,21 +652,16 @@ static int send_halfclose(struct c4iw_ep *ep) static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index e77661d..2d3a3bf 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -112,4 +112,20 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, set_wr_txq(skb, CPL_PRIORITY_DATA, chan); t4_set_arp_err_handler(skb, handle, handler); } + +static inline void +cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_abort_req *req; + + req = (struct cpl_abort_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->cmd = CPL_ABORT_SEND_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index a8f5f36..f2b737e 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -647,9 +647,8 @@ static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) static int cxgbit_send_abort_req(struct cxgbit_sock *csk) { - struct cpl_abort_req *req; - unsigned int len = roundup(sizeof(*req), 16); struct sk_buff *skb; + u32 len = roundup(sizeof(struct cpl_abort_req), 16); pr_debug("%s: csk %p tid %u; state %d\n", __func__, csk, csk->tid, csk->com.state); @@ -660,15 +659,9 @@ static int cxgbit_send_abort_req(struct cxgbit_sock *csk) cxgbit_send_tx_flowc_wr(csk); skb = __skb_dequeue(&csk->skbq); - req = (struct cpl_abort_req *)__skb_put(skb, len); - memset(req, 0, len); + cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx, + csk->com.cdev, cxgbit_abort_arp_failure); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, - csk->tid)); - req->cmd = CPL_ABORT_SEND_RST; return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } -- cgit v1.1 From 052f4731ed1fd6b132a14c56f49435377a246834 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:05 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_abort_rpl() Add cxgb_mk_abort_rpl() to remove duplicate code to form CPL_ABORT_RPL hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 10 ++++------ drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 14 ++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 11 ++--------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 484196e..a6d5fcb 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2705,12 +2705,12 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) @@ -2809,11 +2809,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 2d3a3bf..70999e8 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -128,4 +128,18 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, set_wr_txq(skb, CPL_PRIORITY_DATA, chan); t4_set_arp_err_handler(skb, handle, handler); } + +static inline void +cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_abort_rpl *rpl; + + rpl = (struct cpl_abort_rpl *)__skb_put(skb, len); + memset(rpl, 0, len); + + INIT_TP_WR(rpl, tid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + rpl->cmd = CPL_ABORT_NO_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index f2b737e..9bdbe3b 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1642,11 +1642,10 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) { struct cpl_abort_req_rss *hdr = cplhdr(skb); unsigned int tid = GET_TID(hdr); - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; bool release = false; bool wakeup_thread = false; - unsigned int len = roundup(sizeof(*rpl), 16); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); pr_debug("%s: csk %p; tid %u; state %d\n", __func__, csk, tid, csk->com.state); @@ -1686,14 +1685,8 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) cxgbit_send_tx_flowc_wr(csk); rpl_skb = __skb_dequeue(&csk->skbq); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - - rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len); - memset(rpl, 0, len); - INIT_TP_WR(rpl, csk->tid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); - rpl->cmd = CPL_ABORT_NO_RST; + cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx); cxgbit_ofld_send(csk->com.cdev, rpl_skb); if (wakeup_thread) { -- cgit v1.1 From 6e3b6fc201fe16d3944e2b293e7f47a72f4a56c1 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 13 Sep 2016 21:24:06 +0530 Subject: libcxgb,iw_cxgb4,cxgbit: add cxgb_mk_rx_data_ack() Add cxgb_mk_rx_data_ack() to remove duplicate code to form CPL_RX_DATA_ACK hardware command. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 19 ++++++++----------- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h | 15 +++++++++++++++ drivers/target/iscsi/cxgbit/cxgbit_cm.c | 16 ++++++---------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a6d5fcb..3cbbfbe 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1354,9 +1354,9 @@ static void established_upcall(struct c4iw_ep *ep) static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -1373,15 +1373,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 70999e8..515b94f 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -142,4 +142,19 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) rpl->cmd = CPL_ABORT_NO_RST; set_wr_txq(skb, CPL_PRIORITY_DATA, chan); } + +static inline void +cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + u32 credit_dack) +{ + struct cpl_rx_data_ack *req; + + req = (struct cpl_rx_data_ack *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); + req->credit_dack = cpu_to_be32(credit_dack); + set_wr_txq(skb, CPL_PRIORITY_ACK, chan); +} #endif diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 9bdbe3b..2fb1bf1 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -994,22 +994,18 @@ static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) int cxgbit_rx_data_ack(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_rx_data_ack *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -1; - req = (struct cpl_rx_data_ack *)__skb_put(skb, len); - memset(req, 0, len); + credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | + RX_CREDITS_V(csk->rx_credits); - set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - csk->tid)); - req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | - RX_CREDITS_V(csk->rx_credits)); + cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx, + credit_dack); csk->rx_credits = 0; -- cgit v1.1 From 778935778c3b88e5152a88765850009006ef2e32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: PM / runtime: Use _rcuidle for runtime suspend tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified a few more unprotected uses of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The affected function is rpm_suspend(). The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ Warning from omap3 =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __pm_runtime_suspend+0x54/0x84 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP36xx (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (rpm_suspend+0x604/0x7e4) [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84) [] (__pm_runtime_suspend) from [] (omap2_gpio_prepare_for_idle+0x5c/0x70) [] (omap2_gpio_prepare_for_idle) from [] (omap_sram_idle+0x140/0x244) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 17995fa..82a081e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) struct device *parent = NULL; int retval; - trace_rpm_suspend(dev, rpmflags); + trace_rpm_suspend_rcuidle(dev, rpmflags); repeat: retval = rpm_check_suspend_allowed(dev); @@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) } out: - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval; -- cgit v1.1 From 85574dbf9d12b393448334897acf13f567bac8f9 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:15 +0800 Subject: net: ethernet: mediatek: refactoring mtk_hw_init to be reused the existing mtk_hw_init includes hardware and software initialization inside so that it is slightly hard to reuse them for the process of the reset recovery, so some splitting is made here for keeping hardware initializing relevant thing and the else such as IRQ registration and MDIO initialization what are all about to the interface of core driver moved to the other proper place because they have no needs to register IRQ and re-initialize structure again during the reset process. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 62 ++++++++++++++++------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6e01f1f..6161701 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1415,7 +1415,12 @@ static int mtk_stop(struct net_device *dev) static int __init mtk_hw_init(struct mtk_eth *eth) { - int err, i; + int i; + + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); + clk_prepare_enable(eth->clks[MTK_CLK_ESW]); + clk_prepare_enable(eth->clks[MTK_CLK_GP1]); + clk_prepare_enable(eth->clks[MTK_CLK_GP2]); /* reset the frame engine */ reset_control_assert(eth->rstc); @@ -1441,19 +1446,6 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* Enable RX VLan Offloading */ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); - err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - - err = mtk_mdio_init(eth); - if (err) - return err; - /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); mtk_w32(eth, 0, MTK_PDMA_DELAY_INT); @@ -1786,16 +1778,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->features |= MTK_HW_FEATURES; eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops; - err = register_netdev(eth->netdev[id]); - if (err) { - dev_err(eth->dev, "error bringing up device\n"); - goto free_netdev; - } eth->netdev[id]->irq = eth->irq[0]; - netif_info(eth, probe, eth->netdev[id], - "mediatek frame engine at 0x%08lx, irq %d\n", - eth->netdev[id]->base_addr, eth->irq[0]); - return 0; free_netdev: @@ -1865,11 +1848,6 @@ static int mtk_probe(struct platform_device *pdev) } } - clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); - clk_prepare_enable(eth->clks[MTK_CLK_ESW]); - clk_prepare_enable(eth->clks[MTK_CLK_GP1]); - clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE); INIT_WORK(ð->pending_work, mtk_pending_work); @@ -1890,6 +1868,34 @@ static int mtk_probe(struct platform_device *pdev) goto err_free_dev; } + err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = mtk_mdio_init(eth); + if (err) + goto err_free_dev; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + err = register_netdev(eth->netdev[i]); + if (err) { + dev_err(eth->dev, "error bringing up device\n"); + goto err_free_dev; + } else + netif_info(eth, probe, eth->netdev[i], + "mediatek frame engine at 0x%08lx, irq %d\n", + eth->netdev[i]->base_addr, eth->irq[0]); + } + /* we run 2 devices on the same DMA ring so we need a dummy device * for NAPI to work */ -- cgit v1.1 From bf253fb72221cdd9dc31009056f269a420f7bbd9 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:16 +0800 Subject: net: ethernet: mediatek: add mtk_hw_deinit call as the opposite to mtk_hw_init call grouping things related to the deinitialization of what mtk_hw_init call does that help to be reused by the reset process and the error path handling. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6161701..1f756bd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1477,6 +1477,16 @@ static int __init mtk_hw_init(struct mtk_eth *eth) return 0; } +static int mtk_hw_deinit(struct mtk_eth *eth) +{ + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); + clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); + clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + + return 0; +} + static int __init mtk_init(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -1926,10 +1936,7 @@ static int mtk_remove(struct platform_device *pdev) mtk_stop(eth->netdev[i]); } - clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); - clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + mtk_hw_deinit(eth); netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); -- cgit v1.1 From 8a8a9e89f801ce5d3d1d57ac48db678caf072147 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:17 +0800 Subject: net: ethernet: mediatek: cleanup error path inside mtk_hw_init This cleans up the error path inside mtk_hw_init call, causing it able to exit appropriately when something fails and also includes refactoring mtk_cleanup call to make the partial logic reusable on the error path. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 34 ++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1f756bd..1272316 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1567,17 +1567,36 @@ static void mtk_pending_work(struct work_struct *work) rtnl_unlock(); } -static int mtk_cleanup(struct mtk_eth *eth) +static int mtk_free_dev(struct mtk_eth *eth) { int i; for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) continue; + free_netdev(eth->netdev[i]); + } + + return 0; +} +static int mtk_unreg_dev(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; unregister_netdev(eth->netdev[i]); - free_netdev(eth->netdev[i]); } + + return 0; +} + +static int mtk_cleanup(struct mtk_eth *eth) +{ + mtk_unreg_dev(eth); + mtk_free_dev(eth); cancel_work_sync(ð->pending_work); return 0; @@ -1875,7 +1894,7 @@ static int mtk_probe(struct platform_device *pdev) err = mtk_add_mac(eth, mac_np); if (err) - goto err_free_dev; + goto err_deinit_hw; } err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, @@ -1899,7 +1918,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_free_dev; + goto err_deinit_mdio; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -1919,8 +1938,13 @@ static int mtk_probe(struct platform_device *pdev) return 0; +err_deinit_mdio: + mtk_mdio_cleanup(eth); err_free_dev: - mtk_cleanup(eth); + mtk_free_dev(eth); +err_deinit_hw: + mtk_hw_deinit(eth); + return err; } -- cgit v1.1 From 26a2ad8a5418525d21f06083e65b10c932633209 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:18 +0800 Subject: net: ethernet: mediatek: add controlling power domain the ethernet belongs to introduce power domain control which the digital circuit of the ethernet belongs to inside the flow of hardware initialization and deinitialization which helps the entire ethernet hardware block could restart cleanly and completely as being back to the initial state when the whole machine reboot. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1272316..01f5911 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1417,6 +1418,9 @@ static int __init mtk_hw_init(struct mtk_eth *eth) { int i; + pm_runtime_enable(eth->dev); + pm_runtime_get_sync(eth->dev); + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); clk_prepare_enable(eth->clks[MTK_CLK_ESW]); clk_prepare_enable(eth->clks[MTK_CLK_GP1]); @@ -1484,6 +1488,9 @@ static int mtk_hw_deinit(struct mtk_eth *eth) clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + pm_runtime_put_sync(eth->dev); + pm_runtime_disable(eth->dev); + return 0; } -- cgit v1.1 From 9ea4d311509fc11128a464d86745beeafd575051 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:19 +0800 Subject: net: ethernet: mediatek: add the whole ethernet reset into the reset process 1) original driver only resets DMA used by descriptor rings which can't guarantee it can recover all various kinds of fatal errors, so the patch tries to reset the underlying hardware resource from scratch on Mediatek SoC required for ethernet running, including power, pin mux control, clock and internal circuits on the ethernet in order to restore into the initial state which the rebooted machine gives. 2) add state variable inside structure mtk_eth to help distinguish mtk_hw_init is called between the initialization during boot time or re-initialization during the reset process. 3) add ge_mode variable inside structure mtk_mac for restoring the interface mode of the current setup for the target MAC. 4) remove __init attribute from mtk_hw_init definition Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 51 ++++++++++++++++++++++++----- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 8 +++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 01f5911..7b2f5ed 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -231,7 +231,7 @@ static int mtk_phy_connect(struct mtk_mac *mac) { struct mtk_eth *eth = mac->hw; struct device_node *np; - u32 val, ge_mode; + u32 val; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) @@ -245,18 +245,18 @@ static int mtk_phy_connect(struct mtk_mac *mac) case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: - ge_mode = 0; + mac->ge_mode = 0; break; case PHY_INTERFACE_MODE_MII: - ge_mode = 1; + mac->ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: - ge_mode = 2; + mac->ge_mode = 2; break; case PHY_INTERFACE_MODE_RMII: if (!mac->id) goto err_phy; - ge_mode = 3; + mac->ge_mode = 3; break; default: goto err_phy; @@ -265,7 +265,7 @@ static int mtk_phy_connect(struct mtk_mac *mac) /* put the gmac into the right mode */ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id); - val |= SYSCFG0_GE_MODE(ge_mode, mac->id); + val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); mtk_phy_connect_node(eth, mac, np); @@ -1414,9 +1414,12 @@ static int mtk_stop(struct net_device *dev) return 0; } -static int __init mtk_hw_init(struct mtk_eth *eth) +static int mtk_hw_init(struct mtk_eth *eth) { - int i; + int i, val; + + if (test_and_set_bit(MTK_HW_INIT, ð->state)) + return 0; pm_runtime_enable(eth->dev); pm_runtime_get_sync(eth->dev); @@ -1432,6 +1435,15 @@ static int __init mtk_hw_init(struct mtk_eth *eth) reset_control_deassert(eth->rstc); usleep_range(10, 20); + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i]) + continue; + val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id); + val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id); + } + regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); + /* Set GE2 driving and slew rate */ regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00); @@ -1483,6 +1495,9 @@ static int __init mtk_hw_init(struct mtk_eth *eth) static int mtk_hw_deinit(struct mtk_eth *eth) { + if (!test_and_clear_bit(MTK_HW_INIT, ð->state)) + return 0; + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); @@ -1560,6 +1575,26 @@ static void mtk_pending_work(struct work_struct *work) __set_bit(i, &restart); } + /* restart underlying hardware such as power, clock, pin mux + * and the connected phy + */ + mtk_hw_deinit(eth); + + if (eth->dev->pins) + pinctrl_select_state(eth->dev->pins->p, + eth->dev->pins->default_state); + mtk_hw_init(eth); + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i] || + of_phy_is_fixed_link(eth->mac[i]->of_node)) + continue; + err = phy_init_hw(eth->mac[i]->phy_dev); + if (err) + dev_err(eth->dev, "%s: PHY init failed.\n", + eth->netdev[i]->name); + } + /* restart DMA and enable IRQs */ for (i = 0; i < MTK_MAC_COUNT; i++) { if (!test_bit(i, &restart)) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 0b984dc..388cbe7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -330,6 +330,10 @@ enum mtk_clks_map { MTK_CLK_MAX }; +enum mtk_dev_state { + MTK_HW_INIT +}; + /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at * by the TX descriptor s * @skb: The SKB pointer of the packet being sent @@ -413,6 +417,7 @@ struct mtk_rx_ring { * @clks: clock array for all clocks required * @mii_bus: If there is a bus we need to create an instance for it * @pending_work: The workqueue used to reset the dma ring + * @state Initialization and runtime state of the device. */ struct mtk_eth { @@ -441,11 +446,13 @@ struct mtk_eth { struct mii_bus *mii_bus; struct work_struct pending_work; + unsigned long state; }; /* struct mtk_mac - the structure that holds the info about the MACs of the * SoC * @id: The number of the MAC + * @ge_mode: Interface mode kept for setup restoring * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter @@ -453,6 +460,7 @@ struct mtk_eth { */ struct mtk_mac { int id; + int ge_mode; struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; -- cgit v1.1 From 2a8307aab373684e8c1067695310db4438621868 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:20 +0800 Subject: net: ethernet: mediatek: add more resets for internal ethernet circuit block struct mtk_eth has already contained struct regmap ethsys pointer to the address range of the internal circuit reset, so we reuse it to reset more internal blocks on ethernet hardware such as packet processing engine (PPE) and frame engine (FE) instead of rstc which deals with FE only. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 27 +++++++++++++++------------ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 6 +++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7b2f5ed..4574332 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1414,6 +1414,19 @@ static int mtk_stop(struct net_device *dev) return 0; } +static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits) +{ + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + reset_bits); + + usleep_range(1000, 1100); + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + ~reset_bits); + mdelay(10); +} + static int mtk_hw_init(struct mtk_eth *eth) { int i, val; @@ -1428,12 +1441,8 @@ static int mtk_hw_init(struct mtk_eth *eth) clk_prepare_enable(eth->clks[MTK_CLK_ESW]); clk_prepare_enable(eth->clks[MTK_CLK_GP1]); clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - - /* reset the frame engine */ - reset_control_assert(eth->rstc); - usleep_range(10, 20); - reset_control_deassert(eth->rstc); - usleep_range(10, 20); + ethsys_reset(eth, RSTCTRL_FE); + ethsys_reset(eth, RSTCTRL_PPE); regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); for (i = 0; i < MTK_MAC_COUNT; i++) { @@ -1896,12 +1905,6 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } - eth->rstc = devm_reset_control_get(&pdev->dev, "eth"); - if (IS_ERR(eth->rstc)) { - dev_err(&pdev->dev, "no eth reset found\n"); - return PTR_ERR(eth->rstc); - } - for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 388cbe7..7efa00f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -266,6 +266,11 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) +/*ethernet reset control register*/ +#define ETHSYS_RSTCTRL 0x34 +#define RSTCTRL_FE BIT(6) +#define RSTCTRL_PPE BIT(31) + struct mtk_rx_dma { unsigned int rxd1; unsigned int rxd2; @@ -423,7 +428,6 @@ struct mtk_rx_ring { struct mtk_eth { struct device *dev; void __iomem *base; - struct reset_control *rstc; spinlock_t page_lock; spinlock_t irq_lock; struct net_device dummy_dev; -- cgit v1.1 From dce6fa42199d493596315cddc0b4e7ac1d57475b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 23:13:21 +0800 Subject: net: ethernet: mediatek: avoid race condition during the reset process add the protection of the race condition between the reset process and hardware access happening on the related callbacks. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 36 +++++++++++++++++++++++++++++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 ++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4574332..522fe8d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -145,6 +145,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + switch (mac->phy_dev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; @@ -370,6 +373,9 @@ static int mtk_set_mac_address(struct net_device *dev, void *p) if (ret) return ret; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + spin_lock_bh(&mac->hw->page_lock); mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA_MAC_ADRH(mac->id)); @@ -770,6 +776,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) */ spin_lock(ð->page_lock); + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto drop; + tx_num = mtk_cal_txd_req(skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { mtk_stop_queue(eth); @@ -842,6 +851,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev = eth->netdev[mac]; + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto release_desc; + /* alloc new buffer */ new_data = napi_alloc_frag(ring->frag_size); if (unlikely(!new_data)) { @@ -1576,6 +1588,12 @@ static void mtk_pending_work(struct work_struct *work) rtnl_lock(); + dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__); + + while (test_and_set_bit_lock(MTK_RESETTING, ð->state)) + cpu_relax(); + + dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__); /* stop all devices to make sure that dma is properly shut down */ for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) @@ -1583,6 +1601,7 @@ static void mtk_pending_work(struct work_struct *work) mtk_stop(eth->netdev[i]); __set_bit(i, &restart); } + dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__); /* restart underlying hardware such as power, clock, pin mux * and the connected phy @@ -1615,6 +1634,11 @@ static void mtk_pending_work(struct work_struct *work) dev_close(eth->netdev[i]); } } + + dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__); + + clear_bit_unlock(MTK_RESETTING, ð->state); + rtnl_unlock(); } @@ -1659,6 +1683,9 @@ static int mtk_get_settings(struct net_device *dev, struct mtk_mac *mac = netdev_priv(dev); int err; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + err = phy_read_status(mac->phy_dev); if (err) return -ENODEV; @@ -1709,6 +1736,9 @@ static int mtk_nway_reset(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + return genphy_restart_aneg(mac->phy_dev); } @@ -1717,6 +1747,9 @@ static u32 mtk_get_link(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); int err; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + err = genphy_update_link(mac->phy_dev); if (err) return ethtool_op_get_link(dev); @@ -1757,6 +1790,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, unsigned int start; int i; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + if (netif_running(dev) && netif_device_present(dev)) { if (spin_trylock(&hwstats->stats_lock)) { mtk_stats_update_mac(mac); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7efa00f..79954b4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -336,7 +336,8 @@ enum mtk_clks_map { }; enum mtk_dev_state { - MTK_HW_INIT + MTK_HW_INIT, + MTK_RESETTING }; /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at -- cgit v1.1 From 76f0dcbb5ae1a7c3dbeec13dd98233b8e6b0b32a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Sep 2016 22:55:05 -0700 Subject: tcp: fix a stale ooo_last_skb after a replace When skb replaces another one in ooo queue, I forgot to also update tp->ooo_last_skb as well, if the replaced skb was the last one in the queue. To fix this, we simply can re-use the code that runs after an insertion, trying to merge skbs at the right of current skb. This not only fixes the bug, but also remove all small skbs that might be a subset of the new one. Example: We receive segments 2001:3001, 4001:5001 Then we receive 2001:8001 : We should replace 2001:3001 with the big skb, but also remove 4001:50001 from the queue to save space. packetdrill test demonstrating the bug 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +0.100 < . 1:1(0) ack 1 win 1024 +0 accept(3, ..., ...) = 4 +0.01 < . 1001:2001(1000) ack 1 win 1024 +0 > . 1:1(0) ack 1 +0.01 < . 1001:3001(2000) ack 1 win 1024 +0 > . 1:1(0) ack 1 Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue") Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Cc: Yaogong Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 70b892d..dad3e7e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4502,7 +4502,7 @@ coalesce_done: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); __kfree_skb(skb1); - goto add_sack; + goto merge_right; } } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { goto coalesce_done; @@ -4514,6 +4514,7 @@ insert: rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); +merge_right: /* Remove other segments covered by skb. */ while ((q = rb_next(&skb->rbnode)) != NULL) { skb1 = rb_entry(q, struct sk_buff, rbnode); -- cgit v1.1 From 2a292822f00f7409fc0bd6b2d09efc5b8e6c9c5d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 14 Sep 2016 13:09:24 +0200 Subject: net/mlx4_en: fix off by one in error handling If an error occurs in mlx4_init_eq_table the index used in the err_out_unmap label is one too big which results in a panic in mlx4_free_eq. This patch fixes the index in the error path. Signed-off-by: Sebastian Ott Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f613977..cf8f8a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) return 0; err_out_unmap: - while (i >= 0) - mlx4_free_eq(dev, &priv->eq_table.eq[i--]); + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); #ifdef CONFIG_RFS_ACCEL for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_priv(dev)->port[i].rmap) { -- cgit v1.1 From 7077dc415b113ac17a6696c432bad2d66574e4fb Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 21:29:34 +0800 Subject: net: ethernet: mediatek: fix module loading automatically based on MODULE_DEVICE_TABLE The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to modules.alias: alias of:N*T*Cmediatek,mt7623-ethC* mtk_eth_soc Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d919915..3743af8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1923,6 +1923,7 @@ const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt7623-eth" }, {}, }; +MODULE_DEVICE_TABLE(of, of_mtk_match); static struct platform_driver mtk_driver = { .probe = mtk_probe, -- cgit v1.1 From 01afd972a737879c1466a12f696601a2ce91ea84 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:44 +0300 Subject: net/ibm/emac: add set mac addr callback add realization for mac address set and remove dummy callback. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d..2dfc603 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -980,6 +980,33 @@ static void emac_set_multicast_list(struct net_device *ndev) __emac_set_multicast_list(dev); } +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; +} + static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) { int rx_sync_size = emac_rx_sync_size(new_mtu); @@ -2686,7 +2713,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, .ndo_change_mtu = eth_change_mtu, }; @@ -2699,7 +2726,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit_sg, .ndo_change_mtu = emac_change_mtu, }; -- cgit v1.1 From 7106a069f45b15e63d14484e72969e64798e641c Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:45 +0300 Subject: net/ibm/emac: add mutex to 'set multicast list' for preventing race conditions within ioctl calls. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2dfc603..7af09cb 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -977,7 +977,10 @@ static void emac_set_multicast_list(struct net_device *ndev) dev->mcast_pending = 1; return; } + + mutex_lock(&dev->link_lock); __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); } static int emac_set_mac_address(struct net_device *ndev, void *sa) -- cgit v1.1 From d6f64d725bac20df66b2eacd847fc41d7a1905e0 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 15 Sep 2016 11:40:05 +1200 Subject: net: VRF: Pass original iif to ip_route_input() The function ip_rcv_finish() calls l3mdev_ip_rcv(). On any VRF except the global VRF, this replaces skb->dev with the VRF master interface. When calling ip_route_input_noref() from here, the checks for forwarding look at this master device instead of the initial ingress interface. This will allow packets to be routed which normally would be dropped. For example, an interface that is not assigned an IP address should drop packets, but because the checking is against the master device, the packet will be forwarded. The fix here is to still call l3mdev_ip_rcv(), but remember the initial net_device. This is passed to the other functions within ip_rcv_finish, so they still see the original interface. Signed-off-by: Mark Tomlinson Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4b351af..d6feabb 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,6 +312,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + struct net_device *dev = skb->dev; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -341,7 +342,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) */ if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, dev); if (unlikely(err)) { if (err == -EXDEV) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); @@ -370,7 +371,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * -- cgit v1.1 From 0e26e5bd518f608ee2023b29429ecd4cd8b6969d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:24:13 +0000 Subject: net: dsa: bcm_sf2: Fix non static symbol warning Fixes the following sparse warning: drivers/net/dsa/bcm_sf2.c:963:19: warning: symbol 'bcm_sf2_io_ops' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 5bf4f34..e218887 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -960,7 +960,7 @@ static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } -struct b53_io_ops bcm_sf2_io_ops = { +static struct b53_io_ops bcm_sf2_io_ops = { .read8 = bcm_sf2_core_read8, .read16 = bcm_sf2_core_read16, .read32 = bcm_sf2_core_read32, -- cgit v1.1 From 46c21e20128ac156c5b1503bab33ba1659283815 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:24:37 +0000 Subject: net: dsa: b53: Remove unused including Remove including that don't need it. Signed-off-by: Wei Yongjun Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 76672da..f192a67 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -372,7 +372,6 @@ static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, #ifdef CONFIG_BCM47XX -#include #include #include static inline int b53_switch_get_reset_gpio(struct b53_device *dev) -- cgit v1.1 From 7e5eded5c189abaea77556da41af1195af841b0a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:25:52 +0000 Subject: net: emac: remove unnecessary dev_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 56e0a9f..42d2d233 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -722,7 +722,6 @@ static int emac_remove(struct platform_device *pdev) mdiobus_unregister(adpt->mii_bus); free_netdev(netdev); - dev_set_drvdata(&pdev->dev, NULL); return 0; } -- cgit v1.1 From 1d7b47a3c78e0b5391a18246f9637752a4565e5b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 02:26:10 +0000 Subject: net: emac: remove .owner field for driver Remove .owner field if calls are used which set it automatically. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 42d2d233..e47d387 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -730,7 +730,6 @@ static struct platform_driver emac_platform_driver = { .probe = emac_probe, .remove = emac_remove, .driver = { - .owner = THIS_MODULE, .name = "qcom-emac", .of_match_table = emac_dt_match, }, -- cgit v1.1 From bc6c03fa3cacd31b873e36ca16ef9678269deae6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 03:45:07 +0000 Subject: nfp: fix error return code in nfp_net_netdev_open() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 73725d9dfd99 ("nfp: allocate ring SW structs dynamically") Signed-off-by: Wei Yongjun Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e492..39dadfc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2044,12 +2044,16 @@ static int nfp_net_netdev_open(struct net_device *netdev) nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); - if (!nn->rx_rings) + if (!nn->rx_rings) { + err = -ENOMEM; goto err_free_lsc; + } nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), GFP_KERNEL); - if (!nn->tx_rings) + if (!nn->tx_rings) { + err = -ENOMEM; goto err_free_rx_rings; + } for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); -- cgit v1.1 From e830baa9c3f0023769ba9aab19eb44c892769d87 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 15 Sep 2016 14:39:21 +0200 Subject: qeth: restore device features after recovery After device recovery, only a basic set of network device features is enabled on the device. If features like checksum offloading or TSO were enabled by the user before the recovery, this results in a mismatch between the network device features, that the kernel assumes to be enabled on the device, and the features actually enabled on the device. This patch tries to restore previously set features, that require changes on the device, after the recovery of a device. In case of an error, the network device's features are changed to contain only the features that are actually turned on. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 29 +++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 3 +++ drivers/s390/net/qeth_l3_main.c | 1 + 4 files changed, 34 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index bf40063..6d4b68c4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -999,6 +999,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); +int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 7dba6c8..6ad5a14 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6131,6 +6131,35 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } +/* try to restore device features on a device after recovery */ +int qeth_recover_features(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + netdev_features_t recover = dev->features; + + if (recover & NETIF_F_IP_CSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM)) + recover ^= NETIF_F_IP_CSUM; + } + if (recover & NETIF_F_RXCSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM)) + recover ^= NETIF_F_RXCSUM; + } + if (recover & NETIF_F_TSO) { + if (qeth_set_ipa_tso(card, 1)) + recover ^= NETIF_F_TSO; + } + + if (recover == dev->features) + return 0; + + dev_warn(&card->gdev->dev, + "Device recovery failed to restore all offload features\n"); + dev->features = recover; + return -EIO; +} +EXPORT_SYMBOL_GPL(qeth_recover_features); + int qeth_set_features(struct net_device *dev, netdev_features_t features) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7bc20c5..54fd891 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1246,6 +1246,9 @@ contin: } /* this also sets saved unicast addresses */ qeth_l2_set_rx_mode(card->dev); + rtnl_lock(); + qeth_recover_features(card->dev); + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7293466..2f51271 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3269,6 +3269,7 @@ contin: else dev_open(card->dev); qeth_l3_set_multicast_list(card->dev); + qeth_recover_features(card->dev); rtnl_unlock(); } qeth_trace_features(card); -- cgit v1.1 From 016930b88a1d6eb6e6b3287d593e13ca06986acc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:22 +0200 Subject: s390/qeth: use ip_lock for hsuid configuration qeth_l3_dev_hsuid_store() changes the ip hash table, which requires the ip_lock. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 65645b1..0e00a5c 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -297,7 +297,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->u.a6.pfxlen = 0; addr->type = QETH_IP_TYPE_NORMAL; + spin_lock_bh(&card->ip_lock); qeth_l3_delete_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); } @@ -329,7 +331,10 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->type = QETH_IP_TYPE_NORMAL; } else return -ENOMEM; + + spin_lock_bh(&card->ip_lock); qeth_l3_add_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); return count; -- cgit v1.1 From a7531c1cc09855df5e33ceefe4fdfc2d74ccab19 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:23 +0200 Subject: s390/qeth: allow hsuid configuration in DOWN state The qeth IP address mapping logic has been reworked recently. It causes now problems to specify qeth sysfs attribute "hsuid" in DOWN state, which is allowed. Postpone registering or deregistering of IP-addresses in this case. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f51271..4ba82e1 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -257,6 +257,11 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) if (addr->in_progress) return -EINPROGRESS; + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_DELETE; + return 0; + } + rc = qeth_l3_deregister_addr_entry(card, addr); hash_del(&addr->hnode); @@ -296,6 +301,11 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) hash_add(card->ip_htable, &addr->hnode, qeth_l3_ipaddr_hash(addr)); + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_ADD; + return 0; + } + /* qeth_l3_register_addr_entry can go to sleep * if we add a IPV4 addr. It is caused by the reason * that SETIP ipa cmd starts ARP staff for IPV4 addr. @@ -390,12 +400,16 @@ static void qeth_l3_recover_ip(struct qeth_card *card) int i; int rc; - QETH_CARD_TEXT(card, 4, "recoverip"); + QETH_CARD_TEXT(card, 4, "recovrip"); spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { - if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + if (addr->disp_flag == QETH_DISP_ADDR_DELETE) { + qeth_l3_deregister_addr_entry(card, addr); + hash_del(&addr->hnode); + kfree(addr); + } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) { if (addr->proto == QETH_PROT_IPV4) { addr->in_progress = 1; spin_unlock_bh(&card->ip_lock); @@ -407,10 +421,8 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { + if (addr->ref_counter < 1) qeth_l3_delete_ip(card, addr); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); -- cgit v1.1 From 903e48531e8b5d414c8f1960eacac24c31f60344 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:24 +0200 Subject: qeth: check not more than 16 SBALEs on the completion queue af_iucv socket programs with HiperSockets as transport make use of the qdio completion queue. Running such an af_iucv socket program may result in a crash: [90341.677709] Oops: 0038 ilc:2 [#1] SMP [90341.677743] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.6.0-20160720.0.0e86ec7.5e62689.fc23.s390xperformance #1 [90341.677744] Hardware name: IBM 2964 N96 703 (LPAR) [90341.677746] task: 00000000edb79f00 ti: 00000000edb84000 task.ti: 00000000edb84000 [90341.677748] Krnl PSW : 0704d00180000000 000000000075bc50 (qeth_qdio_input_handler+0x258/0x4e0) [90341.677756] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 000003d10391e900 0000000000000001 00000000e61e6000 0000000000000005 [90341.677759] 0000000000a9e6ec 5420040001a77400 0000000000000001 000000000000006f [90341.677761] 00000000e0d83f00 0000000000000003 0000000000000010 5420040001a77400 [90341.677784] 000000007ba8b000 0000000000943fd0 000000000075bc4e 00000000ed3b3c10 [90341.677793] Krnl Code: 000000000075bc42: e320cc180004 lg %r2,3096(%r12) 000000000075bc48: c0e5ffffc5cc brasl %r14,7547e0 #000000000075bc4e: 1816 lr %r1,%r6 >000000000075bc50: ba19b008 cs %r1,%r9,8(%r11) 000000000075bc54: ec180041017e cij %r1,1,8,75bcd6 000000000075bc5a: 5810b008 l %r1,8(%r11) 000000000075bc5e: ec16005c027e cij %r1,2,6,75bd16 000000000075bc64: 5090b008 st %r9,8(%r11) [90341.677807] Call Trace: [90341.677810] ([<000000000075bbc0>] qeth_qdio_input_handler+0x1c8/0x4e0) [90341.677812] ([<000000000070efbc>] qdio_kick_handler+0x124/0x2a8) [90341.677814] ([<0000000000713570>] __tiqdio_inbound_processing+0xf0/0xcd0) [90341.677818] ([<0000000000143312>] tasklet_action+0x92/0x120) [90341.677823] ([<00000000008b6e72>] __do_softirq+0x112/0x308) [90341.677824] ([<0000000000142bce>] irq_exit+0xd6/0xf8) [90341.677829] ([<000000000010b1d2>] do_IRQ+0x6a/0x88) [90341.677830] ([<00000000008b6322>] io_int_handler+0x112/0x220) [90341.677832] ([<0000000000102b2e>] enabled_wait+0x56/0xa8) [90341.677833] ([<0000000000000000>] (null)) [90341.677835] ([<0000000000102e32>] arch_cpu_idle+0x32/0x48) [90341.677838] ([<000000000018a126>] cpu_startup_entry+0x266/0x2b0) [90341.677841] ([<0000000000113b38>] smp_start_secondary+0x100/0x110) [90341.677843] ([<00000000008b68a6>] restart_int_handler+0x62/0x78) [90341.677845] ([<00000000008b6588>] psw_idle+0x3c/0x40) [90341.677846] Last Breaking-Event-Address: [90341.677848] [<00000000007547ec>] qeth_dbf_longtext+0xc/0xc0 [90341.677849] [90341.677850] Kernel panic - not syncing: Fatal exception in interrupt qeth_qdio_cq_handler() analyzes SBALs on this completion queue, but does not observe the limit of 16 SBAL elements per SBAL. This patch adds the additional check to process not more than 16 SBAL elements. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6ad5a14..20cf296 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3619,7 +3619,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, int e; e = 0; - while (buffer->element[e].addr) { + while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && + buffer->element[e].addr) { unsigned long phys_aob_addr; phys_aob_addr = (unsigned long) buffer->element[e].addr; -- cgit v1.1 From 243f750fc6f5d8e4dec984a9a785941c67452b8f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:25 +0200 Subject: qeth: do not limit number of gso segments To reduce the need of skb_linearize() calls, gso_max_segs of qeth net_devices had been limited according to the maximum number of qdio SBAL elements. But a gso segment cannot be larger than the mtu-size, while an SBAL element can contain up to 4096 bytes. The gso_max_segs limitation limits the maximum packet size given to the qeth driver. Performance measurements with tso-enabled qeth network interfaces and mtu-size 1500 showed, that the disadvantage of smaller packets is much more severe than the advantage of fewer skb_linearize() calls. This patch gets rid of the gso_max_segs limitations in the qeth driver. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 1 - drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 54fd891..2081c18 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1131,7 +1131,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) qeth_l2_request_initial_mac(card); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4ba82e1..0cbbc80 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3148,7 +3148,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) netif_keep_dst(card->dev); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); -- cgit v1.1 From 5722963a8e83309dad831cf6968c4c805aa342c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:26 +0200 Subject: qeth: do not turn on SG per default According to recent performance measurements, turning on net_device feature NETIF_F_SG only behaves well, but turning on feature NETIF_F_GSO shows bad results. Since the kernel activates NETIF_F_GSO automatically as soon as the driver configures feature NETIF_F_SG, qeth should not activate feature NETIF_F_SG per default, until the qeth problems with NETIF_F_GSO are solved. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 -- drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2081c18..bb27058 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1124,8 +1124,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->hw_features |= NETIF_F_RXCSUM; card->dev->vlan_features |= NETIF_F_RXCSUM; } - /* Turn on SG per default */ - card->dev->features |= NETIF_F_SG; } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0cbbc80..c00f6db 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3120,7 +3120,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - card->dev->features = NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { -- cgit v1.1 From 732a59cb6e7faed7a40da6665a517945c95fc895 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 15 Sep 2016 14:39:27 +0200 Subject: s390/qeth: fix setting VIPA address commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") restructured the internal address handling. This work broke setting a virtual IP address. The command echo 10.1.1.1 > /sys/bus/ccwgroup/devices//vipa/add4 fails with file exist error even if the IP address has not been set before. It turned out that the search result for the IP address search is handled incorrectly in the VIPA case. This patch fixes the setting of an virtual IP address. Signed-off-by: Thomas Richter Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c00f6db..272d9e7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -701,7 +701,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); @@ -769,7 +769,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); -- cgit v1.1 From e5dcad290a7c62d1c856269dbd13e470e388b704 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:39 +0200 Subject: Documentation: devicetree: add qca8k binding Add device-tree binding for ar8xxx switch families. Cc: devicetree@vger.kernel.org Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 89 ++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.txt diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt new file mode 100644 index 0000000..9c67ee4 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -0,0 +1,89 @@ +* Qualcomm Atheros QCA8xxx switch family + +Required properties: + +- compatible: should be "qca,qca8337" +- #size-cells: must be 0 +- #address-cells: must be 1 + +Subnodes: + +The integrated switch subnode should be specified according to the binding +described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of +port and PHY id, each subnode describing a port needs to have a valid phandle +referencing the internal PHY connected to it. The CPU port of this switch is +always port 0. + +Example: + + + &mdio0 { + phy_port1: phy@0 { + reg = <0>; + }; + + phy_port2: phy@1 { + reg = <1>; + }; + + phy_port3: phy@2 { + reg = <2>; + }; + + phy_port4: phy@3 { + reg = <3>; + }; + + phy_port5: phy@4 { + reg = <4>; + }; + + switch0@0 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-handle = <&phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-handle = <&phy_port5>; + }; + }; + }; + }; -- cgit v1.1 From cafdc45c949b9963cbfb8fe3a68d0ab16b0208ce Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:40 +0200 Subject: net-next: dsa: add Qualcomm tag RX/TX handler Add support for the 2-bytes Qualcomm tag that gigabit switches such as the QCA8337/N might insert when receiving packets, or that we need to insert while targeting specific switch ports. The tag is inserted directly behind the ethernet header. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/Kconfig | 3 ++ net/dsa/Makefile | 1 + net/dsa/dsa.c | 3 ++ net/dsa/dsa_priv.h | 2 + net/dsa/tag_qca.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 net/dsa/tag_qca.c diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d97c52..7556646 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -26,6 +26,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_QCA, DSA_TAG_LAST, /* MUST BE LAST */ }; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index ff7736f..96e47c5 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA config NET_DSA_TAG_TRAILER bool +config NET_DSA_TAG_QCA + bool + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8af4ded..a3380ed 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d8d267e..66e31ac 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -54,6 +54,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, #endif +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#endif [DSA_TAG_PROTO_NONE] = &none_ops, }; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 00077a9..6cfd738 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops; /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_qca.c */ +extern const struct dsa_device_ops qca_netdev_ops; #endif diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c new file mode 100644 index 0000000..0c90cac --- /dev/null +++ b/net/dsa/tag_qca.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "dsa_priv.h" + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_RECV_VERSION_S 14 +#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_RECV_PRIORITY_S 11 +#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) +#define QCA_HDR_RECV_TYPE_S 6 +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) + +#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_XMIT_VERSION_S 14 +#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_XMIT_PRIORITY_S 11 +#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) +#define QCA_HDR_XMIT_CONTROL_S 8 +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) + +static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u16 *phdr, hdr; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + skb_push(skb, QCA_HDR_LEN); + + memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); + phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + + /* Set the version field, and set destination port information */ + hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | + QCA_HDR_XMIT_FROM_CPU | + BIT(p->port); + + *phdr = htons(hdr); + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + u8 ver; + int port; + __be16 *phdr, hdr; + + if (unlikely(!dst)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) + goto out_drop; + + /* The QCA header is added by the switch between src addr and Ethertype + * At this point, skb->data points to ethertype so header should be + * right before + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Make sure the version is correct */ + ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + if (unlikely(ver != QCA_HDR_VERSION)) + goto out_drop; + + /* Remove QCA tag and recalculate checksum */ + skb_pull_rcsum(skb, QCA_HDR_LEN); + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, + ETH_HLEN - QCA_HDR_LEN); + + /* This protocol doesn't support cascading multiple switches so it's + * safe to assume the switch is first in the tree + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + /* Update skb & forward the frame accordingly */ + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->dev = ds->ports[port].netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +const struct dsa_device_ops qca_netdev_ops = { + .xmit = qca_tag_xmit, + .rcv = qca_tag_rcv, +}; -- cgit v1.1 From 6b93fb46480a9cfa4afb52a6d19b2591804e5f9e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:41 +0200 Subject: net-next: dsa: add new driver for qca8xxx family This patch contains initial support for the QCA8337 switch. It will detect a QCA8337 switch, if present and declared in the DT. Each port will be represented through a standalone net_device interface, as for other DSA switches. CPU can communicate with any of the ports by setting an IP@ on ethN interface. Most of the extra callbacks of the DSA subsystem are already supported, such as bridge offloading, stp, fdb. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/Kconfig | 9 + drivers/net/dsa/Makefile | 1 + drivers/net/dsa/qca8k.c | 1060 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 185 ++++++++ 4 files changed, 1255 insertions(+) create mode 100644 drivers/net/dsa/qca8k.c create mode 100644 drivers/net/dsa/qca8k.h diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index de6d044..0659846 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -25,4 +25,13 @@ source "drivers/net/dsa/b53/Kconfig" source "drivers/net/dsa/mv88e6xxx/Kconfig" +config NET_DSA_QCA8K + tristate "Qualcomm Atheros QCA8K Ethernet switch family support" + depends on NET_DSA + select NET_DSA_TAG_QCA + select REGMAP + ---help--- + This enables support for the Qualcomm Atheros QCA8K Ethernet + switch chips. + endmenu diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index ca1e71b..8346e4f 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm_sf2.o +obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o obj-y += b53/ obj-y += mv88e6xxx/ diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c new file mode 100644 index 0000000..7f3f178 --- /dev/null +++ b/drivers/net/dsa/qca8k.c @@ -0,0 +1,1060 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2016 John Crispin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qca8k.h" + +#define MIB_DESC(_s, _o, _n) \ + { \ + .size = (_s), \ + .offset = (_o), \ + .name = (_n), \ + } + +static const struct qca8k_mib_desc ar8327_mib[] = { + MIB_DESC(1, 0x00, "RxBroad"), + MIB_DESC(1, 0x04, "RxPause"), + MIB_DESC(1, 0x08, "RxMulti"), + MIB_DESC(1, 0x0c, "RxFcsErr"), + MIB_DESC(1, 0x10, "RxAlignErr"), + MIB_DESC(1, 0x14, "RxRunt"), + MIB_DESC(1, 0x18, "RxFragment"), + MIB_DESC(1, 0x1c, "Rx64Byte"), + MIB_DESC(1, 0x20, "Rx128Byte"), + MIB_DESC(1, 0x24, "Rx256Byte"), + MIB_DESC(1, 0x28, "Rx512Byte"), + MIB_DESC(1, 0x2c, "Rx1024Byte"), + MIB_DESC(1, 0x30, "Rx1518Byte"), + MIB_DESC(1, 0x34, "RxMaxByte"), + MIB_DESC(1, 0x38, "RxTooLong"), + MIB_DESC(2, 0x3c, "RxGoodByte"), + MIB_DESC(2, 0x44, "RxBadByte"), + MIB_DESC(1, 0x4c, "RxOverFlow"), + MIB_DESC(1, 0x50, "Filtered"), + MIB_DESC(1, 0x54, "TxBroad"), + MIB_DESC(1, 0x58, "TxPause"), + MIB_DESC(1, 0x5c, "TxMulti"), + MIB_DESC(1, 0x60, "TxUnderRun"), + MIB_DESC(1, 0x64, "Tx64Byte"), + MIB_DESC(1, 0x68, "Tx128Byte"), + MIB_DESC(1, 0x6c, "Tx256Byte"), + MIB_DESC(1, 0x70, "Tx512Byte"), + MIB_DESC(1, 0x74, "Tx1024Byte"), + MIB_DESC(1, 0x78, "Tx1518Byte"), + MIB_DESC(1, 0x7c, "TxMaxByte"), + MIB_DESC(1, 0x80, "TxOverSize"), + MIB_DESC(2, 0x84, "TxByte"), + MIB_DESC(1, 0x8c, "TxCollision"), + MIB_DESC(1, 0x90, "TxAbortCol"), + MIB_DESC(1, 0x94, "TxMultiCol"), + MIB_DESC(1, 0x98, "TxSingleCol"), + MIB_DESC(1, 0x9c, "TxExcDefer"), + MIB_DESC(1, 0xa0, "TxDefer"), + MIB_DESC(1, 0xa4, "TxLateCol"), +}; + +/* The 32bit switch registers are accessed indirectly. To achieve this we need + * to set the page of the register. Track the last page that was set to reduce + * mdio writes + */ +static u16 qca8k_current_page = 0xffff; + +static void +qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) +{ + regaddr >>= 1; + *r1 = regaddr & 0x1e; + + regaddr >>= 5; + *r2 = regaddr & 0x7; + + regaddr >>= 3; + *page = regaddr & 0x3ff; +} + +static u32 +qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum) +{ + u32 val; + int ret; + + ret = bus->read(bus, phy_id, regnum); + if (ret >= 0) { + val = ret; + ret = bus->read(bus, phy_id, regnum + 1); + val |= ret << 16; + } + + if (ret < 0) { + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit register\n"); + return ret; + } + + return val; +} + +static void +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + u16 lo, hi; + int ret; + + lo = val & 0xffff; + hi = (u16)(val >> 16); + + ret = bus->write(bus, phy_id, regnum, lo); + if (ret >= 0) + ret = bus->write(bus, phy_id, regnum + 1, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit register\n"); +} + +static void +qca8k_set_page(struct mii_bus *bus, u16 page) +{ + if (page == qca8k_current_page) + return; + + if (bus->write(bus, 0x18, 0, page) < 0) + dev_err_ratelimited(&bus->dev, + "failed to set qca8k page\n"); + qca8k_current_page = page; +} + +static u32 +qca8k_read(struct qca8k_priv *priv, u32 reg) +{ + u16 r1, r2, page; + u32 val; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + + mutex_unlock(&priv->bus->mdio_lock); + + return val; +} + +static void +qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val) +{ + u16 r1, r2, page; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val); + + mutex_unlock(&priv->bus->mdio_lock); +} + +static u32 +qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val) +{ + u16 r1, r2, page; + u32 ret; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + ret &= ~mask; + ret |= val; + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret); + + mutex_unlock(&priv->bus->mdio_lock); + + return ret; +} + +static void +qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, 0, val); +} + +static void +qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, val, 0); +} + +static int +qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + *val = qca8k_read(priv, reg); + + return 0; +} + +static int +qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + qca8k_write(priv, reg, val); + + return 0; +} + +static const struct regmap_range qca8k_readable_ranges[] = { + regmap_reg_range(0x0000, 0x00e4), /* Global control */ + regmap_reg_range(0x0100, 0x0168), /* EEE control */ + regmap_reg_range(0x0200, 0x0270), /* Parser control */ + regmap_reg_range(0x0400, 0x0454), /* ACL */ + regmap_reg_range(0x0600, 0x0718), /* Lookup */ + regmap_reg_range(0x0800, 0x0b70), /* QM */ + regmap_reg_range(0x0c00, 0x0c80), /* PKT */ + regmap_reg_range(0x0e00, 0x0e98), /* L3 */ + regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */ + regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */ + regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */ + regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */ + regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */ + regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */ + regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */ + +}; + +static struct regmap_access_table qca8k_readable_table = { + .yes_ranges = qca8k_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), +}; + +struct regmap_config qca8k_regmap_config = { + .reg_bits = 16, + .val_bits = 32, + .reg_stride = 4, + .max_register = 0x16ac, /* end MIB - Port6 range */ + .reg_read = qca8k_regmap_read, + .reg_write = qca8k_regmap_write, + .rd_table = &qca8k_readable_table, +}; + +static int +qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(20); + + /* loop until the busy flag has cleared */ + do { + u32 val = qca8k_read(priv, reg); + int busy = val & mask; + + if (!busy) + break; + cond_resched(); + } while (!time_after_eq(jiffies, timeout)); + + return time_after_eq(jiffies, timeout); +} + +static void +qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) +{ + u32 reg[4]; + int i; + + /* load the ARL table into an array */ + for (i = 0; i < 4; i++) + reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4)); + + /* vid - 83:72 */ + fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M; + /* aging - 67:64 */ + fdb->aging = reg[2] & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + fdb->port_mask = (reg[1] >> QCA8K_ATU_PORT_S) & QCA8K_ATU_PORT_M; + /* mac - 47:0 */ + fdb->mac[0] = (reg[1] >> QCA8K_ATU_ADDR0_S) & 0xff; + fdb->mac[1] = reg[1] & 0xff; + fdb->mac[2] = (reg[0] >> QCA8K_ATU_ADDR2_S) & 0xff; + fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff; + fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff; + fdb->mac[5] = reg[0] & 0xff; +} + +static void +qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, + u8 aging) +{ + u32 reg[3] = { 0 }; + int i; + + /* vid - 83:72 */ + reg[2] = (vid & QCA8K_ATU_VID_M) << QCA8K_ATU_VID_S; + /* aging - 67:64 */ + reg[2] |= aging & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + reg[1] = (port_mask & QCA8K_ATU_PORT_M) << QCA8K_ATU_PORT_S; + /* mac - 47:0 */ + reg[1] |= mac[0] << QCA8K_ATU_ADDR0_S; + reg[1] |= mac[1]; + reg[0] |= mac[2] << QCA8K_ATU_ADDR2_S; + reg[0] |= mac[3] << QCA8K_ATU_ADDR3_S; + reg[0] |= mac[4] << QCA8K_ATU_ADDR4_S; + reg[0] |= mac[5]; + + /* load the array into the ARL table */ + for (i = 0; i < 3; i++) + qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]); +} + +static int +qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port) +{ + u32 reg; + + /* Set the command and FDB index */ + reg = QCA8K_ATU_FUNC_BUSY; + reg |= cmd; + if (port >= 0) { + reg |= QCA8K_ATU_FUNC_PORT_EN; + reg |= (port & QCA8K_ATU_FUNC_PORT_M) << QCA8K_ATU_FUNC_PORT_S; + } + + /* Write the function register triggering the table access */ + qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg); + + /* wait for completion */ + if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY)) + return -1; + + /* Check for table full violation when adding an entry */ + if (cmd == QCA8K_FDB_LOAD) { + reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC); + if (reg & QCA8K_ATU_FUNC_FULL) + return -1; + } + + return 0; +} + +static int +qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port) +{ + int ret; + + qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port); + if (ret >= 0) + qca8k_fdb_read(priv, fdb); + + return ret; +} + +static int +qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, + u16 vid, u8 aging) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static int +qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static void +qca8k_fdb_flush(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1); + mutex_unlock(&priv->reg_mutex); +} + +static void +qca8k_mib_init(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY); + qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP); + qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) +{ + u32 reg; + + switch (port) { + case 0: + reg = QCA8K_REG_PORT0_PAD_CTRL; + break; + case 6: + reg = QCA8K_REG_PORT6_PAD_CTRL; + break; + default: + pr_err("Can't set PAD_CTRL on port %d\n", port); + return -EINVAL; + } + + /* Configure a port to be directly connected to an external + * PHY or MAC. + */ + switch (mode) { + case PHY_INTERFACE_MODE_RGMII: + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(3) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(3)); + + /* According to the datasheet, RGMII delay is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; + case PHY_INTERFACE_MODE_SGMII: + qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); + break; + default: + pr_err("xMII mode %d not supported\n", mode); + return -EINVAL; + } + + return 0; +} + +static void +qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) +{ + u32 mask = QCA8K_PORT_STATUS_TXMAC; + + /* Port 0 and 6 have no internal PHY */ + if ((port > 0) && (port < 6)) + mask |= QCA8K_PORT_STATUS_LINK_AUTO; + + if (enable) + qca8k_reg_set(priv, QCA8K_REG_PORT_STATUS(port), mask); + else + qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask); +} + +static int +qca8k_setup(struct dsa_switch *ds) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int ret, i, phy_mode = -1; + + /* Make sure that port 0 is the cpu port */ + if (!dsa_is_cpu_port(ds, 0)) { + pr_err("port 0 is not the CPU port\n"); + return -EINVAL; + } + + mutex_init(&priv->reg_mutex); + + /* Start by setting up the register mapping */ + priv->regmap = devm_regmap_init(ds->dev, NULL, priv, + &qca8k_regmap_config); + if (IS_ERR(priv->regmap)) + pr_warn("regmap initialization failed"); + + /* Initialize CPU port pad mode (xMII type, delays...) */ + phy_mode = of_get_phy_mode(ds->ports[ds->dst->cpu_port].dn); + if (phy_mode < 0) { + pr_err("Can't find phy-mode for master device\n"); + return phy_mode; + } + ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode); + if (ret < 0) + return ret; + + /* Enable CPU Port */ + qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, + QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); + qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1); + priv->port_sts[QCA8K_CPU_PORT].enabled = 1; + + /* Enable MIB counters */ + qca8k_mib_init(priv); + + /* Enable QCA header mode on the cpu port */ + qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); + + /* Disable forwarding by default on all ports */ + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, 0); + + /* Disable MAC by default on all user ports */ + for (i = 1; i < QCA8K_NUM_PORTS; i++) + if (ds->enabled_port_mask & BIT(i)) + qca8k_port_set_status(priv, i, 0); + + /* Forward all unknown frames to CPU port for Linux processing */ + qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1, + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); + + /* Setup connection between CPU port & user ports */ + for (i = 0; i < DSA_MAX_PORTS; i++) { + /* CPU port gets connected to all user ports of the switch */ + if (dsa_is_cpu_port(ds, i)) { + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_LOOKUP_MEMBER, + ds->enabled_port_mask); + } + + /* Invividual user ports get connected to CPU port only */ + if (ds->enabled_port_mask & BIT(i)) { + int shift = 16 * (i % 2); + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, + BIT(QCA8K_CPU_PORT)); + + /* Enable ARP Auto-learning by default */ + qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_LEARN); + + /* For port based vlans to work we need to set the + * default egress vid + */ + qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i), + 0xffff << shift, 1 << shift); + qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i), + QCA8K_PORT_VLAN_CVID(1) | + QCA8K_PORT_VLAN_SVID(1)); + } + } + + /* Flush the FDB table */ + qca8k_fdb_flush(priv); + + return 0; +} + +static int +qca8k_set_addr(struct dsa_switch *ds, u8 *addr) +{ + /* The subsystem always calls this function so add an empty stub */ + return 0; +} + +static int +qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_read(priv->bus, phy, regnum); +} + +static int +qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_write(priv->bus, phy, regnum, val); +} + +static void +qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) + strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, + ETH_GSTRING_LEN); +} + +static void +qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + const struct qca8k_mib_desc *mib; + u32 reg, i; + u64 hi; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) { + mib = &ar8327_mib[i]; + reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset; + + data[i] = qca8k_read(priv, reg); + if (mib->size == 2) { + hi = qca8k_read(priv, reg + 4); + data[i] |= hi << 32; + } + } +} + +static int +qca8k_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(ar8327_mib); +} + +static void +qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); + u32 reg; + + mutex_lock(&priv->reg_mutex); + reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL); + if (enable) + reg |= lpi_en; + else + reg &= ~lpi_en; + qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_eee_init(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret; + + p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full); + + ret = phy_init_eee(phy, 0); + if (ret) + return ret; + + qca8k_eee_enable_set(ds, port, true); + + return 0; +} + +static int +qca8k_set_eee(struct dsa_switch *ds, int port, + struct phy_device *phydev, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret = 0; + + p->eee_enabled = e->eee_enabled; + + if (e->eee_enabled) { + p->eee_enabled = qca8k_eee_init(ds, port, phydev); + if (!p->eee_enabled) + ret = -EOPNOTSUPP; + } + qca8k_eee_enable_set(ds, port, p->eee_enabled); + + return ret; +} + +static int +qca8k_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + struct net_device *netdev = ds->ports[port].netdev; + int ret; + + ret = phy_ethtool_get_eee(netdev->phydev, p); + if (!ret) + e->eee_active = + !!(p->supported & p->advertised & p->lp_advertised); + else + e->eee_active = 0; + + e->eee_enabled = p->eee_enabled; + + return ret; +} + +static void +qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 stp_state; + + switch (state) { + case BR_STATE_DISABLED: + stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING; + break; + case BR_STATE_LISTENING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING; + break; + case BR_STATE_LEARNING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + default: + stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD; + break; + } + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_STATE_MASK, stp_state); +} + +static int +qca8k_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *bridge) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int port_mask = BIT(QCA8K_CPU_PORT); + int i; + + priv->port_sts[port].bridge_dev = bridge; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != bridge) + continue; + /* Add this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_set(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + if (i != port) + port_mask |= BIT(i); + } + /* Add all other ports to this ports portvlan mask */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, port_mask); + + return 0; +} + +static void +qca8k_port_bridge_leave(struct dsa_switch *ds, int port) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int i; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != + priv->port_sts[port].bridge_dev) + continue; + /* Remove this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_clear(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + } + priv->port_sts[port].bridge_dev = NULL; + /* Set the cpu port to be the only one in the portvlan mask of + * this port + */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT)); +} + +static int +qca8k_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 1); + priv->port_sts[port].enabled = 1; + + return 0; +} + +static void +qca8k_port_disable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 0); + priv->port_sts[port].enabled = 0; +} + +static int +qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, + u16 port_mask, u16 vid) +{ + /* Set the vid to the port vlan id if no vid is set */ + if (!vid) + vid = 1; + + return qca8k_fdb_add(priv, addr, port_mask, vid, + QCA8K_ATU_STATUS_STATIC); +} + +static int +qca8k_port_fdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + /* The FDB table for static and auto learned entries is the same. We + * need to reserve an entry with no port_mask set to make sure that + * when port_fdb_add is called an entry is still available. Otherwise + * the last free entry might have been used up by auto learning + */ + return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid); +} + +static void +qca8k_port_fdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + + /* Update the FDB entry adding the port_mask */ + qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid); +} + +static int +qca8k_port_fdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + u16 vid = fdb->vid; + + if (!vid) + vid = 1; + + return qca8k_fdb_del(priv, fdb->addr, port_mask, vid); +} + +static int +qca8k_port_fdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_fdb *fdb, + int (*cb)(struct switchdev_obj *obj)) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct qca8k_fdb _fdb = { 0 }; + int cnt = QCA8K_NUM_FDB_RECORDS; + int ret = 0; + + mutex_lock(&priv->reg_mutex); + while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) { + if (!_fdb.aging) + break; + + ether_addr_copy(fdb->addr, _fdb.mac); + fdb->vid = _fdb.vid; + if (_fdb.aging == QCA8K_ATU_STATUS_STATIC) + fdb->ndm_state = NUD_NOARP; + else + fdb->ndm_state = NUD_REACHABLE; + + ret = cb(&fdb->obj); + if (ret) + break; + } + mutex_unlock(&priv->reg_mutex); + + return 0; +} + +static enum dsa_tag_protocol +qca8k_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_QCA; +} + +static struct dsa_switch_ops qca8k_switch_ops = { + .get_tag_protocol = qca8k_get_tag_protocol, + .setup = qca8k_setup, + .set_addr = qca8k_set_addr, + .get_strings = qca8k_get_strings, + .phy_read = qca8k_phy_read, + .phy_write = qca8k_phy_write, + .get_ethtool_stats = qca8k_get_ethtool_stats, + .get_sset_count = qca8k_get_sset_count, + .get_eee = qca8k_get_eee, + .set_eee = qca8k_set_eee, + .port_enable = qca8k_port_enable, + .port_disable = qca8k_port_disable, + .port_stp_state_set = qca8k_port_stp_state_set, + .port_bridge_join = qca8k_port_bridge_join, + .port_bridge_leave = qca8k_port_bridge_leave, + .port_fdb_prepare = qca8k_port_fdb_prepare, + .port_fdb_add = qca8k_port_fdb_add, + .port_fdb_del = qca8k_port_fdb_del, + .port_fdb_dump = qca8k_port_fdb_dump, +}; + +static int +qca8k_sw_probe(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv; + u32 id; + + /* allocate the private data struct so that we can probe the switches + * ID register + */ + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus = mdiodev->bus; + + /* read the switches ID register */ + id = qca8k_read(priv, QCA8K_REG_MASK_CTRL); + id >>= QCA8K_MASK_CTRL_ID_S; + id &= QCA8K_MASK_CTRL_ID_M; + if (id != QCA8K_ID_QCA8337) + return -ENODEV; + + priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->priv = priv; + priv->ds->dev = &mdiodev->dev; + priv->ds->ops = &qca8k_switch_ops; + mutex_init(&priv->reg_mutex); + dev_set_drvdata(&mdiodev->dev, priv); + + return dsa_register_switch(priv->ds, priv->ds->dev->of_node); +} + +static void +qca8k_sw_remove(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_port_set_status(priv, i, 0); + + dsa_unregister_switch(priv->ds); +} + +#ifdef CONFIG_PM_SLEEP +static void +qca8k_set_pm(struct qca8k_priv *priv, int enable) +{ + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (!priv->port_sts[i].enabled) + continue; + + qca8k_port_set_status(priv, i, enable); + } +} + +static int qca8k_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 0); + + return dsa_switch_suspend(priv->ds); +} + +static int qca8k_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 1); + + return dsa_switch_resume(priv->ds); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, + qca8k_suspend, qca8k_resume); + +static const struct of_device_id qca8k_of_match[] = { + { .compatible = "qca,qca8337" }, + { /* sentinel */ }, +}; + +static struct mdio_driver qca8kmdio_driver = { + .probe = qca8k_sw_probe, + .remove = qca8k_sw_remove, + .mdiodrv.driver = { + .name = "qca8k", + .of_match_table = qca8k_of_match, + .pm = &qca8k_pm_ops, + }, +}; + +static int __init +qca8kmdio_driver_register(void) +{ + return mdio_driver_register(&qca8kmdio_driver); +} +module_init(qca8kmdio_driver_register); + +static void __exit +qca8kmdio_driver_unregister(void) +{ + mdio_driver_unregister(&qca8kmdio_driver); +} +module_exit(qca8kmdio_driver_unregister); + +MODULE_AUTHOR("Mathieu Olivari, John Crispin "); +MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qca8k"); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h new file mode 100644 index 0000000..2014647 --- /dev/null +++ b/drivers/net/dsa/qca8k.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __QCA8K_H +#define __QCA8K_H + +#include +#include + +#define QCA8K_NUM_PORTS 7 + +#define PHY_ID_QCA8337 0x004dd036 +#define QCA8K_ID_QCA8337 0x13 + +#define QCA8K_NUM_FDB_RECORDS 2048 + +#define QCA8K_CPU_PORT 0 + +/* Global control registers */ +#define QCA8K_REG_MASK_CTRL 0x000 +#define QCA8K_MASK_CTRL_ID_M 0xff +#define QCA8K_MASK_CTRL_ID_S 8 +#define QCA8K_REG_PORT0_PAD_CTRL 0x004 +#define QCA8K_REG_PORT5_PAD_CTRL 0x008 +#define QCA8K_REG_PORT6_PAD_CTRL 0x00c +#define QCA8K_PORT_PAD_RGMII_EN BIT(26) +#define QCA8K_PORT_PAD_RGMII_TX_DELAY(x) \ + ((0x8 + (x & 0x3)) << 22) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ + ((0x10 + (x & 0x3)) << 20) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) +#define QCA8K_PORT_PAD_SGMII_EN BIT(7) +#define QCA8K_REG_MODULE_EN 0x030 +#define QCA8K_MODULE_EN_MIB BIT(0) +#define QCA8K_REG_MIB 0x034 +#define QCA8K_MIB_FLUSH BIT(24) +#define QCA8K_MIB_CPU_KEEP BIT(20) +#define QCA8K_MIB_BUSY BIT(17) +#define QCA8K_GOL_MAC_ADDR0 0x60 +#define QCA8K_GOL_MAC_ADDR1 0x64 +#define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) +#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0) +#define QCA8K_PORT_STATUS_SPEED_S 0 +#define QCA8K_PORT_STATUS_TXMAC BIT(2) +#define QCA8K_PORT_STATUS_RXMAC BIT(3) +#define QCA8K_PORT_STATUS_TXFLOW BIT(4) +#define QCA8K_PORT_STATUS_RXFLOW BIT(5) +#define QCA8K_PORT_STATUS_DUPLEX BIT(6) +#define QCA8K_PORT_STATUS_LINK_UP BIT(8) +#define QCA8K_PORT_STATUS_LINK_AUTO BIT(9) +#define QCA8K_PORT_STATUS_LINK_PAUSE BIT(10) +#define QCA8K_REG_PORT_HDR_CTRL(_i) (0x9c + (_i * 4)) +#define QCA8K_PORT_HDR_CTRL_RX_MASK GENMASK(3, 2) +#define QCA8K_PORT_HDR_CTRL_RX_S 2 +#define QCA8K_PORT_HDR_CTRL_TX_MASK GENMASK(1, 0) +#define QCA8K_PORT_HDR_CTRL_TX_S 0 +#define QCA8K_PORT_HDR_CTRL_ALL 2 +#define QCA8K_PORT_HDR_CTRL_MGMT 1 +#define QCA8K_PORT_HDR_CTRL_NONE 0 + +/* EEE control registers */ +#define QCA8K_REG_EEE_CTRL 0x100 +#define QCA8K_REG_EEE_CTRL_LPI_EN(_i) ((_i + 1) * 2) + +/* ACL registers */ +#define QCA8K_REG_PORT_VLAN_CTRL0(_i) (0x420 + (_i * 8)) +#define QCA8K_PORT_VLAN_CVID(x) (x << 16) +#define QCA8K_PORT_VLAN_SVID(x) x +#define QCA8K_REG_PORT_VLAN_CTRL1(_i) (0x424 + (_i * 8)) +#define QCA8K_REG_IPV4_PRI_BASE_ADDR 0x470 +#define QCA8K_REG_IPV4_PRI_ADDR_MASK 0x474 + +/* Lookup registers */ +#define QCA8K_REG_ATU_DATA0 0x600 +#define QCA8K_ATU_ADDR2_S 24 +#define QCA8K_ATU_ADDR3_S 16 +#define QCA8K_ATU_ADDR4_S 8 +#define QCA8K_REG_ATU_DATA1 0x604 +#define QCA8K_ATU_PORT_M 0x7f +#define QCA8K_ATU_PORT_S 16 +#define QCA8K_ATU_ADDR0_S 8 +#define QCA8K_REG_ATU_DATA2 0x608 +#define QCA8K_ATU_VID_M 0xfff +#define QCA8K_ATU_VID_S 8 +#define QCA8K_ATU_STATUS_M 0xf +#define QCA8K_ATU_STATUS_STATIC 0xf +#define QCA8K_REG_ATU_FUNC 0x60c +#define QCA8K_ATU_FUNC_BUSY BIT(31) +#define QCA8K_ATU_FUNC_PORT_EN BIT(14) +#define QCA8K_ATU_FUNC_MULTI_EN BIT(13) +#define QCA8K_ATU_FUNC_FULL BIT(12) +#define QCA8K_ATU_FUNC_PORT_M 0xf +#define QCA8K_ATU_FUNC_PORT_S 8 +#define QCA8K_REG_GLOBAL_FW_CTRL0 0x620 +#define QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN BIT(10) +#define QCA8K_REG_GLOBAL_FW_CTRL1 0x624 +#define QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S 24 +#define QCA8K_GLOBAL_FW_CTRL1_BC_DP_S 16 +#define QCA8K_GLOBAL_FW_CTRL1_MC_DP_S 8 +#define QCA8K_GLOBAL_FW_CTRL1_UC_DP_S 0 +#define QCA8K_PORT_LOOKUP_CTRL(_i) (0x660 + (_i) * 0xc) +#define QCA8K_PORT_LOOKUP_MEMBER GENMASK(6, 0) +#define QCA8K_PORT_LOOKUP_STATE_MASK GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_STATE_DISABLED (0 << 16) +#define QCA8K_PORT_LOOKUP_STATE_BLOCKING (1 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LISTENING (2 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LEARNING (3 << 16) +#define QCA8K_PORT_LOOKUP_STATE_FORWARD (4 << 16) +#define QCA8K_PORT_LOOKUP_STATE GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_LEARN BIT(20) + +/* Pkt edit registers */ +#define QCA8K_EGRESS_VLAN(x) (0x0c70 + (4 * (x / 2))) + +/* L3 registers */ +#define QCA8K_HROUTER_CONTROL 0xe00 +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_M GENMASK(17, 16) +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_S 16 +#define QCA8K_HROUTER_CONTROL_ARP_AGE_MODE 1 +#define QCA8K_HROUTER_PBASED_CONTROL1 0xe08 +#define QCA8K_HROUTER_PBASED_CONTROL2 0xe0c +#define QCA8K_HNAT_CONTROL 0xe38 + +/* MIB registers */ +#define QCA8K_PORT_MIB_COUNTER(_i) (0x1000 + (_i) * 0x100) + +/* QCA specific MII registers */ +#define MII_ATH_MMD_ADDR 0x0d +#define MII_ATH_MMD_DATA 0x0e + +enum { + QCA8K_PORT_SPEED_10M = 0, + QCA8K_PORT_SPEED_100M = 1, + QCA8K_PORT_SPEED_1000M = 2, + QCA8K_PORT_SPEED_ERR = 3, +}; + +enum qca8k_fdb_cmd { + QCA8K_FDB_FLUSH = 1, + QCA8K_FDB_LOAD = 2, + QCA8K_FDB_PURGE = 3, + QCA8K_FDB_NEXT = 6, + QCA8K_FDB_SEARCH = 7, +}; + +struct ar8xxx_port_status { + struct ethtool_eee eee; + struct net_device *bridge_dev; + int enabled; +}; + +struct qca8k_priv { + struct regmap *regmap; + struct mii_bus *bus; + struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; + struct dsa_switch *ds; + struct mutex reg_mutex; +}; + +struct qca8k_mib_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +struct qca8k_fdb { + u16 vid; + u8 port_mask; + u8 aging; + u8 mac[6]; +}; + +#endif /* __QCA8K_H */ -- cgit v1.1 From 95f60084acbcee6c466256cf26eb52191fad9edc Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:50 +0300 Subject: perf/x86/intel/pt: Fix an off-by-one in address filter configuration PT address filter configuration requires that a range is specified by its first and last address, but at the moment we're obtaining the end of the range by adding user specified size to its start, which is off by one from what it actually needs to be. Fix this and make sure that zero-sized filters don't pass the filter validation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 04bb5fb..5ec0100 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1081,7 +1081,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) list_for_each_entry(filter, filters, entry) { /* PT doesn't support single address triggers */ - if (!filter->range) + if (!filter->range || !filter->size) return -EOPNOTSUPP; if (!filter->inode && !kernel_ip(filter->offset)) @@ -1111,7 +1111,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) } else { /* apply the offset */ msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a; + msr_b = filter->size + msr_a - 1; } filters->filter[range].msr_a = msr_a; -- cgit v1.1 From ddfdad991e55b65c1cc4ee29502f6dceee04455a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:51 +0300 Subject: perf/x86/intel/pt: Fix kernel address filter's offset validation The kernel_ip() filter is used mostly by the DS/LBR code to look at the branch addresses, but Intel PT also uses it to validate the address filter offsets for kernel addresses, for which it is not sufficient: supplying something in bits 64:48 that's not a sign extension of the lower address bits (like 0xf00d000000000000) throws a #GP. This patch adds address validation for the user supplied kernel filters. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5ec0100..1f94963 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } +static inline bool valid_kernel_ip(unsigned long ip) +{ + return virt_addr_valid(ip) && kernel_ip(ip); +} + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1084,7 +1089,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !kernel_ip(filter->offset)) + if (!filter->inode && !valid_kernel_ip(filter->offset)) return -EINVAL; if (++range > pt_cap_get(PT_CAP_num_address_ranges)) -- cgit v1.1 From 1155bafcb79208abc6ae234c6e135ac70607755c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:52 +0300 Subject: perf/x86/intel/pt: Do validate the size of a kernel address filter Right now, the kernel address filters in PT are prone to integer overflow that may happen in adding filter's size to its offset to obtain the end of the range. Such an overflow would also throw a #GP in the PT event configuration path. Fix this by explicitly validating the result of this calculation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1f94963..861a7d9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1089,8 +1089,13 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !valid_kernel_ip(filter->offset)) - return -EINVAL; + if (!filter->inode) { + if (!valid_kernel_ip(filter->offset)) + return -EINVAL; + + if (!valid_kernel_ip(filter->offset + filter->size)) + return -EINVAL; + } if (++range > pt_cap_get(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; -- cgit v1.1 From c68df2e7be0c1238ea3c281fd744a204ef3b15a0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 15 Sep 2016 16:30:02 +0300 Subject: mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE In 46fa38e84b65 ("mac80211: allow software PS-Poll/U-APSD with AP_LINK_PS"), Johannes allowed to use mac80211's code for handling stations that go to PS or send PS-Poll / uAPSD trigger frames for devices that enable RSS. This means that mac80211 doesn't look at frames anymore but rather relies on a notification that will come from the device when a PS transition occurs or when a PS-Poll / trigger frame is detected by the device. iwlwifi will need this capability but still needs mac80211 to take care of the TIM IE. Today, if a driver sets AP_LINK_PS, mac80211 will not update the TIM IE. Change mac80211 to check existence of the set_tim driver callback rather than using AP_LINK_PS to decide if the driver handles the TIM IE internally or not. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho [reword commit message a bit] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5e70fa5..1b1b28f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -690,7 +690,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) + if (!local->ops->set_tim) return; if (sta->dead) -- cgit v1.1 From b59abfbed638037f3b51eeb73266892cd2df177f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 15 Sep 2016 16:30:03 +0300 Subject: mac80211_hwsim: statically initialize hwsim_radios list There's no need to initialize at runtime, when the static declaration macro can just be used instead, so do that. Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8c35ac8..431f13b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -487,7 +487,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = { }; static spinlock_t hwsim_radio_lock; -static struct list_head hwsim_radios; +static LIST_HEAD(hwsim_radios); static int hwsim_radio_idx; static struct platform_driver mac80211_hwsim_driver = { @@ -3376,7 +3376,6 @@ static int __init init_mac80211_hwsim(void) mac80211_hwsim_unassign_vif_chanctx; spin_lock_init(&hwsim_radio_lock); - INIT_LIST_HEAD(&hwsim_radios); err = register_pernet_device(&hwsim_net_ops); if (err) -- cgit v1.1 From fbd05e4a6e82fd573d3aa79e284e424b8d78c149 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 15 Sep 2016 18:15:09 +0300 Subject: cfg80211: add helper to find an IE that matches a byte-array There are a few places where an IE that matches not only the EID, but also other bytes inside the element, needs to be found. To simplify that and reduce the amount of similar code, implement a new helper function to match the EID and an extra array of bytes. Additionally, simplify cfg80211_find_vendor_ie() by using the new match function. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 33 +++++++++++++++++++++++++++- net/wireless/scan.c | 58 +++++++++++++++++++++++--------------------------- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d5e7f69..533cb64 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3947,6 +3947,34 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, struct cfg80211_qos_map *qos_map); /** + * cfg80211_find_ie_match - match information element and byte array in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * @match: byte array to match + * @match_len: number of bytes in the match array + * @match_offset: offset in the IE where the byte array should match. + * If match_len is zero, this must also be set to zero. + * Otherwise this must be set to 2 or more, because the first + * byte is the element id, which is already compared to eid, and + * the second byte is the IE length. + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match, or a pointer to the first + * byte of the requested element, that is the byte containing the + * element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data and being large enough for the + * byte array to match. + */ +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset); + +/** * cfg80211_find_ie - find information element in data * * @eid: element ID @@ -3961,7 +3989,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, * Note: There are no checks on the element length other than * having to fit into the given data. */ -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); +static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); +} /** * cfg80211_find_vendor_ie - find vendor specific information element in data diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0358e12..b5bd58d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -352,52 +352,48 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset) { - while (len > 2 && ies[0] != eid) { + /* match_offset can't be smaller than 2, unless match_len is + * zero, in which case match_offset must be zero as well. + */ + if (WARN_ON((match_len && match_offset < 2) || + (!match_len && match_offset))) + return NULL; + + while (len >= 2 && len >= ies[1] + 2) { + if ((ies[0] == eid) && + (ies[1] + 2 >= match_offset + match_len) && + !memcmp(ies + match_offset, match, match_len)) + return ies; + len -= ies[1] + 2; ies += ies[1] + 2; } - if (len < 2) - return NULL; - if (len < 2 + ies[1]) - return NULL; - return ies; + + return NULL; } -EXPORT_SYMBOL(cfg80211_find_ie); +EXPORT_SYMBOL(cfg80211_find_ie_match); const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len) { - struct ieee80211_vendor_ie *ie; - const u8 *pos = ies, *end = ies + len; - int ie_oui; + const u8 *ie; + u8 match[] = { oui >> 16, oui >> 8, oui, oui_type }; + int match_len = (oui_type < 0) ? 3 : sizeof(match); if (WARN_ON(oui_type > 0xff)) return NULL; - while (pos < end) { - pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, - end - pos); - if (!pos) - return NULL; - - ie = (struct ieee80211_vendor_ie *)pos; - - /* make sure we can access ie->len */ - BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); + ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len, + match, match_len, 2); - if (ie->len < sizeof(*ie)) - goto cont; + if (ie && (ie[1] < 4)) + return NULL; - ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; - if (ie_oui == oui && - (oui_type < 0 || ie->oui_type == oui_type)) - return pos; -cont: - pos += 2 + ie->len; - } - return NULL; + return ie; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); -- cgit v1.1 From 080fe0b790ad438fc1b61621dac37c1964ce7f35 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 24 Aug 2016 14:12:08 +0100 Subject: perf/x86/amd: Make HW_CACHE_REFERENCES and HW_CACHE_MISSES measure L2 While the Intel PMU monitors the LLC when perf enables the HW_CACHE_REFERENCES and HW_CACHE_MISSES events, these events monitor L1 instruction cache fetches (0x0080) and instruction cache misses (0x0081) on the AMD PMU. This is extremely confusing when monitoring the same workload across Intel and AMD machines, since parameters like, $ perf stat -e cache-references,cache-misses measure completely different things. Instead, make the AMD PMU measure instruction/data cache and TLB fill requests to the L2 and instruction/data cache and TLB misses in the L2 when HW_CACHE_REFERENCES and HW_CACHE_MISSES are enabled, respectively. That way the events measure unified caches on both platforms. Signed-off-by: Matt Fleming Acked-by: Peter Zijlstra Cc: Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1472044328-21302-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 4 ++-- arch/x86/kvm/pmu_amd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e07a22b..f5f4b3f 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 39b9112..cd94443 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -23,8 +23,8 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, - [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, - [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, + [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }, [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, -- cgit v1.1 From ed3d6d0ac036bcff6e4649a6f8fb987bb4e34444 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Sep 2016 09:24:44 -0500 Subject: usb: musb: Fix tusb6010 compile error on blackfin We have CONFIG_BLACKFIN ifdef redefining all musb registers in musb_regs.h and tusb6010.h is never included causing a build error with blackfin-allmodconfig and COMPILE_TEST. Let's fix the issue by not building tusb6010 if CONFIG_BLACKFIN is selected. Reported-by: kbuild test robot Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 886526b..73cfa13 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -87,7 +87,7 @@ config USB_MUSB_DA8XX config USB_MUSB_TUSB6010 tristate "TUSB6010" depends on HAS_IOMEM - depends on ARCH_OMAP2PLUS || COMPILE_TEST + depends on (ARCH_OMAP2PLUS || COMPILE_TEST) && !BLACKFIN depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules config USB_MUSB_OMAP2PLUS -- cgit v1.1 From 08c5cd37480f59ea39682f4585d92269be6b1424 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 16 Sep 2016 10:24:26 -0400 Subject: USB: change bInterval default to 10 ms Some full-speed mceusb infrared transceivers contain invalid endpoint descriptors for their interrupt endpoints, with bInterval set to 0. In the past they have worked out okay with the mceusb driver, because the driver sets the bInterval field in the descriptor to 1, overwriting whatever value may have been there before. However, this approach was never sanctioned by the USB core, and in fact it does not work with xHCI controllers, because they use the bInterval value that was present when the configuration was installed. Currently usbcore uses 32 ms as the default interval if the value in the endpoint descriptor is invalid. It turns out that these IR transceivers don't work properly unless the interval is set to 10 ms or below. To work around this mceusb problem, this patch changes the endpoint-descriptor parsing routine, making the default interval value be 10 ms rather than 32 ms. Signed-off-by: Alan Stern Tested-by: Wade Berrier CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 15ce4ab..a2d90ac 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -240,8 +240,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, memcpy(&endpoint->desc, d, n); INIT_LIST_HEAD(&endpoint->urb_list); - /* Fix up bInterval values outside the legal range. Use 32 ms if no - * proper value can be guessed. */ + /* + * Fix up bInterval values outside the legal range. + * Use 10 or 8 ms if no proper value can be guessed. + */ i = 0; /* i = min, j = max, n = default */ j = 255; if (usb_endpoint_xfer_int(d)) { @@ -250,13 +252,15 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: case USB_SPEED_HIGH: - /* Many device manufacturers are using full-speed + /* + * Many device manufacturers are using full-speed * bInterval values in high-speed interrupt endpoint - * descriptors. Try to fix those and fall back to a - * 32 ms default value otherwise. */ + * descriptors. Try to fix those and fall back to an + * 8-ms default value otherwise. + */ n = fls(d->bInterval*8); if (n == 0) - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ j = 16; /* @@ -271,10 +275,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, } break; default: /* USB_SPEED_FULL or _LOW */ - /* For low-speed, 10 ms is the official minimum. + /* + * For low-speed, 10 ms is the official minimum. * But some "overclocked" devices might want faster - * polling so we'll allow it. */ - n = 32; + * polling so we'll allow it. + */ + n = 10; break; } } else if (usb_endpoint_xfer_isoc(d)) { @@ -282,10 +288,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, j = 16; switch (to_usb_device(ddev)->speed) { case USB_SPEED_HIGH: - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ break; default: /* USB_SPEED_FULL */ - n = 6; /* 32 ms = 2^(6-1) frames */ + n = 4; /* 8 ms = 2^(4-1) frames */ break; } } -- cgit v1.1 From 5e102b3b4fa944815aead89e63eec2a35069a07b Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 2 Sep 2016 23:46:53 +0300 Subject: IB/rxe: fix GFP_KERNEL in spinlock context There is skb_clone(skb, GFP_KERNEL) in spinlock context in rxe_rcv_mcast_pkt(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c2..144d2f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); -- cgit v1.1 From 344bacca8cd811809fc33a249f2738ab757d327f Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 09:55:28 +0300 Subject: IB/ipoib: Don't allow MC joins during light MC flush This fix solves a race between light flush and on the fly joins. Light flush doesn't set the device to down and unset IPOIB_OPER_UP flag, this means that if while flushing we have a MC join in progress and the QP was attached to BC MGID we can have a mismatches when re-attaching a QP to the BC MGID. The light flush would set the broadcast group to NULL causing an on the fly join to rejoin and reattach to the BC MCG as well as adding the BC MGID to the multicast list. The flush process would later on remove the BC MGID and detach it from the QP. On the next flush the BC MGID is present in the multicast list but not found when trying to detach it because of the previous double attach and single detach. [18332.714265] ------------[ cut here ]------------ [18332.717775] WARNING: CPU: 6 PID: 3767 at drivers/infiniband/core/verbs.c:280 ib_dealloc_pd+0xff/0x120 [ib_core] ... [18332.775198] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 [18332.779411] 0000000000000000 ffff8800b50dfbb0 ffffffff813fed47 0000000000000000 [18332.784960] 0000000000000000 ffff8800b50dfbf0 ffffffff8109add1 0000011832f58300 [18332.790547] ffff880226a596c0 ffff880032482000 ffff880032482830 ffff880226a59280 [18332.796199] Call Trace: [18332.798015] [] dump_stack+0x63/0x8c [18332.801831] [] __warn+0xd1/0xf0 [18332.805403] [] warn_slowpath_null+0x1d/0x20 [18332.809706] [] ib_dealloc_pd+0xff/0x120 [ib_core] [18332.814384] [] ipoib_transport_dev_cleanup+0xfc/0x1d0 [ib_ipoib] [18332.820031] [] ipoib_ib_dev_cleanup+0x98/0x110 [ib_ipoib] [18332.825220] [] ipoib_dev_cleanup+0x2d8/0x550 [ib_ipoib] [18332.830290] [] ipoib_uninit+0x2f/0x40 [ib_ipoib] [18332.834911] [] rollback_registered_many+0x1aa/0x2c0 [18332.839741] [] rollback_registered+0x31/0x40 [18332.844091] [] unregister_netdevice_queue+0x48/0x80 [18332.848880] [] ipoib_vlan_delete+0x1fb/0x290 [ib_ipoib] [18332.853848] [] delete_child+0x7d/0xf0 [ib_ipoib] [18332.858474] [] dev_attr_store+0x18/0x30 [18332.862510] [] sysfs_kf_write+0x3a/0x50 [18332.866349] [] kernfs_fop_write+0x120/0x170 [18332.870471] [] __vfs_write+0x28/0xe0 [18332.874152] [] ? percpu_down_read+0x1f/0x50 [18332.878274] [] vfs_write+0xa2/0x1a0 [18332.881896] [] SyS_write+0x46/0xa0 [18332.885632] [] do_syscall_64+0x57/0xb0 [18332.889709] [] entry_SYSCALL64_slow_path+0x25/0x25 [18332.894727] ---[ end trace 09ebbe31f831ef17 ]--- Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on SM change events") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241..be11d5d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } -- cgit v1.1 From e5ac40cd66c2f3cd11bc5edc658f012661b16347 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 19:16:18 +0300 Subject: IB/mlx4: Fix incorrect MC join state bit-masking on SR-IOV Because of an incorrect bit-masking done on the join state bits, when handling a join request we failed to detect a difference between the group join state and the request join state when joining as send only full member (0x8). This caused the MC join request not to be sent. This issue is relevant only when SRIOV is enabled and SM supports send only full member. This fix separates scope bits and join states bits a nibble each. Fixes: b9c5d6a64358 ('IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV') Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07..097bfcc 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent -- cgit v1.1 From baa0be7026e2f7d1d40bfd45909044169e9e3c68 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:19 +0300 Subject: IB/mlx4: Fix code indentation in QP1 MAD flow The indentation in the QP1 GRH flow in procedure build_mlx_header is really confusing. Fix it, in preparation for a commit which touches this code. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f..e398c04 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2493,24 +2493,26 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); -- cgit v1.1 From 8ec07bf8a8b57d6c58927a16a0a22c0115cf2855 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:20 +0300 Subject: IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV When sending QP1 MAD packets which use a GRH, the source GID (which consists of the 64-bit subnet prefix, and the 64 bit port GUID) must be included in the packet GRH. For SR-IOV, a GID cache is used, since the source GID needs to be the slave's source GID, and not the Hypervisor's GID. This cache also included a subnet_prefix. Unfortunately, the subnet_prefix field in the cache was never initialized (to the default subnet prefix 0xfe80::0). As a result, this field remained all zeroes. Therefore, when SR-IOV was active, all QP1 packets which included a GRH had a source GID subnet prefix of all-zeroes. However, the subnet-prefix should initially be 0xfe80::0 (the default subnet prefix). In addition, if OpenSM modifies a port's subnet prefix, the new subnet prefix must be used in the GRH when sending QP1 packets. To fix this we now initialize the subnet prefix in the SR-IOV GID cache to the default subnet prefix. We update the cached value if/when OpenSM modifies the port's subnet prefix. We take this cached value when sending QP1 packets when SR-IOV is active. Note that the value is stored as an atomic64. This eliminates any need for locking when the subnet prefix is being updated. Note also that we depend on the FW generating the "port management change" event for tracking subnet-prefix changes performed by OpenSM. If running early FW (before 2.9.4630), subnet prefix changes will not be tracked (but the default subnet prefix still will be stored in the cache; therefore users who do not modify the subnet prefix will not have a problem). IF there is a need for such tracking also for early FW, we will add that capability in a subsequent patch. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 23 +++++++++++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/qp.c | 5 +++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d..0f21c3a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832e..686ab48 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e398c04..7fb9629 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2502,8 +2502,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; -- cgit v1.1 From 69d269d38910e697e461ec5677368f57d2046cbe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 12 Sep 2016 19:16:22 +0300 Subject: IB/mlx4: Diagnostic HW counters are not supported in slave mode Modify the mlx4_ib_diag_counters() to avoid the following error in the hypervisor when the slave tries to query the hardware counters in SR-IOV mode. mlx4_core 0000:81:00.0: Unknown command:0x30 accepted from slave:1 Fixes: 3f85f2aaabf7 ("IB/mlx4: Add diagnostic hardware counters") Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2..87ba9bc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) -- cgit v1.1 From 7fae6655a0c897875bd34501ec092232b526d3e4 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Sep 2016 19:16:23 +0300 Subject: IB/mlx5: Enable MAD_IFC commands for IB ports only MAD_IFC command is supported only for physical functions (PF) and when physical port is IB. The proposed fix enforces it. Fixes: d603c809ef91 ("IB/mlx5: Fix decision on using MAD_IFC") Reported-by: David Chang Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8150ea3..0480b64 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { -- cgit v1.1 From ee3da804ad1b1dd4c766199a6e8443542b0aaaef Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 12 Sep 2016 19:16:24 +0300 Subject: IB/mlx5: Set source mac address in FTE Set the source mac address in the FTE when L2 specification is provided. Fixes: 038d2ef87572 ('IB/mlx5: Add flow steering support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0480b64..e19537c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1430,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); -- cgit v1.1 From dfdd6158ca2c8838313470c5ab79243d04da70b2 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:04 +0300 Subject: IB/rxe: Fix kernel panic in udp_setup_tunnel Disable creation of a UDP socket for ipv6 when CONFIG_IPV6 is not enabeld. Since udp_sock_create6() returns 0 when CONFIG_IPV6 is not set [ 46.888632] IP: [] setup_udp_tunnel_sock+0x6/0x4f [ 46.891355] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 46.893918] Oops: 0002 [#1] PREEMPT [ 46.896014] CPU: 0 PID: 1 Comm: swapper Not tainted 4.7.0-rc4-00001-g8700e3e #1 [ 46.900280] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 46.904905] task: cf06c040 ti: cf05e000 task.ti: cf05e000 [ 46.907854] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 46.911137] EIP is at setup_udp_tunnel_sock+0x6/0x4f [ 46.914070] EAX: 00000044 EBX: 00000001 ECX: cf05fef0 EDX: ca8142e0 [ 46.917236] ESI: c2c4505b EDI: cf05fef0 EBP: cf05fed0 ESP: cf05fed0 [ 46.919836] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 46.922046] CR0: 80050033 CR2: 000001fc CR3: 02cec000 CR4: 000006b0 [ 46.924550] Stack: [ 46.926014] cf05ff10 c1fd4657 ca8142e0 0000000a 00000000 00000000 0000b712 00000008 [ 46.931274] 00000000 6bb5bd01 c1fd48de 00000000 00000000 cf05ff1c 00000000 00000000 [ 46.936122] cf05ff1c c1fd4bdf 00000000 cf05ff28 c2c4507b ffffffff cf05ff88 c2bf1c74 [ 46.942350] Call Trace: [ 46.944403] [] rxe_setup_udp_tunnel+0x8f/0x99 [ 46.947689] [] ? net_to_rxe+0x4e/0x4e [ 46.950567] [] rxe_net_init+0xe/0xa4 [ 46.953147] [] rxe_module_init+0x20/0x4c [ 46.955448] [] do_one_initcall+0x89/0x113 [ 46.957797] [] ? set_debug_rodata+0xf/0xf [ 46.959966] [] ? kernel_init_freeable+0xbe/0x15b [ 46.962262] [] kernel_init_freeable+0xde/0x15b [ 46.964418] [] kernel_init+0x8/0xd0 [ 46.966618] [] ret_from_kernel_thread+0xe/0x24 [ 46.969592] [] ? rest_init+0x6f/0x6f Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe.c | 25 +++++++++++++++-- drivers/infiniband/sw/rxe/rxe_net.c | 55 +++++++++++++++++-------------------- drivers/infiniband/sw/rxe/rxe_net.h | 5 +++- 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f..ddd5927 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -362,15 +362,34 @@ static int __init rxe_module_init(void) return err; } - err = rxe_net_init(); + err = rxe_net_ipv4_init(); if (err) { - pr_err("rxe: unable to init\n"); + pr_err("rxe: unable to init ipv4 tunnel\n"); rxe_cache_exit(); - return err; + goto exit; + } + + err = rxe_net_ipv6_init(); + if (err) { + pr_err("rxe: unable to init ipv6 tunnel\n"); + rxe_cache_exit(); + goto exit; } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("rxe: Failed to rigister netdev notifier\n"); + goto exit; + } + pr_info("rxe: loaded\n"); return 0; + +exit: + rxe_release_udp_tunnel(recv_sockets.sk4); + rxe_release_udp_tunnel(recv_sockets.sk6); + return err; } static void __exit rxe_module_exit(void) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea..eedf2f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -658,51 +659,45 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); - return -1; - } - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); + htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); return -1; } - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } - - return err; + return 0; } -void rxe_net_exit(void) +int rxe_net_ipv6_init(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); +#if IS_ENABLED(CONFIG_IPV6) - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); + spin_lock_init(&dev_list_lock); + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + return -1; + } +#endif + return 0; +} + +void rxe_net_exit(void) +{ + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76..0daf7f0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,10 +44,13 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); -int rxe_net_init(void); +int rxe_net_ipv4_init(void); +int rxe_net_ipv6_init(void); void rxe_net_exit(void); #endif /* RXE_NET_H */ -- cgit v1.1 From 908948877bbbb240f67d7eb27d3a392beb394a07 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:05 +0300 Subject: IB/rxe: Fix duplicate atomic request handling When handling ack for atomic opcodes like "fetch&add" or "cmp&swp", the method send_atomic_ack() saves the ack before sending it, in case it gets lost and never reach the requester. In which case the method duplicate_request() will need to find it using the duplicated request.psn. But send_atomic_ack() used a wrong psn value and thus the above ack was never found. This fix uses the ack.psn to locate the ack in case its needed. This fix also copies the ack packet to the skb's control buffer since duplicate_request() will need it when calling rxe_xmit_packet() Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b4..3e0f0f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); -- cgit v1.1 From 3050b99850247695cb07a5c15265afcc08bcf400 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:06 +0300 Subject: IB/rxe: Fix race condition between requester and completer rxe_requester() is sending a pkt with rxe_xmit_packet() and then calls rxe_update() to update the wqe and qp's psn values. But sometimes the response is received before the requester had time to update the wqe in which case the completer acts on errornous wqe values. This fix updates the wqe and qp before actually sending the request and rolls back when xmit fails. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 57 ++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d..13a848a 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} - qp->req.opcode = pkt->opcode; +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -571,7 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -688,13 +711,21 @@ next_wqe: goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- cgit v1.1 From c1cc72cb6ff95cbe02ed747866576dccab4a5911 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:07 +0300 Subject: IB/rxe: Fix kmem_cache leak Decrement qp reference when handling error path in completer to prevent kmem_cache leak. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de..1c59ef2 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -689,7 +689,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -716,6 +723,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } -- cgit v1.1 From e4618d40eb3dc1a6d1f55f7150ea25bb23ab410a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Sep 2016 08:15:37 +0100 Subject: IB/rdmavt: Don't vfree a kzalloc'ed memory region The userspace memory region 'mr' is allocated with kzalloc in __rvt_alloc_mr however it is incorrectly being freed with vfree in __rvt_free_mr. Fix this by using kfree to free it. Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b..46b6497 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** -- cgit v1.1 From d19127473a575c629c70974cee0bb8acb6374f08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 07:26:01 +0100 Subject: rxrpc: Make IPv6 support conditional on CONFIG_IPV6 Add CONFIG_AF_RXRPC_IPV6 and make the IPv6 support code conditional on it. This is then made conditional on CONFIG_IPV6. Without this, the following can be seen: net/built-in.o: In function `rxrpc_init_peer': >> peer_object.c:(.text+0x18c3c8): undefined reference to `ip6_route_output_flags' Reported-by: kbuild test robot Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/Kconfig | 7 +++++++ net/rxrpc/af_rxrpc.c | 7 ++++++- net/rxrpc/conn_object.c | 2 ++ net/rxrpc/local_object.c | 2 ++ net/rxrpc/output.c | 2 ++ net/rxrpc/peer_event.c | 4 +++- net/rxrpc/peer_object.c | 10 ++++++++++ net/rxrpc/utils.c | 2 ++ 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 784c531..13396c7 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -19,6 +19,13 @@ config AF_RXRPC See Documentation/networking/rxrpc.txt. +config AF_RXRPC_IPV6 + bool "IPv6 support for RxRPC" + depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + help + Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as + its network transport. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f61f7b2..09f81bef 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -109,12 +109,14 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (srx->transport_len < sizeof(struct sockaddr_in6)) return -EINVAL; tail = offsetof(struct sockaddr_rxrpc, transport) + sizeof(struct sockaddr_in6); break; +#endif default: return -EAFNOSUPPORT; @@ -413,9 +415,11 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) case AF_INET: rx->srx.transport_len = sizeof(struct sockaddr_in); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: rx->srx.transport_len = sizeof(struct sockaddr_in6); break; +#endif default: ret = -EAFNOSUPPORT; goto error_unlock; @@ -570,7 +574,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET && protocol != PF_INET6) + if (protocol != PF_INET && + IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index c0ddba7..bb1f292 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -134,6 +134,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != srx.transport.sin6.sin6_port || @@ -142,6 +143,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, sizeof(struct in6_addr)) != 0) goto not_found; break; +#endif default: BUG(); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index f5b9bb0..e3fad80 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: /* If the choice of UDP6 port is left up to the transport, then * the endpoint record doesn't match. @@ -67,6 +68,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin6.sin6_addr, &srx->transport.sin6.sin6_addr, sizeof(struct in6_addr)); +#endif default: BUG(); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index d7cd87f..06a9aca 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -259,6 +259,7 @@ send_fragmentable: } break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: opt = IPV6_PMTUDISC_DONT; ret = kernel_setsockopt(conn->params.local->socket, @@ -274,6 +275,7 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; +#endif } up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7421758..9e0725f 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: srx.transport.sin6.sin6_port = serr->port; srx.transport_len = sizeof(struct sockaddr_in6); @@ -78,7 +79,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - memcpy(&srx.transport.sin6.sin6_addr.s6_addr + 12, + memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; @@ -89,6 +90,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, break; } break; +#endif default: BUG(); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index dfc07b4..f3e5766 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -52,11 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: hash_key += (u16 __force)srx->transport.sin.sin_port; size = sizeof(srx->transport.sin6.sin6_addr); p = (u16 *)&srx->transport.sin6.sin6_addr; break; +#endif default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -100,12 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: return ((u16 __force)peer->srx.transport.sin6.sin6_port - (u16 __force)srx->transport.sin6.sin6_port) ?: memcmp(&peer->srx.transport.sin6.sin6_addr, &srx->transport.sin6.sin6_addr, sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -159,7 +163,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) struct rtable *rt; struct flowi fl; struct flowi4 *fl4 = &fl.u.ip4; +#ifdef CONFIG_AF_RXRPC_IPV6 struct flowi6 *fl6 = &fl.u.ip6; +#endif peer->if_mtu = 1500; @@ -177,6 +183,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) dst = &rt->dst; break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: fl6->flowi6_iif = LOOPBACK_IFINDEX; fl6->flowi6_scope = RT_SCOPE_UNIVERSE; @@ -191,6 +198,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) return; } break; +#endif default: BUG(); @@ -241,9 +249,11 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) case AF_INET: peer->hdrsize = sizeof(struct iphdr); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: peer->hdrsize = sizeof(struct ipv6hdr); break; +#endif default: BUG(); } diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index b88914d..ff7af71 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; +#ifdef CONFIG_AF_RXRPC_IPV6 case ETH_P_IPV6: srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); @@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin6.sin6_port = udp_hdr(skb)->source; srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr; return 0; +#endif default: pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n", -- cgit v1.1 From fabf9201806255d70386d8bc9f6a2942c0940da2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Remove some whitespace. Remove a tab that's on a line that should otherwise be blank. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6143204..9367c3b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -31,7 +31,7 @@ static void rxrpc_set_timer(struct rxrpc_call *call) _enter("{%ld,%ld,%ld:%ld}", call->ack_at - now, call->resend_at - now, call->expire_at - now, call->timer.expires - now); - + read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { -- cgit v1.1 From 4b22457c06a3a950e14938c486283ad0f308c13d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Move the check of rx_pkt_offset from rxrpc_locate_data() to caller Move the check of rx_pkt_offset from rxrpc_locate_data() to the caller, rxrpc_recvmsg_data(), so that it's more clear what's going on there. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a284205..0d085f5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -240,9 +240,6 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, int ret; u8 annotation = *_annotation; - if (offset > 0) - return 0; - /* Locate the subpacket */ offset = sp->offset; len = skb->len - sp->offset; @@ -303,8 +300,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); + if (rx_pkt_offset == 0) + ret = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); -- cgit v1.1 From 2e2ea51dec2ab6a81950d4b436eb66ebf45dd507 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Check the return value of rxrpc_locate_data() Check the return value of rxrpc_locate_data() in rxrpc_recvmsg_data(). Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 0d085f5..1edf2cf 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -300,10 +300,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - if (rx_pkt_offset == 0) + if (rx_pkt_offset == 0) { ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + if (ret < 0) + goto out; + } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); -- cgit v1.1 From 816c9fce12f3745abc959c0fca8ace1c2c51421c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Fix handling of the last packet in rxrpc_recvmsg_data() The code for determining the last packet in rxrpc_recvmsg_data() has been using the RXRPC_CALL_RX_LAST flag to determine if the rx_top pointer points to the last packet or not. This isn't a good idea, however, as the input code may be running simultaneously on another CPU and that sets the flag *before* updating the top pointer. Fix this by the following means: (1) Restrict the use of RXRPC_CALL_RX_LAST to the input routines only. There's otherwise a synchronisation problem between detecting the flag and checking tx_top. This could probably be dealt with by appropriate application of memory barriers, but there's a simpler way. (2) Set RXRPC_CALL_RX_LAST after setting rx_top. (3) Make rxrpc_rotate_rx_window() consult the flags header field of the DATA packet it's about to discard to see if that was the last packet. Use this as the basis for ending the Rx phase. This shouldn't be a problem because the recvmsg side of things is guaranteed to see the packets in order. (4) Make rxrpc_recvmsg_data() return 1 to indicate the end of the data if: (a) the packet it has just processed is marked as RXRPC_LAST_PACKET (b) the call's Rx phase has been ended. Signed-off-by: David Howells --- net/rxrpc/input.c | 4 +++- net/rxrpc/recvmsg.c | 49 +++++++++++++++++++++++++++++++++---------------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 75af0bd..f0d9115 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -238,7 +238,7 @@ next_subpacket: len = RXRPC_JUMBO_DATALEN; if (flags & RXRPC_LAST_PACKET) { - if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && seq != call->rx_top) return rxrpc_proto_abort("LSN", call, seq); } else { @@ -282,6 +282,8 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); + if (flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_LAST, &call->flags); queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 1edf2cf..8b8d7e1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); @@ -163,8 +165,10 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { + struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t hard_ack, top; + u8 flags; int ix; _enter("%d", call->debug_id); @@ -177,6 +181,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + flags = sp->hdr.flags; call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -184,8 +190,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); - _debug("%u,%u,%lx", hard_ack, top, call->flags); - if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + _debug("%u,%u,%02x", hard_ack, top, flags); + if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -278,13 +284,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, size_t remain; bool last; unsigned int rx_pkt_offset, rx_pkt_len; - int ix, copy, ret = 0; + int ix, copy, ret = -EAGAIN, ret2; _enter(""); rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } + /* Barriers against rxrpc_input_data(). */ hard_ack = call->rx_hard_ack; top = smp_load_acquire(&call->rx_top); @@ -301,11 +313,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, sock_recv_timestamp(msg, sock->sk, skb); if (rx_pkt_offset == 0) { - ret = rxrpc_locate_data(call, skb, - &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); - if (ret < 0) + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + if (ret2 < 0) { + ret = ret2; goto out; + } } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -316,10 +330,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (copy > remain) copy = remain; if (copy > 0) { - ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, - copy); - if (ret < 0) + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; goto out; + } /* handle piecemeal consumption of data packets */ _debug("copied %d @%zu", copy, *_offset); @@ -332,6 +348,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { _debug("buffer full"); ASSERTCMP(*_offset, ==, len); + ret = 0; break; } @@ -342,19 +359,19 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = 0; rx_pkt_len = 0; - ASSERTIFCMP(last, seq, ==, top); - } - - if (after(seq, top)) { - ret = -EAGAIN; - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + if (last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); ret = 1; + goto out; + } } + out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; } +done: _leave(" = %d [%u/%u]", ret, seq, top); return ret; } -- cgit v1.1 From e6f3afb3fc058e17b407b6f7cac08058b19e641c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Record calls that need to be accepted Record calls that need to be accepted using sk_acceptq_added() otherwise the backlog counter goes negative because sk_acceptq_removed() is called. This causes the preallocator to malfunction. Calls that are preaccepted by AFS within the kernel aren't affected by this. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 26c293e..323b8da 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -369,6 +369,8 @@ found_service: if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); + else + sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { -- cgit v1.1 From 0360da6db7d6390e7bd2f6c93b01af29bcd36ad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:11 +0100 Subject: rxrpc: Purge the to_be_accepted queue on socket release Purge the queue of to_be_accepted calls on socket release. Note that purging sock_calls doesn't release the ref owned by to_be_accepted. Probably the sock_calls list is redundant given a purges of the recvmsg_q, the to_be_accepted queue and the calls tree. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 22f9b0d..b0ffbd9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -476,6 +476,16 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) _enter("%p", rx); + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); -- cgit v1.1 From 66d58af7f4af53e8318e852efa31a7cb0e31bfb6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Fix the putting of client connections In rxrpc_put_one_client_conn(), if a connection has RXRPC_CONN_COUNTED set on it, then it's accounted for in rxrpc_nr_client_conns and may be on various lists - and this is cleaned up correctly. However, if the connection doesn't have RXRPC_CONN_COUNTED set on it, then the put routine returns rather than just skipping the extra bit of cleanup. Fix this by making the extra bit of clean up conditional instead and always killing off the connection. This manifests itself as connections with a zero usage count hanging around in /proc/net/rxrpc_conns because the connection allocated, but discarded, due to a race with another process that set up a parallel connection, which was then shared instead. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9344a84..5a675c4 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -818,7 +818,7 @@ idle_connection: static struct rxrpc_connection * rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; @@ -834,24 +834,22 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); - if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags)) - return NULL; - - spin_lock(&rxrpc_client_conn_cache_lock); - nr_conns = --rxrpc_nr_client_conns; + if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; + + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } - next = NULL; - if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxrpc_waiting_client_conns)) { - next = list_entry(rxrpc_waiting_client_conns.next, - struct rxrpc_connection, cache_link); - rxrpc_get_connection(next); - rxrpc_activate_conn(next); + spin_unlock(&rxrpc_client_conn_cache_lock); } - spin_unlock(&rxrpc_client_conn_cache_lock); rxrpc_kill_connection(conn); - if (next) rxrpc_activate_channels(next); -- cgit v1.1 From 357f5ef64628c2d6c532e7a6bfc0bc3830b4c221 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call() Call rxrpc_release_call() on getting an error in rxrpc_new_client_call() rather than trying to do the cleanup ourselves. This isn't a problem, provided we set RXRPC_CALL_HAS_USERID only if we actually add the call to the calls tree as cleanup code fragments that would otherwise cause problems are conditional. Without this, we miss some of the cleanup. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b0ffbd9..23f5a5f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -226,9 +226,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ - call->user_call_ID = user_call_ID; - __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_lock(&rx->call_lock); pp = &rx->calls.rb_node; @@ -242,10 +239,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else - goto found_user_ID_now_present; + goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); + call->user_call_ID = user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -276,33 +275,22 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _leave(" = %p [new]", call); return call; -error: - write_lock(&rx->call_lock); - rb_erase(&call->sock_node, &rx->calls); - write_unlock(&rx->call_lock); - rxrpc_put_call(call, rxrpc_call_put_userid); - - write_lock(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock(&rxrpc_call_lock); - -error_out: - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); - /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ -found_user_ID_now_present: +error_dup_user_ID: write_unlock(&rx->call_lock); ret = -EEXIST; - goto error_out; + +error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ERR_PTR(ret); } /* -- cgit v1.1 From 78883793f8ac4bb3f97d48db7a8c71d8476bcf98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Fix unexposed client conn release If the last call on a client connection is release after the connection has had a bunch of calls allocated but before any DATA packets are sent (so that it's not yet marked RXRPC_CONN_EXPOSED), an assertion will happen in rxrpc_disconnect_client_call(). af_rxrpc: Assertion failed - 1(0x1) >= 2(0x2) is false ------------[ cut here ]------------ kernel BUG at ../net/rxrpc/conn_client.c:753! This is because it's expecting the conn to have been exposed and to have 2 or more refs - but this isn't necessarily the case. Simply remove the assertion. This allows the conn to be moved into the inactive state and deleted if it isn't resurrected before the final put is called. Signed-off-by: David Howells --- net/rxrpc/conn_client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5a675c4..226bc91 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -721,7 +721,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) } ASSERTCMP(rcu_access_pointer(chan->call), ==, call); - ASSERTCMP(atomic_read(&conn->usage), >=, 2); /* If a client call was exposed to the world, we save the result for * retransmission. -- cgit v1.1 From d01dc4c3c1209e865368d5f8d3b5e08f97326ca9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Fix the parsing of soft-ACKs The soft-ACK parser doesn't increment the pointer into the soft-ACK list, resulting in the first ACK/NACK value being applied to all the relevant packets in the Tx queue. This has the potential to miss retransmissions and cause excessive retransmissions. Fix this by incrementing the pointer. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index f0d9115..c1f83d2 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -384,7 +384,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; - switch (*acks) { + switch (*acks++) { case RXRPC_ACK_TYPE_ACK: call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; break; -- cgit v1.1 From dfa7d9204054b091949d87270e55e0fd5800c3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Fix retransmission algorithm Make the retransmission algorithm use for-loops instead of do-loops and move the counter increments into the for-statement increment slots. Though the do-loops are slighly more efficient since there will be at least one pass through the each loop, the counter increments are harder to get right as the continue-statements skip them. Without this, if there are any positive acks within the loop, the do-loop will cycle forever because the counter increment is never done. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9367c3b..f0cabc4 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -163,8 +163,7 @@ static void rxrpc_resend(struct rxrpc_call *call) */ now = jiffies; resend_at = now + rxrpc_resend_timeout; - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation == RXRPC_TX_ANNO_ACK) @@ -184,8 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; - seq++; - } while (before_eq(seq, top)); + } call->resend_at = resend_at; @@ -194,8 +192,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * lock is dropped, it may clear some of the retransmission markers for * packets that it soft-ACKs. */ - seq = cursor + 1; - do { + for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; if (annotation != RXRPC_TX_ANNO_RETRANS) @@ -237,8 +234,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; - seq++; - } while (before_eq(seq, top)); + } out_unlock: spin_unlock_bh(&call->lock); -- cgit v1.1 From 27d0fc431c6b4847231c1490fa541bc3f5a7a351 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Don't transmit an ACK if there's no reason set Don't transmit an ACK if call->ackr_reason in unset. There's the possibility of a race between recvmsg() sending an ACK and the background processing thread trying to send the same one. Signed-off-by: David Howells --- net/rxrpc/output.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 06a9aca..aa05072 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -137,6 +137,11 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) switch (type) { case RXRPC_PACKET_TYPE_ACK: spin_lock_bh(&call->lock); + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; -- cgit v1.1 From 2311e327cda015a24a201efc7655a9a983679e55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Be consistent about switch value in rxrpc_send_call_packet() rxrpc_send_call_packet() should use type in both its switch-statements rather than using pkt->whdr.type. This might give the compiler an easier job of uninitialised variable checking. Signed-off-by: David Howells --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index aa05072..0b21ed8 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -182,7 +182,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) &msg, iov, ioc, len); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (pkt->whdr.type) { + switch (type) { case RXRPC_PACKET_TYPE_ACK: rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), -- cgit v1.1 From 182f50562490e5861afaa7a2e42dcc0dd9dcfcca Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:12 +0100 Subject: rxrpc: Fix the basic transmit DATA packet content size at 1412 bytes Fix the basic transmit DATA packet content size at 1412 bytes so that they can be arbitrarily assembled into jumbo packets. In the future, I'm thinking of moving to keeping a jumbo packet header at the beginning of each packet in the Tx queue and creating the packet header on the spot when kernel_sendmsg() is invoked. That way, jumbo packets can be assembled on the spur of the moment for (re-)transmission. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cba2365..8bfddf4 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -214,7 +214,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, goto maybe_error; } - max = call->conn->params.peer->maxdata; + max = RXRPC_JUMBO_DATALEN; max -= call->conn->security_size; max &= ~(call->conn->size_align - 1UL); -- cgit v1.1 From a3868bfc8d5b0f36c784deab644ee1d2b0e6974b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Print the packet type name in the Rx packet trace Print a symbolic packet type name for each valid received packet in the trace output, not just a number. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 5 +++-- net/rxrpc/ar-internal.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ea3b10e..0a30c67 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -93,11 +93,12 @@ TRACE_EVENT(rxrpc_rx_packet, memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); ), - TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x", + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s", __entry->hdr.epoch, __entry->hdr.cid, __entry->hdr.callNumber, __entry->hdr.serviceId, __entry->hdr.serial, __entry->hdr.seq, - __entry->hdr.type, __entry->hdr.flags) + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); TRACE_EVENT(rxrpc_rx_done, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e78c40b..0f6fafa 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -551,6 +551,9 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +extern const char *const rxrpc_pkts[]; +extern const char *rxrpc_acks(u8 reason); + #include /* @@ -851,11 +854,8 @@ extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; extern unsigned int rxrpc_resend_timeout; -extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; -extern const char *rxrpc_acks(u8 reason); - /* * output.c */ -- cgit v1.1 From a84a46d73050f70fd8820c74840e2815c78d8690 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: rxrpc: Add some additional call tracing Add additional call tracepoint points for noting call-connected, call-released and connection-failed events. Also fix one tracepoint that was using an integer instead of the corresponding enum value as the point type. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/call_object.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0f6fafa..4a73c20 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -539,6 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued, rxrpc_call_queued_ref, rxrpc_call_seen, + rxrpc_call_connected, + rxrpc_call_release, rxrpc_call_got, rxrpc_call_got_userid, rxrpc_call_got_kernel, @@ -546,6 +548,7 @@ enum rxrpc_call_trace { rxrpc_call_put_userid, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_error, rxrpc_call__nr_trace }; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 23f5a5f..0df9d1a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,6 +53,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_new_service] = "NWs", [rxrpc_call_queued] = "QUE", [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_connected] = "CON", + [rxrpc_call_release] = "RLS", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", [rxrpc_call_got_userid] = "Gus", @@ -61,6 +63,7 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_kernel] = "Pke", [rxrpc_call_put_noqueue] = "PNQ", + [rxrpc_call_error] = "*E*", }; struct kmem_cache *rxrpc_call_jar; @@ -222,8 +225,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - trace_rxrpc_call(call, 0, atomic_read(&call->usage), here, - (const void *)user_call_ID); + trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + here, (const void *)user_call_ID); /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); @@ -263,6 +266,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error; + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, ERR_PTR(ret)); + spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); @@ -287,6 +293,8 @@ error_dup_user_ID: error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); @@ -396,15 +404,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { + const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; int i; _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); - rxrpc_see_call(call); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) -- cgit v1.1 From 363deeab6d0f308d33d011323661ae9cf5f9f8d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: rxrpc: Add connection tracepoint and client conn state tracepoint Add a pair of tracepoints, one to track rxrpc_connection struct ref counting and the other to track the client connection cache state. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 60 ++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 76 +++++++++++++++++++++++++--------------- net/rxrpc/call_accept.c | 4 +++ net/rxrpc/call_object.c | 2 -- net/rxrpc/conn_client.c | 82 ++++++++++++++++++++++++++++++-------------- net/rxrpc/conn_event.c | 2 +- net/rxrpc/conn_object.c | 72 ++++++++++++++++++++++++++++++++++++-- net/rxrpc/conn_service.c | 4 +++ net/rxrpc/misc.c | 31 +++++++++++++++++ 9 files changed, 274 insertions(+), 59 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0a30c67..c0c496c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,6 +16,66 @@ #include +TRACE_EVENT(rxrpc_conn, + TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + int usage, const void *where), + + TP_ARGS(conn, op, usage, where), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->op = op; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("C=%p %s u=%d sp=%pSR", + __entry->conn, + rxrpc_conn_traces[__entry->op], + __entry->usage, + __entry->where) + ); + +TRACE_EVENT(rxrpc_client, + TP_PROTO(struct rxrpc_connection *conn, int channel, + enum rxrpc_client_trace op), + + TP_ARGS(conn, channel, op), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(u32, cid ) + __field(int, channel ) + __field(int, usage ) + __field(enum rxrpc_client_trace, op ) + __field(enum rxrpc_conn_cache_state, cs ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->channel = channel; + __entry->usage = atomic_read(&conn->usage); + __entry->op = op; + __entry->cid = conn->proto.cid; + __entry->cs = conn->cache_state; + ), + + TP_printk("C=%p h=%2d %s %s i=%08x u=%d", + __entry->conn, + __entry->channel, + rxrpc_client_traces[__entry->op], + rxrpc_conn_cache_states[__entry->cs], + __entry->cid, + __entry->usage) + ); + TRACE_EVENT(rxrpc_call, TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, int usage, const void *where, const void *aux), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4a73c20..6ca40ee 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -314,6 +314,7 @@ enum rxrpc_conn_cache_state { RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ + RXRPC_CONN__NR_CACHE_STATES }; /* @@ -533,6 +534,44 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_conn_trace { + rxrpc_conn_new_client, + rxrpc_conn_new_service, + rxrpc_conn_queued, + rxrpc_conn_seen, + rxrpc_conn_got, + rxrpc_conn_put_client, + rxrpc_conn_put_service, + rxrpc_conn__nr_trace +}; + +extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4]; + +enum rxrpc_client_trace { + rxrpc_client_activate_chans, + rxrpc_client_alloc, + rxrpc_client_chan_activate, + rxrpc_client_chan_disconnect, + rxrpc_client_chan_pass, + rxrpc_client_chan_unstarted, + rxrpc_client_cleanup, + rxrpc_client_count, + rxrpc_client_discard, + rxrpc_client_duplicate, + rxrpc_client_exposed, + rxrpc_client_replace, + rxrpc_client_to_active, + rxrpc_client_to_culled, + rxrpc_client_to_idle, + rxrpc_client_to_inactive, + rxrpc_client_to_waiting, + rxrpc_client_uncount, + rxrpc_client__nr_trace +}; + +extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7]; +extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5]; + enum rxrpc_call_trace { rxrpc_call_new_client, rxrpc_call_new_service, @@ -734,7 +773,11 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); -void __rxrpc_put_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -747,38 +790,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_get_connection(struct rxrpc_connection *conn) -{ - atomic_inc(&conn->usage); -} - -static inline -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn) -{ - return atomic_inc_not_zero(&conn->usage) ? conn : NULL; -} - static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { if (!conn) return; - if (rxrpc_conn_is_client(conn)) { - if (atomic_dec_and_test(&conn->usage)) - rxrpc_put_client_conn(conn); - } else { - if (atomic_dec_return(&conn->usage) == 1) - __rxrpc_put_connection(conn); - } -} - -static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) -{ - if (!rxrpc_get_connection_maybe(conn)) - return false; - if (!rxrpc_queue_work(&conn->processor)) - rxrpc_put_connection(conn); - return true; + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); } /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 323b8da..3e47450 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -85,6 +85,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), here); } /* Now it gets complicated, because calls get registered with the @@ -290,6 +293,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_get_local(local); conn->params.local = local; conn->params.peer = peer; + rxrpc_see_connection(conn); rxrpc_new_incoming_connection(conn, skb); } else { rxrpc_get_connection(conn); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0df9d1a..54f3048 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -479,8 +479,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 226bc91..c76a125 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -105,6 +105,14 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *); static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, rxrpc_discard_expired_client_conns); +const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = { + [RXRPC_CONN_CLIENT_INACTIVE] = "Inac", + [RXRPC_CONN_CLIENT_WAITING] = "Wait", + [RXRPC_CONN_CLIENT_ACTIVE] = "Actv", + [RXRPC_CONN_CLIENT_CULLED] = "Cull", + [RXRPC_CONN_CLIENT_IDLE] = "Idle", +}; + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -220,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); + trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + __builtin_return_address(0)); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; @@ -385,6 +396,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, rb_replace_node(&conn->client_node, &candidate->client_node, &local->client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_replace); goto candidate_published; } } @@ -409,8 +421,11 @@ found_extant_conn: _debug("found conn"); spin_unlock(&local->client_conns_lock); - rxrpc_put_connection(candidate); - candidate = NULL; + if (candidate) { + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + candidate = NULL; + } spin_lock(&conn->channel_lock); call->conn = conn; @@ -433,6 +448,7 @@ error: */ static void rxrpc_activate_conn(struct rxrpc_connection *conn) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; rxrpc_nr_active_client_conns++; list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); @@ -462,8 +478,10 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = rxrpc_nr_client_conns; - if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_count); rxrpc_nr_client_conns = nr_conns + 1; + } switch (conn->cache_state) { case RXRPC_CONN_CLIENT_ACTIVE: @@ -494,6 +512,7 @@ activate_conn: wait_for_capacity: _debug("wait"); + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); goto out_unlock; @@ -524,6 +543,8 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + write_lock_bh(&call->state_lock); call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); @@ -563,6 +584,8 @@ static void rxrpc_activate_channels(struct rxrpc_connection *conn) _enter("%d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; @@ -657,10 +680,13 @@ int rxrpc_connect_call(struct rxrpc_call *call, * had a chance at re-use (the per-connection security negotiation is * expensive). */ -static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, + unsigned int channel) { - if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); rxrpc_get_connection(conn); + } } /* @@ -669,9 +695,9 @@ static void rxrpc_expose_client_conn(struct rxrpc_connection *conn) */ void rxrpc_expose_client_call(struct rxrpc_call *call) { + unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = - &conn->channels[call->cid & RXRPC_CHANNELMASK]; + struct rxrpc_channel *chan = &conn->channels[channel]; if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { /* Mark the call ID as being used. If the callNumber counter @@ -682,7 +708,7 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) chan->call_counter++; if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_expose_client_conn(conn); + rxrpc_expose_client_conn(conn, channel); } } @@ -695,6 +721,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; spin_lock(&conn->channel_lock); @@ -709,6 +736,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); list_del_init(&call->chan_wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); + /* We must deactivate or idle the connection if it's now * waiting for nothing. */ @@ -739,7 +768,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) /* See if we can pass the channel directly to another call. */ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && !list_empty(&conn->waiting_calls)) { - _debug("pass chan"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out_2; } @@ -762,7 +791,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - _debug("pass chan 2"); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); goto out; @@ -794,7 +823,7 @@ idle_connection: * immediately or moved to the idle list for a short while. */ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - _debug("make idle"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; conn->cache_state = RXRPC_CONN_CLIENT_IDLE; list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); @@ -804,7 +833,7 @@ idle_connection: &rxrpc_client_conn_reap, rxrpc_conn_idle_client_expiry); } else { - _debug("make inactive"); + trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); } @@ -821,6 +850,8 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) struct rxrpc_local *local = conn->params.local; unsigned int nr_conns; + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { spin_lock(&local->client_conns_lock); if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, @@ -834,6 +865,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_uncount); spin_lock(&rxrpc_client_conn_cache_lock); nr_conns = --rxrpc_nr_client_conns; @@ -863,20 +895,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) */ void rxrpc_put_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next; + const void *here = __builtin_return_address(0); + int n; do { - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); - - next = rxrpc_put_one_client_conn(conn); - - if (!next) - break; - conn = next; - } while (atomic_dec_and_test(&conn->usage)); - - _leave(""); + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + if (n > 0) + return; + ASSERTCMP(n, >=, 0); + + conn = rxrpc_put_one_client_conn(conn); + } while (conn); } /* @@ -907,9 +937,11 @@ static void rxrpc_cull_active_client_conns(void) ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); if (list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); conn->cache_state = RXRPC_CONN_CLIENT_CULLED; list_del_init(&conn->cache_link); } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); conn->cache_state = RXRPC_CONN_CLIENT_WAITING; list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); @@ -983,7 +1015,7 @@ next: goto not_yet_expired; } - _debug("discard conn %d", conn->debug_id); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) BUG(); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 0691007..a43f4c9 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -377,7 +377,7 @@ void rxrpc_process_connection(struct work_struct *work) u32 abort_code = RX_PROTOCOL_ERROR; int ret; - _enter("{%d}", conn->debug_id); + rxrpc_see_connection(conn); if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index bb1f292..3b55aee 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -246,11 +246,77 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) } /* - * release a virtual connection + * Queue a connection's work processor, getting a ref to pass to the work + * queue. */ -void __rxrpc_put_connection(struct rxrpc_connection *conn) +bool rxrpc_queue_conn(struct rxrpc_connection *conn) { - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = atomic_read(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + } +} + +/* + * Get a ref on a connection. + */ +void rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + + if (conn) { + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n > 0) + trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + else + conn = NULL; + } + return conn; +} + +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + ASSERTCMP(n, >=, 0); + if (n == 0) + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 83d54da..eef551f 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -136,6 +136,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) list_add_tail(&conn->link, &rxrpc_connections); list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), + __builtin_return_address(0)); } return conn; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 8b91078..598064d 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -101,3 +101,34 @@ const char *rxrpc_acks(u8 reason) reason = ARRAY_SIZE(str) - 1; return str[reason]; } + +const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { + [rxrpc_conn_new_client] = "NWc", + [rxrpc_conn_new_service] = "NWs", + [rxrpc_conn_queued] = "QUE", + [rxrpc_conn_seen] = "SEE", + [rxrpc_conn_got] = "GOT", + [rxrpc_conn_put_client] = "PTc", + [rxrpc_conn_put_service] = "PTs", +}; + +const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { + [rxrpc_client_activate_chans] = "Activa", + [rxrpc_client_alloc] = "Alloc ", + [rxrpc_client_chan_activate] = "ChActv", + [rxrpc_client_chan_disconnect] = "ChDisc", + [rxrpc_client_chan_pass] = "ChPass", + [rxrpc_client_chan_unstarted] = "ChUnst", + [rxrpc_client_cleanup] = "Clean ", + [rxrpc_client_count] = "Count ", + [rxrpc_client_discard] = "Discar", + [rxrpc_client_duplicate] = "Duplic", + [rxrpc_client_exposed] = "Expose", + [rxrpc_client_replace] = "Replac", + [rxrpc_client_to_active] = "->Actv", + [rxrpc_client_to_culled] = "->Cull", + [rxrpc_client_to_idle] = "->Idle", + [rxrpc_client_to_inactive] = "->Inac", + [rxrpc_client_to_waiting] = "->Wait", + [rxrpc_client_uncount] = "Uncoun", +}; -- cgit v1.1 From a124fe3ee5d82f2c9a9b8818ed5cb9f61685f1d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Add a tracepoint to follow the life of a packet in the Tx buffer Add a tracepoint to follow the insertion of a packet into the transmit buffer, its transmission and its rotation out of the buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/input.c | 2 ++ net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/sendmsg.c | 9 ++++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c0c496c..ffc74b3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -208,6 +208,32 @@ TRACE_EVENT(rxrpc_abort, __entry->abort_code, __entry->error, __entry->why) ); +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_transmit_trace, why ) + __field(rxrpc_seq_t, tx_hard_ack ) + __field(rxrpc_seq_t, tx_top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->tx_hard_ack = call->tx_hard_ack; + __entry->tx_top = call->tx_top; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_transmit_traces[__entry->why], + __entry->tx_hard_ack + 1, + __entry->tx_top - __entry->tx_hard_ack) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6ca40ee..afa5dcc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -593,6 +593,18 @@ enum rxrpc_call_trace { extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; +enum rxrpc_transmit_trace { + rxrpc_transmit_wait, + rxrpc_transmit_queue, + rxrpc_transmit_queue_reqack, + rxrpc_transmit_queue_last, + rxrpc_transmit_rotate, + rxrpc_transmit_end, + rxrpc_transmit__nr_trace +}; + +extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c1f83d2..c7eb510 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) spin_unlock(&call->lock); + trace_rxrpc_transmit(call, rxrpc_transmit_rotate); wake_up(&call->waitq); while (list) { @@ -107,6 +108,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) } write_unlock(&call->state_lock); + trace_rxrpc_transmit(call, rxrpc_transmit_end); _leave(" = ok"); return true; } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 598064d..dca8999 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -132,3 +132,12 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { [rxrpc_client_to_waiting] = "->Wait", [rxrpc_client_uncount] = "Uncoun", }; + +const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { + [rxrpc_transmit_wait] = "WAI", + [rxrpc_transmit_queue] = "QUE", + [rxrpc_transmit_queue_reqack] = "QRA", + [rxrpc_transmit_queue_last] = "QLS", + [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_end] = "END", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8bfddf4..28d8f73 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -56,6 +56,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, break; } + trace_rxrpc_transmit(call, rxrpc_transmit_wait); release_sock(&rx->sk); *timeo = schedule_timeout(*timeo); lock_sock(&rx->sk); @@ -104,8 +105,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) + if (last) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); -- cgit v1.1 From ec71eb9ada34f8d1a58b7c35d906c59411295445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Add a tracepoint to log received ACK packets Add a tracepoint to log information from received ACK packets. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 26 ++++++++++++++++++++++++++ net/rxrpc/input.c | 2 ++ 2 files changed, 28 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ffc74b3..2b19f3f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -234,6 +234,32 @@ TRACE_EVENT(rxrpc_transmit, __entry->tx_top - __entry->tx_hard_ack) ); +TRACE_EVENT(rxrpc_rx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, u8 reason, u8 n_acks), + + TP_ARGS(call, first, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c7eb510..7b18ca1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -440,6 +440,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, ntohs(buf.ack.maxSkew), -- cgit v1.1 From f3639df2d90bc919328c459b3c7c49ed5667a52f Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Add a tracepoint to log ACK transmission Add a tracepoint to log information about ACK transmission. Signed-off-by: David Howels --- include/trace/events/rxrpc.h | 30 ++++++++++++++++++++++++++++++ net/rxrpc/conn_event.c | 3 +++ net/rxrpc/output.c | 7 ++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2b19f3f..d545d69 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -260,6 +260,36 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, + rxrpc_serial_t serial, u8 reason, u8 n_acks), + + TP_ARGS(call, first, serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(rxrpc_serial_t, serial ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->serial = serial; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x r=%08x n=%u", + __entry->call, + rxrpc_acks(__entry->reason), + __entry->first, + __entry->serial, + __entry->n_acks) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a43f4c9..9b19c51 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,6 +98,9 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); + + trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0b21ed8..2c9daea 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -38,12 +38,14 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); @@ -51,7 +53,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.maxSkew = htons(call->ackr_skew); pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); - pkt->ack.serial = htonl(call->ackr_serial); + pkt->ack.serial = htonl(serial); pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; @@ -75,6 +77,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); + trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, + top - hard_ack); + *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; -- cgit v1.1 From 58dc63c998ea3c5a27e2bf9251eddbf0977056a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:13 +0100 Subject: rxrpc: Add a tracepoint to follow packets in the Rx buffer Add a tracepoint to follow the life of packets that get added to a call's receive buffer. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 33 +++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 12 ++++++++++++ net/rxrpc/call_accept.c | 3 +++ net/rxrpc/input.c | 6 +++++- net/rxrpc/misc.c | 9 +++++++++ net/rxrpc/recvmsg.c | 11 +++++++++++ 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d545d69..7dd5f01 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -290,6 +290,39 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_receive, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_receive_trace why, + rxrpc_serial_t serial, rxrpc_seq_t seq), + + TP_ARGS(call, why, serial, seq), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_receive_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->seq = seq; + __entry->hard_ack = call->rx_hard_ack; + __entry->top = call->rx_top; + ), + + TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x", + __entry->call, + rxrpc_receive_traces[__entry->why], + __entry->serial, + __entry->seq, + __entry->hard_ack, + __entry->top) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index afa5dcc..e5d2f2f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -605,6 +605,18 @@ enum rxrpc_transmit_trace { extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; +enum rxrpc_receive_trace { + rxrpc_receive_incoming, + rxrpc_receive_queue, + rxrpc_receive_queue_last, + rxrpc_receive_front, + rxrpc_receive_rotate, + rxrpc_receive_end, + rxrpc_receive__nr_trace +}; + +extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3e47450..a8d39d7 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -367,6 +367,9 @@ found_service: goto out; } + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7b18ca1..b690220 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -284,8 +284,12 @@ next_subpacket: call->rxtx_buffer[ix] = skb; if (after(seq, call->rx_top)) smp_store_release(&call->rx_top, seq); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } queued = true; if (after_eq(seq, call->rx_expect_next)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index dca8999..db5f1d5 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -141,3 +141,12 @@ const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_rotate] = "ROT", [rxrpc_transmit_end] = "END", }; + +const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { + [rxrpc_receive_incoming] = "INC", + [rxrpc_receive_queue] = "QUE", + [rxrpc_receive_queue_last] = "QLS", + [rxrpc_receive_front] = "FRN", + [rxrpc_receive_rotate] = "ROT", + [rxrpc_receive_end] = "END", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8b8d7e1..22d5108 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { @@ -167,6 +168,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; u8 flags; int ix; @@ -183,6 +185,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_see_skb(skb); sp = rxrpc_skb(skb); flags = sp->hdr.flags; + serial = sp->hdr.serial; + if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) + serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ @@ -191,6 +197,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb); _debug("%u,%u,%02x", hard_ack, top, flags); + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) rxrpc_end_rx_phase(call); } @@ -309,6 +316,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rxrpc_see_skb(skb); sp = rxrpc_skb(skb); + if (!(flags & MSG_PEEK)) + trace_rxrpc_receive(call, rxrpc_receive_front, + sp->hdr.serial, seq); + if (msg) sock_recv_timestamp(msg, sock->sk, skb); -- cgit v1.1 From 849979051cbc9352857d8bb31895ae55afe19d96 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 11:13:31 +0100 Subject: rxrpc: Add a tracepoint to follow what recvmsg does Add a tracepoint to follow what recvmsg does within AF_RXRPC. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 34 ++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 17 +++++++++++++++++ net/rxrpc/misc.c | 14 ++++++++++++++ net/rxrpc/recvmsg.c | 34 ++++++++++++++++++++++++++-------- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 7dd5f01..5873220 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -323,6 +323,40 @@ TRACE_EVENT(rxrpc_receive, __entry->top) ); +TRACE_EVENT(rxrpc_recvmsg, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + rxrpc_seq_t seq, unsigned int offset, unsigned int len, + int ret), + + TP_ARGS(call, why, seq, offset, len, ret), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_recvmsg_trace, why ) + __field(rxrpc_seq_t, seq ) + __field(unsigned int, offset ) + __field(unsigned int, len ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->seq = seq; + __entry->offset = offset; + __entry->len = len; + __entry->ret = ret; + ), + + TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d", + __entry->call, + rxrpc_recvmsg_traces[__entry->why], + __entry->seq, + __entry->offset, + __entry->len, + __entry->ret) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e5d2f2f..a17341d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -617,6 +617,23 @@ enum rxrpc_receive_trace { extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; +enum rxrpc_recvmsg_trace { + rxrpc_recvmsg_enter, + rxrpc_recvmsg_wait, + rxrpc_recvmsg_dequeue, + rxrpc_recvmsg_hole, + rxrpc_recvmsg_next, + rxrpc_recvmsg_cont, + rxrpc_recvmsg_full, + rxrpc_recvmsg_data_return, + rxrpc_recvmsg_terminal, + rxrpc_recvmsg_to_be_accepted, + rxrpc_recvmsg_return, + rxrpc_recvmsg__nr_trace +}; + +extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index db5f1d5..c7065d8 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -150,3 +150,17 @@ const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { [rxrpc_receive_rotate] = "ROT", [rxrpc_receive_end] = "END", }; + +const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { + [rxrpc_recvmsg_enter] = "ENTR", + [rxrpc_recvmsg_wait] = "WAIT", + [rxrpc_recvmsg_dequeue] = "DEQU", + [rxrpc_recvmsg_hole] = "HOLE", + [rxrpc_recvmsg_next] = "NEXT", + [rxrpc_recvmsg_cont] = "CONT", + [rxrpc_recvmsg_full] = "FULL", + [rxrpc_recvmsg_data_return] = "DATA", + [rxrpc_recvmsg_terminal] = "TERM", + [rxrpc_recvmsg_to_be_accepted] = "TBAC", + [rxrpc_recvmsg_return] = "RETN", +}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 22d5108..b62a081 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -94,6 +94,8 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) break; } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } @@ -124,6 +126,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); } + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); return ret; } @@ -310,8 +313,11 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, for (seq = hard_ack + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - if (!skb) + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); break; + } smp_rmb(); rxrpc_see_skb(skb); sp = rxrpc_skb(skb); @@ -327,10 +333,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, ret2 = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], &rx_pkt_offset, &rx_pkt_len); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); } _debug("recvmsg %x DATA #%u { %d, %d }", sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); @@ -357,6 +368,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; @@ -383,6 +396,8 @@ out: call->rx_pkt_len = rx_pkt_len; } done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); _leave(" = %d [%u/%u]", ret, seq, top); return ret; } @@ -404,7 +419,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, DEFINE_WAIT(wait); - _enter(",,,%zu,%d", len, flags); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; @@ -424,8 +439,10 @@ try_again: if (list_empty(&rx->recvmsg_q)) { ret = -EWOULDBLOCK; - if (timeo == 0) + if (timeo == 0) { + call = NULL; goto error_no_call; + } release_sock(&rx->sk); @@ -439,6 +456,8 @@ try_again: if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); @@ -457,7 +476,7 @@ try_again: rxrpc_get_call(call, rxrpc_call_got); write_unlock_bh(&rx->recvmsg_lock); - _debug("recvmsg call %p", call); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); @@ -527,16 +546,15 @@ error: rxrpc_put_call(call, rxrpc_call_put); error_no_call: release_sock(&rx->sk); - _leave(" = %d", ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - release_sock(&rx->sk); - _leave(" = %d [wait]", ret); - return ret; + call = NULL; + goto error_no_call; } /** -- cgit v1.1 From ba39f3a0ed756ccd882adf4a77916ec863db3ce4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: rxrpc: Remove printks from rxrpc_recvmsg_data() to fix uninit var Remove _enter/_debug/_leave calls from rxrpc_recvmsg_data() of which one uses an uninitialised variable. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index b62a081..79e6566 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -296,8 +296,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, unsigned int rx_pkt_offset, rx_pkt_len; int ix, copy, ret = -EAGAIN, ret2; - _enter(""); - rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; @@ -343,8 +341,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); } - _debug("recvmsg %x DATA #%u { %d, %d }", - sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); /* We have to handle short, empty and used-up DATA packets. */ remain = len - *_offset; @@ -360,8 +356,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } /* handle piecemeal consumption of data packets */ - _debug("copied %d @%zu", copy, *_offset); - rx_pkt_offset += copy; rx_pkt_len -= copy; *_offset += copy; @@ -370,7 +364,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_len > 0) { trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, rx_pkt_offset, rx_pkt_len, 0); - _debug("buffer full"); ASSERTCMP(*_offset, ==, len); ret = 0; break; @@ -398,7 +391,6 @@ out: done: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); - _leave(" = %d [%u/%u]", ret, seq, top); return ret; } -- cgit v1.1 From 71f3ca408fd43b586c02480768a503af075b247e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:14 +0100 Subject: rxrpc: Improve skb tracing Improve sk_buff tracing within AF_RXRPC by the following means: (1) Use an enum to note the event type rather than plain integers and use an array of event names rather than a big multi ?: list. (2) Distinguish Rx from Tx packets and account them separately. This requires the call phase to be tracked so that we know what we might find in rxtx_buffer[]. (3) Add a parameter to rxrpc_{new,see,get,free}_skb() to indicate the event type. (4) A pair of 'rotate' events are added to indicate packets that are about to be rotated out of the Rx and Tx windows. (5) A pair of 'lost' events are added, along with rxrpc_lose_skb() for packet loss injection recording. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 12 ++++------ net/rxrpc/af_rxrpc.c | 5 +++-- net/rxrpc/ar-internal.h | 33 ++++++++++++++++++++++----- net/rxrpc/call_event.c | 8 +++---- net/rxrpc/call_object.c | 11 ++++++--- net/rxrpc/conn_event.c | 6 ++--- net/rxrpc/input.c | 13 ++++++----- net/rxrpc/local_event.c | 4 ++-- net/rxrpc/misc.c | 18 +++++++++++++++ net/rxrpc/output.c | 4 ++-- net/rxrpc/peer_event.c | 10 ++++----- net/rxrpc/recvmsg.c | 7 +++--- net/rxrpc/sendmsg.c | 10 ++++----- net/rxrpc/skbuff.c | 53 +++++++++++++++++++++++++++++++------------- 14 files changed, 131 insertions(+), 63 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5873220..75a5d8b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -107,14 +107,14 @@ TRACE_EVENT(rxrpc_call, ); TRACE_EVENT(rxrpc_skb, - TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count, - const void *where), + TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, + int usage, int mod_count, const void *where), TP_ARGS(skb, op, usage, mod_count, where), TP_STRUCT__entry( __field(struct sk_buff *, skb ) - __field(int, op ) + __field(enum rxrpc_skb_trace, op ) __field(int, usage ) __field(int, mod_count ) __field(const void *, where ) @@ -130,11 +130,7 @@ TRACE_EVENT(rxrpc_skb, TP_printk("s=%p %s u=%d m=%d p=%pSR", __entry->skb, - (__entry->op == 0 ? "NEW" : - __entry->op == 1 ? "SEE" : - __entry->op == 2 ? "GET" : - __entry->op == 3 ? "FRE" : - "PUR"), + rxrpc_skb_traces[__entry->op], __entry->usage, __entry->mod_count, __entry->where) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 09f81bef..8dbf7be 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -45,7 +45,7 @@ u32 rxrpc_epoch; atomic_t rxrpc_debug_id; /* count of skbs currently in use */ -atomic_t rxrpc_n_skbs; +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -867,7 +867,8 @@ static void __exit af_rxrpc_exit(void) proto_unregister(&rxrpc_proto); rxrpc_destroy_all_calls(); rxrpc_destroy_all_connections(); - ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); rxrpc_destroy_all_locals(); remove_proc_entry("rxrpc_conns", init_net.proc_net); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a17341d..034f525 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -520,6 +520,7 @@ struct rxrpc_call { rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ @@ -534,6 +535,27 @@ struct rxrpc_call { rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; +enum rxrpc_skb_trace { + rxrpc_skb_rx_cleaned, + rxrpc_skb_rx_freed, + rxrpc_skb_rx_got, + rxrpc_skb_rx_lost, + rxrpc_skb_rx_received, + rxrpc_skb_rx_rotated, + rxrpc_skb_rx_purged, + rxrpc_skb_rx_seen, + rxrpc_skb_tx_cleaned, + rxrpc_skb_tx_freed, + rxrpc_skb_tx_got, + rxrpc_skb_tx_lost, + rxrpc_skb_tx_new, + rxrpc_skb_tx_rotated, + rxrpc_skb_tx_seen, + rxrpc_skb__nr_trace +}; + +extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7]; + enum rxrpc_conn_trace { rxrpc_conn_new_client, rxrpc_conn_new_service, @@ -642,7 +664,7 @@ extern const char *rxrpc_acks(u8 reason); /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_skbs; +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -1000,10 +1022,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); -void rxrpc_new_skb(struct sk_buff *); -void rxrpc_see_skb(struct sk_buff *); -void rxrpc_get_skb(struct sk_buff *); -void rxrpc_free_skb(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f0cabc4..7d1b998 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -170,7 +170,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); if (annotation == RXRPC_TX_ANNO_UNACK) { @@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call) continue; skb = call->rxtx_buffer[ix]; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); sp = rxrpc_skb(skb); @@ -211,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_send_data_packet(call->conn, skb) < 0) { call->resend_at = now + 2; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } @@ -219,7 +219,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_expose_client_call(call); sp->resend_at = now + rxrpc_resend_timeout; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); /* We need to clear the retransmit state, but there are two diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 54f3048..f50a609 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -182,6 +182,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; + call->tx_phase = true; _leave(" = %p", call); return call; @@ -458,7 +459,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); call->rxtx_buffer[i] = NULL; } @@ -552,9 +555,11 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) /* Clean up the Rx/Tx buffer */ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i]); + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); - rxrpc_free_skb(call->tx_pending); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9b19c51..75a15a4 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -388,7 +388,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -399,7 +399,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); break; } } @@ -416,7 +416,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b690220..84bb16d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -50,7 +50,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; @@ -66,7 +66,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) skb = list; list = skb->next; skb->next = NULL; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } } @@ -99,6 +99,7 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) default: break; case RXRPC_CALL_CLIENT_AWAIT_REPLY: + call->tx_phase = false; call->state = RXRPC_CALL_CLIENT_RECV_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: @@ -278,7 +279,7 @@ next_subpacket: * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() * and also rxrpc_fill_out_ack(). */ - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_rx_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -691,13 +692,13 @@ void rxrpc_data_ready(struct sock *udp_sk) return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); /* we'll probably need to checksum it (didn't call sock_recvmsg) */ if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; @@ -821,7 +822,7 @@ void rxrpc_data_ready(struct sock *udp_sk) discard_unlock: rcu_read_unlock(); discard: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); return; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index f073e93..190f68b 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { @@ -107,7 +107,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index c7065d8..026e1f2 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -102,6 +102,24 @@ const char *rxrpc_acks(u8 reason) return str[reason]; } +const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { + [rxrpc_skb_rx_cleaned] = "Rx CLN", + [rxrpc_skb_rx_freed] = "Rx FRE", + [rxrpc_skb_rx_got] = "Rx GOT", + [rxrpc_skb_rx_lost] = "Rx *L*", + [rxrpc_skb_rx_received] = "Rx RCV", + [rxrpc_skb_rx_purged] = "Rx PUR", + [rxrpc_skb_rx_rotated] = "Rx ROT", + [rxrpc_skb_rx_seen] = "Rx SEE", + [rxrpc_skb_tx_cleaned] = "Tx CLN", + [rxrpc_skb_tx_freed] = "Tx FRE", + [rxrpc_skb_tx_got] = "Tx GOT", + [rxrpc_skb_tx_lost] = "Tx *L*", + [rxrpc_skb_tx_new] = "Tx NEW", + [rxrpc_skb_tx_rotated] = "Tx ROT", + [rxrpc_skb_tx_seen] = "Tx SEE", +}; + const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { [rxrpc_conn_new_client] = "NWc", [rxrpc_conn_new_service] = "NWs", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2c9daea..a2cad5c 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -324,7 +324,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { @@ -343,7 +343,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) kernel_sendmsg(local->socket, &msg, iov, 2, size); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 9e0725f..18276e7 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -155,11 +155,11 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; } @@ -169,7 +169,7 @@ void rxrpc_error_report(struct sock *sk) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [no peer]"); return; } @@ -179,7 +179,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -187,7 +187,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); /* The ref we obtained is passed off to the work item */ rxrpc_queue_work(&peer->error_distributor); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 79e6566..6ba4af5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -155,6 +155,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) break; case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; break; default: @@ -185,7 +186,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) hard_ack++; ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); sp = rxrpc_skb(skb); flags = sp->hdr.flags; serial = sp->hdr.serial; @@ -197,7 +198,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) /* Barrier against rxrpc_input_data(). */ smp_store_release(&call->rx_hard_ack, hard_ack); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); @@ -317,7 +318,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } smp_rmb(); - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); if (!(flags & MSG_PEEK)) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 28d8f73..6a39ee9 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -100,7 +100,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb); + rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -146,7 +146,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_instant_resend(call, ix); } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(""); } @@ -201,7 +201,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); copied = 0; do { @@ -242,7 +242,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (!skb) goto maybe_error; - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_tx_new); _debug("ALLOC SEND %p", skb); @@ -352,7 +352,7 @@ out: return ret; call_terminated: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(" = %d", -call->error); return -call->error; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 620d9cc..5154cbf 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,55 +18,77 @@ #include #include "ar-internal.h" +#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + /* - * Note the existence of a new-to-us socket buffer (allocated or dequeued). + * Note the allocation or reception of a socket buffer. */ -void rxrpc_new_skb(struct sk_buff *skb) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } /* * Note the re-emergence of a socket buffer from a queue or buffer. */ -void rxrpc_see_skb(struct sk_buff *skb) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { - int n = atomic_read(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here); + int n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } } /* * Note the addition of a ref on a socket buffer. */ -void rxrpc_get_skb(struct sk_buff *skb) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); skb_get(skb); } /* * Note the destruction of a socket buffer. */ -void rxrpc_free_skb(struct sk_buff *skb) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); kfree_skb(skb); } } /* + * Note the injected loss of a socket buffer. + */ +void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + if (op == rxrpc_skb_tx_lost) { + n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + } else { + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } + } +} + +/* * Clear a queue of socket buffers. */ void rxrpc_purge_queue(struct sk_buff_head *list) @@ -74,8 +96,9 @@ void rxrpc_purge_queue(struct sk_buff_head *list) const void *here = __builtin_return_address(0); struct sk_buff *skb; while ((skb = skb_dequeue((list))) != NULL) { - int n = atomic_dec_return(&rxrpc_n_skbs); - trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here); + int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); + trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + atomic_read(&skb->users), n, here); kfree_skb(skb); } } -- cgit v1.1 From 8a681c360559f75a80b37e6a6a9590457361ccb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 17 Sep 2016 10:49:15 +0100 Subject: rxrpc: Add config to inject packet loss Add a configuration option to inject packet loss by discarding approximately every 8th packet received and approximately every 8th DATA packet transmitted. Note that no locking is used, but it shouldn't really matter. Signed-off-by: David Howells --- net/rxrpc/Kconfig | 7 +++++++ net/rxrpc/input.c | 8 ++++++++ net/rxrpc/output.c | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 13396c7..86f8853 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -26,6 +26,13 @@ config AF_RXRPC_IPV6 Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as its network transport. +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 84bb16d..7ac1edf 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -712,6 +712,14 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + return; + } + } + _net("Rx UDP packet from %08x:%04hu", ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a2cad5c..16e18a9 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -225,6 +225,15 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); + _leave(" = 0 [lose]"); + return 0; + } + } + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { -- cgit v1.1 From 7e426671704d2266757dff9c4254b788561aa11e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:36 +0300 Subject: net/mlx5e: Single flow order-0 pages for Striding RQ To improve the memory consumption scheme, we omit the flow that demands and splits high-order pages in Striding RQ, and stay with a single Striding RQ flow that uses order-0 pages. Moving to fragmented memory allows the use of larger MPWQEs, which reduces the number of UMR posts and filler CQEs. Moving to a single flow allows several optimizations that improve performance, especially in production servers where we would anyway fallback to order-0 allocations: - inline functions that were called via function pointers. - improve the UMR post process. This patch alone is expected to give a slight performance reduction. However, the new memory scheme gives the possibility to use a page-cache of a fair size, that doesn't inflate the memory footprint, which will dramatically fix the reduction and even give a performance gain. Performance tests: The following results were measured on a freshly booted system, giving optimal baseline performance, as high-order pages are yet to be fragmented and depleted. We ran pktgen single-stream benchmarks, with iptables-raw-drop: Single stride, 64 bytes: * 4,739,057 - baseline * 4,749,550 - this patch no reduction Larger packets, no page cross, 1024 bytes: * 3,982,361 - baseline * 3,845,682 - this patch 3.5% reduction Larger packets, every 3rd packet crosses a page, 1500 bytes: * 3,731,189 - baseline * 3,579,414 - this patch 4% reduction Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Fixes: bc77b240b3c5 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 54 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 136 ++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 292 ++++----------------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 - drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 5 files changed, 184 insertions(+), 304 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a9358cf..401b2f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -62,12 +62,12 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) @@ -293,8 +293,8 @@ struct mlx5e_rq { u32 wqe_sz; struct sk_buff **skb; struct mlx5e_mpw_info *wqe_info; + void *mtt_no_align; __be32 mkey_be; - __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; @@ -323,32 +323,15 @@ struct mlx5e_rq { struct mlx5e_umr_dma_info { __be64 *mtt; - __be64 *mtt_no_align; dma_addr_t mtt_addr; - struct mlx5e_dma_info *dma_info; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; }; struct mlx5e_mpw_info { - union { - struct mlx5e_dma_info dma_info; - struct mlx5e_umr_dma_info umr; - }; + struct mlx5e_umr_dma_info umr; u16 consumed_strides; u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; - - void (*dma_pre_sync)(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len); - void (*add_skb_frag)(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, u32 len); - void (*copy_skb_header)(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen); - void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); }; struct mlx5e_tx_wqe_info { @@ -672,24 +655,11 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); -void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -776,6 +746,12 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); } +static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return rq->mpwqe_mtt_offset + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return min_t(int, mdev->priv.eq_table.num_comp_vectors, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index af4c61e..136554b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -138,7 +138,6 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_mpwqe_frag += rq_stats->mpwqe_frag; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; @@ -295,6 +294,107 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, + struct mlx5e_channel *c) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int i; + + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); + if (unlikely(!rq->mtt_no_align)) + goto err_free_wqe_info; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr))) + goto err_unmap_mtts; + + mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i); + } + + return 0; + +err_unmap_mtts: + while (--i >= 0) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mtt_no_align); +err_free_wqe_info: + kfree(rq->wqe_info); + +err_out: + return -ENOMEM; +} + +static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mtt_no_align); + kfree(rq->wqe_info); +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -319,14 +419,16 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->wq_type = priv->params.rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->tstamp = &priv->tstamp; + rq->channel = c; + rq->ix = c->ix; + rq->priv = c->priv; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -338,6 +440,10 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->wqe_sz; + rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, @@ -356,27 +462,19 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); byte_count = rq->wqe_sz; byte_count |= MLX5_HW_START_PADDING; + rq->mkey_be = c->mkey_be; } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); wqe->data.byte_count = cpu_to_be32(byte_count); + wqe->data.lkey = rq->mkey_be; } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); rq->am.mode = priv->params.rx_cq_period_mode; - rq->wq_type = priv->params.rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; - rq->channel = c; - rq->ix = c->ix; - rq->priv = c->priv; - rq->mkey_be = c->mkey_be; - rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); - return 0; err_rq_wq_destroy: @@ -389,7 +487,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - kfree(rq->wqe_info); + mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->skb); @@ -528,7 +626,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e7c969d..5d1b7b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -200,7 +200,6 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) *((dma_addr_t *)skb->cb) = dma_addr; wqe->data.addr = cpu_to_be64(dma_addr); - wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -231,44 +230,11 @@ static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } -static inline void -mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, - len, DMA_FROM_DEVICE); -} - -static inline void -mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - /* No dma pre sync for fragmented MPWQE */ -} - -static inline void -mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) -{ - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); - - wi->skbs_frags[page_idx]++; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - &wi->dma_info.page[page_idx], frag_offset, - len, truesize); -} - -static inline void -mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) +static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) { unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); @@ -282,24 +248,11 @@ mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, } static inline void -mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) -{ - struct page *page = &wi->dma_info.page[page_idx]; - - skb_copy_to_linear_data(skb, page_address(page) + offset, - ALIGN(headlen, sizeof(long))); -} - -static inline void -mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) +mlx5e_copy_skb_header_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) { u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; @@ -324,46 +277,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) -{ - return rq->mpwqe_mtt_offset + - wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); -} - -static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, - struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, - u16 ix) +static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - struct mlx5_wqe_data_seg *dseg = &wqe->data; struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); - - memset(wqe, 0, sizeof(*wqe)); - cseg->opmod_idx_opcode = - cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | - MLX5_OPCODE_UMR); - cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | - ds_cnt); - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - cseg->imm = rq->umr_mkey_be; - - ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); - ucseg->bsf_octowords = - cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); - ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - - dseg->lkey = sq->mkey_be; - dseg->addr = cpu_to_be64(wi->umr.mtt_addr); -} - -static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) -{ struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -378,30 +294,22 @@ static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) } wqe = mlx5_wq_cyc_get_wqe(wq, pi); - mlx5e_build_umr_wqe(rq, sq, wqe, ix); + memcpy(wqe, &wi->umr.wqe, sizeof(*wqe)); + wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_get_wqe_mtt_sz(void) -{ - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the mtt array, we allocate - * a little more. - */ - return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), - MLX5_UMR_MTT_ALIGNMENT); -} - -static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) +static inline int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) { - struct page *page; - - page = dev_alloc_page(); + struct page *page = dev_alloc_page(); if (unlikely(!page)) return -ENOMEM; @@ -417,47 +325,25 @@ static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, return 0; } -static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) +static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - int mtt_sz = mlx5e_get_wqe_mtt_sz(); u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + int err; int i; - wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * - MLX5_MPWRQ_PAGES_PER_WQE, - GFP_ATOMIC); - if (unlikely(!wi->umr.dma_info)) - goto err_out; - - /* We allocate more than mtt_sz as we will align the pointer */ - wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, - GFP_ATOMIC); - if (unlikely(!wi->umr.mtt_no_align)) - goto err_free_umr; - - wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); - wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, - PCI_DMA_TODEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) - goto err_free_mtt; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + err = mlx5e_alloc_and_map_page(rq, wi, i); + if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + page_ref_add(wi->umr.dma_info[i].page, pg_strides); wi->skbs_frags[i] = 0; } wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; - wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; - wqe->data.lkey = rq->umr_mkey_be; wqe->data.addr = cpu_to_be64(dma_offset); return 0; @@ -466,41 +352,28 @@ err_unmap: while (--i >= 0) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + page_ref_sub(wi->umr.dma_info[i].page, pg_strides); put_page(wi->umr.dma_info[i].page); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - -err_free_mtt: - kfree(wi->umr.mtt_no_align); - -err_free_umr: - kfree(wi->umr.dma_info); -err_out: - return -ENOMEM; + return err; } -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); + pg_strides - wi->skbs_frags[i]); put_page(wi->umr.dma_info[i].page); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - kfree(wi->umr.mtt_no_align); - kfree(wi->umr.dma_info); } -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); @@ -508,12 +381,11 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); return; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - rq->stats.mpwqe_frag++; /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); @@ -521,84 +393,23 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - gfp_t gfp_mask; - int i; - - gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; - wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - MLX5_MPWRQ_WQE_PAGE_ORDER); - if (unlikely(!wi->dma_info.page)) - return -ENOMEM; - - wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, - rq->wqe_sz, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { - put_page(wi->dma_info.page); - return -ENOMEM; - } - - /* We split the high-order page into order-0 ones and manage their - * reference counter to minimize the memory held by small skb fragments - */ - split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_add(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq)); - wi->skbs_frags[i] = 0; - } - - wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; - wi->free_wqe = mlx5e_free_rx_linear_mpwqe; - wqe->data.lkey = rq->mkey_be; - wqe->data.addr = cpu_to_be64(wi->dma_info.addr); - - return 0; -} - -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) -{ - int i; - - dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, - PCI_DMA_FROMDEVICE); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_sub(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(&wi->dma_info.page[i]); - } -} - -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; - err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); - if (unlikely(err)) { - err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); - if (unlikely(err)) - return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; - } - - return 0; + err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); } #define RQ_CANNOT_POST(rq) \ @@ -617,9 +428,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) int err; err = rq->alloc_wqe(rq, wqe, wq->head); + if (err == -EBUSY) + return true; if (unlikely(err)) { - if (err != -EBUSY) - rq->stats.buff_alloc_err++; + rq->stats.buff_alloc_err++; break; } @@ -831,7 +643,6 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); @@ -845,21 +656,20 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, page_idx++; frag_offset -= PAGE_SIZE; } - wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); while (byte_cnt) { u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, - pg_consumed_bytes); + mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; page_idx++; } /* copy header */ - wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, - headlen); + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx, + head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -904,7 +714,7 @@ mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) return; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 499487c..1f56543 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -73,7 +73,6 @@ struct mlx5e_sw_stats { u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; - u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; @@ -105,7 +104,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, @@ -274,7 +272,6 @@ struct mlx5e_rq_stats { u64 lro_bytes; u64 wqe_err; u64 mpwqe_filler; - u64 mpwqe_frag; u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; @@ -290,7 +287,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bf33bb..08d8b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -87,7 +87,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) case MLX5_OPCODE_NOP: break; case MLX5_OPCODE_UMR: - mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + mlx5e_post_rx_mpwqe(&sq->channel->rq); break; default: WARN_ONCE(true, -- cgit v1.1 From a5a0c590166e39fa399940775e7bfd8e1a9356da Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:37 +0300 Subject: net/mlx5e: Introduce API for RX mapped pages Manage the allocation and deallocation of mapped RX pages only through dedicated API functions. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 46 +++++++++++++++---------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5d1b7b5..0c34daa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -305,26 +305,32 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { struct page *page = dev_alloc_page(); + if (unlikely(!page)) return -ENOMEM; - wi->umr.dma_info[i].page = page; - wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; } - wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); return 0; } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); + put_page(dma_info->page); +} + static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) @@ -336,10 +342,13 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - err = mlx5e_alloc_and_map_page(rq, wi, i); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, pg_strides); + wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + page_ref_add(dma_info->page, pg_strides); wi->skbs_frags[i] = 0; } @@ -350,10 +359,10 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, err_unmap: while (--i >= 0) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, pg_strides); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides); + mlx5e_page_release(rq, dma_info); } return err; @@ -365,11 +374,10 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - pg_strides - wi->skbs_frags[i]); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); + mlx5e_page_release(rq, dma_info); } } -- cgit v1.1 From 4415a0319f92ea0d624fe11c917faf9114f89187 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Sep 2016 16:08:38 +0300 Subject: net/mlx5e: Implement RX mapped page cache for page recycle Instead of reallocating and mapping pages for RX data-path, recycle already used pages in a per ring cache. Performance tests: The following results were measured on a freshly booted system, giving optimal baseline performance, as high-order pages are yet to be fragmented and depleted. We ran pktgen single-stream benchmarks, with iptables-raw-drop: Single stride, 64 bytes: * 4,739,057 - baseline * 4,749,550 - order0 no cache * 4,786,899 - order0 with cache 1% gain Larger packets, no page cross, 1024 bytes: * 3,982,361 - baseline * 3,845,682 - order0 no cache * 4,127,852 - order0 with cache 3.7% gain Larger packets, every 3rd packet crosses a page, 1500 bytes: * 3,731,189 - baseline * 3,579,414 - order0 no cache * 3,931,708 - order0 with cache 5.4% gain Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 16 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 57 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 16 ++++++ 4 files changed, 99 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 401b2f7..7dd4763 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -287,6 +287,18 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ u8 tired; }; +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -301,6 +313,8 @@ struct mlx5e_rq { struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + struct mlx5e_page_cache page_cache; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; mlx5e_fp_dealloc_wqe dealloc_wqe; @@ -651,6 +665,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 136554b..8595b50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -141,6 +141,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -475,6 +479,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, INIT_WORK(&rq->am.work, mlx5e_rx_am_work); rq->am.mode = priv->params.rx_cq_period_mode; + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + return 0; err_rq_wq_destroy: @@ -485,6 +492,8 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { + int i; + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); @@ -493,6 +502,12 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) kfree(rq->skb); } + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; + + mlx5e_page_release(rq, dma_info, false); + } mlx5_wq_destroy(&rq->wq_ctrl); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c34daa..dc86779 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -305,11 +305,55 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_FROM_DEVICE); + return true; +} + static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - struct page *page = dev_alloc_page(); + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + page = dev_alloc_page(); if (unlikely(!page)) return -ENOMEM; @@ -324,9 +368,12 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } -static inline void mlx5e_page_release(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) { + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; + dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); put_page(dma_info->page); } @@ -362,7 +409,7 @@ err_unmap: struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } return err; @@ -377,7 +424,7 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); - mlx5e_page_release(rq, dma_info); + mlx5e_page_release(rq, dma_info, true); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 1f56543..6af8d79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -76,6 +76,10 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; /* Special handling counters */ u64 link_down_events_phy; @@ -107,6 +111,10 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -275,6 +283,10 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; }; static const struct counter_desc rq_stats_desc[] = { @@ -290,6 +302,10 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, }; struct mlx5e_sq_stats { -- cgit v1.1 From cce94483e47e8e3d74cf4475dea33f9fd4b6ad9f Mon Sep 17 00:00:00 2001 From: Filipe Manco Date: Thu, 15 Sep 2016 17:10:46 +0200 Subject: xen-netback: fix error handling on netback_probe() In case of error during netback_probe() (e.g. an entry missing on the xenstore) netback_remove() is called on the new device, which will set the device backend state to XenbusStateClosed by calling set_backend_state(). However, the backend state wasn't initialized by netback_probe() at this point, which will cause and invalid transaction and set_backend_state() to BUG(). Initialize the backend state at the beginning of netback_probe() to XenbusStateInitialising, and create two new valid state transitions on set_backend_state(), from XenbusStateInitialising to XenbusStateClosed, and from XenbusStateInitialising to XenbusStateInitWait. Signed-off-by: Filipe Manco Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 46 ++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6a31f26..daf4c78 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -271,6 +271,11 @@ static int netback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); + be->state = XenbusStateInitialising; + err = xenbus_switch_state(dev, XenbusStateInitialising); + if (err) + goto fail; + sg = 1; do { @@ -383,11 +388,6 @@ static int netback_probe(struct xenbus_device *dev, be->hotplug_script = script; - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto fail; - - be->state = XenbusStateInitWait; /* This kicks hotplug scripts, so do it immediately. */ err = backend_create_xenvif(be); @@ -492,20 +492,20 @@ static inline void backend_switch_state(struct backend_info *be, /* Handle backend state transitions: * - * The backend state starts in InitWait and the following transitions are + * The backend state starts in Initialising and the following transitions are * allowed. * - * InitWait -> Connected - * - * ^ \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | V V + * Initialising -> InitWait -> Connected + * \ + * \ ^ \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * V | V V * - * Closed <-> Closing + * Closed <-> Closing * * The state argument specifies the eventual state of the backend and the * function transitions to that state via the shortest path. @@ -515,6 +515,20 @@ static void set_backend_state(struct backend_info *be, { while (be->state != state) { switch (be->state) { + case XenbusStateInitialising: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosing: + backend_switch_state(be, XenbusStateInitWait); + break; + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosed); + break; + default: + BUG(); + } + break; case XenbusStateClosed: switch (state) { case XenbusStateInitWait: -- cgit v1.1 From ffb4d6c8508657824bcef68a36b2a0f9d8c09d10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:12:33 -0700 Subject: tcp: fix overflow in __tcp_retransmit_skb() If a TCP socket gets a large write queue, an overflow can happen in a test in __tcp_retransmit_skb() preventing all retransmits. The flow then stalls and resets after timeouts. Tested: sysctl -w net.core.wmem_max=1000000000 netperf -H dest -- -s 1000000000 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdaef7f..f53d0cc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2605,7 +2605,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) -- cgit v1.1 From 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:48:46 -0700 Subject: net: avoid sk_forward_alloc overflows A malicious TCP receiver, sending SACK, can force the sender to split skbs in write queue and increase its memory usage. Then, when socket is closed and its write queue purged, we might overflow sk_forward_alloc (It becomes negative) sk_mem_reclaim() does nothing in this case, and more than 2GB are leaked from TCP perspective (tcp_memory_allocated is not changed) Then warnings trigger from inet_sock_destruct() and sk_stream_kill_queues() seeing a not zero sk_forward_alloc All TCP stack can be stuck because TCP is under memory pressure. A simple fix is to preemptively reclaim from sk_mem_uncharge(). This makes sure a socket wont have more than 2 MB forward allocated, after burst and idle period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e..8741988 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1332,6 +1332,16 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + + /* Avoid a possible overflow. + * TCP send queues can make this happen, if sk_mem_reclaim() + * is not called and more than 2 GBytes are released at once. + * + * If we reach 2 MBytes, reclaim 1 MBytes right now, there is + * no need to hold that much forward allocation anyway. + */ + if (unlikely(sk->sk_forward_alloc >= 1 << 21)) + __sk_mem_reclaim(sk, 1 << 20); } static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) -- cgit v1.1 From 8ab86c00e349cef9fb14719093a7f198bcc72629 Mon Sep 17 00:00:00 2001 From: "phil.turnbull@oracle.com" Date: Thu, 15 Sep 2016 12:41:44 -0400 Subject: irda: Free skb on irda_accept error path. skb is not freed if newsk is NULL. Rework the error path so free_skb is unconditionally called on function exit. Fixes: c3ea9fa27413 ("[IrDA] af_irda: IRDA_ASSERT cleanups") Signed-off-by: Phil Turnbull Signed-off-by: David S. Miller --- net/irda/af_irda.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 8d2f7c9..ccc2444 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -832,7 +832,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); struct sock *newsk; - struct sk_buff *skb; + struct sk_buff *skb = NULL; int err; err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); @@ -900,7 +900,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) err = -EPERM; /* value does not seem to make sense. -arnd */ if (!new->tsap) { pr_debug("%s(), dup failed!\n", __func__); - kfree_skb(skb); goto out; } @@ -919,7 +918,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - kfree_skb(skb); sk->sk_ack_backlog--; newsock->state = SS_CONNECTED; @@ -927,6 +925,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) irda_connect_response(new); err = 0; out: + kfree_skb(skb); release_sock(sk); return err; } -- cgit v1.1 From 4496195ddd75c4ad57b783739414e69b7d79843e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 15 Sep 2016 15:02:38 -0300 Subject: sctp: fix SSN comparision This function actually operates on u32 yet its paramteres were declared as u16, causing integer truncation upon calling. Note in patch context that ADDIP_SERIAL_SIGN_BIT is already 32 bits. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index efc0174..bafe2a0 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -382,7 +382,7 @@ enum { ADDIP_SERIAL_SIGN_BIT = (1<<31) }; -static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) +static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) { return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } -- cgit v1.1 From 3613b3dbd1ade9a6a626dae1f608c57638eb5e8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 09:33:02 -0700 Subject: tcp: prepare skbs for better sack shifting With large BDP TCP flows and lossy networks, it is very important to keep a low number of skbs in the write queue. RACK and SACK processing can perform a linear scan of it. We should avoid putting any payload in skb->head, so that SACK shifting can be done if needed. With this patch, we allow to pack ~0.5 MB per skb instead of the 64KB initially cooked at tcp_sendmsg() time. This gives a reduction of number of skbs in write queue by eight. tcp_rack_detect_loss() likes this. We still allow payload in skb->head for first skb put in the queue, to not impact RPC workloads. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a13fcb3..7dae800 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1020,17 +1020,31 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -static inline int select_size(const struct sock *sk, bool sg) +/* Do not bother using a page frag for very small frames. + * But use this heuristic only for the first skb in write queue. + * + * Having no payload in skb->head allows better SACK shifting + * in tcp_shift_skb_data(), reducing sack/rack overhead, because + * write queue has less skbs. + * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB. + * This also speeds up tso_fragment(), since it wont fallback + * to tcp_fragment(). + */ +static int linear_payload_sz(bool first_skb) +{ + if (first_skb) + return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + return 0; +} + +static int select_size(const struct sock *sk, bool sg, bool first_skb) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { if (sk_can_gso(sk)) { - /* Small frames wont use a full page: - * Payload will immediately follow tcp header. - */ - tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + tmp = linear_payload_sz(first_skb); } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -1161,6 +1175,8 @@ restart: } if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { + bool first_skb; + new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -1172,10 +1188,11 @@ new_segment: process_backlog = false; goto restart; } + first_skb = skb_queue_empty(&sk->sk_write_queue); skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), + select_size(sk, sg, first_skb), sk->sk_allocation, - skb_queue_empty(&sk->sk_write_queue)); + first_skb); if (!skb) goto wait_for_memory; -- cgit v1.1 From e1fb9d0389e5386151de32b64624896e2b621e1a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 15 Sep 2016 10:13:47 -0700 Subject: net: vrf: Remove RT_FL_TOS No longer used after d66f6c0a8f3c0 ("net: ipv4: Remove l3mdev_get_saddr") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 55674b0..85c271c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -37,9 +37,6 @@ #include #include -#define RT_FL_TOS(oldflp4) \ - ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) - #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -- cgit v1.1 From 19664c6a000956290cce84c6924b13488ab794d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 15 Sep 2016 10:18:45 -0700 Subject: net: l3mdev: Remove netif_index_is_l3_master No longer used after e0d56fdd73422 ("net: l3mdev: remove redundant calls") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099..b220dab 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -114,25 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - struct net_device *dev; - bool rc = false; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); - - return rc; -} - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline @@ -226,11 +207,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - return false; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { -- cgit v1.1 From 5ff904d55da821fd194ff493f2928d134ce5b67a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Sep 2016 18:51:25 +0100 Subject: llc: switch type to bool as the timeout is only tested versus 0 (As asked by Dave in Februrary) Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/llc/af_llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8ae3ed9..db916cf 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; -static long llc_ui_wait_for_conn(struct sock *sk, long timeout); +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); @@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) return rc; } -static long llc_ui_wait_for_conn(struct sock *sk, long timeout) +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT(wait); -- cgit v1.1 From 66f58ec486389e8d3f7ebe8cfc3883a72e808eb9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:24 +0200 Subject: hisilicon: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 7995 848 8 8851 2293 drivers/net/ethernet/hisilicon/hip04_eth.o and after the change it is: text data bss dec hex filename 8571 256 8 8835 2283 drivers/net/ethernet/hisilicon/hip04_eth.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index a90ab40..415ffa1 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -761,7 +761,7 @@ static const struct ethtool_ops hip04_ethtool_ops = { .get_drvinfo = hip04_get_drvinfo, }; -static struct net_device_ops hip04_netdev_ops = { +static const struct net_device_ops hip04_netdev_ops = { .ndo_open = hip04_mac_open, .ndo_stop = hip04_mac_stop, .ndo_get_stats = hip04_get_stats, -- cgit v1.1 From 373075049c7be28bed875494488c185ef5eeb938 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:25 +0200 Subject: dwc_eth_qos: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 21623 1316 40 22979 59c3 drivers/net/ethernet/synopsys/dwc_eth_qos.o and after the change it is: text data bss dec hex filename 22199 724 40 22963 59b3 drivers/net/ethernet/synopsys/dwc_eth_qos.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index ef26f58..0d00531 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2761,7 +2761,7 @@ static const struct ethtool_ops dwceqos_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; -static struct net_device_ops netdev_ops = { +static const struct net_device_ops netdev_ops = { .ndo_open = dwceqos_open, .ndo_stop = dwceqos_stop, .ndo_start_xmit = dwceqos_start_xmit, -- cgit v1.1 From eb94737d711913a23e466b99c0d9ffdf15651290 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Sep 2016 22:23:26 +0200 Subject: l2tp: constify net_device_ops structures Check for net_device_ops structures that are only stored in the netdev_ops field of a net_device structure. This field is declared const, so net_device_ops structures that have this property can be declared as const also. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r disable optional_qualifier@ identifier i; position p; @@ static struct net_device_ops i@p = { ... }; @ok@ identifier r.i; struct net_device e; position p; @@ e.netdev_ops = &i@p; @bad@ position p != {r.p,ok.p}; identifier r.i; struct net_device_ops e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct net_device_ops i = { ... }; // The result of size on this file before the change is: text data bss dec hex filename 3401 931 44 4376 1118 net/l2tp/l2tp_eth.o and after the change it is: text data bss dec hex filename 3993 347 44 4384 1120 net/l2tp/l2tp_eth.o Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index ef2cd30..965f7e3 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -121,7 +121,7 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev, } -static struct net_device_ops l2tp_eth_netdev_ops = { +static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_init = l2tp_eth_dev_init, .ndo_uninit = l2tp_eth_dev_uninit, .ndo_start_xmit = l2tp_eth_dev_xmit, -- cgit v1.1 From cfc7381b3002756b1dcada32979e942aa3126e31 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:29 -0700 Subject: ip_tunnel: add collect_md mode to IPIP tunnel Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. ovs can use it as well in the future (once appropriate ovs-vport abstractions and user apis are added). Note that just like in other tunnels we cannot cache the dst, since tunnel_info metadata can be different for every packet. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 ++ include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ip_tunnel.c | 76 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 35 +++++++++++++++---- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e598c63..59557c0 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const u8 proto); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8c..18d5dc1 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -73,6 +73,7 @@ enum { IFLA_IPTUN_ENCAP_FLAGS, IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, + IFLA_IPTUN_COLLECT_METADATA, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 95649eb..5719d6b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -55,6 +55,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + u32 headroom = sizeof(struct iphdr); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + const struct iphdr *inner_iph; + struct rtable *rt; + struct flowi4 fl4; + __be16 df = 0; + u8 tos, ttl; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto tx_error; + key = &tun_info->key; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + tos = key->tos; + if (tos == 1) { + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link); + if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + goto tx_error; + rt = ip_route_output_key(tunnel->net, &fl4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + ttl = key->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + else if (skb->protocol == htons(ETH_P_IP)) + df = inner_iph->frag_off & htons(IP_DF); + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + if (headroom > dev->needed_headroom) + dev->needed_headroom = headroom; + + if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, + key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + return; +tx_error: + dev->stats.tx_errors++; + goto kfree; +tx_dropped: + dev->stats.tx_dropped++; +kfree: + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4ae3f8e..c939258 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -115,6 +115,7 @@ #include #include #include +#include static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->collect_md) { + tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } return -1; @@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, ipproto); - ip_tunnel_xmit(skb, dev, tiph, ipproto); + if (tunnel->collect_md) + ip_md_tunnel_xmit(skb, dev, ipproto); + else + ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: @@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, bool *collect_md) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPIP; parms->iph.ihl = 5; + *collect_md = false; if (!data) return; @@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + *collect_md = true; } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; if (ipip_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &t->collect_md); return ip_tunnel_newlink(dev, tb, &p); } @@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + bool collect_md; if (ipip_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); @@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &collect_md); + if (collect_md) + return -EINVAL; if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) @@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) tunnel->encap.flags)) goto nla_put_failure; + if (tunnel->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ipip_link_ops __read_mostly = { -- cgit v1.1 From 8d79266bc48c6ab6477d04e159cabf1e7809cb72 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:30 -0700 Subject: ip6_tunnel: add collect_md mode to IPv6 tunnels Similar to gre, vxlan, geneve tunnels allow IPIP6 and IP6IP6 tunnels to operate in 'collect metadata' mode. Unlike ipv4 code here it's possible to reuse ip6_tnl_xmit() function for both collect_md and traditional tunnels. bpf_skb_[gs]et_tunnel_key() helpers and ovs (in the future) are the users. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + net/ipv6/ip6_tunnel.c | 178 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 134 insertions(+), 45 deletions(-) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 43a5a0e..20ed969 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -23,6 +23,7 @@ struct __ip6_tnl_parm { __u8 proto; /* tunnel protocol */ __u8 encap_limit; /* encapsulation limit for tunnel */ __u8 hop_limit; /* hop limit for tunnel */ + bool collect_md; __be32 flowinfo; /* traffic class and flowlabel for tunnel */ __u32 flags; /* tunnel flags */ struct in6_addr laddr; /* local tunnel end-point address */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5c57797..6a66adb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ #include #include #include +#include MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); @@ -90,6 +91,7 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; + struct ip6_tnl __rcu *collect_md_tun; }; static struct net_device_stats *ip6_get_stats(struct net_device *dev) @@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ return t; } + t = rcu_dereference(ip6n->collect_md_tun); + if (t) + return t; + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, NULL); + for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); @@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + if (t->parms.collect_md) { + tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } @@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + u8 hop_limit; int err = -1; + if (t->parms.collect_md) { + hop_limit = skb_tunnel_info(skb)->key.ttl; + goto route_lookup; + } else { + hop_limit = t->parms.hop_limit; + } + /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { struct in6_addr *addr6; @@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, goto tx_err_link_failure; if (!dst) { +route_lookup: dst = ip6_route_output(net, NULL, fl6); if (dst->error) @@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, dst = NULL; goto tx_err_link_failure; } + if (t->parms.collect_md && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl6->daddr, 0, &fl6->saddr)) + goto tx_err_link_failure; ndst = dst; } @@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; @@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + if (t->parms.collect_md) { + if (t->encap.type != TUNNEL_ENCAP_NONE) + goto tx_err_dst_release; + } else { + if (!fl6->flowi6_mark && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + } skb_dst_set(skb, dst); if (encap_limit >= 0) { @@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; @@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; + dsfield = ipv4_get_dsfield(iph); - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; - dsfield = ipv4_get_dsfield(iph); + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + dsfield = ipv6_get_dsfield(ipv6h); + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + encap_limit = t->parms.encap_limit; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); + if (t->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } @@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + parms->collect_md = true; } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; @@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, ip6_tnl_netlink_parms(data, &nt->parms); - t = ip6_tnl_locate(net, &nt->parms, 0); - if (!IS_ERR(t)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ip6n->collect_md_tun)) + return -EEXIST; + } else { + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) + return -EEXIST; + } return ip6_tnl_create2(dev); } @@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return err; } ip6_tnl_netlink_parms(data, &p); + if (p.collect_md) + return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { @@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; - if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, - tunnel->encap.type) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.flags)) + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; + if (parm->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { -- cgit v1.1 From a1c82704d13fd0d0ab0eb10d33a9bb7af83c90e3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:31 -0700 Subject: samples/bpf: extend test_tunnel_bpf.sh with IPIP test extend existing tests for vxlan, geneve, gre to include IPIP tunnel. It tests both traditional tunnel configuration and dynamic via bpf helpers. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tcbpf2_kern.c | 58 ++++++++++++++++++++++++++++++++++++++++++ samples/bpf/test_tunnel_bpf.sh | 56 ++++++++++++++++++++++++++++++++++------ 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 7a15289..c1917d9 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -1,4 +1,5 @@ /* Copyright (c) 2016 VMware + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -188,4 +189,61 @@ int _geneve_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ipip_set_tunnel") +int _ipip_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) + return TC_ACT_SHOT; + + if (tcp->dest == htons(5200)) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + else if (tcp->dest == htons(5201)) + key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ + else + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip_get_tunnel") +int _ipip_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + return TC_ACT_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index 4956589..1ff634f 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -9,15 +9,13 @@ # local 172.16.1.200 remote 172.16.1.100 # veth1 IP: 172.16.1.200, tunnel dev 11 -set -e - function config_device { ip netns add at_ns0 ip link add veth0 type veth peer name veth1 ip link set veth0 netns at_ns0 ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up + ip link set dev veth1 up mtu 1500 ip addr add dev veth1 172.16.1.200/24 } @@ -67,6 +65,19 @@ function add_geneve_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function add_ipip_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + function attach_bpf { DEV=$1 SET_TUNNEL=$2 @@ -85,6 +96,7 @@ function test_gre { attach_bpf $DEV gre_set_tunnel gre_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup } function test_vxlan { @@ -96,6 +108,7 @@ function test_vxlan { attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup } function test_geneve { @@ -107,21 +120,48 @@ function test_geneve { attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel ping -c 1 10.1.1.100 ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + +function test_ipip { + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + config_device + tcpdump -nei veth1 & + cat /sys/kernel/debug/tracing/trace_pipe & + add_ipip_tunnel + ethtool -K veth1 gso off gro off rx off tx off + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + sleep 0.2 + iperf -c 10.1.1.100 -n 5k -p 5200 + cleanup } function cleanup { + set +ex + pkill iperf ip netns delete at_ns0 ip link del veth1 - ip link del $DEV + ip link del ipip11 + ip link del gretap11 + ip link del geneve11 + pkill tcpdump + pkill cat + set -ex } +cleanup echo "Testing GRE tunnel..." test_gre -cleanup echo "Testing VXLAN tunnel..." test_vxlan -cleanup echo "Testing GENEVE tunnel..." test_geneve -cleanup -echo "Success" +echo "Testing IPIP tunnel..." +test_ipip +echo "*** PASS ***" -- cgit v1.1 From 173ca26e9b5136faa82dee37c77cbfb36974d079 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:32 -0700 Subject: samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test the test creates 3 namespaces with veth connected via bridge. First two namespaces simulate two different hosts with the same IPv4 and IPv6 addresses configured on the tunnel interface and they communicate with outside world via standard tunnels. Third namespace creates collect_md tunnel that is driven by BPF program which selects different remote host (either first or second namespace) based on tcp dest port number while tcp dst ip is the same. This scenario is rough approximation of load balancer use case. The tests check both traditional tunnel configuration and collect_md mode. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/tcbpf2_kern.c | 132 ++++++++++++++++++++++++++++++++++ samples/bpf/test_ipip.sh | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100755 samples/bpf/test_ipip.sh diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index c1917d9..3303bb8 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -9,12 +9,15 @@ #include #include #include +#include #include #include #include #include +#include #include "bpf_helpers.h" +#define _htonl __builtin_bswap32 #define ERROR(ret) do {\ char fmt[] = "ERROR line:%d ret:%d\n";\ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ @@ -246,4 +249,133 @@ int _ipip_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ipip6_set_tunnel") +int _ipip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) { + ERROR(iph->protocol); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip6_get_tunnel") +int _ipip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("ip6ip6_set_tunnel") +int _ip6ip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct ipv6hdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->nexthdr == NEXTHDR_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->nexthdr != NEXTHDR_TCP) { + ERROR(iph->nexthdr); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6ip6_get_tunnel") +int _ip6ip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh new file mode 100755 index 0000000..1969254 --- /dev/null +++ b/samples/bpf/test_ipip.sh @@ -0,0 +1,178 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip netns add at_ns1 + ip netns add at_ns2 + ip link add veth0 type veth peer name veth0b + ip link add veth1 type veth peer name veth1b + ip link add veth2 type veth peer name veth2b + ip link set veth0b up + ip link set veth1b up + ip link set veth2b up + ip link set dev veth0b mtu 1500 + ip link set dev veth1b mtu 1500 + ip link set dev veth2b mtu 1500 + ip link set veth0 netns at_ns0 + ip link set veth1 netns at_ns1 + ip link set veth2 netns at_ns2 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 + ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad + ip netns exec at_ns1 ip link set dev veth1 up + ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 + ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad + ip netns exec at_ns2 ip link set dev veth2 up + ip link add br0 type bridge + ip link set br0 up + ip link set dev br0 mtu 1500 + ip link set veth0b master br0 + ip link set veth1b master br0 + ip link set veth2b master br0 +} + +function add_ipip_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ipip external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ipip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ip6ip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 +} + +function attach_bpf { + DEV=$1 + SET_TUNNEL=$2 + GET_TUNNEL=$3 + ip netns exec at_ns2 tc qdisc add dev $DEV clsact + ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL + ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL +} + +function test_ipip { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip_tunnel + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv4 over IPv6 tunnel +function test_ipip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip6_tunnel + attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv6 over IPv6 tunnel +function test_ip6ip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ip6ip6_tunnel + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + + ip netns exec at_ns0 ping -6 -c 1 2601:646::2 + ip netns exec at_ns2 ping -6 -c 1 2601:646::1 + ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 + cleanup +} + +function cleanup { + set +ex + pkill iperf + ip netns delete at_ns0 + ip netns delete at_ns1 + ip netns delete at_ns2 + ip link del veth0 + ip link del veth1 + ip link del veth2 + ip link del br0 + pkill tcpdump + pkill cat + set -ex +} + +cleanup +echo "Testing IP tunnels..." +test_ipip +test_ipip6 +test_ip6ip6 +echo "*** PASS ***" -- cgit v1.1 From 2835d2d9e366a2985b24051d228333bfba82f3a7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:51 +0200 Subject: bna: add missing per queue ethtool stat Commit ba5ca784 "bna: check for dma mapping errors" added besides other things a statistic that counts number of DMA buffer mapping failures per each Rx queue. This counter is not included in ethtool stats output. Fixes: ba5ca784 "bna: check for dma mapping errors" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 0e4fdc3..5671353 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -31,7 +31,7 @@ #define BNAD_NUM_TXF_COUNTERS 12 #define BNAD_NUM_RXF_COUNTERS 10 #define BNAD_NUM_CQ_COUNTERS (3 + 5) -#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 #define BNAD_ETHTOOL_STATS_NUM \ @@ -658,6 +658,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_consumer_index", q_num); @@ -678,6 +680,9 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", + q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; -- cgit v1.1 From 37dd348270c1a48f0234354a06c0ce052b6c85b1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:52 +0200 Subject: bna: fix crash in bnad_get_strings() Commit 6e7333d "net: add rx_nohandler stat counter" added the new entry rx_nohandler into struct rtnl_link_stats64. Unfortunately the bna driver foolishly depends on the structure. It uses part of it for ethtool statistics and it's not bad but the driver assumes its size is constant as it defines string for each existing entry. The problem occurs when the structure is extended because you need to modify bna driver as well. If not any attempt to retrieve ethtool statistics results in crash in bnad_get_strings(). The patch changes BNAD_ETHTOOL_STATS_NUM so it counts real number of strings in the array and also removes rtnl_link_stats64 entries that are not used in output and are always zero. Fixes: 6e7333d "net: add rx_nohandler stat counter" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 50 ++++++++++++------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 5671353..31f61a7 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -34,12 +34,7 @@ #define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 -#define BNAD_ETHTOOL_STATS_NUM \ - (sizeof(struct rtnl_link_stats64) / sizeof(u64) + \ - sizeof(struct bnad_drv_stats) / sizeof(u64) + \ - offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64)) - -static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { +static const char *bnad_net_stats_strings[] = { "rx_packets", "tx_packets", "rx_bytes", @@ -50,22 +45,10 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_dropped", "multicast", "collisions", - "rx_length_errors", - "rx_over_errors", "rx_crc_errors", "rx_frame_errors", - "rx_fifo_errors", - "rx_missed_errors", - - "tx_aborted_errors", - "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", - "tx_window_errors", - - "rx_compressed", - "tx_compressed", "netif_queue_stop", "netif_queue_wakeup", @@ -254,6 +237,8 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "fc_tx_fid_parity_errors", }; +#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings) + static int bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { @@ -859,9 +844,9 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *buf) { struct bnad *bnad = netdev_priv(netdev); - int i, j, bi; + int i, j, bi = 0; unsigned long flags; - struct rtnl_link_stats64 *net_stats64; + struct rtnl_link_stats64 net_stats64; u64 *stats64; u32 bmap; @@ -876,14 +861,25 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, * under the same lock */ spin_lock_irqsave(&bnad->bna_lock, flags); - bi = 0; - memset(buf, 0, stats->n_stats * sizeof(u64)); - - net_stats64 = (struct rtnl_link_stats64 *)buf; - bnad_netdev_qstats_fill(bnad, net_stats64); - bnad_netdev_hwstats_fill(bnad, net_stats64); - bi = sizeof(*net_stats64) / sizeof(u64); + memset(&net_stats64, 0, sizeof(net_stats64)); + bnad_netdev_qstats_fill(bnad, &net_stats64); + bnad_netdev_hwstats_fill(bnad, &net_stats64); + + buf[bi++] = net_stats64.rx_packets; + buf[bi++] = net_stats64.tx_packets; + buf[bi++] = net_stats64.rx_bytes; + buf[bi++] = net_stats64.tx_bytes; + buf[bi++] = net_stats64.rx_errors; + buf[bi++] = net_stats64.tx_errors; + buf[bi++] = net_stats64.rx_dropped; + buf[bi++] = net_stats64.tx_dropped; + buf[bi++] = net_stats64.multicast; + buf[bi++] = net_stats64.collisions; + buf[bi++] = net_stats64.rx_length_errors; + buf[bi++] = net_stats64.rx_crc_errors; + buf[bi++] = net_stats64.rx_frame_errors; + buf[bi++] = net_stats64.tx_fifo_errors; /* Get netif_queue_stopped from stack */ bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev); -- cgit v1.1 From d4690f1e1cdabb4d61207b6787b1605a0dc0aeab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Sep 2016 00:11:45 +0100 Subject: fix iov_iter_fault_in_readable() ... by turning it into what used to be multipages counterpart Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/uio.h | 2 +- lib/iov_iter.c | 24 ++---------------------- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 1b5d1cd..75b4aaf 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); +#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9e8c738..7e3138c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -291,33 +291,13 @@ done: } /* - * Fault in the first iovec of the given iov_iter, to a maximum length - * of bytes. Returns 0 on success, or non-zero if the memory could not be - * accessed (ie. because it is an invalid address). - * - * writev-intensive code may want this to prefault several iovecs -- that - * would be possible (callers must not rely on the fact that _only_ the - * first iovec will be faulted with the current implementation). - */ -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) -{ - if (!(i->type & (ITER_BVEC|ITER_KVEC))) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); - } - return 0; -} -EXPORT_SYMBOL(iov_iter_fault_in_readable); - -/* * Fault in one or more iovecs of the given iov_iter, to a maximum length of * bytes. For each iovec, fault in each page that constitutes the iovec. * * Return 0 on success, or non-zero if the memory could not be accessed (i.e. * because it is an invalid address). */ -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { size_t skip = i->iov_offset; const struct iovec *iov; @@ -334,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) } return 0; } -EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); +EXPORT_SYMBOL(iov_iter_fault_in_readable); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, -- cgit v1.1 From 6244bd651236d86f59387d43c531b5f942a92b38 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Aug 2016 17:48:20 -0600 Subject: exynos-drm: Fix unsupported GEM memory type error message to be clear Fix unsupported GEM memory type error message to include the memory type information. Signed-off-by: Shuah Khan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index e016640..40ce841 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -55,11 +55,11 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, flags = exynos_gem->flags; /* - * without iommu support, not support physically non-continuous memory - * for framebuffer. + * Physically non-contiguous memory type for framebuffer is not + * supported without IOMMU. */ if (IS_NONCONTIG_BUFFER(flags)) { - DRM_ERROR("cannot use this gem memory type for fb.\n"); + DRM_ERROR("Non-contiguous GEM memory is not supported.\n"); return -EINVAL; } -- cgit v1.1 From 479f12545460809cfc9093d90d6ed82d76388e97 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:54 +0200 Subject: drm/exynos: fimc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 0525c56..147ef0d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1753,32 +1753,6 @@ static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) return 0; } -#ifdef CONFIG_PM_SLEEP -static int fimc_suspend(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return fimc_clk_ctrl(ctx, false); -} - -static int fimc_resume(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return fimc_clk_ctrl(ctx, true); - - return 0; -} -#endif - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); @@ -1799,7 +1773,8 @@ static int fimc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops fimc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL) }; -- cgit v1.1 From 83bd7b20aaf499030bf857ef64de3c19309b107d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:55 +0200 Subject: drm/exynos: gsc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 5d20da8..b1894aa9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,32 +1760,6 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int gsc_suspend(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return gsc_clk_ctrl(ctx, false); -} - -static int gsc_resume(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return gsc_clk_ctrl(ctx, true); - - return 0; -} -#endif - #ifdef CONFIG_PM static int gsc_runtime_suspend(struct device *dev) { @@ -1807,7 +1781,8 @@ static int gsc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops gsc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; -- cgit v1.1 From 5b67723e6096f5470f361656cd108430d3b12c67 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:56 +0200 Subject: drm/exynos: rotator: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 404367a..6591e40 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -794,29 +794,6 @@ static int rotator_clk_crtl(struct rot_context *rot, bool enable) return 0; } - -#ifdef CONFIG_PM_SLEEP -static int rotator_suspend(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (pm_runtime_suspended(dev)) - return 0; - - return rotator_clk_crtl(rot, false); -} - -static int rotator_resume(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (!pm_runtime_suspended(dev)) - return rotator_clk_crtl(rot, true); - - return 0; -} -#endif - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); @@ -833,7 +810,8 @@ static int rotator_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops rotator_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume, NULL) }; -- cgit v1.1 From b05984e21a7e000bf5074ace00d7a574944b2c16 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:57 +0200 Subject: drm/exynos: g2d: fix system and runtime pm integration Move code from system sleep pm to runtime pm callbacks to ensure proper driver state preservation when device is under power domain. Then, use generic helpers for using runtime pm for system sleep pm. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 4bf00f5..6eca8bb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1475,8 +1475,8 @@ static int g2d_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int g2d_suspend(struct device *dev) +#ifdef CONFIG_PM +static int g2d_runtime_suspend(struct device *dev) { struct g2d_data *g2d = dev_get_drvdata(dev); @@ -1490,25 +1490,6 @@ static int g2d_suspend(struct device *dev) flush_work(&g2d->runqueue_work); - return 0; -} - -static int g2d_resume(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - - g2d->suspended = false; - g2d_exec_runqueue(g2d); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int g2d_runtime_suspend(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - clk_disable_unprepare(g2d->gate_clk); return 0; @@ -1523,12 +1504,16 @@ static int g2d_runtime_resume(struct device *dev) if (ret < 0) dev_warn(dev, "failed to enable clock.\n"); + g2d->suspended = false; + g2d_exec_runqueue(g2d); + return ret; } #endif static const struct dev_pm_ops g2d_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL) }; -- cgit v1.1 From 65c0044ca8d7c7bbccae37f0ff2972f0210e9f41 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 07:52:49 -0700 Subject: avr32: fix 'undefined reference to `___copy_from_user' avr32 builds fail with: arch/avr32/kernel/built-in.o: In function `arch_ptrace': (.text+0x650): undefined reference to `___copy_from_user' arch/avr32/kernel/built-in.o:(___ksymtab+___copy_from_user+0x0): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_doulongvec_ms_jiffies_minmax': (.text+0x5dd8): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_dointvec_minmax_sysadmin': sysctl.c:(.text+0x6174): undefined reference to `___copy_from_user' kernel/built-in.o: In function `ptrace_has_cap': ptrace.c:(.text+0x69c0): undefined reference to `___copy_from_user' kernel/built-in.o:ptrace.c:(.text+0x6b90): more undefined references to `___copy_from_user' follow Fixes: 8630c32275ba ("avr32: fix copy_from_user()") Cc: Al Viro Acked-by: Havard Skinnemoen Acked-by: Hans-Christian Noren Egtvedt Signed-off-by: Guenter Roeck --- arch/avr32/lib/copy_user.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index 96a6de9..0753734 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -23,8 +23,8 @@ */ .text .align 1 - .global copy_from_user - .type copy_from_user, @function + .global ___copy_from_user + .type ___copy_from_user, @function ___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 -- cgit v1.1 From 8e4b72054f554967827e18be1de0e8122e6efc04 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 12:57:24 -0700 Subject: openrisc: fix the fix of copy_from_user() Since commit acb2505d0119 ("openrisc: fix copy_from_user()"), copy_from_user() returns the number of bytes requested, not the number of bytes not copied. Cc: Al Viro Fixes: acb2505d0119 ("openrisc: fix copy_from_user()") Signed-off-by: Guenter Roeck --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index cbad29b..5cc6b4f 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -276,7 +276,7 @@ copy_from_user(void *to, const void *from, unsigned long n) unsigned long res = n; if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_tofrom_user(to, from, n); + res = __copy_tofrom_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; -- cgit v1.1 From 3be7988674ab33565700a37b210f502563d932e6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Sep 2016 17:27:41 -0700 Subject: Linux 4.8-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a8c8dd..74e22c2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.1 From 4158dbe1be9b420e1fdd9ec5c033647a605ca485 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 18 Sep 2016 22:51:38 +0900 Subject: Subject: [PATCH, RESEND] drm: exynos: avoid unused function warning When CONFIG_PM is not set, we get a warning about an unused function: drivers/gpu/drm/exynos/exynos_drm_gsc.c:1219:12: error: 'gsc_clk_ctrl' defined but not used [-Werror=unused-function] static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) ^~~~~~~~~~~~ This removes the two #ifdef checks in this file and instead marks the functions as __maybe_unused, which is a more reliable way of doing the same, allowing better build coverage and avoiding the warning above. Signed-off-by: Arnd Bergmann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index b1894aa9..52a9d26 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,8 +1760,7 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int gsc_runtime_suspend(struct device *dev) +static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1770,7 +1769,7 @@ static int gsc_runtime_suspend(struct device *dev) return gsc_clk_ctrl(ctx, false); } -static int gsc_runtime_resume(struct device *dev) +static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1778,7 +1777,6 @@ static int gsc_runtime_resume(struct device *dev) return gsc_clk_ctrl(ctx, true); } -#endif static const struct dev_pm_ops gsc_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, -- cgit v1.1 From 2c89791eeb6f3873349c240345c1879ef6a16f63 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:18 +0800 Subject: sctp: remove the unnecessary state check in sctp_outq_tail Data Chunks are only sent by sctp_primitive_SEND, in which sctp checks the asoc's state through statetable before calling sctp_outq_tail. So there's no need to check the asoc's state again in sctp_outq_tail. Besides, sctp_do_sm is protected by lock_sock, even if sending msg is interrupted by timer events, the event's processes still need to acquire lock_sock first. It means no others CMDs can be enqueue into side effect list before CMD_SEND_MSG to change asoc->state, so it's safe to remove it. This patch is to remove redundant asoc->state check from sctp_outq_tail. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 53 ++++++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 72e54a4..da2418b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,50 +299,25 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) * immediately. */ if (sctp_chunk_is_data(chunk)) { - /* Is it OK to queue data chunks? */ - /* From 9. Termination of Association - * - * When either endpoint performs a shutdown, the - * association on each peer will stop accepting new - * data from its user and only deliver data in queue - * at the time of sending or receiving the SHUTDOWN - * chunk. - */ - switch (q->asoc->state) { - case SCTP_STATE_CLOSED: - case SCTP_STATE_SHUTDOWN_PENDING: - case SCTP_STATE_SHUTDOWN_SENT: - case SCTP_STATE_SHUTDOWN_RECEIVED: - case SCTP_STATE_SHUTDOWN_ACK_SENT: - /* Cannot send after transport endpoint shutdown */ - error = -ESHUTDOWN; - break; - - default: - pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", - __func__, q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : - "illegal chunk"); - - sctp_chunk_hold(chunk); - sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - chunk->asoc->sent_cnt_removable++; - if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); - else - SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - break; - } + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); + + sctp_chunk_hold(chunk); + sctp_outq_tail_data(q, chunk); + if (chunk->asoc->prsctp_enable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + chunk->asoc->sent_cnt_removable++; + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); + else + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); } else { list_add_tail(&chunk->list, &q->control_chunk_list); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - if (error < 0) - return error; - if (!q->cork) error = sctp_outq_flush(q, 0, gfp); -- cgit v1.1 From 66388f2c08dfa38071f9eceae7bb29060d9be9aa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:19 +0800 Subject: sctp: do not return the transmit err back to sctp_sendmsg Once a chunk is enqueued successfully, sctp queues can take care of it. Even if it is failed to transmit (like because of nomem), it should be put into retransmit queue. If sctp report this error to users, it confuses them, they may resend that msg, but actually in kernel sctp stack is in charge of retransmit it already. Besides, this error probably is not from the failure of transmitting current msg, but transmitting or retransmitting another msg's chunks, as sctp_outq_flush just tries to send out all transports' chunks. This patch is to make sctp_cmd_send_msg return avoid, and not return the transmit err back to sctp_sendmsg Fixes: 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 12d4519..cf6e4f0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1020,19 +1020,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, * This way the whole message is queued up and bundling if * encouraged for small fragments. */ -static int sctp_cmd_send_msg(struct sctp_association *asoc, - struct sctp_datamsg *msg, gfp_t gfp) +static void sctp_cmd_send_msg(struct sctp_association *asoc, + struct sctp_datamsg *msg, gfp_t gfp) { struct sctp_chunk *chunk; - int error = 0; - - list_for_each_entry(chunk, &msg->chunks, frag_list) { - error = sctp_outq_tail(&asoc->outqueue, chunk, gfp); - if (error) - break; - } - return error; + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_outq_tail(&asoc->outqueue, chunk, gfp); } @@ -1709,7 +1703,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_cork(&asoc->outqueue); local_cork = 1; } - error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); + sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); -- cgit v1.1 From b61c654f9b3f1a271217e46c893f80565b1f754d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:20 +0800 Subject: sctp: free msg->chunks when sctp_primitive_SEND return err Last patch "sctp: do not return the transmit err back to sctp_sendmsg" made sctp_primitive_SEND return err only when asoc state is unavailable. In this case, chunks are not enqueued, they have no chance to be freed if we don't take care of them later. This Patch is actually to revert commit 1cd4d5c4326a ("sctp: remove the unused sctp_datamsg_free()"), commit 69b5777f2e57 ("sctp: hold the chunks only after the chunk is enqueued in outq") and commit 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg"), to use sctp_datamsg_free to free the chunks of current msg. Fixes: 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/chunk.c | 13 +++++++++++++ net/sctp/outqueue.c | 1 - net/sctp/socket.c | 8 ++++++-- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b..f61fb7c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -537,6 +537,7 @@ struct sctp_datamsg { struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, struct sctp_sndrcvinfo *, struct iov_iter *); +void sctp_datamsg_free(struct sctp_datamsg *); void sctp_datamsg_put(struct sctp_datamsg *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e547..af9cc80 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -70,6 +70,19 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } +void sctp_datamsg_free(struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + + /* This doesn't have to be a _safe vairant because + * sctp_chunk_free() only drops the refs. + */ + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_chunk_free(chunk); + + sctp_datamsg_put(msg); +} + /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index da2418b..6c109b0 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -304,7 +304,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); - sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); if (chunk->asoc->prsctp_enable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a..6cdc61c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,6 +1958,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { + sctp_chunk_hold(chunk); + /* Do accounting for the write space. */ sctp_set_owner_w(chunk); @@ -1970,13 +1972,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * breaks. */ err = sctp_primitive_SEND(net, asoc, datamsg); - sctp_datamsg_put(datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { + sctp_datamsg_free(datamsg); goto out_free; + } pr_debug("%s: we sent primitively\n", __func__); + sctp_datamsg_put(datamsg); err = msg_len; if (unlikely(wait_connect)) { -- cgit v1.1 From 645194409b0634a43890ec27c491c368b3bffc07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:21 +0800 Subject: sctp: save transmit error to sk_err in sctp_outq_flush Every time when sctp calls sctp_outq_flush, it sends out the chunks of control queue, retransmit queue and data queue. Even if some trunks are failed to transmit, it still has to flush all the transports, as it's the only chance to clean that transmit_list. So the latest transmit error here should be returned back. This transmit error is an internal error of sctp stack. I checked all the places where it uses the transmit error (the return value of sctp_outq_flush), most of them are actually just save it to sk_err. Except for sctp_assoc/endpoint_bh_rcv, they will drop the chunk if it's failed to send a REPLY, which is actually incorrect, as we can't be sure the error that sctp_outq_flush returns is from sending that REPLY. So it's meaningless for sctp_outq_flush to return error back. This patch is to save transmit error to sk_err in sctp_outq_flush, the new error can update the old value. Eventually, sctp_wait_for_* would check for it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 31b7bc3..f2597a9 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -180,7 +180,6 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, int one_packet, gfp_t gfp) { sctp_xmit_t retval; - int error = 0; pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); @@ -188,6 +187,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: if (!packet->has_cookie_echo) { + int error = 0; + error = sctp_packet_transmit(packet, gfp); if (error < 0) chunk->skb->sk->sk_err = -error; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 6c109b0..052a479 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -533,7 +533,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; switch (reason) { case SCTP_RTXR_T3_RTX: @@ -577,10 +576,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * will be flushed at the end. */ if (reason != SCTP_RTXR_FAST_RTX) - error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); - - if (error) - q->asoc->base.sk->sk_err = -error; + sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); } /* @@ -893,8 +889,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) sctp_packet_config(&singleton, vtag, 0); sctp_packet_append_chunk(&singleton, chunk); error = sctp_packet_transmit(&singleton, gfp); - if (error < 0) - return error; + if (error < 0) { + asoc->base.sk->sk_err = -error; + return 0; + } break; case SCTP_CID_ABORT: @@ -992,6 +990,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) retran: error = sctp_outq_flush_rtx(q, packet, rtx_timeout, &start_timer); + if (error < 0) + asoc->base.sk->sk_err = -error; if (start_timer) { sctp_transport_reset_t3_rtx(transport); @@ -1166,14 +1166,17 @@ sctp_flush_out: struct sctp_transport, send_ready); packet = &t->packet; - if (!sctp_packet_empty(packet)) + if (!sctp_packet_empty(packet)) { error = sctp_packet_transmit(packet, gfp); + if (error < 0) + asoc->base.sk->sk_err = -error; + } /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - return error; + return 0; } /* Update unack_data based on the incoming SACK chunk */ -- cgit v1.1 From 83dbc3d4a38411ef38f680d7045c8478cc9c5a56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:22 +0800 Subject: sctp: make sctp_outq_flush/tail/uncork return void sctp_outq_flush return value is meaningless now, this patch is to make sctp_outq_flush return void, as well as sctp_outq_fail and sctp_outq_uncork. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/outqueue.c | 19 +++++++------------ net/sctp/sm_sideeffect.c | 9 ++++----- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f61fb7c..8693dc4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1077,7 +1077,7 @@ struct sctp_outq { void sctp_outq_init(struct sctp_association *, struct sctp_outq *); void sctp_outq_teardown(struct sctp_outq *); void sctp_outq_free(struct sctp_outq*); -int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); +void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); @@ -1085,7 +1085,7 @@ void sctp_outq_restart(struct sctp_outq *); void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); -int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 052a479..8c3f446 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, @@ -285,10 +285,9 @@ void sctp_outq_free(struct sctp_outq *q) } /* Put a new chunk in an sctp_outq. */ -int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) +void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? @@ -318,9 +317,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) } if (!q->cork) - error = sctp_outq_flush(q, 0, gfp); - - return error; + sctp_outq_flush(q, 0, gfp); } /* Insert a chunk into the sorted list based on the TSNs. The retransmit list @@ -748,12 +745,12 @@ redo: } /* Cork the outqueue so queued chunks are really queued. */ -int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) +void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) { if (q->cork) q->cork = 0; - return sctp_outq_flush(q, 0, gfp); + sctp_outq_flush(q, 0, gfp); } @@ -766,7 +763,7 @@ int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -891,7 +888,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) error = sctp_packet_transmit(&singleton, gfp); if (error < 0) { asoc->base.sk->sk_err = -error; - return 0; + return; } break; @@ -1175,8 +1172,6 @@ sctp_flush_out: /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - - return 0; } /* Update unack_data based on the incoming SACK chunk */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index cf6e4f0..c345bf1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1421,8 +1421,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, - gfp); + sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp); break; case SCTP_CMD_SEND_PKT: @@ -1676,7 +1675,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_FORCE_PRIM_RETRAN: t = asoc->peer.retran_path; asoc->peer.retran_path = asoc->peer.primary_path; - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; asoc->peer.retran_path = t; break; @@ -1733,9 +1732,9 @@ out: */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); if (sp->data_ready_signalled) sp->data_ready_signalled = 0; -- cgit v1.1 From 41001faf95faaff7c4f4f93c6bb544ee227ad0cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:23 +0800 Subject: sctp: not return ENOMEM err back in sctp_packet_transmit As David and Marcelo's suggestion, ENOMEM err shouldn't return back to user in transmit path. Instead, sctp's retransmit would take care of the chunks that fail to send because of ENOMEM. This patch is only to do some release job when alloc_skb fails, not to return ENOMEM back any more. Besides, it also cleans up sctp_packet_transmit's err path, and fixes some issues in err path: - It didn't free the head skb in nomem: path. - No need to check nskb in no_route: path. - It should goto err: path if alloc_skb fails for head. - Not all the NOMEMs should free nskb. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/output.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index f2597a9..0c605ec 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -442,14 +442,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * time. Application may notice this error. */ pr_err_once("Trying to GSO but underlying device doesn't support it."); - goto nomem; + goto err; } } else { pkt_size = packet->size; } head = alloc_skb(pkt_size + MAX_HEADER, gfp); if (!head) - goto nomem; + goto err; if (gso) { NAPI_GRO_CB(head)->last = head; skb_shinfo(head)->gso_type = sk->sk_gso_type; @@ -470,8 +470,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } } dst = dst_clone(tp->dst); - if (!dst) - goto no_route; + if (!dst) { + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), + IPSTATS_MIB_OUTNOROUTES); + goto nodst; + } skb_dst_set(head, dst); /* Build the SCTP header. */ @@ -622,8 +626,10 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (!gso) break; - if (skb_gro_receive(&head, nskb)) + if (skb_gro_receive(&head, nskb)) { + kfree_skb(nskb); goto nomem; + } nskb = NULL; if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= sk->sk_gso_max_segs)) @@ -717,18 +723,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } head->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(head, tp); + goto out; -out: - sctp_packet_reset(packet); - return err; -no_route: - kfree_skb(head); - if (nskb != head) - kfree_skb(nskb); - - if (asoc) - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); +nomem: + if (packet->auth && list_empty(&packet->auth->list)) + sctp_chunk_free(packet->auth); +nodst: /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the * association is unreachable. @@ -737,22 +738,18 @@ no_route: * required. */ /* err = -EHOSTUNREACH; */ -err: - /* Control chunks are unreliable so just drop them. DATA chunks - * will get resent or dropped later. - */ + kfree_skb(head); +err: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } - goto out; -nomem: - if (packet->auth && list_empty(&packet->auth->list)) - sctp_chunk_free(packet->auth); - err = -ENOMEM; - goto err; + +out: + sctp_packet_reset(packet); + return err; } /******************************************************************** -- cgit v1.1 From 40773966ccf1985a1b2bb570a03cbeaf1cbd4e00 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:52 -0300 Subject: openvswitch: fix flow stats accounting when node 0 is not possible On a system with only node 1 as possible, all statistics is going to be accounted on node 0 as it will have a single writer. However, when getting and clearing the statistics, node 0 is not going to be considered, as it's not a possible node. Tested that statistics are not zero on a system with only node 1 possible. Also compile-tested with CONFIG_NUMA off. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 6 ++++-- net/openvswitch/flow_table.c | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1240ae3..5b80612 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -142,7 +142,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow, *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); if (stats) { @@ -165,7 +166,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow) { int node; - for_each_node(node) { + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d073fff..957a3c3 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -148,8 +148,9 @@ static void flow_free(struct sw_flow *flow) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - for_each_node(node) - if (flow->stats[node]) + /* We open code this to make sure node 0 is always considered */ + for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) + if (node != 0 && flow->stats[node]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); -- cgit v1.1 From db74a3335e0f645e3139c80bcfc90feb01d8e304 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 15 Sep 2016 19:11:53 -0300 Subject: openvswitch: use percpu flow stats Instead of using flow stats per NUMA node, use it per CPU. When using megaflows, the stats lock can be a bottleneck in scalability. On a E5-2690 12-core system, usual throughput went from ~4Mpps to ~15Mpps when forwarding between two 40GbE ports with a single flow configured on the datapath. This has been tested on a system with possible CPUs 0-7,16-23. After module removal, there were no corruption on the slab cache. Signed-off-by: Thadeu Lima de Souza Cascardo Cc: pravin shelar Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 42 ++++++++++++++++++++++-------------------- net/openvswitch/flow.h | 4 ++-- net/openvswitch/flow_table.c | 26 +++++++++----------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5b80612..0fa45439 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - stats = rcu_dereference(flow->stats[node]); + stats = rcu_dereference(flow->stats[cpu]); - /* Check if already have node-specific stats. */ + /* Check if already have CPU-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); /* Mark if we write on the pre-allocated stats. */ - if (node == 0 && unlikely(flow->stats_last_writer != node)) - flow->stats_last_writer = node; + if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) + flow->stats_last_writer = cpu; } else { stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ spin_lock(&stats->lock); - /* If the current NUMA-node is the only writer on the + /* If the current CPU is the only writer on the * pre-allocated stats keep using them. */ - if (unlikely(flow->stats_last_writer != node)) { + if (unlikely(flow->stats_last_writer != cpu)) { /* A previous locker may have already allocated the - * stats, so we need to check again. If node-specific + * stats, so we need to check again. If CPU-specific * stats were already allocated, we update the pre- * allocated stats as we have already locked them. */ - if (likely(flow->stats_last_writer != NUMA_NO_NODE) - && likely(!rcu_access_pointer(flow->stats[node]))) { - /* Try to allocate node-specific stats. */ + if (likely(flow->stats_last_writer != -1) && + likely(!rcu_access_pointer(flow->stats[cpu]))) { + /* Try to allocate CPU-specific stats. */ struct flow_stats *new_stats; new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); - rcu_assign_pointer(flow->stats[node], + rcu_assign_pointer(flow->stats[cpu], new_stats); goto unlock; } } - flow->stats_last_writer = node; + flow->stats_last_writer = cpu; } } @@ -136,15 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int node; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -164,11 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int node; + int cpu; - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { spin_lock_bh(&stats->lock); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 156a302..ae783f5 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -178,14 +178,14 @@ struct sw_flow { struct hlist_node node[2]; u32 hash; } flow_table, ufid_table; - int stats_last_writer; /* NUMA-node id of the last writer on + int stats_last_writer; /* CPU id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + struct flow_stats __rcu *stats[]; /* One for each CPU. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 957a3c3..ea7a807 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; struct flow_stats *stats; - int node; - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - flow->sf_acts = NULL; - flow->mask = NULL; - flow->id.unmasked_key = NULL; - flow->id.ufid_len = 0; - flow->stats_last_writer = NUMA_NO_NODE; + flow->stats_last_writer = -1; /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, @@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); - for_each_node(node) - if (node != 0) - RCU_INIT_POINTER(flow->stats[node], NULL); - return flow; err: kmem_cache_free(flow_cache, flow); @@ -142,17 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - int node; + int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - /* We open code this to make sure node 0 is always considered */ - for (node = 0; node < MAX_NUMNODES; node = next_node(node, node_possible_map)) - if (node != 0 && flow->stats[node]) + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, - (struct flow_stats __force *)flow->stats[node]); + (struct flow_stats __force *)flow->stats[cpu]); kmem_cache_free(flow_cache, flow); } @@ -757,7 +749,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (nr_node_ids + + (nr_cpu_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) -- cgit v1.1 From 695b4ec0f0a9cf29deabd3ac075911d58b31f42b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 16:20:01 -0700 Subject: pkt_sched: fq: use proper locking in fq_dump_stats() When fq is used on 32bit kernels, we need to lock the qdisc before copying 64bit fields. Otherwise "tc -s qdisc ..." might report bogus values. Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index e5458b9..dc52cc1 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -823,20 +823,24 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_get_ns(); - struct tc_fq_qd_stats st = { - .gc_flows = q->stat_gc_flows, - .highprio_packets = q->stat_internal_packets, - .tcp_retrans = q->stat_tcp_retrans, - .throttled = q->stat_throttled, - .flows_plimit = q->stat_flows_plimit, - .pkts_too_long = q->stat_pkts_too_long, - .allocation_errors = q->stat_allocation_errors, - .flows = q->flows, - .inactive_flows = q->inactive_flows, - .throttled_flows = q->throttled_flows, - .time_next_delayed_flow = q->time_next_delayed_flow - now, - }; + struct tc_fq_qd_stats st; + + sch_tree_lock(sch); + + st.gc_flows = q->stat_gc_flows; + st.highprio_packets = q->stat_internal_packets; + st.tcp_retrans = q->stat_tcp_retrans; + st.throttled = q->stat_throttled; + st.flows_plimit = q->stat_flows_plimit; + st.pkts_too_long = q->stat_pkts_too_long; + st.allocation_errors = q->stat_allocation_errors; + st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.flows = q->flows; + st.inactive_flows = q->inactive_flows; + st.throttled_flows = q->throttled_flows; + st.pad = 0; + + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } -- cgit v1.1 From 19cd120319ef5390404a5d9c829c3a7962f184a8 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 16 Sep 2016 10:50:13 +0200 Subject: stmmac: fix PWRDWN into the PMT register for global unicast. MAC devices use the RWKPKTEN and MGKPKTEN bits of the PMT Control/Status register to generate power management events. So this patch is to properly set the RWKPKTEN [BIT(2)] inside the PMT register (needed in case of global unicast). Reported-by: Aditi SHARMA Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index cbefe9e..885a5e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -261,7 +261,7 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index df5580d..51019b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -102,7 +102,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); -- cgit v1.1 From 22da73492541736eff5f6a6634c732e36c52a133 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Sep 2016 10:43:38 +0100 Subject: net: r6040: add in missing white space in error message text A couple of dev_err messages span two lines and the literal string is missing a white space between words. Add the white space and join the two lines into one. Signed-off-by: Colin Ian King Acked-by: FLorian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rdc/r6040.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index cb29ee2..5ef5d72 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1062,14 +1062,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* this should always be supported */ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } -- cgit v1.1 From 2c9d85d4d82d9e0a62aad08bf50650804e68ed30 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:36 +0200 Subject: netdevice: Add offload statistics ndo Add a new ndo to return statistics for offloaded operation. Since there can be many different offloaded operation with many stats types, the ndo gets an attribute id by which it knows which stats are wanted. The ndo also gets a void pointer to be cast according to the attribute id. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2095b6a..a10d8d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,6 +924,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -1155,6 +1163,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, -- cgit v1.1 From 69ae6ad2ff37911903a90256e216d7e7ae460002 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:37 +0200 Subject: net: core: Add offload stats to if_stats_msg Add a nested attribute of offload stats to if_stats_msg named IFLA_STATS_LINK_OFFLOAD_XSTATS. Under it, add SW stats, meaning stats only per packets that went via slowpath to the cpu, named IFLA_OFFLOAD_XSTATS_CPU_HIT. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 ++++ net/core/rtnetlink.c | 111 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9bf3aec..2351776a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -826,6 +826,7 @@ enum { IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, __IFLA_STATS_MAX, }; @@ -845,6 +846,14 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + /* XDP section */ enum { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 937e459..0dbae42 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3577,6 +3577,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } +#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) +static int rtnl_get_offload_stats_attr_size(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return sizeof(struct rtnl_link_stats64); + } + + return 0; +} + +static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, + int *prividx) +{ + struct nlattr *attr = NULL; + int attr_id, size; + void *attr_data; + int err; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return -ENODATA; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (attr_id < *prividx) + continue; + + size = rtnl_get_offload_stats_attr_size(attr_id); + if (!size) + continue; + + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + attr_data = nla_data(attr); + memset(attr_data, 0, size); + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, + attr_data); + if (err) + goto get_offload_stats_failure; + } + + if (!attr) + return -ENODATA; + + *prividx = 0; + return 0; + +nla_put_failure: + err = -EMSGSIZE; +get_offload_stats_failure: + *prividx = attr_id; + return err; +} + +static int rtnl_get_offload_stats_size(const struct net_device *dev) +{ + int nla_size = 0; + int attr_id; + int size; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return 0; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + size = rtnl_get_offload_stats_attr_size(attr_id); + nla_size += nla_total_size_64bit(size); + } + + if (nla_size != 0) + nla_size += nla_total_size(0); + + return nla_size; +} + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, unsigned int filter_mask, @@ -3586,6 +3671,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *attr; int s_prividx = *prividx; + int err; ASSERT_RTNL(); @@ -3614,8 +3700,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_ops *ops = dev->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS); @@ -3639,8 +3723,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (master) ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS_SLAVE); @@ -3655,6 +3737,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, + *idxattr)) { + *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; + attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + if (!attr) + goto nla_put_failure; + + err = rtnl_get_offload_stats(skb, dev, prividx); + if (err == -ENODATA) + nla_nest_cancel(skb, attr); + else + nla_nest_end(skb, attr); + + if (err && err != -ENODATA) + goto nla_put_failure; + *idxattr = 0; + } + nlmsg_end(skb, nlh); return 0; @@ -3708,6 +3808,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) + size += rtnl_get_offload_stats_size(dev); + return size; } -- cgit v1.1 From fc1bbb0f1831cc22326c86fb21d88cca44999b3e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:38 +0200 Subject: mlxsw: spectrum: Implement offload stats ndo and expose HW stats by default Change the default statistics ndo to return HW statistics (like the one returned by ethtool_ops). The HW stats are collected to a cache by delayed work every 1 sec. Implement the offload stat ndo. Add a function to get SW statistics, to be called from this function. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 129 +++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 5 + 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 27bbcaf..171f8dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -819,9 +819,9 @@ err_span_port_mtu_update: return err; } -static struct rtnl_link_stats64 * -mlxsw_sp_port_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +int +mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_pcpu_stats *p; @@ -848,6 +848,107 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; + return 0; +} + +bool mlxsw_sp_port_has_offload_stats(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlxsw_sp_port_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); +} + +static int mlxsw_sp_port_get_hw_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl); + if (err) + goto out; + + stats->tx_packets = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + stats->rx_packets = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + stats->tx_bytes = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + stats->rx_bytes = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + stats->multicast = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + + stats->rx_crc_errors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + stats->rx_frame_errors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + + stats->rx_length_errors = ( + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl)); + + stats->rx_errors = (stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_length_errors); + +out: + return err; +} + +static void update_stats_cache(struct work_struct *work) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + container_of(work, struct mlxsw_sp_port, + hw_stats.update_dw.work); + + if (!netif_carrier_ok(mlxsw_sp_port->dev)) + goto out; + + mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, + mlxsw_sp_port->hw_stats.cache); + +out: + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + MLXSW_HW_STATS_UPDATE_TIME); +} + +/* Return the stats from a cache that is updated periodically, + * as this function might get called in an atomic context. + */ +static struct rtnl_link_stats64 * +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + return stats; } @@ -1209,6 +1310,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats, + .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, @@ -1547,8 +1650,6 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1557,8 +1658,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; } @@ -2145,6 +2245,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_stats; } + mlxsw_sp_port->hw_stats.cache = + kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); + + if (!mlxsw_sp_port->hw_stats.cache) { + err = -ENOMEM; + goto err_alloc_hw_stats; + } + INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + &update_stats_cache); + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; @@ -2245,6 +2355,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); return 0; err_core_port_init: @@ -2265,6 +2376,8 @@ err_port_system_port_mapping_set: err_dev_addr_init: mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: + kfree(mlxsw_sp_port->hw_stats.cache); +err_alloc_hw_stats: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -2281,6 +2394,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; + cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -2290,6 +2404,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 969c250..49f4cafc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -361,6 +361,11 @@ struct mlxsw_sp_port { struct list_head vports_list; /* TC handles */ struct list_head mall_tc_list; + struct { + #define MLXSW_HW_STATS_UPDATE_TIME HZ + struct rtnl_link_stats64 *cache; + struct delayed_work update_dw; + } hw_stats; }; struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); -- cgit v1.1 From d409b84768037ad03d1d73538d99fb902adf7365 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:08 -0700 Subject: ipv6: Export p6_route_input_lookup symbol Make ip6_route_input_lookup available outside of ipv6 the module similar to ip_route_input_noref in the IPv4 world. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +++ net/ipv6/route.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d97305d..e0cd318 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) } void ip6_route_input(struct sk_buff *skb); +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad4a7ff..4dab585 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } -static struct dst_entry *ip6_route_input_lookup(struct net *net, - struct net_device *dev, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); } +EXPORT_SYMBOL_GPL(ip6_route_input_lookup); void ip6_route_input(struct sk_buff *skb) { -- cgit v1.1 From e8bffe0cf964f0330595bb376b74921cccdaac88 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:13 -0700 Subject: net: Add _nf_(un)register_hooks symbols Add _nf_register_hooks() and _nf_unregister_hooks() calls which allow caller to hold RTNL mutex. Signed-off-by: Mahesh Bandewar CC: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ net/netfilter/core.c | 51 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9a..e82b767 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -133,6 +133,8 @@ int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d..2c5327e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -188,19 +188,17 @@ EXPORT_SYMBOL(nf_unregister_net_hooks); static LIST_HEAD(nf_hook_list); -int nf_register_hook(struct nf_hook_ops *reg) +static int _nf_register_hook(struct nf_hook_ops *reg) { struct net *net, *last; int ret; - rtnl_lock(); for_each_net(net) { ret = nf_register_net_hook(net, reg); if (ret && ret != -ENOENT) goto rollback; } list_add_tail(®->list, &nf_hook_list); - rtnl_unlock(); return 0; rollback: @@ -210,19 +208,34 @@ rollback: break; nf_unregister_net_hook(net, reg); } + return ret; +} + +int nf_register_hook(struct nf_hook_ops *reg) +{ + int ret; + + rtnl_lock(); + ret = _nf_register_hook(reg); rtnl_unlock(); + return ret; } EXPORT_SYMBOL(nf_register_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +static void _nf_unregister_hook(struct nf_hook_ops *reg) { struct net *net; - rtnl_lock(); list_del(®->list); for_each_net(net) nf_unregister_net_hook(net, reg); +} + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + rtnl_lock(); + _nf_unregister_hook(reg); rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -246,6 +259,26 @@ err: } EXPORT_SYMBOL(nf_register_hooks); +/* Caller MUST take rtnl_lock() */ +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = _nf_register_hook(®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + _nf_unregister_hooks(reg, i); + return err; +} +EXPORT_SYMBOL(_nf_register_hooks); + void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) { while (n-- > 0) @@ -253,6 +286,14 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); +/* Caller MUST take rtnl_lock */ +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + while (n-- > 0) + _nf_unregister_hook(®[n]); +} +EXPORT_SYMBOL(_nf_unregister_hooks); + unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, struct nf_hook_state *state, -- cgit v1.1 From 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:19 -0700 Subject: ipvlan: Introduce l3s mode In a typical IPvlan L3 setup where master is in default-ns and each slave is into different (slave) ns. In this setup egress packet processing for traffic originating from slave-ns will hit all NF_HOOKs in slave-ns as well as default-ns. However same is not true for ingress processing. All these NF_HOOKs are hit only in the slave-ns skipping them in the default-ns. IPvlan in L3 mode is restrictive and if admins want to deploy iptables rules in default-ns, this asymmetric data path makes it impossible to do so. This patch makes use of the l3_rcv() (added as part of l3mdev enhancements) to perform input route lookup on RX packets without changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN to change the skb->dev just before handing over skb to L4. Signed-off-by: Mahesh Bandewar CC: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 7 ++- drivers/net/Kconfig | 1 + drivers/net/ipvlan/ipvlan.h | 6 +++ drivers/net/ipvlan/ipvlan_core.c | 94 +++++++++++++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 87 +++++++++++++++++++++++++++++++--- include/uapi/linux/if_link.h | 1 + 6 files changed, 188 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8..24196ce 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link type ipvlan mode { l2 | L3 } + ip link add link type ipvlan mode { l2 | l3 | l3s } e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. +4.3 L3S mode: + This is very similar to the L3 mode except that iptables (conn-tracking) +works in this mode and hence it is L3-symmetric (L3s). This will have slightly less +performance but that shouldn't matter since you are choosing this mode over plain-L3 +mode to make conn-tracking work. 5. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b..8768a62 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc..7e0732f 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto); +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511..b4e9907 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: + case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); } @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L3S: + return RX_HANDLER_PASS; } /* Should not reach here */ @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + void *lyr3h; + int addr_type; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + int err; + struct iphdr *ip4h = ip_hdr(skb); + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + skb_dst_set(skb, dst); + break; + } + default: + break; + } + +out: + return skb; +} + +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c..f442eb3 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2351776a..7ec9e99 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -464,6 +464,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX }; -- cgit v1.1 From 95357907ae73a8039c2106897ee2694f26ac3caf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 16 Sep 2016 22:36:12 +0200 Subject: mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full The XDP_TX action can fail transmitting the frame in case the TX ring is full or port is down. In case of TX failure it should drop the frame, and not as now call 'break' which is the same as XDP_PASS. Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6758292..c80073e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,7 +906,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - break; + goto next; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: -- cgit v1.1 From e8bc8f9a670e26e91562e724a2114243898bd616 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 16 Sep 2016 23:05:35 +0200 Subject: sctp: Remove some redundant code In commit 311b21774f13 ("sctp: simplify sk_receive_queue locking"), a call to 'skb_queue_splice_tail_init()' has been made explicit. Previously it was hidden in 'sctp_skb_list_tail()' Now, the code around it looks redundant. The '_init()' part of 'skb_queue_splice_tail_init()' should already do the same. Signed-off-by: Christophe JAILLET Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ulpqueue.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 877e550..84d0fda 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -140,11 +140,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { - struct list_head *list; skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); - list = (struct list_head *)&sctp_sk(sk)->pd_lobby; - INIT_LIST_HEAD(list); return 1; } } else { -- cgit v1.1 From 0fbc81b3ad513fecaaf62b48f42b89fcd57f7682 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Sat, 17 Sep 2016 08:12:39 +0530 Subject: chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's Allocate resources dynamically to cxgb4's Upper layer driver's(ULD) like cxgbit, iw_cxgb4 and cxgb4i. Allocate resources when they register with cxgb4 driver and free them while unregistering. All the queues and the interrupts for them will be allocated during ULD probe only and freed during remove. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_core.c | 10 +- drivers/infiniband/hw/cxgb4/device.c | 4 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 47 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 127 +---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 613 +++++---------------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 223 ++++++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 31 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 18 +- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 3 + drivers/target/iscsi/cxgbit/cxgbit_main.c | 3 + 10 files changed, 385 insertions(+), 694 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 2f6156b..fb5f9bb 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -39,12 +39,10 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_FW6_PLD] = cpl_fw6_pld_handler, }; -static struct cxgb4_pci_uld_info chcr_uld_info = { +static struct cxgb4_uld_info chcr_uld_info = { .name = DRV_MODULE_NAME, - .nrxq = 4, + .nrxq = MAX_ULD_QSETS, .rxq_size = 1024, - .nciq = 0, - .ciq_size = 0, .add = chcr_uld_add, .state_change = chcr_uld_state_change, .rx_handler = chcr_uld_rx_handler, @@ -205,7 +203,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { - if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) { + if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) { pr_err("ULD register fail: No chcr crypto support in cxgb4"); return -1; } @@ -228,7 +226,7 @@ static void __exit chcr_crypto_exit(void) kfree(u_ctx); } mutex_unlock(&dev_mutex); - cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1); + cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); } module_init(chcr_crypto_init); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d733..f170b63 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1475,6 +1475,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 4595569..1f9867d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -437,11 +437,6 @@ enum { MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */ MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */ MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ - MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ - MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ - - /* # of streaming iSCSIT Rx queues */ - MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS, }; enum { @@ -458,8 +453,7 @@ enum { enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + - MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS, + MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS, }; struct adapter; @@ -704,10 +698,6 @@ struct sge { struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES]; struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; - struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS]; - struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES]; - struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; - struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; struct sge_uld_rxq_info **uld_rxq_info; @@ -717,15 +707,8 @@ struct sge { u16 max_ethqsets; /* # of available Ethernet queue sets */ u16 ethqsets; /* # of active Ethernet queue sets */ u16 ethtxq_rover; /* Tx queue to clean up next */ - u16 iscsiqsets; /* # of active iSCSI queue sets */ - u16 niscsitq; /* # of available iSCST Rx queues */ - u16 rdmaqs; /* # of available RDMA Rx queues */ - u16 rdmaciqs; /* # of available RDMA concentrator IQs */ + u16 ofldqsets; /* # of active ofld queue sets */ u16 nqs_per_uld; /* # of Rx queues per ULD */ - u16 iscsi_rxq[MAX_OFLD_QSETS]; - u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; - u16 rdma_rxq[MAX_RDMA_QUEUES]; - u16 rdma_ciq[MAX_RDMA_CIQS]; u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; u32 fl_pg_order; /* large page allocation size */ @@ -749,10 +732,7 @@ struct sge { }; #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++) -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++) -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) struct l2t_data; @@ -786,6 +766,7 @@ struct uld_msix_bmap { struct uld_msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; + unsigned int idx; }; struct vf_info { @@ -818,7 +799,7 @@ struct adapter { } msix_info[MAX_INGQ + 1]; struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ - unsigned int msi_idx; + int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@ -836,9 +817,10 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; - struct cxgb4_pci_uld_info *uld; + struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; + unsigned int num_ofld_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@ -858,6 +840,8 @@ struct adapter { #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; + struct mutex uld_mutex; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@ -1051,6 +1035,11 @@ static inline int is_pci_uld(const struct adapter *adap) return adap->params.crypto; } +static inline int is_uld(const struct adapter *adap) +{ + return (adap->params.offload || adap->params.crypto); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -1277,6 +1266,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid); int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); @@ -1635,7 +1626,9 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); -void uld_mem_free(struct adapter *adap); -int uld_mem_alloc(struct adapter *adap); +void t4_uld_mem_free(struct adapter *adap); +int t4_uld_mem_alloc(struct adapter *adap); +void t4_uld_clean_up(struct adapter *adap); +void t4_register_netevent_notifier(void); void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 91fb508..52be9a4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2432,17 +2432,11 @@ static int sge_qinfo_show(struct seq_file *seq, void *v) { struct adapter *adap = seq->private; int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4); - int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4); - int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4); - int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4); - int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4); + int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4); int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4); int i, r = (uintptr_t)v - 1; - int iscsi_idx = r - eth_entries; - int iscsit_idx = iscsi_idx - iscsi_entries; - int rdma_idx = iscsit_idx - iscsit_entries; - int ciq_idx = rdma_idx - rdma_entries; - int ctrl_idx = ciq_idx - ciq_entries; + int ofld_idx = r - eth_entries; + int ctrl_idx = ofld_idx - ofld_entries; int fq_idx = ctrl_idx - ctrl_entries; if (r) @@ -2518,119 +2512,17 @@ do { \ RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); - } else if (iscsi_idx < iscsi_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsirxq[iscsi_idx * 4]; + } else if (ofld_idx < ofld_entries) { const struct sge_ofld_txq *tx = - &adap->sge.ofldtxq[iscsi_idx * 4]; - int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx); + &adap->sge.ofldtxq[ofld_idx * 4]; + int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx); - S("QType:", "iSCSI"); + S("QType:", "OFLD-Txq"); T("TxQ ID:", q.cntxt_id); T("TxQ size:", q.size); T("TxQ inuse:", q.in_use); T("TxQ CIDX:", q.cidx); T("TxQ PIDX:", q.pidx); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (iscsit_idx < iscsit_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsitrxq[iscsit_idx * 4]; - int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx); - - S("QType:", "iSCSIT"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (rdma_idx < rdma_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.rdmarxq[rdma_idx * 4]; - int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); - - S("QType:", "RDMA-CPL"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (ciq_idx < ciq_entries) { - const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4]; - int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); - - S("QType:", "RDMA-CIQ"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - RL("RxAN:", stats.an); - RL("RxNoMem:", stats.nomem); } else if (ctrl_idx < ctrl_entries) { const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4]; @@ -2672,10 +2564,7 @@ do { \ static int sge_queue_entries(const struct adapter *adap) { return DIV_ROUND_UP(adap->sge.ethqsets, 4) + - DIV_ROUND_UP(adap->sge.iscsiqsets, 4) + - DIV_ROUND_UP(adap->sge.niscsitq, 4) + - DIV_ROUND_UP(adap->sge.rdmaqs, 4) + - DIV_ROUND_UP(adap->sge.rdmaciqs, 4) + + DIV_ROUND_UP(adap->sge.ofldqsets, 4) + DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44cc976..d1ebb84 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -226,11 +226,6 @@ static struct dentry *cxgb4_debugfs_root; LIST_HEAD(adapter_list); DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; static void link_report(struct net_device *dev) { @@ -678,56 +673,6 @@ out: return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@ -779,30 +724,12 @@ static void name_msix_vecs(struct adapter *adap) snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@ -819,57 +746,9 @@ static int request_msix_queue_irqs(struct adapter *adap) goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@ -885,16 +764,6 @@ static void free_msix_queue_irqs(struct adapter *adap) free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@ -1033,42 +902,11 @@ static void enable_rx(struct adapter *adap) } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) -{ - int i, err; - - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); - if (err) - return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; - } - return 0; -} -/** - * setup_sge_queues - configure SGE Tx/Rx/response queues - * @adap: the adapter - * - * Determines how many sets of SGE queues to use and initializes them. - * We support multiple queue sets per port if we have MSI-X, otherwise - * just one queue set per port. - */ -static int setup_sge_queues(struct adapter *adap) +static int setup_fw_sge_queues(struct adapter *adap) { - int err, i, j; struct sge *s = &adap->sge; + int err = 0; bitmap_zero(s->starving_fl, s->egr_sz); bitmap_zero(s->txq_maperr, s->egr_sz); @@ -1083,25 +921,27 @@ static int setup_sge_queues(struct adapter *adap) adap->msi_idx = -((int)s->intrq.abs_id + 1); } - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], adap->msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + if (err) + t4_free_sge_resources(adap); + return err; +} + +/** + * setup_sge_queues - configure SGE Tx/Rx/response queues + * @adap: the adapter + * + * Determines how many sets of SGE queues to use and initializes them. + * We support multiple queue sets per port if we have MSI-X, otherwise + * just one queue set per port. + */ +static int setup_sge_queues(struct adapter *adap) +{ + int err, i, j; + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@ -1132,8 +972,8 @@ freeout: t4_free_sge_resources(adap); } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@ -1141,30 +981,15 @@ freeout: t4_free_sge_resources(adap); goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (adap->msi_idx > 0) \ - adap->msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@ -1175,6 +1000,9 @@ freeout: t4_free_sge_resources(adap); RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@ -2317,7 +2145,7 @@ static void disable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@ -2329,7 +2157,7 @@ static void enable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@ -2337,9 +2165,10 @@ static void enable_dbs(struct adapter *adap) static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@ -2393,13 +2222,14 @@ out: if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@ -2464,94 +2294,12 @@ void t4_db_dropped(struct adapter *adap) queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) -{ - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - +void t4_register_netevent_notifier(void) +{ if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@ -2561,12 +2309,6 @@ static void detach_ulds(struct adapter *adap) mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], - CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; - } - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) { adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); @@ -2577,10 +2319,6 @@ static void detach_ulds(struct adapter *adap) netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@ -2589,65 +2327,12 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); - for (i = 0; i < CXGB4_PCI_ULD_MAX; i++) if (adap->uld && adap->uld[i].handle) adap->uld[i].state_change(adap->uld[i].handle, new_state); mutex_unlock(&uld_mutex); } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; - mutex_unlock(&uld_mutex); - return 0; -} -EXPORT_SYMBOL(cxgb4_unregister_uld); - #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, unsigned long event, void *data) @@ -2752,7 +2437,6 @@ static int cxgb_up(struct adapter *adap) adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@ -4262,6 +3946,7 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4314,6 +3999,7 @@ static int adap_init0(struct adapter *adap) "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@ -4324,6 +4010,8 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; } if (caps_cmd.cryptocaps) { /* Should query params here...TODO */ @@ -4523,14 +4211,14 @@ static void cfg_queues(struct adapter *adap) #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel()) { adap->params.offload = 0; adap->params.crypto = 0; - } else if (adap->num_uld && uld_mem_alloc(adap)) { + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; } @@ -4576,33 +4264,18 @@ static void cfg_queues(struct adapter *adap) s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@ -4621,47 +4294,8 @@ static void cfg_queues(struct adapter *adap) for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@ -4695,7 +4329,15 @@ static void reduce_ethqs(struct adapter *adap, int n) static int get_msix_info(struct adapter *adap) { struct uld_msix_info *msix_info; - int max_ingq = (MAX_OFLD_QSETS * adap->num_uld); + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); if (!msix_info) @@ -4709,12 +4351,13 @@ static int get_msix_info(struct adapter *adap) } spin_lock_init(&adap->msix_bmap_ulds.lock); adap->msix_info_ulds = msix_info; +out: return 0; } static void free_msix_info(struct adapter *adap) { - if (!adap->num_uld) + if (!(adap->num_uld && adap->num_ofld_uld)) return; kfree(adap->msix_info_ulds); @@ -4733,32 +4376,32 @@ static int enable_msix(struct adapter *adap) struct msix_entry *entries; int max_ingq = MAX_INGQ; - max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; /* map for msix */ - if (is_pci_uld(adap) && get_msix_info(adap)) + if (get_msix_info(adap)) { + adap->params.offload = 0; adap->params.crypto = 0; + } for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; } if (is_pci_uld(adap)) { - want += netif_get_num_default_rss_queues() * nchan; - uld_need = nchan; + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for @@ -4786,43 +4429,25 @@ static int enable_msix(struct adapter *adap) if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_pci_uld(adap)) { + if (is_uld(adap)) { if (allocated < want) s->nqs_per_uld = nchan; else - s->nqs_per_uld = netif_get_num_default_rss_queues() * - nchan; + s->nqs_per_uld = s->ofldqsets; } - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; - - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; - } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - if (is_pci_uld(adap)) - i -= s->nqs_per_uld * adap->num_uld; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - - } - - for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i) + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) adap->msix_info[i].vec = entries[i].vector; - if (is_pci_uld(adap)) { - for (j = 0 ; i < allocated; ++i, j++) + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; + } adap->msix_bmap_ulds.mapsize = j; } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs, s->nqs_per_uld); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@ -5535,10 +5160,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); return 0; sriov: @@ -5593,8 +5222,8 @@ sriov: free_some_resources(adapter); if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5631,7 +5260,7 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@ -5658,8 +5287,8 @@ static void remove_one(struct pci_dev *pdev) if (adapter->flags & USING_MSIX) free_msix_info(adapter); - if (adapter->num_uld) - uld_mem_free(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@ -5690,12 +5319,58 @@ static void remove_one(struct pci_dev *pdev) #endif } +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif +} + static struct pci_driver cxgb4_driver = { .name = KBUILD_MODNAME, .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 5d402ba..fc04e3b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -82,6 +82,24 @@ static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) spin_unlock_irqrestore(&bmap->lock, flags); } +/* Flush the aggregated lro sessions */ +static void uldrx_flush_handler(struct sge_rspq *q) +{ + struct adapter *adap = q->adap; + + if (adap->uld[q->uld].lro_flush) + adap->uld[q->uld].lro_flush(&q->lro_mgr); +} + +/** + * uldrx_handler - response queue handler for ULD queues + * @q: the response queue that received the packet + * @rsp: the response queue descriptor holding the offload message + * @gl: the gather list of packet fragments + * + * Deliver an ingress offload packet to a ULD. All processing is done by + * the ULD, we just maintain statistics. + */ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) { @@ -124,8 +142,8 @@ static int alloc_uld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; unsigned short *ids = rxq_info->rspq_id + offset; unsigned int per_chan = nq / adap->params.nports; - unsigned int msi_idx, bmap_idx; - int i, err; + unsigned int bmap_idx = 0; + int i, err, msi_idx; if (adap->flags & USING_MSIX) msi_idx = 1; @@ -135,14 +153,14 @@ static int alloc_uld_rxqs(struct adapter *adap, for (i = 0; i < nq; i++, q++) { if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); - adap->msi_idx++; + msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], - adap->msi_idx, + msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, - NULL, + lro ? uldrx_flush_handler : NULL, 0); if (err) goto freeout; @@ -159,7 +177,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } /* We need to free rxq also in case of ciq allocation failure */ @@ -169,7 +186,6 @@ freeout: if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } return err; @@ -178,17 +194,38 @@ freeout: int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int i, ret = 0; if (adap->flags & USING_MSIX) { - rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq, + rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq), + sizeof(unsigned short), GFP_KERNEL); if (!rxq_info->msix_tbl) return -ENOMEM; } - return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, rxq_info->nrxq, lro)); + + /* Tell uP to route control queue completions to rdma rspq */ + if (adap->flags & FULL_INIT_DONE && + !ret && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + unsigned int cmplqid; + u32 param, cmdop; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + ret = t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + return ret; } static void t4_free_uld_rxqs(struct adapter *adap, int n, @@ -198,7 +235,6 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n, if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); - adap->msi_idx--; } } @@ -206,6 +242,21 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + u32 param, cmdop, cmplqid = 0; + int i; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + if (rxq_info->nciq) t4_free_uld_rxqs(adap, rxq_info->nciq, rxq_info->uldrxq + rxq_info->nrxq); @@ -215,26 +266,38 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) } int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, - const struct cxgb4_pci_uld_info *uld_info) + const struct cxgb4_uld_info *uld_info) { struct sge *s = &adap->sge; struct sge_uld_rxq_info *rxq_info; - int i, nrxq; + int i, nrxq, ciq_size; rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); if (!rxq_info) return -ENOMEM; - if (uld_info->nrxq > s->nqs_per_uld) - rxq_info->nrxq = s->nqs_per_uld; - else - rxq_info->nrxq = uld_info->nrxq; - if (!uld_info->nciq) + if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) { + i = s->nqs_per_uld; + rxq_info->nrxq = roundup(i, adap->params.nports); + } else { + i = min_t(int, uld_info->nrxq, + num_online_cpus()); + rxq_info->nrxq = roundup(i, adap->params.nports); + } + if (!uld_info->ciq) { rxq_info->nciq = 0; - else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld) - rxq_info->nciq = s->nqs_per_uld; - else - rxq_info->nciq = uld_info->nciq; + } else { + if (adap->flags & USING_MSIX) + rxq_info->nciq = min_t(int, s->nqs_per_uld, + num_online_cpus()); + else + rxq_info->nciq = min_t(int, MAX_OFLD_QSETS, + num_online_cpus()); + rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) * + adap->params.nports); + rxq_info->nciq = max_t(int, rxq_info->nciq, + adap->params.nports); + } nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), @@ -259,12 +322,17 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, r->fl.size = 72; } + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; + if (ciq_size > SGE_MAX_IQ_SIZE) { + dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n"); + ciq_size = SGE_MAX_IQ_SIZE; + } + for (i = rxq_info->nrxq; i < nrxq; i++) { struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; - init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64); + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); r->rspq.uld = uld_type; - r->fl.size = 72; } memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); @@ -285,7 +353,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type) int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx, bmap_idx, err = 0; + int err = 0; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; @@ -310,10 +379,10 @@ unwind: void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, @@ -325,10 +394,10 @@ void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; int n = sizeof(adap->msix_info_ulds[0].desc); - int idx; + unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { - unsigned int bmap_idx = rxq_info->msix_tbl[idx]; + bmap_idx = rxq_info->msix_tbl[idx]; snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", adap->port[0]->name, rxq_info->name, idx); @@ -390,15 +459,15 @@ static void uld_queue_init(struct adapter *adap, unsigned int uld_type, lli->nciq = rxq_info->nciq; } -int uld_mem_alloc(struct adapter *adap) +int t4_uld_mem_alloc(struct adapter *adap) { struct sge *s = &adap->sge; - adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL); + adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL); if (!adap->uld) return -ENOMEM; - s->uld_rxq_info = kzalloc(adap->num_uld * + s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX * sizeof(struct sge_uld_rxq_info *), GFP_KERNEL); if (!s->uld_rxq_info) @@ -410,7 +479,7 @@ err_uld: return -ENOMEM; } -void uld_mem_free(struct adapter *adap) +void t4_uld_mem_free(struct adapter *adap) { struct sge *s = &adap->sge; @@ -418,6 +487,26 @@ void uld_mem_free(struct adapter *adap) kfree(adap->uld); } +void t4_uld_clean_up(struct adapter *adap) +{ + struct sge_uld_rxq_info *rxq_info; + unsigned int i; + + if (!adap->uld) + return; + for (i = 0; i < CXGB4_ULD_MAX; i++) { + if (!adap->uld[i].handle) + continue; + rxq_info = adap->sge.uld_rxq_info[i]; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, i); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, i); + free_sge_queues_uld(adap, i); + free_queues_uld(adap, i); + } +} + static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) { int i; @@ -429,10 +518,15 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->ports = adap->port; lld->vr = &adap->vres; lld->mtus = adap->params.mtus; - lld->ntxq = adap->sge.iscsiqsets; + lld->ntxq = adap->sge.ofldqsets; lld->nchan = adap->params.nports; lld->nports = adap->params.nports; lld->wr_cred = adap->params.ofldq_wr_cred; + lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); + lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); + lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); + lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); + lld->iscsi_ppm = &adap->iscsi_ppm; lld->adapter_type = adap->params.chip; lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; lld->udb_density = 1 << adap->params.sge.eq_qpp; @@ -472,23 +566,37 @@ static void uld_attach(struct adapter *adap, unsigned int uld) } adap->uld[uld].handle = handle; + t4_register_netevent_notifier(); if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); } -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p) +/** + * cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods + * + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. + */ +int cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) { int ret = 0; + unsigned int adap_idx = 0; struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; ret = cfg_queues_uld(adap, type, p); if (ret) @@ -510,11 +618,14 @@ int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, } adap->uld[type] = *p; uld_attach(adap, type); + adap_idx++; } mutex_unlock(&uld_mutex); return 0; free_irq: + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); if (adap->flags & USING_MSIX) free_msix_queue_irqs_uld(adap, type); free_rxq: @@ -522,21 +633,49 @@ free_rxq: free_queues: free_queues_uld(adap, type); out: + + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + if (!adap_idx) + break; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + adap_idx--; + } mutex_unlock(&uld_mutex); return ret; } -EXPORT_SYMBOL(cxgb4_register_pci_uld); +EXPORT_SYMBOL(cxgb4_register_uld); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) +/** + * cxgb4_unregister_uld - unregister an upper-layer driver + * @type: the ULD type + * + * Unregisters an existing upper-layer driver. + */ +int cxgb4_unregister_uld(enum cxgb4_uld type) { struct adapter *adap; - if (type >= CXGB4_PCI_ULD_MAX) + if (type >= CXGB4_ULD_MAX) return -EINVAL; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { - if (!is_pci_uld(adap)) + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) continue; adap->uld[type].handle = NULL; adap->uld[type].add = NULL; @@ -551,4 +690,4 @@ int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type) return 0; } -EXPORT_SYMBOL(cxgb4_unregister_pci_uld); +EXPORT_SYMBOL(cxgb4_unregister_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index ab40372..b3544f6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -42,6 +42,8 @@ #include #include "cxgb4.h" +#define MAX_ULD_QSETS 16 + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ @@ -189,9 +191,11 @@ static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) } enum cxgb4_uld { + CXGB4_ULD_INIT, CXGB4_ULD_RDMA, CXGB4_ULD_ISCSI, CXGB4_ULD_ISCSIT, + CXGB4_ULD_CRYPTO, CXGB4_ULD_MAX }; @@ -284,31 +288,11 @@ struct cxgb4_lld_info { struct cxgb4_uld_info { const char *name; - void *(*add)(const struct cxgb4_lld_info *p); - int (*rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl); - int (*state_change)(void *handle, enum cxgb4_state new_state); - int (*control)(void *handle, enum cxgb4_control control, ...); - int (*lro_rx_handler)(void *handle, const __be64 *rsp, - const struct pkt_gl *gl, - struct t4_lro_mgr *lro_mgr, - struct napi_struct *napi); - void (*lro_flush)(struct t4_lro_mgr *); -}; - -enum cxgb4_pci_uld { - CXGB4_PCI_ULD1, - CXGB4_PCI_ULD_MAX -}; - -struct cxgb4_pci_uld_info { - const char *name; - bool lro; void *handle; unsigned int nrxq; - unsigned int nciq; unsigned int rxq_size; - unsigned int ciq_size; + bool ciq; + bool lro; void *(*add)(const struct cxgb4_lld_info *p); int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); @@ -323,9 +307,6 @@ struct cxgb4_pci_uld_info { int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); -int cxgb4_register_pci_uld(enum cxgb4_pci_uld type, - struct cxgb4_pci_uld_info *p); -int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 9a607db..1e74fd6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2860,6 +2860,18 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, return 0; } +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid) +{ + u32 param, val; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) | + FW_PARAMS_PARAM_YZ_V(eqid)); + val = cmplqid; + return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); +} + int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid) { @@ -3014,12 +3026,6 @@ void t4_free_sge_resources(struct adapter *adap) } } - /* clean up RDMA and iSCSI Rx queues */ - t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq); - t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); - /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { struct sge_ofld_txq *q = &adap->sge.ofldtxq[i]; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index e4ba2d2..7c0d7af 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -84,6 +84,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *); static const struct cxgb4_uld_info cxgb4i_uld_info = { .name = DRV_MODULE_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = false, .add = t4_uld_add, .rx_handler = t4_uld_rx_handler, .state_change = t4_uld_state_change, diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c index 27dd11a..ad26b93 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_main.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -652,6 +652,9 @@ static struct iscsit_transport cxgbit_transport = { static struct cxgb4_uld_info cxgbit_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = true, .add = cxgbit_uld_add, .state_change = cxgbit_uld_state_change, .lro_rx_handler = cxgbit_uld_lro_rx_handler, -- cgit v1.1 From ee40681037c0e5fa0058447d7603a4fb77308bce Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:55 +0800 Subject: net: ethernet: mediatek: add HW LRO functions of PDMA RX rings The codes add the large receive offload (LRO) functions by hardware as below: 1) PDMA has total four RX rings that one is the normal ring, and others can be configured as LRO rings. 2) Only TCP/IP RX flows can be offloaded. The hardware can set four IP addresses at most, if the destination IP of the RX flow matches one of them, it has the chance to be offloaded. 3) There three RX flows can be offloaded at most, and one flow is mapped to one RX ring. 4) If there are more than three candidate RX flows, the hardware can choose three of them by throughput comparison results. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 215 +++++++++++++++++++++++++--- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 75 +++++++++- 2 files changed, 265 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 522fe8d..18600cb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -820,11 +820,51 @@ drop: return NETDEV_TX_OK; } +static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth) +{ + int i; + struct mtk_rx_ring *ring; + int idx; + + if (!eth->hwlro) + return ð->rx_ring[0]; + + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); + if (ring->dma[idx].rxd2 & RX_DMA_DONE) { + ring->calc_idx_update = true; + return ring; + } + } + + return NULL; +} + +static void mtk_update_rx_cpu_idx(struct mtk_eth *eth) +{ + struct mtk_rx_ring *ring; + int i; + + if (!eth->hwlro) { + ring = ð->rx_ring[0]; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } else { + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + if (ring->calc_idx_update) { + ring->calc_idx_update = false; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } + } + } +} + static int mtk_poll_rx(struct napi_struct *napi, int budget, struct mtk_eth *eth) { - struct mtk_rx_ring *ring = ð->rx_ring; - int idx = ring->calc_idx; + struct mtk_rx_ring *ring; + int idx; struct sk_buff *skb; u8 *data, *new_data; struct mtk_rx_dma *rxd, trxd; @@ -836,7 +876,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, dma_addr_t dma_addr; int mac = 0; - idx = NEXT_RX_DESP_IDX(idx); + ring = mtk_get_rx_ring(eth); + if (unlikely(!ring)) + goto rx_done; + + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); rxd = &ring->dma[idx]; data = ring->data[idx]; @@ -907,12 +951,13 @@ release_desc: done++; } +rx_done: if (done) { /* make sure that all changes to the dma ring are flushed before * we continue */ wmb(); - mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); + mtk_update_rx_cpu_idx(eth); } return done; @@ -1135,32 +1180,41 @@ static void mtk_tx_clean(struct mtk_eth *eth) } } -static int mtk_rx_alloc(struct mtk_eth *eth) +static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; + int rx_data_len, rx_dma_size; int i; - ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN); + if (rx_flag == MTK_RX_FLAGS_HWLRO) { + rx_data_len = MTK_MAX_LRO_RX_LENGTH; + rx_dma_size = MTK_HW_LRO_DMA_SIZE; + } else { + rx_data_len = ETH_DATA_LEN; + rx_dma_size = MTK_DMA_SIZE; + } + + ring->frag_size = mtk_max_frag_size(rx_data_len); ring->buf_size = mtk_max_buf_size(ring->frag_size); - ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data), + ring->data = kcalloc(rx_dma_size, sizeof(*ring->data), GFP_KERNEL); if (!ring->data) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { ring->data[i] = netdev_alloc_frag(ring->frag_size); if (!ring->data[i]) return -ENOMEM; } ring->dma = dma_alloc_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + rx_dma_size * sizeof(*ring->dma), &ring->phys, GFP_ATOMIC | __GFP_ZERO); if (!ring->dma) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { dma_addr_t dma_addr = dma_map_single(eth->dev, ring->data[i] + NET_SKB_PAD, ring->buf_size, @@ -1171,27 +1225,30 @@ static int mtk_rx_alloc(struct mtk_eth *eth) ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size); } - ring->calc_idx = MTK_DMA_SIZE - 1; + ring->dma_size = rx_dma_size; + ring->calc_idx_update = false; + ring->calc_idx = rx_dma_size - 1; + ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no); /* make sure that all changes to the dma ring are flushed before we * continue */ wmb(); - mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX); + mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no)); + mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no)); + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX); return 0; } -static void mtk_rx_clean(struct mtk_eth *eth) +static void mtk_rx_clean(struct mtk_eth *eth, int ring_no) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; int i; if (ring->data && ring->dma) { - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < ring->dma_size; i++) { if (!ring->data[i]) continue; if (!ring->dma[i].rxd1) @@ -1208,13 +1265,98 @@ static void mtk_rx_clean(struct mtk_eth *eth) if (ring->dma) { dma_free_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + ring->dma_size * sizeof(*ring->dma), ring->dma, ring->phys); ring->dma = NULL; } } +static int mtk_hwlro_rx_init(struct mtk_eth *eth) +{ + int i; + u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0; + u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0; + + /* set LRO rings to auto-learn modes */ + ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE; + + /* validate LRO ring */ + ring_ctrl_dw2 |= MTK_RING_VLD; + + /* set AGE timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H; + ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L; + + /* set max AGG timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME; + + /* set max LRO AGG count */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L; + ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H; + + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i)); + mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i)); + mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i)); + } + + /* IPv4 checksum update enable */ + lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN; + + /* switch priority comparison to packet count mode */ + lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE; + + /* bandwidth threshold setting */ + mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2); + + /* auto-learn score delta setting */ + mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA); + + /* set refresh timer for altering flows to 1 sec. (unit: 20us) */ + mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME, + MTK_PDMA_LRO_ALT_REFRESH_TIMER); + + /* set HW LRO mode & the max aggregation count for rx packets */ + lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff); + + /* the minimal remaining room of SDL0 in RXD for lro aggregation */ + lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL; + + /* enable HW LRO */ + lro_ctrl_dw0 |= MTK_LRO_EN; + + mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3); + mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0); + + return 0; +} + +static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) +{ + int i; + u32 val; + + /* relinquish lro rings, flush aggregated packets */ + mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0); + + /* wait for relinquishments done */ + for (i = 0; i < 10; i++) { + val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0); + if (val & MTK_LRO_RING_RELINQUISH_DONE) { + msleep(20); + continue; + } + } + + /* invalidate lro rings */ + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i)); + + /* disable HW LRO */ + mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1235,6 +1377,7 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth) static int mtk_dma_init(struct mtk_eth *eth) { int err; + u32 i; if (mtk_dma_busy_wait(eth)) return -EBUSY; @@ -1250,10 +1393,21 @@ static int mtk_dma_init(struct mtk_eth *eth) if (err) return err; - err = mtk_rx_alloc(eth); + err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL); if (err) return err; + if (eth->hwlro) { + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO); + if (err) + return err; + } + err = mtk_hwlro_rx_init(eth); + if (err) + return err; + } + /* Enable random early drop and set drop threshold automatically */ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN, MTK_QDMA_FC_THRES); @@ -1278,7 +1432,14 @@ static void mtk_dma_free(struct mtk_eth *eth) eth->phy_scratch_ring = 0; } mtk_tx_clean(eth); - mtk_rx_clean(eth); + mtk_rx_clean(eth, 0); + + if (eth->hwlro) { + mtk_hwlro_rx_uninit(eth); + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_rx_clean(eth, i); + } + kfree(eth->scratch_head); } @@ -1873,6 +2034,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw = eth; mac->of_node = np; + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); + mac->hwlro_ip_cnt = 0; + mac->hw_stats = devm_kzalloc(eth->dev, sizeof(*mac->hw_stats), GFP_KERNEL); @@ -1889,6 +2053,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; + + eth->netdev[id]->hw_features = MTK_HW_FEATURES; + if (eth->hwlro) + eth->netdev[id]->hw_features |= NETIF_F_LRO; + eth->netdev[id]->vlan_features = MTK_HW_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); eth->netdev[id]->features |= MTK_HW_FEATURES; @@ -1941,6 +2110,8 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } + eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro"); + for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 79954b4..7c5e534 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -39,7 +39,21 @@ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ NETIF_F_IPV6_CSUM) -#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (MTK_DMA_SIZE - 1)) +#define NEXT_RX_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1)) + +#define MTK_MAX_RX_RING_NUM 4 +#define MTK_HW_LRO_DMA_SIZE 8 + +#define MTK_MAX_LRO_RX_LENGTH (4096 * 3) +#define MTK_MAX_LRO_IP_CNT 2 +#define MTK_HW_LRO_TIMER_UNIT 1 /* 20 us */ +#define MTK_HW_LRO_REFRESH_TIME 50000 /* 1 sec. */ +#define MTK_HW_LRO_AGG_TIME 10 /* 200us */ +#define MTK_HW_LRO_AGE_TIME 50 /* 1ms */ +#define MTK_HW_LRO_MAX_AGG_CNT 64 +#define MTK_HW_LRO_BW_THRE 3000 +#define MTK_HW_LRO_REPLACE_DELTA 1000 +#define MTK_HW_LRO_SDL_REMAIN_ROOM 1522 /* Frame Engine Global Reset Register */ #define MTK_RST_GL 0x04 @@ -50,6 +64,9 @@ #define MTK_GDM1_AF BIT(28) #define MTK_GDM2_AF BIT(29) +/* PDMA HW LRO Alter Flow Timer Register */ +#define MTK_PDMA_LRO_ALT_REFRESH_TIMER 0x1c + /* Frame Engine Interrupt Grouping Register */ #define MTK_FE_INT_GRP 0x20 @@ -70,12 +87,29 @@ /* PDMA RX Base Pointer Register */ #define MTK_PRX_BASE_PTR0 0x900 +#define MTK_PRX_BASE_PTR_CFG(x) (MTK_PRX_BASE_PTR0 + (x * 0x10)) /* PDMA RX Maximum Count Register */ #define MTK_PRX_MAX_CNT0 0x904 +#define MTK_PRX_MAX_CNT_CFG(x) (MTK_PRX_MAX_CNT0 + (x * 0x10)) /* PDMA RX CPU Pointer Register */ #define MTK_PRX_CRX_IDX0 0x908 +#define MTK_PRX_CRX_IDX_CFG(x) (MTK_PRX_CRX_IDX0 + (x * 0x10)) + +/* PDMA HW LRO Control Registers */ +#define MTK_PDMA_LRO_CTRL_DW0 0x980 +#define MTK_LRO_EN BIT(0) +#define MTK_L3_CKS_UPD_EN BIT(7) +#define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) +#define MTK_LRO_RING_RELINQUISH_REQ (0x3 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x3 << 29) + +#define MTK_PDMA_LRO_CTRL_DW1 0x984 +#define MTK_PDMA_LRO_CTRL_DW2 0x988 +#define MTK_PDMA_LRO_CTRL_DW3 0x98c +#define MTK_ADMA_MODE BIT(15) +#define MTK_LRO_MIN_RXD_SDL (MTK_HW_LRO_SDL_REMAIN_ROOM << 16) /* PDMA Global Configuration Register */ #define MTK_PDMA_GLO_CFG 0xa04 @@ -84,6 +118,7 @@ /* PDMA Reset Index Register */ #define MTK_PDMA_RST_IDX 0xa08 #define MTK_PST_DRX_IDX0 BIT(16) +#define MTK_PST_DRX_IDX_CFG(x) (MTK_PST_DRX_IDX0 << (x)) /* PDMA Delay Interrupt Register */ #define MTK_PDMA_DELAY_INT 0xa0c @@ -94,10 +129,33 @@ /* PDMA Interrupt Mask Register */ #define MTK_PDMA_INT_MASK 0xa28 +/* PDMA HW LRO Alter Flow Delta Register */ +#define MTK_PDMA_LRO_ALT_SCORE_DELTA 0xa4c + /* PDMA Interrupt grouping registers */ #define MTK_PDMA_INT_GRP1 0xa50 #define MTK_PDMA_INT_GRP2 0xa54 +/* PDMA HW LRO IP Setting Registers */ +#define MTK_LRO_RX_RING0_DIP_DW0 0xb04 +#define MTK_LRO_DIP_DW0_CFG(x) (MTK_LRO_RX_RING0_DIP_DW0 + (x * 0x40)) +#define MTK_RING_MYIP_VLD BIT(9) + +/* PDMA HW LRO Ring Control Registers */ +#define MTK_LRO_RX_RING0_CTRL_DW1 0xb28 +#define MTK_LRO_RX_RING0_CTRL_DW2 0xb2c +#define MTK_LRO_RX_RING0_CTRL_DW3 0xb30 +#define MTK_LRO_CTRL_DW1_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW1 + (x * 0x40)) +#define MTK_LRO_CTRL_DW2_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW2 + (x * 0x40)) +#define MTK_LRO_CTRL_DW3_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW3 + (x * 0x40)) +#define MTK_RING_AGE_TIME_L ((MTK_HW_LRO_AGE_TIME & 0x3ff) << 22) +#define MTK_RING_AGE_TIME_H ((MTK_HW_LRO_AGE_TIME >> 10) & 0x3f) +#define MTK_RING_AUTO_LERAN_MODE (3 << 6) +#define MTK_RING_VLD BIT(8) +#define MTK_RING_MAX_AGG_TIME ((MTK_HW_LRO_AGG_TIME & 0xffff) << 10) +#define MTK_RING_MAX_AGG_CNT_L ((MTK_HW_LRO_MAX_AGG_CNT & 0x3f) << 26) +#define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3) + /* QDMA TX Queue Configuration Registers */ #define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) #define QDMA_RES_THRES 4 @@ -132,7 +190,6 @@ /* QDMA Reset Index Register */ #define MTK_QDMA_RST_IDX 0x1A08 -#define MTK_PST_DRX_IDX0 BIT(16) /* QDMA Delay Interrupt Register */ #define MTK_QDMA_DELAY_INT 0x1A0C @@ -377,6 +434,12 @@ struct mtk_tx_ring { atomic_t free_count; }; +/* PDMA rx ring mode */ +enum mtk_rx_flags { + MTK_RX_FLAGS_NORMAL = 0, + MTK_RX_FLAGS_HWLRO, +}; + /* struct mtk_rx_ring - This struct holds info describing a RX ring * @dma: The descriptor ring * @data: The memory pointed at by the ring @@ -391,7 +454,10 @@ struct mtk_rx_ring { dma_addr_t phys; u16 frag_size; u16 buf_size; + u16 dma_size; + bool calc_idx_update; u16 calc_idx; + u32 crx_idx_reg; }; /* currently no SoC has more than 2 macs */ @@ -439,9 +505,10 @@ struct mtk_eth { unsigned long sysclk; struct regmap *ethsys; struct regmap *pctl; + bool hwlro; atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; - struct mtk_rx_ring rx_ring; + struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct napi_struct tx_napi; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; @@ -470,6 +537,8 @@ struct mtk_mac { struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; struct phy_device *phy_dev; + __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; + int hwlro_ip_cnt; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ -- cgit v1.1 From 7aab747e5563ecbc9f3cb64ddea13fe7b9fee2bd Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:56 +0800 Subject: net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO The codes add ethtool functions to set RX flows for HW LRO. Because the HW LRO hardware can only recognize the destination IP of TCP/IP RX flows, the ethtool command to add HW LRO flow is as below: ethtool -N [devname] flow-type tcp4 dst-ip [ip_addr] loc [0~1] Otherwise, cause the hardware can set total four destination IPs, each GMAC (GMAC1/GMAC2) can set two IPs separately at most. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 236 ++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 18600cb..481f360 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1357,6 +1357,182 @@ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); } +static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx)); + + /* validate the IP setting */ + mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); +} + +static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx)); +} + +static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac) +{ + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) + cnt++; + } + + return cnt; +} + +static int mtk_hwlro_add_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if ((fsp->flow_type != TCP_V4_FLOW) || + (!fsp->h_u.tcp_ip4_spec.ip4dst) || + (fsp->location > 1)) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst); + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]); + + return 0; +} + +static int mtk_hwlro_del_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if (fsp->location > 1) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + + return 0; +} + +static void mtk_hwlro_netdev_disable(struct net_device *dev) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int i, hwlro_idx; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + mac->hwlro_ip[i] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i; + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + } + + mac->hwlro_ip_cnt = 0; +} + +static int mtk_hwlro_get_fdir_entry(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + + /* only tcp dst ipv4 is meaningful, others are meaningless */ + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); + fsp->m_u.tcp_ip4_spec.ip4dst = 0; + + fsp->h_u.tcp_ip4_spec.ip4src = 0; + fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff; + fsp->h_u.tcp_ip4_spec.psrc = 0; + fsp->m_u.tcp_ip4_spec.psrc = 0xffff; + fsp->h_u.tcp_ip4_spec.pdst = 0; + fsp->m_u.tcp_ip4_spec.pdst = 0xffff; + fsp->h_u.tcp_ip4_spec.tos = 0; + fsp->m_u.tcp_ip4_spec.tos = 0xff; + + return 0; +} + +static int mtk_hwlro_get_fdir_all(struct net_device *dev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mtk_mac *mac = netdev_priv(dev); + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) { + rule_locs[cnt] = i; + cnt++; + } + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static netdev_features_t mtk_fix_features(struct net_device *dev, + netdev_features_t features) +{ + if (!(features & NETIF_F_LRO)) { + struct mtk_mac *mac = netdev_priv(dev); + int ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + if (ip_cnt) { + netdev_info(dev, "RX flow is programmed, LRO should keep on\n"); + + features |= NETIF_F_LRO; + } + } + + return features; +} + +static int mtk_set_features(struct net_device *dev, netdev_features_t features) +{ + int err = 0; + + if (!((dev->features ^ features) & NETIF_F_LRO)) + return 0; + + if (!(features & NETIF_F_LRO)) + mtk_hwlro_netdev_disable(dev); + + return err; +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1971,6 +2147,62 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); } +static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + if (dev->features & NETIF_F_LRO) { + cmd->data = MTK_MAX_RX_RING_NUM; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRLCNT: + if (dev->features & NETIF_F_LRO) { + struct mtk_mac *mac = netdev_priv(dev); + + cmd->rule_cnt = mac->hwlro_ip_cnt; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRULE: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_entry(dev, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_all(dev, cmd, + rule_locs); + break; + default: + break; + } + + return ret; +} + +static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_add_ipaddr(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_del_ipaddr(dev, cmd); + break; + default: + break; + } + + return ret; +} + static const struct ethtool_ops mtk_ethtool_ops = { .get_settings = mtk_get_settings, .set_settings = mtk_set_settings, @@ -1982,6 +2214,8 @@ static const struct ethtool_ops mtk_ethtool_ops = { .get_strings = mtk_get_strings, .get_sset_count = mtk_get_sset_count, .get_ethtool_stats = mtk_get_ethtool_stats, + .get_rxnfc = mtk_get_rxnfc, + .set_rxnfc = mtk_set_rxnfc, }; static const struct net_device_ops mtk_netdev_ops = { @@ -1996,6 +2230,8 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, + .ndo_fix_features = mtk_fix_features, + .ndo_set_features = mtk_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mtk_poll_controller, #endif -- cgit v1.1 From 004e6cc6c181aa109427594fca35eb55d89d780e Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:57 +0800 Subject: net: ethernet: mediatek: add the dts property to set if the HW supports LRO Add the dts property for the capability if the hardware supports LRO. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 32eaaca..6103e55 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,7 +24,7 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - +- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node @@ -51,6 +51,7 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; + mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; -- cgit v1.1 From 106323b905a6bcd21ff83dd4e19566282fd5eb52 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:52:17 +0000 Subject: cxgb4: Fix return value check in cfg_queues_uld() Fix the retrn value check which testing the wrong variable in cfg_queues_uld(). Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index fc04e3b..d12a73e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -308,7 +308,7 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, } rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); - if (!rxq_info->uldrxq) { + if (!rxq_info->rspq_id) { kfree(rxq_info->uldrxq); kfree(rxq_info); return -ENOMEM; -- cgit v1.1 From 1486587b2fcda08dee7eab23784d504eed772c45 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:30 +0200 Subject: pie: use qdisc_dequeue_head wrapper Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a570b0b..d976d74 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { struct sk_buff *skb; - skb = __qdisc_dequeue_head(sch, &sch->q); + skb = qdisc_dequeue_head(sch); if (!skb) return NULL; -- cgit v1.1 From 97d0678f913369af0dc8b510a682a641654ab743 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:31 +0200 Subject: sched: don't use skb queue helpers A followup change will replace the sk_buff_head in the qdisc struct with a slightly different list. Use of the sk_buff_head helpers will thus cause compiler warnings. Open-code these accesses in an extra change to ease review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 4 ++-- net/sched/sch_generic.c | 2 +- net/sched/sch_netem.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index baeed6a..1e37247 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d21b56..5e63bf6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -486,7 +486,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff_head *list = band2list(priv, band); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aaaf021..1832d77 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -502,7 +502,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -522,7 +522,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->rate) { struct sk_buff *last; - if (!skb_queue_empty(&sch->q)) + if (sch->q.qlen) last = skb_peek_tail(&sch->q); else last = netem_rb_to_skb(rb_last(&q->t_root)); -- cgit v1.1 From ec323368793b8570c02e723127611a8d906a9b3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:32 +0200 Subject: sched: remove qdisc arg from __qdisc_dequeue_head Moves qdisc stat accouting to qdisc_dequeue_head. The only direct caller of the __qdisc_dequeue_head version open-codes this now. This allows us to later use __qdisc_dequeue_head as a replacement of __skb_dequeue() (which operates on sk_buff_head list). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 ++++++++------- net/sched/sch_generic.c | 7 ++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 52a2015..0741ed4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -614,11 +614,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, - struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) { struct sk_buff *skb = __skb_dequeue(list); + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); @@ -627,11 +633,6 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, return skb; } -static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) -{ - return __qdisc_dequeue_head(sch, &sch->q); -} - /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e63bf6..73877d9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,7 +506,12 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(band >= 0)) { struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct sk_buff *skb = __qdisc_dequeue_head(list); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; if (skb_queue_empty(list)) -- cgit v1.1 From ed760cb8aae7c2b84c193d4a7637b0c9e752f07e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:33 +0200 Subject: sched: replace __skb_dequeue with __qdisc_dequeue_head After previous patch these functions are identical. Replace __skb_dequeue in qdiscs with __qdisc_dequeue_head. Next patch will then make __qdisc_dequeue_head handle single-linked list instead of strcut sk_buff_head argument. Doesn't change generated code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_codel.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_pie.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 4002df3..5bfa79e 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -69,7 +69,7 @@ struct codel_sched_data { static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1832d77..0a964b3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -587,7 +587,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct rb_node *p; tfifo_dequeue: - skb = __skb_dequeue(&sch->q); + skb = __qdisc_dequeue_head(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index d976d74..5c3a99d 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); -- cgit v1.1 From 48da34b7a74201f15315cb1fc40bb9a7bd2b4940 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:34 +0200 Subject: sched: add and use qdisc_skb_head helpers This change replaces sk_buff_head struct in Qdiscs with new qdisc_skb_head. Its similar to the skb_buff_head api, but does not use skb->prev pointers. Qdiscs will commonly enqueue at the tail of a list and dequeue at head. While skb_buff_head works fine for this, enqueue/dequeue needs to also adjust the prev pointer of next element. The ->prev pointer is not required for qdiscs so we can just leave it undefined and avoid one cacheline write access for en/dequeue. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 63 ++++++++++++++++++++++++++++++++++++++--------- net/sched/sch_generic.c | 21 ++++++++-------- net/sched/sch_htb.c | 24 +++++++++++++++--- net/sched/sch_netem.c | 14 +++++++++-- 4 files changed, 94 insertions(+), 28 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0741ed4..e6aa0a2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,14 @@ struct qdisc_size_table { u16 data[]; }; +/* similar to sk_buff_head, but skb->prev pointer is undefined. */ +struct qdisc_skb_head { + struct sk_buff *head; + struct sk_buff *tail; + __u32 qlen; + spinlock_t lock; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -76,7 +84,7 @@ struct Qdisc { * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; @@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } +static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) +{ + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; +} + static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) + struct qdisc_skb_head *qh) { - __skb_queue_tail(list, skb); + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; @@ -614,9 +639,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = qh->head; + + if (likely(skb != NULL)) { + qh->head = skb->next; + qh->qlen--; + if (qh->head == NULL) + qh->tail = NULL; + skb->next = NULL; + } return skb; } @@ -643,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list, + struct qdisc_skb_head *qh, struct sk_buff **to_free) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); @@ -667,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { - return skb_peek(&sch->q); + const struct qdisc_skb_head *qh = &sch->q; + + return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ @@ -702,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - if (!skb_queue_empty(list)) { - rtnl_kfree_skbs(list->next, list->prev); - __skb_queue_head_init(list); + ASSERT_RTNL(); + if (qh->qlen) { + rtnl_kfree_skbs(qh->head, qh->tail); + + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 73877d9..6cfb6e9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -477,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -489,7 +489,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -505,8 +505,8 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(qdisc, skb); @@ -514,7 +514,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -529,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -569,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -617,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa1..c798d0d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0a964b3..9f7b380 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -523,7 +533,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *last; if (sch->q.qlen) - last = skb_peek_tail(&sch->q); + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } -- cgit v1.1 From 47a66e45d7a7613322549c2475ea9d809baaf514 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Tue, 13 Sep 2016 14:20:45 -0700 Subject: drm: Only use compat ioctl for addfb2 on X86/IA64 Similar to struct drm_update_draw, struct drm_mode_fb_cmd2 has an unaligned 64 bit field (modifier). This get packed differently between 32 bit and 64 bit modes on architectures that can handle unaligned 64 bit access (X86 and IA64). Other architectures pack the structs the same and don't need the compat wrapper. Use the same condition for drm_mode_fb_cmd2 as we use for drm_update_draw. Note that only the modifier will be packed differently between compat and non-compat versions. Reviewed-by: Rob Clark Signed-off-by: Kristian H. Kristensen [seanpaul added not at bottom of commit msg re: modifier] Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1473801645-116011-1-git-send-email-hoegsberg@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_ioc32.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 57676f8..a628975 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -1015,6 +1015,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +#if defined(CONFIG_X86) || defined(CONFIG_IA64) typedef struct drm_mode_fb_cmd232 { u32 fb_id; u32 width; @@ -1071,6 +1072,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return 0; } +#endif static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, @@ -1104,7 +1106,9 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, +#if defined(CONFIG_X86) || defined(CONFIG_IA64) [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, +#endif }; /** -- cgit v1.1 From b588479358ce26f32138e0f0a7ab0678f8e3e601 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 18 Sep 2016 07:42:53 +0000 Subject: xfrm: Fix memory leak of aead algorithm name commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") introduced aead. The function attach_aead kmemdup()s the algorithm name during xfrm_state_construct(). However this memory is never freed. Implementation has since been slightly modified in commit ee5c23176fcc ("xfrm: Clone states properly on migration") without resolving this leak. This patch adds a kfree() call for the aead algorithm name. Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") Signed-off-by: Ilan Tayari Acked-by: Rami Rosen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c..a30f898d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); + kfree(x->aead); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); -- cgit v1.1 From 4de349e786a3a2d51bd02d56f3de151bbc3c3df9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Aug 2016 12:41:08 -0300 Subject: can: flexcan: fix resume function On a imx6ul-pico board the following error is seen during system suspend: dpm_run_callback(): platform_pm_resume+0x0/0x54 returns -110 PM: Device 2090000.flexcan failed to resume: error -110 The reason for this suspend error is because when the CAN interface is not active the clocks are disabled and then flexcan_chip_enable() will always fail due to a timeout error. In order to fix this issue, only call flexcan_chip_enable/disable() when the CAN interface is active. Based on a patch from Dong Aisheng in the NXP kernel. Signed-off-by: Fabio Estevam Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 41c0fc9..16f7cad 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device) struct flexcan_priv *priv = netdev_priv(dev); int err; - err = flexcan_chip_disable(priv); - if (err) - return err; - if (netif_running(dev)) { + err = flexcan_chip_disable(priv); + if (err) + return err; netif_stop_queue(dev); netif_device_detach(dev); } @@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); + int err; priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(dev)) { netif_device_attach(dev); netif_start_queue(dev); + err = flexcan_chip_enable(priv); + if (err) + return err; } - return flexcan_chip_enable(priv); + return 0; } static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume); -- cgit v1.1 From d8feef9bd447381952a33e6284241006f394c080 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 18 Sep 2016 11:24:50 -0300 Subject: [media] cx23885/saa7134: assign q->dev to the PCI device Fix a regression caused by commit 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Three places where q->dev should be set were missed, causing a WARN. Fixes: 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Signed-off-by: Hans Verkuil Reported-by: Marton Balint Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/cx23885/cx23885-417.c | 1 + drivers/media/pci/saa7134/saa7134-dvb.c | 1 + drivers/media/pci/saa7134/saa7134-empress.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c index efec2d1..4d080da 100644 --- a/drivers/media/pci/cx23885/cx23885-417.c +++ b/drivers/media/pci/cx23885/cx23885-417.c @@ -1552,6 +1552,7 @@ int cx23885_417_register(struct cx23885_dev *dev) q->mem_ops = &vb2_dma_sg_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err < 0) diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c index db987e5..59a4b5f 100644 --- a/drivers/media/pci/saa7134/saa7134-dvb.c +++ b/drivers/media/pci/saa7134/saa7134-dvb.c @@ -1238,6 +1238,7 @@ static int dvb_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; ret = vb2_queue_init(q); if (ret) { vb2_dvb_dealloc_frontends(&dev->frontends); diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index ca417a4..791a516 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -295,6 +295,7 @@ static int empress_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err) return err; -- cgit v1.1 From 53f863a66904542b03204f2b115d050b04c11ba5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:40 +0200 Subject: Bluetooth: Put led_trigger field behind CONFIG_BT_LEDS The led_trigger field in hci_dev should be conditional based on if CONFIG_BT_LEDS is set or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ee7fc47..b8d43bd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -399,7 +399,9 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; +#if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; +#endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); -- cgit v1.1 From e64c97b53bc6727aa4385535166aaa047281e02d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:41 +0200 Subject: Bluetooth: Add combined LED trigger for controller power Instead of just having a LED trigger for power on a specific controller, this adds the LED trigger "bluetooth-power" that combines the power states of all controllers into a single trigger. This simplifies the trigger selection and also supports multiple controllers per host system via a single LED. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/af_bluetooth.c | 5 +++++ net/bluetooth/leds.c | 27 +++++++++++++++++++++++++++ net/bluetooth/leds.h | 10 ++++++++++ 3 files changed, 42 insertions(+) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0b5f729..1d96ff3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,7 @@ #include #include +#include "leds.h" #include "selftest.h" /* Bluetooth sockets */ @@ -726,6 +727,8 @@ static int __init bt_init(void) bt_debugfs = debugfs_create_dir("bluetooth", NULL); + bt_leds_init(); + err = bt_sysfs_init(); if (err < 0) return err; @@ -785,6 +788,8 @@ static void __exit bt_exit(void) bt_sysfs_cleanup(); + bt_leds_cleanup(); + debugfs_remove_recursive(bt_debugfs); } diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c index 8319c84..cb670b5 100644 --- a/net/bluetooth/leds.c +++ b/net/bluetooth/leds.c @@ -11,6 +11,8 @@ #include "leds.h" +DEFINE_LED_TRIGGER(bt_power_led_trigger); + struct hci_basic_led_trigger { struct led_trigger led_trigger; struct hci_dev *hdev; @@ -24,6 +26,21 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) if (hdev->power_led) led_trigger_event(hdev->power_led, enabled ? LED_FULL : LED_OFF); + + if (!enabled) { + struct hci_dev *d; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(d, &hci_dev_list, list) { + if (test_bit(HCI_UP, &d->flags)) + enabled = true; + } + + read_unlock(&hci_dev_list_lock); + } + + led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF); } static void power_activate(struct led_classdev *led_cdev) @@ -72,3 +89,13 @@ void hci_leds_init(struct hci_dev *hdev) /* initialize power_led */ hdev->power_led = led_allocate_basic(hdev, power_activate, "power"); } + +void bt_leds_init(void) +{ + led_trigger_register_simple("bluetooth-power", &bt_power_led_trigger); +} + +void bt_leds_cleanup(void) +{ + led_trigger_unregister_simple(bt_power_led_trigger); +} diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h index a9c4d6e..08725a2 100644 --- a/net/bluetooth/leds.h +++ b/net/bluetooth/leds.h @@ -7,10 +7,20 @@ */ #if IS_ENABLED(CONFIG_BT_LEDS) + void hci_leds_update_powered(struct hci_dev *hdev, bool enabled); void hci_leds_init(struct hci_dev *hdev); + +void bt_leds_init(void); +void bt_leds_cleanup(void); + #else + static inline void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) {} static inline void hci_leds_init(struct hci_dev *hdev) {} + +static inline void bt_leds_init(void) {} +static inline void bt_leds_cleanup(void) {} + #endif -- cgit v1.1 From abbcc341adb16f68915cae7ef9a10e0d7b57e3c0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:24 +0200 Subject: mac802154: set phy net namespace for new ifaces This patch sets the net namespace when creating SoftMAC interfaces. This is important if the namespace at phy layer was switched before. Currently we losing interfaces in some namespace and it's not possible to recover that. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 7079cd3..06019db 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -663,6 +663,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, /* TODO check this */ SET_NETDEV_DEV(ndev, &local->phy->dev); + dev_net_set(ndev, wpan_phy_net(local->hw.phy)); sdata = netdev_priv(ndev); ndev->ieee802154_ptr = &sdata->wpan_dev; memcpy(sdata->name, ndev->name, IFNAMSIZ); -- cgit v1.1 From 5ddedce3b7331959a6da217ed3189d020090873c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 24 Jul 2016 16:12:25 +0200 Subject: 6lowpan: ndisc: no overreact if no short address is available This patch removes handling to remove short address for a neigbour entry if RS/RA/NS/NA doesn't contain a short address. If these messages doesn't has any short address option, the existing short address from ndisc cache will be used. The current behaviour will set that the neigbour doesn't has a short address anymore. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/ndisc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index 86450b7..941df2f 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -101,8 +101,6 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short); if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr)) neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); - } else { - neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); } write_unlock_bh(&n->lock); } -- cgit v1.1 From ca1de81aa262dcf48354a7c55f2558205517d06e Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:40 -0400 Subject: mac802154: don't warn on unsupported frames Just because we don't support certain types of frames yet doesn't mean we have to flood the message log with warnings about "invalid" frames. Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 446e130..b978da0 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -101,6 +101,11 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, sdata->dev->stats.rx_bytes += skb->len; switch (mac_cb(skb)->type) { + case IEEE802154_FC_TYPE_BEACON: + case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + goto fail; + case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: -- cgit v1.1 From bd89bb6daaca3e4a7c509bdacb53a610f432fa2c Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 25 Jul 2016 11:46:41 -0400 Subject: mac802154: use rate limited warnings for malformed frames Signed-off-by: Aristeu Rozanski Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index b978da0..4dcf6e1 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -109,8 +109,8 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: - pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb(skb)->type); + pr_warn_ratelimited("ieee802154: bad frame received " + "(type = %d)\n", mac_cb(skb)->type); goto fail; } -- cgit v1.1 From 65010e68efbeda4275845240869138c0c4587422 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:27 -0700 Subject: Bluetooth: Add HCI device identifier for Qualcomm SMD This patch assigns the next free HCI device identifier to Bluetooth devices based on the Qualcomm Shared Memory channels. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 003b252..0aac123 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -63,6 +63,7 @@ #define HCI_SDIO 6 #define HCI_SPI 7 #define HCI_I2C 8 +#define HCI_SMD 9 /* HCI controller types */ #define HCI_PRIMARY 0x00 -- cgit v1.1 From 1511cc750c3d9a1c402d71e3522c9cf1fad0ad9c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:28 -0700 Subject: Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver The Qualcomm WCNSS chip provides two SMD channels to the BT core; one for command and one for event packets. This driver exposes the two channels as a hci device. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 12 +++ drivers/bluetooth/Makefile | 1 + drivers/bluetooth/btqcomsmd.c | 182 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 drivers/bluetooth/btqcomsmd.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index cf50fd2..2c48191 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -331,4 +331,16 @@ config BT_WILINK Say Y here to compile support for Texas Instrument's WiLink7 driver into the kernel or say M to compile it as module (btwilink). +config BT_QCOMSMD + tristate "Qualcomm SMD based HCI support" + depends on QCOM_SMD + select BT_QCA + help + Qualcomm SMD based HCI driver. + This driver is used to bridge HCI data onto the shared memory + channels to the WCNSS core. + + Say Y here to compile support for HCI over Qualcomm SMD into the + kernel or say M to compile as a module. + endmenu diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 9c18939..3e92cfeb 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_BT_ATH3K) += ath3k.o obj-$(CONFIG_BT_MRVL) += btmrvl.o obj-$(CONFIG_BT_MRVL_SDIO) += btmrvl_sdio.o obj-$(CONFIG_BT_WILINK) += btwilink.o +obj-$(CONFIG_BT_QCOMSMD) += btqcomsmd.o obj-$(CONFIG_BT_BCM) += btbcm.o obj-$(CONFIG_BT_RTL) += btrtl.o obj-$(CONFIG_BT_QCA) += btqca.o diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c new file mode 100644 index 0000000..08c2c93 --- /dev/null +++ b/drivers/bluetooth/btqcomsmd.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, Linaro Ltd. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "btqca.h" + +struct btqcomsmd { + struct hci_dev *hdev; + + struct qcom_smd_channel *acl_channel; + struct qcom_smd_channel *cmd_channel; +}; + +static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type, + const void *data, size_t count) +{ + struct sk_buff *skb; + + /* Use GFP_ATOMIC as we're in IRQ context */ + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) { + hdev->stat.err_rx++; + return -ENOMEM; + } + + hci_skb_pkt_type(skb) = type; + memcpy(skb_put(skb, count), data, count); + + return hci_recv_frame(hdev, skb); +} + +static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + btq->hdev->stat.byte_rx += count; + return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count); +} + +static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count); +} + +static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct btqcomsmd *btq = hci_get_drvdata(hdev); + int ret; + + switch (hci_skb_pkt_type(skb)) { + case HCI_ACLDATA_PKT: + ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len); + hdev->stat.acl_tx++; + hdev->stat.byte_tx += skb->len; + break; + case HCI_COMMAND_PKT: + ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len); + hdev->stat.cmd_tx++; + break; + default: + ret = -EILSEQ; + break; + } + + kfree_skb(skb); + + return ret; +} + +static int btqcomsmd_open(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_close(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_probe(struct platform_device *pdev) +{ + struct btqcomsmd *btq; + struct hci_dev *hdev; + void *wcnss; + int ret; + + btq = devm_kzalloc(&pdev->dev, sizeof(*btq), GFP_KERNEL); + if (!btq) + return -ENOMEM; + + wcnss = dev_get_drvdata(pdev->dev.parent); + + btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL", + btqcomsmd_acl_callback); + if (IS_ERR(btq->acl_channel)) + return PTR_ERR(btq->acl_channel); + + btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", + btqcomsmd_cmd_callback); + if (IS_ERR(btq->cmd_channel)) + return PTR_ERR(btq->cmd_channel); + + qcom_smd_set_drvdata(btq->acl_channel, btq); + qcom_smd_set_drvdata(btq->cmd_channel, btq); + + hdev = hci_alloc_dev(); + if (!hdev) + return -ENOMEM; + + hci_set_drvdata(hdev, btq); + btq->hdev = hdev; + SET_HCIDEV_DEV(hdev, &pdev->dev); + + hdev->bus = HCI_SMD; + hdev->open = btqcomsmd_open; + hdev->close = btqcomsmd_close; + hdev->send = btqcomsmd_send; + hdev->set_bdaddr = qca_set_bdaddr_rome; + + ret = hci_register_dev(hdev); + if (ret < 0) { + hci_free_dev(hdev); + return ret; + } + + platform_set_drvdata(pdev, btq); + + return 0; +} + +static int btqcomsmd_remove(struct platform_device *pdev) +{ + struct btqcomsmd *btq = platform_get_drvdata(pdev); + + hci_unregister_dev(btq->hdev); + hci_free_dev(btq->hdev); + + return 0; +} + +static const struct of_device_id btqcomsmd_of_match[] = { + { .compatible = "qcom,wcnss-bt", }, + { }, +}; + +static struct platform_driver btqcomsmd_driver = { + .probe = btqcomsmd_probe, + .remove = btqcomsmd_remove, + .driver = { + .name = "btqcomsmd", + .of_match_table = btqcomsmd_of_match, + }, +}; + +module_platform_driver(btqcomsmd_driver); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm SMD HCI driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.1 From f0a70a04ca10d07a383a89edea142e3cbab1f2ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Aug 2016 16:02:44 -0700 Subject: Bluetooth: btusb, hci_intel: Fix wait_on_bit_timeout() return value checks wait_on_bit_timeout() returns one of the following three values: * 0 to indicate success. * -EINTR to indicate that a signal has been received; * -EAGAIN to indicate timeout; Make the wait_on_bit_timeout() callers check for these values. Signed-off-by: Bart Van Assche Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 5 ++--- drivers/bluetooth/hci_intel.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 811f9b9..c58a00c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2221,9 +2221,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Firmware loading interrupted", hdev->name); - err = -EINTR; goto done; } @@ -2275,7 +2274,7 @@ done: TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Device boot interrupted", hdev->name); return -EINTR; } diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index ed0a420..9e27128 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -128,7 +128,7 @@ static int intel_wait_booting(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "Device boot interrupted"); return -EINTR; } @@ -151,7 +151,7 @@ static int intel_wait_lpm_transaction(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "LPM transaction interrupted"); return -EINTR; } @@ -813,7 +813,7 @@ static int intel_setup(struct hci_uart *hu) err = wait_on_bit_timeout(&intel->flags, STATE_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hdev, "Firmware loading interrupted"); err = -EINTR; goto done; -- cgit v1.1 From 1aabbbcefe8e62fbffaaa01ca8bdd4cd6ed1625b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 29 Jul 2016 13:28:25 +0200 Subject: Bluetooth: add printf format attribute to hci_set_[fh]w_info() Commit 5177a83827cd ("Bluetooth: Add debugfs fields for hardware and firmware info") introduced hci_set_hw_info() and hci_set_fw_info(). These functions use kvasprintf_const() but are not marked with a __printf attribute. Adding such an attribute helps detecting issues related to printf-formatting at build time. Signed-off-by: Nicolas Iooss Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b8d43bd..cc349f6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1028,8 +1028,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); -void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); -void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); -- cgit v1.1 From 935199348048902124d0b288788c3a45e78b69ab Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 16 Aug 2016 12:50:06 +0800 Subject: Bluetooth: btusb: Add support for 0cf3:e009 Device 0cf3:e009 is one of the QCA ROME family. T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e009 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c58a00c..80ae854 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -248,6 +248,7 @@ static const struct usb_device_id blacklist_table[] = { /* QCA ROME chipset */ { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, -- cgit v1.1 From 7e8524591ffffe3536bd363827ff4477a5672c65 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 11 Aug 2016 23:00:31 +0200 Subject: Bluetooth: bcm203x: don't print error when allocating urb fails kmalloc will print enough information in case of failure. Signed-off-by: Wolfram Sang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bcm203x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index 5b0ef7b..5ce6d41 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -185,10 +185,8 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id data->state = BCM203X_LOAD_MINIDRV; data->urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->urb) { - BT_ERR("Can't allocate URB"); + if (!data->urb) return -ENOMEM; - } if (request_firmware(&firmware, "BCM2033-MD.hex", &udev->dev) < 0) { BT_ERR("Mini driver request failed"); -- cgit v1.1 From 47b0f573f2fa7634860e16ea31f2bc3057a1022a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:37 +0200 Subject: Bluetooth: Check SOL_HCI for raw socket options The SOL_HCI level should be enforced when using socket options on the HCI raw socket interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 96f04b7..99dd150 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1440,6 +1440,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1523,6 +1526,9 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; -- cgit v1.1 From 70ecce91e3a2d7e332fe56fd065c67d404b8fccf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:38 +0200 Subject: Bluetooth: Store control socket cookie and comm information To further allow unique identification and tracking of control socket, store cookie and comm information when binding the socket. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/hci_sock.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bfd1590..69b5174 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -371,6 +371,7 @@ void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); int hci_sock_test_flag(struct sock *sk, int nr); unsigned short hci_sock_get_channel(struct sock *sk); +u32 hci_sock_get_cookie(struct sock *sk); int hci_sock_init(void); void hci_sock_cleanup(void); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 99dd150..4dce6df 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,8 @@ static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); +static DEFINE_IDA(sock_cookie_ida); + static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ @@ -52,6 +55,8 @@ struct hci_pinfo { __u32 cmsg_mask; unsigned short channel; unsigned long flags; + __u32 cookie; + char comm[TASK_COMM_LEN]; }; void hci_sock_set_flag(struct sock *sk, int nr) @@ -74,6 +79,11 @@ unsigned short hci_sock_get_channel(struct sock *sk) return hci_pi(sk)->channel; } +u32 hci_sock_get_cookie(struct sock *sk) +{ + return hci_pi(sk)->cookie; +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -585,6 +595,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + int id; BT_DBG("sock %p sk %p", sock, sk); @@ -593,8 +604,17 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); + break; + case HCI_CHANNEL_CONTROL: + id = hci_pi(sk)->cookie; + + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + break; + } bt_sock_unlink(&hci_sk_list, sk); @@ -957,6 +977,15 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + int id; + + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); -- cgit v1.1 From 03c979c4717c7fa0c058fafe76ac4d6acdd1fb0d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:39 +0200 Subject: Bluetooth: Introduce helper to pack mgmt version information The mgmt version information will be also needed for the control changell tracing feature. This provides a helper to pack them. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cc349f6..9f181b5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1451,6 +1451,7 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7639290..9071886 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -278,6 +278,14 @@ static u8 le_addr_type(u8 mgmt_addr_type) return ADDR_LE_DEV_RANDOM; } +void mgmt_fill_version_info(void *ver) +{ + struct mgmt_rp_read_version *rp = ver; + + rp->version = MGMT_VERSION; + rp->revision = cpu_to_le16(MGMT_REVISION); +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -285,8 +293,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("sock %p", sk); - rp.version = MGMT_VERSION; - rp.revision = cpu_to_le16(MGMT_REVISION); + mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); -- cgit v1.1 From 249fa1699f8642c73eb43e61b321969f0549ab2c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:40 +0200 Subject: Bluetooth: Add support for sending MGMT open and close to monitor This sends new notifications to the monitor support whenever a management channel has been opened or closed. This allows tracing of control channels really easily. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 95 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 587d013..9640790 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -45,6 +45,8 @@ struct hci_mon_hdr { #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 #define HCI_MON_USER_LOGGING 13 +#define HCI_MON_CTRL_OPEN 14 +#define HCI_MON_CTRL_CLOSE 15 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4dce6df..2d87250 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -394,6 +394,59 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + u16 format = 0x0002; + u8 ver[3]; + u32 flags; + + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); + if (!skb) + return NULL; + + mgmt_fill_version_info(ver); + flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(format, skb_put(skb, 2)); + memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + put_unaligned_le32(flags, skb_put(skb, 4)); + *skb_put(skb, 1) = TASK_COMM_LEN; + memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(4, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -468,6 +521,29 @@ static void send_monitor_replay(struct sock *sk) read_unlock(&hci_dev_list_lock); } +static void send_monitor_control_replay(struct sock *mon_sk) +{ + struct sock *sk; + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + skb = create_monitor_ctrl_open(sk); + if (!skb) + continue; + + if (sock_queue_rcv_skb(mon_sk, skb)) + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { @@ -595,6 +671,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + struct sk_buff *skb; int id; BT_DBG("sock %p sk %p", sock, sk); @@ -611,6 +688,14 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_CONTROL: id = hci_pi(sk)->cookie; + /* Send event to monitor */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_pi(sk)->cookie = 0xffffffff; ida_simple_remove(&sock_cookie_ida, id); break; @@ -931,6 +1016,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Bluetooth subsystem version %s", BT_SUBSYS_VERSION); send_monitor_replay(sk); + send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; @@ -977,6 +1063,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; int id; id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); @@ -986,6 +1073,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); -- cgit v1.1 From 38ceaa00d02dceb22c6bdd5268f5a44d5c00e123 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:41 +0200 Subject: Bluetooth: Add support for sending MGMT commands and events to monitor This adds support for tracing all management commands and events via the monitor interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 3 ++ include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 94 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt_util.c | 66 ++++++++++++++++++++++++++-- 4 files changed, 162 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9f181b5..a48f71d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1406,6 +1406,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 9640790..240786b 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -47,6 +47,8 @@ struct hci_mon_hdr { #define HCI_MON_USER_LOGGING 13 #define HCI_MON_CTRL_OPEN 14 #define HCI_MON_CTRL_CLOSE 15 +#define HCI_MON_CTRL_COMMAND 16 +#define HCI_MON_CTRL_EVENT 17 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2d87250..576ea48 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -315,6 +315,60 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk) +{ + struct sock *sk; + __le16 index; + + if (hdev) + index = cpu_to_le16(hdev->id); + else + index = cpu_to_le16(MGMT_INDEX_NONE); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) + continue; + + /* Skip the original socket */ + if (sk == skip_sk) + continue; + + skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); + if (!skb) + continue; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(event, skb_put(skb, 2)); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + skb->tstamp = tstamp; + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -447,6 +501,33 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return skb; } +static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, + u16 opcode, u16 len, + const void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -1257,6 +1338,19 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, goto done; } + if (chan->channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; + + /* Send event to monitor */ + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 8c30c7e..c933bd0 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -21,12 +21,41 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include +#include #include #include "mgmt_util.h" +static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, + u16 opcode, u16 len, void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { @@ -52,14 +81,18 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, __net_timestamp(skb); hci_send_to_channel(channel, skb, flag, skip_sk); - kfree_skb(skb); + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; @@ -80,17 +113,30 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; ev->opcode = cpu_to_le16(cmd); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_STATUS, sizeof(*ev), ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; @@ -114,10 +160,24 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (rp) memcpy(ev->data, rp, rp_len); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_COMPLETE, + sizeof(*ev) + rp_len, ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } -- cgit v1.1 From 37d3a1fab50fa07ac706787646e61c60e7c520e0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 28 Aug 2016 20:53:34 +0300 Subject: Bluetooth: mgmt: Fix sending redundant event for Advertising Instance When an Advertising Instance is removed, the Advertising Removed event shouldn't be sent to the same socket that issued the Remove Advertising command (it gets a command complete event instead). The mgmt_advertising_removed() function already has a parameter for skipping a specific socket, but there was no code to propagate the right value to this parameter. This patch fixes the issue by making sure the intermediate hci_req_clear_adv_instance() function gets the socket pointer. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++----- net/bluetooth/hci_request.h | 5 +++-- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b0e23df..9968b1c 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1194,7 +1194,7 @@ static void adv_timeout_expire(struct work_struct *work) hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, instance, false); + hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); @@ -1284,8 +1284,9 @@ static void cancel_adv_timeout(struct hci_dev *hdev) * setting. * - force == false: Only instances that have a timeout will be removed. */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force) { struct adv_info *adv_instance, *n, *next_instance = NULL; int err; @@ -1311,7 +1312,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, rem_inst = adv_instance->instance; err = hci_remove_adv_instance(hdev, rem_inst); if (!err) - mgmt_advertising_removed(NULL, hdev, rem_inst); + mgmt_advertising_removed(sk, hdev, rem_inst); } } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1325,7 +1326,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, err = hci_remove_adv_instance(hdev, instance); if (!err) - mgmt_advertising_removed(NULL, hdev, instance); + mgmt_advertising_removed(sk, hdev, instance); } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b2d044b..ac1e110 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,8 +73,9 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force); void __hci_req_update_class(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9071886..f9af5f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -929,7 +929,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - hci_req_clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) __hci_req_disable_advertising(&req); @@ -1697,7 +1697,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - hci_req_clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -6182,7 +6182,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); -- cgit v1.1 From 5504c3a31061704512707bb23bd7835e8a5281e4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:46 +0200 Subject: Bluetooth: Use individual flags for certain management events Instead of hiding everything behind a general managment events flag, introduce indivdual flags that allow fine control over which events are send to a given management channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 5 ++++- net/bluetooth/hci_sock.c | 5 ++++- net/bluetooth/mgmt.c | 32 +++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0aac123..ddb9acc 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,7 +208,10 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, - HCI_MGMT_GENERIC_EVENTS, + HCI_MGMT_OPTION_EVENTS, + HCI_MGMT_SETTING_EVENTS, + HCI_MGMT_DEV_CLASS_EVENTS, + HCI_MGMT_LOCAL_NAME_EVENTS, HCI_MGMT_OOB_DATA_EVENTS, }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 576ea48..d37c224 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1164,7 +1164,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); - hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f9af5f7..469f5cc 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -256,13 +256,6 @@ static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, flag, skip_sk); } -static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, - u16 len, struct sock *skip_sk) -{ - return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, - HCI_MGMT_GENERIC_EVENTS, skip_sk); -} - static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { @@ -579,8 +572,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) @@ -1007,8 +1000,8 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), skip); + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -3000,8 +2993,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (err < 0) goto failed; - err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, - data, len, sk); + err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, + len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); goto failed; } @@ -6502,8 +6495,9 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), + HCI_MGMT_DEV_CLASS_EVENTS, NULL); new_settings(hdev, match.sk); @@ -7100,8 +7094,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - dev_class, 3, NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, + 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); if (match.sk) sock_put(match.sk); @@ -7130,8 +7124,8 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) return; } - mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) -- cgit v1.1 From 56f787c5024de7829f8cccce7569feb520829baf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:47 +0200 Subject: Bluetooth: Fix wrong Get Clock Information return parameters The address information of the Get Clock Information return parameters is copying from a different memory location. It uses &cmd->param while it actually needs to be cmd->param. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 469f5cc..0c83dd3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4869,7 +4869,7 @@ static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) int err; memset(&rp, 0, sizeof(rp)); - memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); if (status) goto complete; -- cgit v1.1 From 9db5c62951871c33e4443fe433e234419cf574d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:31:57 +0200 Subject: Bluetooth: Use command status event for Set IO Capability errors In case of failure, the Set IO Capability command is suppose to return command status and not command complete. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0c83dd3..47efdb4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2513,8 +2513,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); -- cgit v1.1 From 3d4e2fb64111ffb5dc737daf25f5434bf39bee5f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Aug 2016 14:36:18 +0200 Subject: Bluetooth: add WCNSS dependency for HCI driver The newly added bluetooth driver is based on the soc-specific support, but lacks the obvious compile-time dependency on that: drivers/bluetooth/btqcomsmd.o: In function `btqcomsmd_probe': btqcomsmd.c:(.text.btqcomsmd_probe+0x40): undefined reference to `qcom_wcnss_open_channel' btqcomsmd.c:(.text.btqcomsmd_probe+0x5c): undefined reference to `qcom_wcnss_open_channel' Makefile:969: recipe for target 'vmlinux' failed Fixes: 90c107dc8b2c ("Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2c48191..43e9f93 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -333,7 +333,7 @@ config BT_WILINK config BT_QCOMSMD tristate "Qualcomm SMD based HCI support" - depends on QCOM_SMD + depends on QCOM_SMD && QCOM_WCNSS_CTRL select BT_QCA help Qualcomm SMD based HCI driver. -- cgit v1.1 From df1cb87af9f24527a8932e4d195d49ffab1168d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:34 +0200 Subject: Bluetooth: Introduce helper functions for socket cookie handling Instead of manually allocating cookie information each time, use helper functions for generating and releasing cookies. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d37c224..804208d 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -84,6 +84,33 @@ u32 hci_sock_get_cookie(struct sock *sk) return hci_pi(sk)->cookie; } +static bool hci_sock_gen_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (!id) { + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + return true; + } + + return false; +} + +static void hci_sock_free_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (id) { + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + } +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -753,7 +780,6 @@ static int hci_sock_release(struct socket *sock) struct sock *sk = sock->sk; struct hci_dev *hdev; struct sk_buff *skb; - int id; BT_DBG("sock %p sk %p", sock, sk); @@ -767,8 +793,6 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_CONTROL: - id = hci_pi(sk)->cookie; - /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); if (skb) { @@ -777,8 +801,7 @@ static int hci_sock_release(struct socket *sock) kfree_skb(skb); } - hci_pi(sk)->cookie = 0xffffffff; - ida_simple_remove(&sock_cookie_ida, id); + hci_sock_free_cookie(sk); break; } @@ -1145,14 +1168,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; - int id; - - id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); - if (id < 0) - id = 0xffffffff; - hci_pi(sk)->cookie = id; - get_task_comm(hci_pi(sk)->comm, current); + hci_sock_gen_cookie(sk); /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); -- cgit v1.1 From 9e8305b39bfa23a83b932007654097f4676c2ba2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:35 +0200 Subject: Bluetooth: Use numbers for subsystem version string Instead of keeping a version string around, use version and revision numbers and then stringify them for use as module parameter. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 3 ++- net/bluetooth/af_bluetooth.c | 10 +++++++--- net/bluetooth/hci_sock.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 69b5174..d705bcf 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,7 +29,8 @@ #include #include -#define BT_SUBSYS_VERSION "2.21" +#define BT_SUBSYS_VERSION 2 +#define BT_SUBSYS_REVISION 21 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1d96ff3..1aff2da 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -713,13 +714,16 @@ static struct net_proto_family bt_sock_family_ops = { struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); +#define VERSION __stringify(BT_SUBSYS_VERSION) "." \ + __stringify(BT_SUBSYS_REVISION) + static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", BT_SUBSYS_VERSION); + BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) @@ -797,7 +801,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); -MODULE_VERSION(BT_SUBSYS_VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); +MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 804208d..a4227c7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1117,8 +1117,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); - send_monitor_note(sk, "Bluetooth subsystem version %s", - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Bluetooth subsystem version %u.%u", + BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); -- cgit v1.1 From 0ef2c42f8c4e372bad16f67dc0f4b15b9be910f6 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:36 +0200 Subject: Bluetooth: Send control open and close only when cookie is present Only when the cookie has been assigned, then send the open and close monitor messages. Also if the socket is bound to a device, then include the index into the message. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a4227c7..0deca75 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -483,6 +483,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) u8 ver[3]; u32 flags; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; @@ -501,7 +505,10 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; @@ -512,6 +519,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) struct hci_mon_hdr *hdr; struct sk_buff *skb; + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -522,7 +533,10 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); - hdr->index = cpu_to_le16(HCI_DEV_NONE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); return skb; -- cgit v1.1 From 5a6d2cf5f18b5afbae0b1b450070bbba50f1e3e0 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:37 +0200 Subject: Bluetooth: Assign the channel early when binding HCI sockets Assignment of the hci_pi(sk)->channel should be done early when binding the HCI socket. This avoids confusion with the RAW channel that is used for legacy access. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 0deca75..ca13fac 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1045,6 +1045,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; break; @@ -1107,9 +1108,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } } - atomic_inc(&hdev->promisc); - + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: @@ -1123,6 +1125,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } + hci_pi(sk)->channel = haddr.hci_channel; + /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ @@ -1149,6 +1153,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EPERM; goto done; } + + hci_pi(sk)->channel = haddr.hci_channel; break; default: @@ -1170,6 +1176,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->channel = haddr.hci_channel; + /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They @@ -1180,7 +1188,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ - if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { struct sk_buff *skb; hci_sock_gen_cookie(sk); @@ -1203,8 +1211,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, break; } - - hci_pi(sk)->channel = haddr.hci_channel; sk->sk_state = BT_BOUND; done: -- cgit v1.1 From d0bef1d26fb6fdad818f3d15a178d51e2a8478ae Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:38 +0200 Subject: Bluetooth: Add extra channel checks for control open/close messages The control open and close monitoring events require special channel checks to ensure messages are only send when the right events happen. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ca13fac..b22efe2 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -479,7 +479,7 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) { struct hci_mon_hdr *hdr; struct sk_buff *skb; - u16 format = 0x0002; + u16 format; u8 ver[3]; u32 flags; @@ -487,11 +487,20 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + format = 0x0002; + mgmt_fill_version_info(ver); + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); if (!skb) return NULL; - mgmt_fill_version_info(ver); flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); @@ -523,6 +532,14 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) if (!hci_pi(sk)->cookie) return NULL; + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_CONTROL: + break; + default: + /* No message for unsupported format */ + return NULL; + } + skb = bt_skb_alloc(4, GFP_ATOMIC); if (!skb) return NULL; @@ -652,9 +669,6 @@ static void send_monitor_control_replay(struct sock *mon_sk) sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *skb; - if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) - continue; - skb = create_monitor_ctrl_open(sk); if (!skb) continue; -- cgit v1.1 From f81f5b2db8692ff1d2d5f4db1fde58e67aa976a3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:39 +0200 Subject: Bluetooth: Send control open and close messages for HCI raw sockets When opening and closing HCI raw sockets their main usage is for legacy userspace. To track interaction with the modern mgmt interface, send open and close monitoring messages for these action. The HCI raw sockets is special since it supports unbound ioctl operation and for that special case delay the notification message until at least one ioctl has been executed. The difference between a bound and unbound socket will be detailed by the fact the HCI index is present or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b22efe2..c777243 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,6 +488,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + format = 0x0000; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -533,6 +538,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return NULL; switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: break; default: @@ -820,6 +826,7 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); break; + case HCI_CHANNEL_RAW: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -958,6 +965,27 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, goto done; } + /* When calling an ioctl on an unbound raw socket, then ensure + * that the monitor gets informed. Ensure that the resulting event + * is only send once by checking if the cookie exists or not. The + * socket cookie will be only ever generated once for the lifetime + * of a given socket. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + release_sock(sk); switch (cmd) { @@ -1061,6 +1089,26 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; + + /* Only send the event to monitor when a new cookie has + * been generated. An existing cookie means that an unbound + * socket has seen an ioctl and that triggered the cookie + * generation and sending of the monitor event. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } break; case HCI_CHANNEL_USER: -- cgit v1.1 From f4cdbb3f25c15c17a952deae1f2e0db6df8f1948 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:40 +0200 Subject: Bluetooth: Handle HCI raw socket transition from unbound to bound In case an unbound HCI raw socket is later on bound, ensure that the monitor notification messages indicate a close and re-open. None of the userspace tools use the socket this, but it is actually possible to use an ioctl on an unbound socket and then later bind it. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index c777243..83e9fdb 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1049,6 +1049,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; + struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); @@ -1088,27 +1089,34 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; - hci_pi(sk)->hdev = hdev; - - /* Only send the event to monitor when a new cookie has - * been generated. An existing cookie means that an unbound - * socket has seen an ioctl and that triggered the cookie - * generation and sending of the monitor event. - */ - if (hci_sock_gen_cookie(sk)) { - struct sk_buff *skb; - - if (capable(CAP_NET_ADMIN)) - hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); - /* Send event to monitor */ - skb = create_monitor_ctrl_open(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * then there has been already an ioctl issued against + * an unbound socket and with that triggerd an open + * notification. Send a close notification first to + * allow the state transition to bounded. + */ + skb = create_monitor_ctrl_close(sk); if (skb) { hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } } + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + hci_pi(sk)->hdev = hdev; + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } break; case HCI_CHANNEL_USER: @@ -1251,9 +1259,20 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { - struct sk_buff *skb; - - hci_sock_gen_cookie(sk); + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been + * assigned, this socket will transtion from + * a raw socket into a control socket. To + * allow for a clean transtion, send the + * close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } /* Send event to monitor */ skb = create_monitor_ctrl_open(sk); -- cgit v1.1 From fac9a6021b4e8dc8b2112a8e133936c0daf7ff94 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:42:53 +0530 Subject: Bluetooth: Remove deprecated create_singlethread_workqueue The workqueue "workqueue" queues multiple work items viz &qca->ws_awake_rx &qca->ws_rx_vote_off, &qca->ws_awake_device, &qca->ws_tx_vote_off which require strict execution ordering. Hence, an ordered dedicated workqueue has been used to replace the deprecated create_singlethread_workqueue instance. WQ_MEM_RECLAIM has not been set since the driver is not being used on a memory reclaim path. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 683c2b6..6c867fb 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -397,7 +397,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->txq); skb_queue_head_init(&qca->tx_wait_q); spin_lock_init(&qca->hci_ibs_lock); - qca->workqueue = create_singlethread_workqueue("qca_wq"); + qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); kfree(qca); -- cgit v1.1 From 418678b01aca849b4f86224e609610ce87a9bdc4 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 1 Sep 2016 17:22:37 +0200 Subject: Bluetooth: btusb: Mark CW6622 devices to have broken link key commands Conwise CW6622 seems to have a problem with the stored link key commands so just mark it as broken. < HCI Command: Read Local Supported Features (0x04|0x0003) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Supported Features (0x04|0x0003) ncmd 1 status 0x00 Features: 0xff 0x3e 0x85 0x38 0x18 0x18 0x00 0x00 < HCI Command: Read Local Version Information (0x04|0x0001) plen 0 > HCI Event: Command Complete (0x0e) plen 12 Read Local Version Information (0x04|0x0001) ncmd 1 status 0x00 HCI Version: 2.0 (0x3) HCI Revision: 0x1f4 LMP Version: 2.0 (0x3) LMP Subversion: 0x1f4 Manufacturer: CONWISE Technology Corporation Ltd (66) ... < HCI Command: Read Local Supported Commands (0x04|0x0002) plen 0 > HCI Event: Command Complete (0x0e) plen 68 Read Local Supported Commands (0x04|0x0002) ncmd 1 status 0x00 Commands: 7fffef03cedfffffffffff1ff20ff8ff3f ... < HCI Command: Read Stored Link Key (0x03|0x000d) plen 7 bdaddr 00:00:00:00:00:00 all 1 > HCI Event: Command Complete (0x0e) plen 8 Read Stored Link Key (0x03|0x000d) ncmd 1 status 0x11 max 0 num 0 Error: Unsupported Feature or Parameter Value Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 80ae854..9ebd73d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_REALTEK 0x20000 #define BTUSB_BCM2045 0x40000 #define BTUSB_IFNUM_2 0x80000 +#define BTUSB_CW6622 0x100000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -291,7 +292,8 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC }, /* CONWISE Technology based adapters with buggy SCO support */ - { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, + { USB_DEVICE(0x0e5e, 0x6622), + .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622}, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, @@ -2845,6 +2847,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; + if (id->driver_info & BTUSB_CW6622) + set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + if (id->driver_info & BTUSB_BCM2045) set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); -- cgit v1.1 From 321c6feed2519a2691f65e41c4d62332d6ee3d52 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 16:46:23 +0200 Subject: Bluetooth: Add framework for Extended Controller Information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command is used to retrieve the current state and basic information of a controller. It is typically used right after getting the response to the Read Controller Index List command or an Index Added event (or its extended counterparts). When any of the values in the EIR_Data field changes, the event Extended Controller Information Changed will be used to inform clients about the updated information. Signed-off-by: Marcel Holtmann Signed-off-by: Michał Narajowski --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/mgmt.h | 18 +++++++++++++ net/bluetooth/mgmt.c | 62 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ddb9acc..99aa5e5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,6 +208,7 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, + HCI_MGMT_EXT_INFO_EVENTS, HCI_MGMT_OPTION_EVENTS, HCI_MGMT_SETTING_EVENTS, HCI_MGMT_DEV_CLASS_EVENTS, diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7647964..611b243 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -586,6 +586,18 @@ struct mgmt_rp_get_adv_size_info { #define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 +#define MGMT_OP_READ_EXT_INFO 0x0042 +#define MGMT_READ_EXT_INFO_SIZE 0 +struct mgmt_rp_read_ext_info { + bdaddr_t bdaddr; + __u8 version; + __le16 manufacturer; + __le32 supported_settings; + __le32 current_settings; + __le16 eir_len; + __u8 eir[0]; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -800,3 +812,9 @@ struct mgmt_ev_advertising_added { struct mgmt_ev_advertising_removed { __u8 instance; } __packed; + +#define MGMT_EV_EXT_INFO_CHANGED 0x0025 +struct mgmt_ev_ext_info_changed { + __le16 eir_len; + __u8 eir[0]; +} __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 47efdb4..69001f4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -104,6 +104,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_events[] = { @@ -141,6 +142,7 @@ static const u16 mgmt_events[] = { MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -149,6 +151,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -162,6 +165,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -862,6 +866,52 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_info rp; + + BT_DBG("sock %p %s", sk, hdev->name); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + + bacpy(&rp.bdaddr, &hdev->bdaddr); + + rp.version = hdev->hci_ver; + rp.manufacturer = cpu_to_le16(hdev->manufacturer); + + rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + + rp.eir_len = cpu_to_le16(0); + + hci_dev_unlock(hdev); + + /* If this command is called at least once, then the events + * for class of device and local name changes are disabled + * and only the new extended controller information event + * is used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, + sizeof(rp)); +} + +static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) +{ + struct mgmt_ev_ext_info_changed ev; + + ev.eir_len = cpu_to_le16(0); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, + sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); +} + static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); @@ -2995,6 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); + ext_info_changed(hdev, sk); goto failed; } @@ -6356,6 +6407,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, + { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -6494,10 +6547,12 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } new_settings(hdev, match.sk); @@ -7093,9 +7148,11 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); - if (!status) + if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } if (match.sk) sock_put(match.sk); @@ -7126,6 +7183,7 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); + ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) -- cgit v1.1 From 8a0c9f49090fe8ae122fd1bbf7260c8492289386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Thu, 1 Sep 2016 16:46:24 +0200 Subject: Bluetooth: Append local name and CoD to Extended Controller Info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds device class, complete local name and short local name to EIR data in Extended Controller Info as specified in docs. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 63 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 69001f4..74179b9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -866,26 +866,58 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info rp; + struct mgmt_rp_read_ext_info *rp; + char buff[512]; + u16 eir_len = 0; + u8 name_len; BT_DBG("sock %p %s", sk, hdev->name); hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(buff, eir_len, + EIR_CLASS_OF_DEV, + hdev->dev_class, 3); - bacpy(&rp.bdaddr, &hdev->bdaddr); + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); - rp.version = hdev->hci_ver; - rp.manufacturer = cpu_to_le16(hdev->manufacturer); + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); - rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); - rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + if (!rp) + return -ENOMEM; + + memset(rp, 0, sizeof(*rp) + eir_len); + + rp->eir_len = cpu_to_le16(eir_len); + memcpy(rp->eir, buff, eir_len); - rp.eir_len = cpu_to_le16(0); + bacpy(&rp->bdaddr, &hdev->bdaddr); + + rp->version = hdev->hci_ver; + rp->manufacturer = cpu_to_le16(hdev->manufacturer); + + rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp->current_settings = cpu_to_le32(get_current_settings(hdev)); hci_dev_unlock(hdev); @@ -898,8 +930,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, - sizeof(rp)); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp, + sizeof(*rp) + eir_len); } static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) @@ -5552,17 +5584,6 @@ unlock: return err; } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { -- cgit v1.1 From bdca1fd9a6df745857e23c6056494b7fe062b4e6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 1 Sep 2016 11:24:57 +0200 Subject: fakelb: fix schedule while atomic This patch changes the spinlock to mutex for the available fakelb phy list. When holding the spinlock the ieee802154_unregister_hw is called which holding the rtnl_mutex, in that case we get a "BUG: sleeping function called from invalid context" error. We simple change the spinlock to mutex which allows to hold the rtnl lock there. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/fakelb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 0becf0a..ec387ef 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -30,7 +30,7 @@ static int numlbs = 2; static LIST_HEAD(fakelb_phys); -static DEFINE_SPINLOCK(fakelb_phys_lock); +static DEFINE_MUTEX(fakelb_phys_lock); static LIST_HEAD(fakelb_ifup_phys); static DEFINE_RWLOCK(fakelb_ifup_phys_lock); @@ -188,9 +188,9 @@ static int fakelb_add_one(struct device *dev) if (err) goto err_reg; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_add_tail(&phy->list, &fakelb_phys); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; @@ -222,10 +222,10 @@ static int fakelb_probe(struct platform_device *pdev) return 0; err_slave: - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return err; } @@ -233,10 +233,10 @@ static int fakelb_remove(struct platform_device *pdev) { struct fakelb_phy *phy, *tmp; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; } -- cgit v1.1 From aa1638dde75d00e4f549902017d0df48b77e86ff Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 19:48:28 +0200 Subject: Bluetooth: Send control open and close messages for HCI user channels When opening and closing HCI user channel, send monitoring messages to be able to trace its behavior. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 83e9fdb..48f9471 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -493,6 +493,11 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) ver[0] = BT_SUBSYS_VERSION; put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); break; + case HCI_CHANNEL_USER: + format = 0x0001; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; case HCI_CHANNEL_CONTROL: format = 0x0002; mgmt_fill_version_info(ver); @@ -539,6 +544,7 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: break; default: @@ -827,6 +833,7 @@ static int hci_sock_release(struct socket *sock) atomic_dec(&monitor_promisc); break; case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: /* Send event to monitor */ skb = create_monitor_ctrl_close(sk); @@ -1179,8 +1186,36 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } hci_pi(sk)->channel = haddr.hci_channel; + + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * this socket will transition from a raw socket into + * an user channel socket. For a clean transition, send + * the close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + /* The user channel is restricted to CAP_NET_ADMIN + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->hdev = hdev; + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + atomic_inc(&hdev->promisc); break; -- cgit v1.1 From baab793225c9badf46309f56982eb1012dbaac80 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 4 Sep 2016 05:13:46 +0200 Subject: Bluetooth: Fix wrong New Settings event when closing HCI User Channel When closing HCI User Channel, the New Settings event was send out to inform about changed settings. However such event is wrong since the exclusive HCI User Channel access is active until the Index Added event has been sent. @ USER Close: test @ MGMT Event: New Settings (0x0006) plen 4 Current settings: 0x00000ad0 Bondable Secure Simple Pairing BR/EDR Low Energy Secure Connections = Close Index: 00:14:EF:22:04:12 @ MGMT Event: Index Added (0x0004) plen 0 Calling __mgmt_power_off from hci_dev_do_close requires an extra check for an active HCI User Channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ddf8432..3ac89e9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1562,6 +1562,7 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); if (!auto_off && hdev->dev_type == HCI_PRIMARY && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); -- cgit v1.1 From 0676cab47ed18c9cfa884f055a3ba0f9e6fb8e96 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Sep 2016 13:15:49 +0100 Subject: Bluetooth: btqca: remove null checks on edl->data as it is an array edl->data is an array of __u8 so the null check is unneccessary, so remove it. Signed-off-by: Colin Ian King Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4a62081..28afd5d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -55,8 +55,8 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version) } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } @@ -224,8 +224,8 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size, } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } -- cgit v1.1 From 3c0975a7a1087add3bf873601f0270aa695d7616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Dalleau?= Date: Thu, 8 Sep 2016 12:00:11 +0200 Subject: Bluetooth: Fix reason code used for rejecting SCO connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A comment in the code states that SCO connection should be rejected with the proper error value between 0xd-0xf. The code uses HCI_ERROR_REMOTE_LOW_RESOURCES which is 0x14. This led to following error: < HCI Command: Reject Synchronous Co.. (0x01|0x002a) plen 7 Address: 34:51:C9:EF:02:CA (Apple, Inc.) Reason: Remote Device Terminated due to Low Resources (0x14) > HCI Event: Command Status (0x0f) plen 4 Reject Synchronous Connection Request (0x01|0x002a) ncmd 1 Status: Invalid HCI Command Parameters (0x12) Instead make use of HCI_ERROR_REJ_LIMITED_RESOURCES which is 0xd. Signed-off-by: Frédéric Dalleau Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9968b1c..9566ff8 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1717,7 +1717,7 @@ void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, * function. To be safe hard-code one of the * values that's suitable for SCO. */ - rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES; + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(rej), &rej); -- cgit v1.1 From 4037a7747d7b5a3e5bb4d10fb9ea6e2fd8a23c3b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 8 Sep 2016 18:07:07 +0200 Subject: Bluetooth: Increase the subsystem minor version number While the subsystem version information are purely informational, increase the minor number due to the addition of user channel and management control monitoring suppport. It is helpful for debugging purposes to see the version numbers change. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d705bcf..0a1e21d 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -30,7 +30,7 @@ #include #define BT_SUBSYS_VERSION 2 -#define BT_SUBSYS_REVISION 21 +#define BT_SUBSYS_REVISION 22 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 -- cgit v1.1 From 3e36ca483a642f441b8e29b4e98091f2c62bfb38 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 10 Sep 2016 12:21:22 +0000 Subject: Bluetooth: Use kzalloc instead of kmalloc/memset Use kzalloc rather than kmalloc followed by memset with 0. Generated by: scripts/coccinelle/api/alloc/kzalloc-simple.cocci Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 74179b9..0ac881c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -902,12 +902,10 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); - rp = kmalloc(sizeof(*rp) + eir_len, GFP_KERNEL); + rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); if (!rp) return -ENOMEM; - memset(rp, 0, sizeof(*rp) + eir_len); - rp->eir_len = cpu_to_le16(eir_len); memcpy(rp->eir, buff, eir_len); -- cgit v1.1 From 83ebb9ec734e9e768a9fae469e4a7ed1762ef43a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 9 Sep 2016 20:24:40 +0200 Subject: Bluetooth: Fix not registering BR/EDR SMP channel with force_bredr flag If force_bredr is set SMP BR/EDR channel should also be for non-SC capable controllers. Since hcidev flag is persistent wrt power toggle it can be already set when calling smp_register(). This resulted in SMP BR/EDR channel not being registered even if HCI_FORCE_BREDR_SMP flag was set. This also fix NULL pointer dereference when trying to disable force_bredr after power cycle. BUG: unable to handle kernel NULL pointer dereference at 0000000000000388 IP: [] smp_del_chan+0x18/0x80 [bluetooth] Call Trace: [] force_bredr_smp_write+0xba/0x100 [bluetooth] [] full_proxy_write+0x54/0x90 [] __vfs_write+0x37/0x160 [] ? selinux_file_permission+0xd7/0x110 [] ? security_file_permission+0x3d/0xc0 [] ? percpu_down_read+0x12/0x50 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 Code: 48 8b 45 f0 eb c1 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 05 c6 3b 02 00 04 55 48 89 e5 41 54 53 49 89 fc 75 4b <49> 8b 9c 24 88 03 00 00 48 85 db 74 31 49 c7 84 24 88 03 00 00 RIP [] smp_del_chan+0x18/0x80 [bluetooth] RSP CR2: 0000000000000388 Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4c1a16a..43faf2a 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3387,7 +3387,10 @@ int smp_register(struct hci_dev *hdev) if (!lmp_sc_capable(hdev)) { debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, hdev, &force_bredr_smp_fops); - return 0; + + /* Flag can be already set here (due to power toggle) */ + if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) + return 0; } if (WARN_ON(hdev->smp_bredr_data)) { -- cgit v1.1 From 1110a2dbe69831abdcf119c3a9a4c4ef2d0905f8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 9 Sep 2016 10:02:05 -0500 Subject: Bluetooth: btrtl: Add RTL8822BE Bluetooth device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTL8822BE is a new Realtek wifi and BT device. Support for the BT part is hereby added. As this device is similar to most of the other Realtek BT devices, the changes are minimal. The main difference is that the 8822BE needs a configuration file for enabling and disabling features. Thus code is added to select and load this configuration file. Although not needed at the moment, hooks are added for the other devices that might need such configuration files. One additional change is to the routine that tests that the project ID contained in the firmware matches the hardware. As the project IDs are not sequential, continuing to use the position in the array as the expected value of the ID would require adding extra unused entries in the table, and any subsequant rearrangment of the array would break the code. To fix these problems, the array elements now contain both the hardware ID and the expected value for the project ID. Signed-off-by: 陆朱伟 Signed-off-by: Larry Finger Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btrtl.c | 107 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 12 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 8428893..fc9b257 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -33,6 +33,7 @@ #define RTL_ROM_LMP_8723B 0x8723 #define RTL_ROM_LMP_8821A 0x8821 #define RTL_ROM_LMP_8761A 0x8761 +#define RTL_ROM_LMP_8822B 0x8822 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) { @@ -78,11 +79,15 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, const unsigned char *patch_length_base, *patch_offset_base; u32 patch_offset = 0; u16 patch_length, num_patches; - const u16 project_id_to_lmp_subver[] = { - RTL_ROM_LMP_8723A, - RTL_ROM_LMP_8723B, - RTL_ROM_LMP_8821A, - RTL_ROM_LMP_8761A + static const struct { + __u16 lmp_subver; + __u8 id; + } project_id_to_lmp_subver[] = { + { RTL_ROM_LMP_8723A, 0 }, + { RTL_ROM_LMP_8723B, 1 }, + { RTL_ROM_LMP_8821A, 2 }, + { RTL_ROM_LMP_8761A, 3 }, + { RTL_ROM_LMP_8822B, 8 }, }; ret = rtl_read_rom_version(hdev, &rom_version); @@ -134,14 +139,20 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, return -EINVAL; } - if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) { + /* Find project_id in table */ + for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) { + if (project_id == project_id_to_lmp_subver[i].id) + break; + } + + if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) { BT_ERR("%s: unknown project id %d", hdev->name, project_id); return -EINVAL; } - if (lmp_subver != project_id_to_lmp_subver[project_id]) { + if (lmp_subver != project_id_to_lmp_subver[i].lmp_subver) { BT_ERR("%s: firmware is for %x but this is a %x", hdev->name, - project_id_to_lmp_subver[project_id], lmp_subver); + project_id_to_lmp_subver[i].lmp_subver, lmp_subver); return -EINVAL; } @@ -257,6 +268,26 @@ out: return ret; } +static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff) +{ + const struct firmware *fw; + int ret; + + BT_INFO("%s: rtl: loading %s", hdev->name, name); + ret = request_firmware(&fw, name, &hdev->dev); + if (ret < 0) { + BT_ERR("%s: Failed to load %s", hdev->name, name); + return ret; + } + + ret = fw->size; + *buff = kmemdup(fw->data, ret, GFP_KERNEL); + + release_firmware(fw); + + return ret; +} + static int btrtl_setup_rtl8723a(struct hci_dev *hdev) { const struct firmware *fw; @@ -296,25 +327,74 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, unsigned char *fw_data = NULL; const struct firmware *fw; int ret; + int cfg_sz; + u8 *cfg_buff = NULL; + u8 *tbuff; + char *cfg_name = NULL; + + switch (lmp_subver) { + case RTL_ROM_LMP_8723B: + cfg_name = "rtl_bt/rtl8723b_config.bin"; + break; + case RTL_ROM_LMP_8821A: + cfg_name = "rtl_bt/rtl8821a_config.bin"; + break; + case RTL_ROM_LMP_8761A: + cfg_name = "rtl_bt/rtl8761a_config.bin"; + break; + case RTL_ROM_LMP_8822B: + cfg_name = "rtl_bt/rtl8822b_config.bin"; + break; + default: + BT_ERR("%s: rtl: no config according to lmp_subver %04x", + hdev->name, lmp_subver); + break; + } + + if (cfg_name) { + cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); + if (cfg_sz < 0) + cfg_sz = 0; + } else + cfg_sz = 0; BT_INFO("%s: rtl: loading %s", hdev->name, fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load %s", hdev->name, fw_name); - return ret; + goto err_req_fw; } ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); if (ret < 0) goto out; + if (cfg_sz) { + tbuff = kzalloc(ret + cfg_sz, GFP_KERNEL); + if (!tbuff) { + ret = -ENOMEM; + goto out; + } + + memcpy(tbuff, fw_data, ret); + kfree(fw_data); + + memcpy(tbuff + ret, cfg_buff, cfg_sz); + ret += cfg_sz; + + fw_data = tbuff; + } + + BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret); + ret = rtl_download_firmware(hdev, fw_data, ret); - kfree(fw_data); - if (ret < 0) - goto out; out: release_firmware(fw); + kfree(fw_data); +err_req_fw: + if (cfg_sz) + kfree(cfg_buff); return ret; } @@ -377,6 +457,9 @@ int btrtl_setup_realtek(struct hci_dev *hdev) case RTL_ROM_LMP_8761A: return btrtl_setup_rtl8723b(hdev, lmp_subver, "rtl_bt/rtl8761a_fw.bin"); + case RTL_ROM_LMP_8822B: + return btrtl_setup_rtl8723b(hdev, lmp_subver, + "rtl_bt/rtl8822b_fw.bin"); default: BT_INFO("rtl: assuming no firmware upload needed."); return 0; -- cgit v1.1 From 7c295c4801b2de24fc25687eb0cb73cf0c99d114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:02 +0200 Subject: Bluetooth: Add support for local name in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables appending local name to scan response data. If currently advertised instance has name flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 28 +++++++++++++++++++-------- net/bluetooth/mgmt.c | 46 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9566ff8..0ce6cdd 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -971,14 +971,14 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - u8 ad_len = 0; size_t name_len; + int max_len; + max_len = HCI_MAX_AD_LENGTH - ad_len - 2; name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + if (name_len > 0 && max_len > 0) { if (name_len > max_len) { name_len = max_len; @@ -997,22 +997,34 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + return append_local_name(hdev, ptr, 0); +} + static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance; + u32 instance_flags; + u8 scan_rsp_len = 0; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) return 0; - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ + instance_flags = adv_instance->flags; + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); - return adv_instance->scan_rsp_len; + scan_rsp_len += adv_instance->scan_rsp_len; + ptr += adv_instance->scan_rsp_len; + + if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; } void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0ac881c..89954bb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3012,6 +3012,35 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } +static void adv_expire(struct hci_dev *hdev, u32 flags) +{ + struct adv_info *adv_instance; + struct hci_request req; + int err; + + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + if (!adv_instance) + return; + + /* stop if current instance doesn't need to be changed */ + if (!(adv_instance->flags & flags)) + return; + + cancel_adv_timeout(hdev); + + adv_instance = hci_get_next_instance(hdev, adv_instance->instance); + if (!adv_instance) + return; + + hci_req_init(&req, hdev); + err = __hci_req_schedule_adv_instance(&req, adv_instance->instance, + true); + if (err) + return; + + hci_req_run(&req, NULL); +} + static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -3027,13 +3056,17 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) cp = cmd->param; - if (status) + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, mgmt_status(status)); - else + } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME); + } + mgmt_pending_remove(cmd); unlock: @@ -5885,6 +5918,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) flags |= MGMT_ADV_FLAG_TX_POWER; @@ -5961,6 +5995,10 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, tx_power_managed = true; max_len -= 3; } + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } if (len > max_len) @@ -6293,6 +6331,10 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; } return max_len; -- cgit v1.1 From c4960ecf2b09210930964ef2c05ce2590802ccf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Sun, 18 Sep 2016 12:50:03 +0200 Subject: Bluetooth: Add support for appearance in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables prepending appearance value to scan response data. It also adds support for setting appearance value through mgmt command. If currently advertised instance has apperance flag set it is expired immediately. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 6 ++++++ net/bluetooth/hci_request.c | 8 ++++++++ net/bluetooth/mgmt.c | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a48f71d..f00bf66 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -211,6 +211,7 @@ struct hci_dev { __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; + __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 611b243..72a456b 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -598,6 +598,12 @@ struct mgmt_rp_read_ext_info { __u8 eir[0]; } __packed; +#define MGMT_OP_SET_APPEARANCE 0x0043 +struct mgmt_cp_set_appearance { + __u16 appearance; +} __packed; +#define MGMT_SET_APPEARANCE_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0ce6cdd..c813568 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1015,6 +1015,14 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + ptr += 4; + } + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 89954bb..78d7088 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -105,6 +105,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, + MGMT_OP_SET_APPEARANCE, }; static const u16 mgmt_events[] = { @@ -3143,6 +3144,34 @@ failed: return err; } +static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_appearance *cp = data; + u16 apperance; + int err; + + BT_DBG(""); + + apperance = le16_to_cpu(cp->appearance); + + hci_dev_lock(hdev); + + if (hdev->appearance != apperance) { + hdev->appearance = apperance; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + } + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, + 0); + + hci_dev_unlock(hdev); + + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5918,6 +5947,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) @@ -5999,6 +6029,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + max_len -= 4; } if (len > max_len) @@ -6335,6 +6368,9 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) + max_len -= 4; } return max_len; @@ -6470,6 +6506,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, HCI_MGMT_UNTRUSTED }, + { set_appearance, MGMT_SET_APPEARANCE_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.1 From 5e2c59e84b633e4f7719fdc6a2930f2a311da83a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:04 +0200 Subject: Bluetooth: Remove unused parameter from tlv_data_is_valid function hdev parameter is not used in function. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 78d7088..97f70b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,8 +6005,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, - u8 len, bool is_adv_data) +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; int i, cur_len; @@ -6168,8 +6167,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); -- cgit v1.1 From 2bb36870e8cb29949ef9acec37129cd8e70f1857 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:05 +0200 Subject: Bluetooth: Unify advertising instance flags check This unifies max length and TLV validity checks. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 85 +++++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 97f70b7..c96b0ad 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6005,34 +6005,59 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; - int i, cur_len; - bool flags_managed = false; - bool tx_power_managed = false; if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) { - flags_managed = true; + MGMT_ADV_FLAG_MANAGED_FLAGS)) max_len -= 3; - } - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { - tx_power_managed = true; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; - } } else { /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; - if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; } + return max_len; +} + +static bool flags_managed(u32 adv_flags) +{ + return adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS); +} + +static bool tx_power_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_TX_POWER; +} + +static bool name_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME; +} + +static bool appearance_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_APPEARANCE; +} + +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +{ + int i, cur_len; + u8 max_len; + + max_len = tlv_data_max_len(adv_flags, is_adv_data); + if (len > max_len) return false; @@ -6040,10 +6065,20 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (flags_managed && data[i + 1] == EIR_FLAGS) + if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags)) return false; - if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + if (data[i + 1] == EIR_APPEARANCE && + appearance_managed(adv_flags)) return false; /* If the current field length would exceed the total data @@ -6351,30 +6386,6 @@ unlock: return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) -{ - u8 max_len = HCI_MAX_AD_LENGTH; - - if (is_adv_data) { - if (adv_flags & (MGMT_ADV_FLAG_DISCOV | - MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) - max_len -= 3; - - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) - max_len -= 3; - } else { - /* at least 1 byte of name should fit in */ - if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; - - if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) - max_len -= 4; - } - - return max_len; -} - static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { -- cgit v1.1 From 9c9db78dc0fbbd95177fefdad008e46ffaa777f2 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:06 +0200 Subject: Bluetooth: Fix advertising instance validity check for flags Flags are not allowed in Scan Response. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c96b0ad..2758c6a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6065,7 +6065,8 @@ static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (data[i + 1] == EIR_FLAGS && flags_managed(adv_flags)) + if (data[i + 1] == EIR_FLAGS && + (!is_adv_data || flags_managed(adv_flags))) return false; if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) -- cgit v1.1 From 3310230c5dddfafe3d1ef87f1257812011681aca Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Sun, 18 Sep 2016 12:50:07 +0200 Subject: Bluetooth: Increment management interface revision Increment the mgmt revision due to the recently added Read Extended Controller Information and Set Appearance commands. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2758c6a..54dd218 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 13 +#define MGMT_REVISION 14 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit v1.1 From 143f0a28ff7ebcc74144ed29bc66da6fbcce0dc7 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 12:05:12 +0200 Subject: Bluetooth: hci_bcm: Change protocol name Use full name instead of abbreviation. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 1c97eda..5ccb90e 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -798,7 +798,7 @@ static int bcm_remove(struct platform_device *pdev) static const struct hci_uart_proto bcm_proto = { .id = HCI_UART_BCM, - .name = "BCM", + .name = "Broadcom", .manufacturer = 15, .init_speed = 115200, .oper_speed = 4000000, -- cgit v1.1 From 9e69130c4efc61ce0a8fb3b9eea0188f8d41f779 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 11:32:35 +0200 Subject: Bluetooth: hci_uart: Add Nokia Protocol identifier Will be used by hci_nokia extra protocol. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_uart.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 839bad1..22b7c58 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 10 +#define HCI_UART_MAX_PROTO 11 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -47,6 +47,7 @@ #define HCI_UART_BCM 7 #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 +#define HCI_UART_NOKIA 10 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 -- cgit v1.1 From 162f812f23bab583f5d514ca0e4df67797ac9cdf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 19 Sep 2016 16:29:27 +0200 Subject: Bluetooth: hci_uart: Add Marvell support This patch introduces support for Marvell Bluetooth controller over UART (8897 for now). In order to send the final firmware at full speed, a helper firmware is firstly sent. Firmware download is driven by the controller which sends request firmware packets (including expected size). This driver is a global rework of the one proposed by Amitkumar Karwar . Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 11 ++ drivers/bluetooth/Makefile | 1 + drivers/bluetooth/hci_ldisc.c | 6 + drivers/bluetooth/hci_mrvl.c | 387 ++++++++++++++++++++++++++++++++++++++++++ drivers/bluetooth/hci_uart.h | 8 +- 5 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 drivers/bluetooth/hci_mrvl.c diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 43e9f93..3cc9bff 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -180,6 +180,17 @@ config BT_HCIUART_AG6XX Say Y here to compile support for Intel AG6XX protocol. +config BT_HCIUART_MRVL + bool "Marvell protocol support" + depends on BT_HCIUART + select BT_HCIUART_H4 + help + Marvell is serial protocol for communication between Bluetooth + device and host. This protocol is required for most Marvell Bluetooth + devices with UART interface. + + Say Y here to compile support for HCI MRVL protocol. + config BT_HCIBCM203X tristate "HCI BCM203x USB driver" depends on USB diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 3e92cfeb..b1fc29a 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -38,6 +38,7 @@ hci_uart-$(CONFIG_BT_HCIUART_INTEL) += hci_intel.o hci_uart-$(CONFIG_BT_HCIUART_BCM) += hci_bcm.o hci_uart-$(CONFIG_BT_HCIUART_QCA) += hci_qca.o hci_uart-$(CONFIG_BT_HCIUART_AG6XX) += hci_ag6xx.o +hci_uart-$(CONFIG_BT_HCIUART_MRVL) += hci_mrvl.o hci_uart-objs := $(hci_uart-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index dda9739..9a3aab6 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -810,6 +810,9 @@ static int __init hci_uart_init(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_init(); +#endif return 0; } @@ -845,6 +848,9 @@ static void __exit hci_uart_exit(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_deinit(); +#endif /* Release tty registration of line discipline */ err = tty_unregister_ldisc(N_HCI); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c new file mode 100644 index 0000000..bbc4b39 --- /dev/null +++ b/drivers/bluetooth/hci_mrvl.c @@ -0,0 +1,387 @@ +/* + * + * Bluetooth HCI UART driver for marvell devices + * + * Copyright (C) 2016 Marvell International Ltd. + * Copyright (C) 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hci_uart.h" + +#define HCI_FW_REQ_PKT 0xA5 +#define HCI_CHIP_VER_PKT 0xAA + +#define MRVL_ACK 0x5A +#define MRVL_NAK 0xBF +#define MRVL_RAW_DATA 0x1F + +enum { + STATE_CHIP_VER_PENDING, + STATE_FW_REQ_PENDING, +}; + +struct mrvl_data { + struct sk_buff *rx_skb; + struct sk_buff_head txq; + struct sk_buff_head rawq; + unsigned long flags; + unsigned int tx_len; + u8 id, rev; +}; + +struct hci_mrvl_pkt { + __le16 lhs; + __le16 rhs; +} __packed; +#define HCI_MRVL_PKT_SIZE 4 + +static int mrvl_open(struct hci_uart *hu) +{ + struct mrvl_data *mrvl; + + BT_DBG("hu %p", hu); + + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); + if (!mrvl) + return -ENOMEM; + + skb_queue_head_init(&mrvl->txq); + skb_queue_head_init(&mrvl->rawq); + + set_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + + hu->priv = mrvl; + return 0; +} + +static int mrvl_close(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + kfree_skb(mrvl->rx_skb); + kfree(mrvl); + + hu->priv = NULL; + return 0; +} + +static int mrvl_flush(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + + return 0; +} + +static struct sk_buff *mrvl_dequeue(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + skb = skb_dequeue(&mrvl->txq); + if (!skb) { + /* Any raw data ? */ + skb = skb_dequeue(&mrvl->rawq); + } else { + /* Prepend skb with frame type */ + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); + } + + return skb; +} + +static int mrvl_enqueue(struct hci_uart *hu, struct sk_buff *skb) +{ + struct mrvl_data *mrvl = hu->priv; + + skb_queue_tail(&mrvl->txq, skb); + return 0; +} + +static void mrvl_send_ack(struct hci_uart *hu, unsigned char type) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + /* No H4 payload, only 1 byte header */ + skb = bt_skb_alloc(0, GFP_ATOMIC); + if (!skb) { + bt_dev_err(hu->hdev, "Unable to alloc ack/nak packet"); + return; + } + hci_skb_pkt_type(skb) = type; + + skb_queue_tail(&mrvl->txq, skb); + hci_uart_tx_wakeup(hu); +} + +static int mrvl_recv_fw_req(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_FW_REQ_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected firmware request"); + ret = -EINVAL; + goto done; + } + + mrvl->tx_len = le16_to_cpu(pkt->lhs); + + clear_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_FW_REQ_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +static int mrvl_recv_chip_ver(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + u16 version = le16_to_cpu(pkt->lhs); + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_CHIP_VER_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected chip version"); + goto done; + } + + mrvl->id = version; + mrvl->rev = version >> 8; + + bt_dev_info(hdev, "Controller id = %x, rev = %x", mrvl->id, mrvl->rev); + + clear_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_CHIP_VER_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +#define HCI_RECV_CHIP_VER \ + .type = HCI_CHIP_VER_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +#define HCI_RECV_FW_REQ \ + .type = HCI_FW_REQ_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +static const struct h4_recv_pkt mrvl_recv_pkts[] = { + { H4_RECV_ACL, .recv = hci_recv_frame }, + { H4_RECV_SCO, .recv = hci_recv_frame }, + { H4_RECV_EVENT, .recv = hci_recv_frame }, + { HCI_RECV_FW_REQ, .recv = mrvl_recv_fw_req }, + { HCI_RECV_CHIP_VER, .recv = mrvl_recv_chip_ver }, +}; + +static int mrvl_recv(struct hci_uart *hu, const void *data, int count) +{ + struct mrvl_data *mrvl = hu->priv; + + if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) + return -EUNATCH; + + mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); + if (IS_ERR(mrvl->rx_skb)) { + int err = PTR_ERR(mrvl->rx_skb); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); + mrvl->rx_skb = NULL; + return err; + } + + return count; +} + +static int mrvl_load_firmware(struct hci_dev *hdev, const char *name) +{ + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + const struct firmware *fw = NULL; + const u8 *fw_ptr, *fw_max; + int err; + + err = request_firmware(&fw, name, &hdev->dev); + if (err < 0) { + bt_dev_err(hdev, "Failed to load firmware file %s", name); + return err; + } + + fw_ptr = fw->data; + fw_max = fw->data + fw->size; + + bt_dev_info(hdev, "Loading %s", name); + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + while (fw_ptr <= fw_max) { + struct sk_buff *skb; + + /* Controller drives the firmware load by sending firmware + * request packets containing the expected fragment size. + */ + err = wait_on_bit_timeout(&mrvl->flags, STATE_FW_REQ_PENDING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(2000)); + if (err == 1) { + bt_dev_err(hdev, "Firmware load interrupted"); + err = -EINTR; + break; + } else if (err) { + bt_dev_err(hdev, "Firmware request timeout"); + err = -ETIMEDOUT; + break; + } + + bt_dev_dbg(hdev, "Firmware request, expecting %d bytes", + mrvl->tx_len); + + if (fw_ptr == fw_max) { + /* Controller requests a null size once firmware is + * fully loaded. If controller expects more data, there + * is an issue. + */ + if (!mrvl->tx_len) { + bt_dev_info(hdev, "Firmware loading complete"); + } else { + bt_dev_err(hdev, "Firmware loading failure"); + err = -EINVAL; + } + break; + } + + if (fw_ptr + mrvl->tx_len > fw_max) { + mrvl->tx_len = fw_max - fw_ptr; + bt_dev_dbg(hdev, "Adjusting tx_len to %d", + mrvl->tx_len); + } + + skb = bt_skb_alloc(mrvl->tx_len, GFP_KERNEL); + if (!skb) { + bt_dev_err(hdev, "Failed to alloc mem for FW packet"); + err = -ENOMEM; + break; + } + bt_cb(skb)->pkt_type = MRVL_RAW_DATA; + + memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len); + fw_ptr += mrvl->tx_len; + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + skb_queue_tail(&mrvl->rawq, skb); + hci_uart_tx_wakeup(hu); + } + + release_firmware(fw); + return err; +} + +static int mrvl_setup(struct hci_uart *hu) +{ + int err; + + hci_uart_set_flow_control(hu, true); + + err = mrvl_load_firmware(hu->hdev, "mrvl/helper_uart_3000000.bin"); + if (err) { + bt_dev_err(hu->hdev, "Unable to download firmware helper"); + return -EINVAL; + } + + hci_uart_set_baudrate(hu, 3000000); + hci_uart_set_flow_control(hu, false); + + err = mrvl_load_firmware(hu->hdev, "mrvl/uart8897_bt.bin"); + if (err) + return err; + + return 0; +} + +static const struct hci_uart_proto mrvl_proto = { + .id = HCI_UART_MRVL, + .name = "Marvell", + .init_speed = 115200, + .open = mrvl_open, + .close = mrvl_close, + .flush = mrvl_flush, + .setup = mrvl_setup, + .recv = mrvl_recv, + .enqueue = mrvl_enqueue, + .dequeue = mrvl_dequeue, +}; + +int __init mrvl_init(void) +{ + return hci_uart_register_proto(&mrvl_proto); +} + +int __exit mrvl_deinit(void) +{ + return hci_uart_unregister_proto(&mrvl_proto); +} diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 22b7c58..0701395 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 11 +#define HCI_UART_MAX_PROTO 12 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -48,6 +48,7 @@ #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 #define HCI_UART_NOKIA 10 +#define HCI_UART_MRVL 11 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 @@ -190,3 +191,8 @@ int qca_deinit(void); int ag6xx_init(void); int ag6xx_deinit(void); #endif + +#ifdef CONFIG_BT_HCIUART_MRVL +int mrvl_init(void); +int mrvl_deinit(void); +#endif -- cgit v1.1 From 7d5c11da1ff6389511c42448f59456373edfc103 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:52 +0200 Subject: Bluetooth: Refactor read_ext_controller_info handler There is no need to allocate heap for reply only to copy stack data to it. This also fix rp memory leak and missing hdev unlock if kmalloc failed. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 54dd218..604c481 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -881,42 +881,38 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct mgmt_rp_read_ext_info *rp; - char buff[512]; + char buf[512]; + struct mgmt_rp_read_ext_info *rp = (void *)buf; u16 eir_len = 0; - u8 name_len; + size_t name_len; BT_DBG("sock %p %s", sk, hdev->name); + memset(&buf, 0, sizeof(buf)); + hci_dev_lock(hdev); + bacpy(&rp->bdaddr, &hdev->bdaddr); + + rp->version = hdev->hci_ver; + rp->manufacturer = cpu_to_le16(hdev->manufacturer); + + rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp->current_settings = cpu_to_le32(get_current_settings(hdev)); + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(buff, eir_len, - EIR_CLASS_OF_DEV, + eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_COMPLETE, + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); name_len = strlen(hdev->short_name); - eir_len = eir_append_data(buff, eir_len, EIR_NAME_SHORT, + eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); - rp = kzalloc(sizeof(*rp) + eir_len, GFP_KERNEL); - if (!rp) - return -ENOMEM; - rp->eir_len = cpu_to_le16(eir_len); - memcpy(rp->eir, buff, eir_len); - - bacpy(&rp->bdaddr, &hdev->bdaddr); - - rp->version = hdev->hci_ver; - rp->manufacturer = cpu_to_le16(hdev->manufacturer); - - rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); - rp->current_settings = cpu_to_le32(get_current_settings(hdev)); hci_dev_unlock(hdev); -- cgit v1.1 From cde7a863d36a4a629c111f37edc2297d6b822a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:53 +0200 Subject: Bluetooth: Factor appending EIR to separate helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will also be used for Extended Information Event handling. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 604c481..2b6fe10 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,13 +878,32 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) +{ + u16 eir_len = 0; + size_t name_len; + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + return eir_len; +} + static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { char buf[512]; struct mgmt_rp_read_ext_info *rp = (void *)buf; - u16 eir_len = 0; - size_t name_len; + u16 eir_len; BT_DBG("sock %p %s", sk, hdev->name); @@ -900,18 +919,8 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); rp->current_settings = cpu_to_le32(get_current_settings(hdev)); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, - hdev->dev_class, 3); - - name_len = strlen(hdev->dev_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_COMPLETE, - hdev->dev_name, name_len); - - name_len = strlen(hdev->short_name); - eir_len = eir_append_data(rp->eir, eir_len, EIR_NAME_SHORT, - hdev->short_name, name_len); + eir_len = append_eir_data_to_buf(hdev, rp->eir); rp->eir_len = cpu_to_le16(eir_len); hci_dev_unlock(hdev); -- cgit v1.1 From 6a9e90bff9cfb33d5939c29e5bf2674c9176365d Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 19 Sep 2016 20:25:54 +0200 Subject: Bluetooth: Add appearance to Read Ext Controller Info command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If LE is enabled appearance is added to EIR data. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2b6fe10..d3837e0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -878,6 +878,16 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} + static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; @@ -887,6 +897,10 @@ static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, hdev->dev_class, 3); + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, + hdev->appearance); + name_len = strlen(hdev->dev_name); eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); -- cgit v1.1 From 5e9fae48f800b973e45887ce0b8d717d54c0bb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:55 +0200 Subject: Bluetooth: Add supported data types to ext info changed event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds EIR data to extended info changed event. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d3837e0..29e5ce9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -954,12 +954,18 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) { - struct mgmt_ev_ext_info_changed ev; + char buf[512]; + struct mgmt_ev_ext_info_changed *ev = (void *)buf; + u16 eir_len; - ev.eir_len = cpu_to_le16(0); + memset(buf, 0, sizeof(buf)); + + eir_len = append_eir_data_to_buf(hdev, ev->eir); + ev->eir_len = cpu_to_le16(eir_len); - return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, - sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev, + sizeof(*ev) + eir_len, + HCI_MGMT_EXT_INFO_EVENTS, skip); } static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) -- cgit v1.1 From e74317f43f5ce2d13cddaab867c59d42934d9585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 20:25:56 +0200 Subject: Bluetooth: Fix missing ext info event when setting appearance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing event when setting appearance, just like in the set local name command. Signed-off-by: Michał Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29e5ce9..cd9f345 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3187,6 +3187,8 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, if (hci_dev_test_flag(hdev, HCI_LE_ADV)) adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + + ext_info_changed(hdev, sk); } err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, -- cgit v1.1 From af4168c5a925dc3b11b0246c2b91124327919f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Mon, 19 Sep 2016 14:33:33 +0200 Subject: Bluetooth: Set appearance only for LE capable controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting appearance on controllers without LE support will result in No Supported error. Signed-off-by: Michał Narajowski Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cd9f345..7b2bac4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3178,6 +3178,10 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, + MGMT_STATUS_NOT_SUPPORTED); + apperance = le16_to_cpu(cp->appearance); hci_dev_lock(hdev); -- cgit v1.1 From 67326666e2d45ebea7db3ed8e3e735f15e60dd91 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Sep 2016 10:52:14 -0500 Subject: scripts: add script for translating stack dump function offsets addr2line doesn't work with KASLR addresses. Add a basic addr2line wrapper script which takes the 'func+offset/size' format as input. Signed-off-by: Josh Poimboeuf Signed-off-by: Linus Torvalds --- scripts/faddr2line | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100755 scripts/faddr2line diff --git a/scripts/faddr2line b/scripts/faddr2line new file mode 100755 index 0000000..4fbfe83 --- /dev/null +++ b/scripts/faddr2line @@ -0,0 +1,177 @@ +#!/bin/bash +# +# Translate stack dump function offsets. +# +# addr2line doesn't work with KASLR addresses. This works similarly to +# addr2line, but instead takes the 'func+0x123' format as input: +# +# $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568 +# meminfo_proc_show+0x5/0x568: +# meminfo_proc_show at fs/proc/meminfo.c:27 +# +# If the address is part of an inlined function, the full inline call chain is +# printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27 +# native_write_msr+0x6/0x27: +# arch_static_branch at arch/x86/include/asm/msr.h:121 +# (inlined by) static_key_false at include/linux/jump_label.h:125 +# (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125 +# +# The function size after the '/' in the input is optional, but recommended. +# It's used to help disambiguate any duplicate symbol names, which can occur +# rarely. If the size is omitted for a duplicate symbol then it's possible for +# multiple code sites to be printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5 +# raw_ioctl+0x5/0x20: +# raw_ioctl at drivers/char/raw.c:122 +# +# raw_ioctl+0x5/0xb1: +# raw_ioctl at net/ipv4/raw.c:876 +# +# Multiple addresses can be specified on a single command line: +# +# $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90 +# type_show+0x10/0x2d: +# type_show at drivers/video/backlight/backlight.c:213 +# +# free_reserved_area+0x90/0x123: +# free_reserved_area at mm/page_alloc.c:6429 (discriminator 2) + + +set -o errexit +set -o nounset + +command -v awk >/dev/null 2>&1 || die "awk isn't installed" +command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" +command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" + +usage() { + echo "usage: faddr2line ..." >&2 + exit 1 +} + +warn() { + echo "$1" >&2 +} + +die() { + echo "ERROR: $1" >&2 + exit 1 +} + +# Try to figure out the source directory prefix so we can remove it from the +# addr2line output. HACK ALERT: This assumes that start_kernel() is in +# kernel/init.c! This only works for vmlinux. Otherwise it falls back to +# printing the absolute path. +find_dir_prefix() { + local objfile=$1 + + local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + [[ -z $start_kernel_addr ]] && return + + local file_line=$(addr2line -e $objfile $start_kernel_addr) + [[ -z $file_line ]] && return + + local prefix=${file_line%init/main.c:*} + if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then + return + fi + + DIR_PREFIX=$prefix + return 0 +} + +__faddr2line() { + local objfile=$1 + local func_addr=$2 + local dir_prefix=$3 + local print_warnings=$4 + + local func=${func_addr%+*} + local offset=${func_addr#*+} + offset=${offset%/*} + local size= + [[ $func_addr =~ "/" ]] && size=${func_addr#*/} + + if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then + warn "bad func+offset $func_addr" + DONE=1 + return + fi + + # Go through each of the object's symbols which match the func name. + # In rare cases there might be duplicates. + while read symbol; do + local fields=($symbol) + local sym_base=0x${fields[1]} + local sym_size=${fields[2]} + local sym_type=${fields[3]} + + # calculate the address + local addr=$(($sym_base + $offset)) + if [[ -z $addr ]] || [[ $addr = 0 ]]; then + warn "bad address: $sym_base + $offset" + DONE=1 + return + fi + local hexaddr=0x$(printf %x $addr) + + # weed out non-function symbols + if [[ $sym_type != "FUNC" ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to non-function symbol" + continue + fi + + # if the user provided a size, make sure it matches the symbol's size + if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)" + continue; + fi + + # make sure the provided offset is within the symbol's range + if [[ $offset -gt $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)" + continue + fi + + # separate multiple entries with a blank line + [[ $FIRST = 0 ]] && echo + FIRST=0 + + local hexsize=0x$(printf %x $sym_size) + echo "$func+$offset/$hexsize:" + addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + DONE=1 + + done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') +} + +[[ $# -lt 2 ]] && usage + +objfile=$1 +[[ ! -f $objfile ]] && die "can't find objfile $objfile" +shift + +DIR_PREFIX=supercalifragilisticexpialidocious +find_dir_prefix $objfile + +FIRST=1 +while [[ $# -gt 0 ]]; do + func_addr=$1 + shift + + # print any matches found + DONE=0 + __faddr2line $objfile $func_addr $DIR_PREFIX 0 + + # if no match was found, print warnings + if [[ $DONE = 0 ]]; then + __faddr2line $objfile $func_addr $DIR_PREFIX 1 + warn "no match for $func_addr" + fi +done -- cgit v1.1 From 7fadce0d60d09427e0027d3d468781b08ca0b3d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Sep 2016 14:49:08 -0700 Subject: scripts/faddr2line: improve on base path filtering a bit Due to our compiler include directives, the build pathnames for header files often end up being of the form "$srcdir/./include/linux/xyz.h", which ends up having that extra "." path component after the build base in it. Teach faddr2line to skip that too, to make code generated in inline functions in header files match the filename for the regular C files. Rabin Vincent pointed out that I can't make a stricter regexp match by using the " at " prefix for the pathname, because that ends up being locale-dependent. But this does require that the path match be preceded by a space, to make it a bit more strict (that matters mainly if we didn't find any base_dir at all, and we only end up with the "./" part of the match) Acked-by: Josh Poimboeuf Cc: Rabin Vincent Signed-off-by: Linus Torvalds --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 4fbfe83..450b332 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -145,7 +145,7 @@ __faddr2line() { local hexsize=0x$(printf %x $sym_size) echo "$func+$offset/$hexsize:" - addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') -- cgit v1.1 From 9bb627be47a574b764e162e8513d5db78d49e7f5 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 19 Sep 2016 14:43:52 -0700 Subject: mem-hotplug: don't clear the only node in new_node_page() Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") introduced new_node_page() for memory hotplug. In new_node_page(), the nid is cleared before calling __alloc_pages_nodemask(). But if it is the only node of the system, and the first round allocation fails, it will not be able to get memory from an empty nodemask, and will trigger oom. The patch checks whether it is the last node on the system, and if it is, then don't clear the nid in the nodemask. Fixes: 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") Link: http://lkml.kernel.org/r/1473044391.4250.19.camel@TP420 Signed-off-by: Li Zhong Reported-by: John Allen Acked-by: Vlastimil Babka Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 41266dc..b58906b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - node_clear(nid, nmask); + if (nid != next_node_in(nid, nmask)) + node_clear(nid, nmask); + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; -- cgit v1.1 From e6f0c6e6170fec175fe676495f29029aecdf486c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:43:55 -0700 Subject: ocfs2/dlm: fix race between convert and migration Commit ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") checks if lockres master has changed to identify whether new master has finished recovery or not. This will introduce a race that right after old master does umount ( means master will change), a new convert request comes. In this case, it will reset lockres state to DLM_RECOVERING and then retry convert, and then fail with lockres->l_action being set to OCFS2_AST_INVALID, which will cause inconsistent lock level between ocfs2 and dlm, and then finally BUG. Since dlm recovery will clear lock->convert_pending in dlm_move_lockres_to_recovery_list, we can use it to correctly identify the race case between convert and recovery. So fix it. Fixes: ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") Link: http://lkml.kernel.org/r/57CE1569.8010704@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmconvert.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index cdeafb4..0bb1286 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, struct dlm_lock *lock, int flags, int type) { enum dlm_status status; - u8 old_owner = res->owner; mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); @@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - lock->convert_pending = 0; /* if it failed, move it back to granted queue. * if master returns DLM_NORMAL and then down before sending ast, * it may have already been moved to granted queue, reset to @@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, if (status != DLM_NOTQUEUED) dlm_error(status); dlm_revert_pending_convert(res, lock); - } else if ((res->state & DLM_LOCK_RES_RECOVERING) || - (old_owner != res->owner)) { - mlog(0, "res %.*s is in recovering or has been recovered.\n", - res->lockname.len, res->lockname.name); + } else if (!lock->convert_pending) { + mlog(0, "%s: res %.*s, owner died and lock has been moved back " + "to granted list, retry convert.\n", + dlm->name, res->lockname.len, res->lockname.name); status = DLM_RECOVERING; } + + lock->convert_pending = 0; bail: spin_unlock(&res->spinlock); -- cgit v1.1 From d8e3875431956c1f78e142d531f490f76c760ce3 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 19 Sep 2016 14:43:58 -0700 Subject: MAINTAINERS: Maik has moved Maik is no longer using the plusserver.de email, update with his current email. Link: http://lkml.kernel.org/r/1473007794-27960-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Maik Broemme Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 644ff65..2551f6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6103,7 +6103,7 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) -M: Maik Broemme +M: Maik Broemme L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/fb/intelfb.txt -- cgit v1.1 From c131f751ab1a852d4dd4b490b3a7fbba7d738de5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:01 -0700 Subject: khugepaged: fix use-after-free in collapse_huge_page() hugepage_vma_revalidate() tries to re-check if we still should try to collapse small pages into huge one after the re-acquiring mmap_sem. The problem Dmitry Vyukov reported[1] is that the vma found by hugepage_vma_revalidate() can be suitable for huge pages, but not the same vma we had before dropping mmap_sem. And dereferencing original vma can lead to fun results.. Let's use vma hugepage_vma_revalidate() found instead of assuming it's the same as what we had before the lock was dropped. [1] http://lkml.kernel.org/r/CACT4Y+Z3gigBvhca9kRJFcjX0G70V_nRhbwKBU+yGoESBDKi9Q@mail.gmail.com Link: http://lkml.kernel.org/r/20160907122559.GA6542@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Reviewed-by: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Cc: Vegard Nossum Cc: Sasha Levin Cc: Konstantin Khlebnikov Cc: Andrey Ryabinin Cc: Greg Thelen Cc: Suleiman Souhlal Cc: Hugh Dickins Cc: David Rientjes Cc: syzkaller Cc: Kostya Serebryany Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 79c52d0..62339bf 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) * value (scan code). */ -static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) +static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, + struct vm_area_struct **vmap) { struct vm_area_struct *vma; unsigned long hstart, hend; @@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) if (unlikely(khugepaged_test_exit(mm))) return SCAN_ANY_PROCESS; - vma = find_vma(mm, address); + *vmap = vma = find_vma(mm, address); if (!vma) return SCAN_VMA_NULL; @@ -898,7 +899,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ if (ret & VM_FAULT_RETRY) { down_read(&mm->mmap_sem); - if (hugepage_vma_revalidate(mm, address)) { + if (hugepage_vma_revalidate(mm, address, &fe.vma)) { /* vma is no longer available, don't continue to swapin */ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; @@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, static void collapse_huge_page(struct mm_struct *mm, unsigned long address, struct page **hpage, - struct vm_area_struct *vma, int node, int referenced) { pmd_t *pmd, _pmd; @@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm, spinlock_t *pmd_ptl, *pte_ptl; int isolated = 0, result = 0; struct mem_cgroup *memcg; + struct vm_area_struct *vma; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ gfp_t gfp; @@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm, } down_read(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) { mem_cgroup_cancel_charge(new_page, memcg, true); up_read(&mm->mmap_sem); @@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm, * handled by the anon_vma lock + PG_lock. */ down_write(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) goto out; /* check if the pmd is still valid */ @@ -1202,7 +1203,7 @@ out_unmap: if (ret) { node = khugepaged_find_target_node(); /* collapse_huge_page will return with the mmap_sem released */ - collapse_huge_page(mm, address, hpage, vma, node, referenced); + collapse_huge_page(mm, address, hpage, node, referenced); } out: trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, -- cgit v1.1 From 982785c6b05a82c01e90687b7e25ee87c8970b2e Mon Sep 17 00:00:00 2001 From: Ebru Akagunduz Date: Mon, 19 Sep 2016 14:44:04 -0700 Subject: mm, thp: fix leaking mapped pte in __collapse_huge_page_swapin() Currently, khugepaged does not permit swapin if there are enough young pages in a THP. The problem is when a THP does not have enough young pages, khugepaged leaks mapped ptes. This patch prohibits leaking mapped ptes. Link: http://lkml.kernel.org/r/1472820276-7831-1-git-send-email-ebru.akagunduz@gmail.com Signed-off-by: Ebru Akagunduz Suggested-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Reviewed-by: Rik van Riel Cc: Vlastimil Babka Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 62339bf..728d779 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -882,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, .pmd = pmd, }; + /* we only decide to swapin, if there is enough young ptes */ + if (referenced < HPAGE_PMD_NR/2) { + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); + return false; + } fe.pte = pte_offset_map(pmd, address); for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; fe.pte++, fe.address += PAGE_SIZE) { @@ -889,11 +894,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, if (!is_swap_pte(pteval)) continue; swapped_in++; - /* we only decide to swapin, if there is enough young ptes */ - if (referenced < HPAGE_PMD_NR/2) { - trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); - return false; - } ret = do_swap_page(&fe, pteval); /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ -- cgit v1.1 From 4d35427ad7641cba08ea0deffae1a78147ad41c0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:07 -0700 Subject: mm: avoid endless recursion in dump_page() dump_page() uses page_mapcount() to get mapcount of the page. page_mapcount() has VM_BUG_ON_PAGE(PageSlab(page)) as mapcount doesn't make sense for slab pages and the field in struct page used for other information. It leads to recursion if dump_page() called for slub page and DEBUG_VM is enabled: dump_page() -> page_mapcount() -> VM_BUG_ON_PAGE() -> dump_page -> ... Let's avoid calling page_mapcount() for slab pages in dump_page(). Link: http://lkml.kernel.org/r/20160908082137.131076-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 8865bfb..74c7cae 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { + int mapcount = PageSlab(page) ? 0 : page_mapcount(page); + pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", - page, page_ref_count(page), page_mapcount(page), - page->mapping, page->index); + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page)); if (PageCompound(page)) pr_cont(" compound_mapcount: %d", compound_mapcount(page)); pr_cont("\n"); -- cgit v1.1 From 08eeb3061e44661afb4cb9eb08780e2fff8bfbc5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 19 Sep 2016 14:44:09 -0700 Subject: MAINTAINERS: update email for VLYNQ bus entry Link: http://lkml.kernel.org/r/1473218738-21836-1-git-send-email-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2551f6e..a0ce40f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12569,7 +12569,7 @@ F: include/linux/if_*vlan.h F: net/8021q/ VLYNQ BUS -M: Florian Fainelli +M: Florian Fainelli L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Maintained F: drivers/vlynq/vlynq.c -- cgit v1.1 From 7cbdb4a286a60c5d519cb9223fe2134d26870d39 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 19 Sep 2016 14:44:12 -0700 Subject: autofs: use dentry flags to block walks during expire Somewhere along the way the autofs expire operation has changed to hold a spin lock over expired dentry selection. The autofs indirect mount expired dentry selection is complicated and quite lengthy so it isn't appropriate to hold a spin lock over the operation. Commit 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") added a might_sleep() to dput() causing a WARN_ONCE() about this usage to be issued. But the spin lock doesn't need to be held over this check, the autofs dentry info. flags are enough to block walks into dentrys during the expire. I've left the direct mount expire as it is (for now) because it is much simpler and quicker than the indirect mount expire and adding spin lock release and re-aquires would do nothing more than add overhead. Fixes: 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") Link: http://lkml.kernel.org/r/20160912014017.1773.73060.stgit@pluto.themaw.net Signed-off-by: Ian Kent Reported-by: Takashi Iwai Tested-by: Takashi Iwai Cc: Takashi Iwai Cc: NeilBrown Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 55 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b493909..d8e6d42 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry, } return NULL; } + /* * Find an eligible tree to time-out * A tree is eligible if :- @@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct dentry *root = sb->s_root; struct dentry *dentry; struct dentry *expired; + struct dentry *found; struct autofs_info *ino; if (!root) @@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, dentry = NULL; while ((dentry = get_next_positive_subdir(dentry, root))) { + int flags = how; + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_WANT_EXPIRE) - expired = NULL; - else - expired = should_expire(dentry, mnt, timeout, how); - if (!expired) { + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { spin_unlock(&sbi->fs_lock); continue; } + spin_unlock(&sbi->fs_lock); + + expired = should_expire(dentry, mnt, timeout, flags); + if (!expired) + continue; + + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); - spin_lock(&sbi->fs_lock); - if (should_expire(expired, mnt, timeout, how)) { - if (expired != dentry) - dput(dentry); - goto found; - } + /* Make sure a reference is not taken on found if + * things have changed. + */ + flags &= ~AUTOFS_EXP_LEAVES; + found = should_expire(expired, mnt, timeout, how); + if (!found || found != expired) + /* Something has changed, continue */ + goto next; + + if (expired != dentry) + dput(dentry); + + spin_lock(&sbi->fs_lock); + goto found; +next: + spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; + spin_unlock(&sbi->fs_lock); if (expired != dentry) dput(expired); - spin_unlock(&sbi->fs_lock); } return NULL; @@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); int status; + int state; /* Block on any pending expire */ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) @@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) if (rcu_walk) return -ECHILD; +retry: spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_EXPIRING) { + state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING); + if (state == AUTOFS_INF_WANT_EXPIRE) { + spin_unlock(&sbi->fs_lock); + /* + * Possibly being selected for expire, wait until + * it's selected or not. + */ + schedule_timeout_uninterruptible(HZ/10); + goto retry; + } + if (state & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); -- cgit v1.1 From c8de641b1e9c5489aa6ca57b7836acd68e7563f1 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 19 Sep 2016 14:44:15 -0700 Subject: mm: fix the page_swap_info() BUG_ON check Commit 62c230bc1790 ("mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages") replaced the swap_aops dirty hook from __set_page_dirty_no_writeback() with swap_set_page_dirty(). For normal cases without these special SWP flags code path falls back to __set_page_dirty_no_writeback() so the behaviour is expected to be the same as before. But swap_set_page_dirty() makes use of the page_swap_info() helper to get the swap_info_struct to check for the flags like SWP_FILE, SWP_BLKDEV etc as desired for those features. This helper has BUG_ON(!PageSwapCache(page)) which is racy and safe only for the set_page_dirty_lock() path. For the set_page_dirty() path which is often needed for cases to be called from irq context, kswapd() can toggle the flag behind the back while the call is getting executed when system is low on memory and heavy swapping is ongoing. This ends up with undesired kernel panic. This patch just moves the check outside the helper to its users appropriately to fix kernel panic for the described path. Couple of users of helpers already take care of SwapCache condition so I skipped them. Link: http://lkml.kernel.org/r/1473460718-31013-1-git-send-email-santosh.shilimkar@oracle.com Signed-off-by: Santosh Shilimkar Cc: Mel Gorman Cc: Joe Perches Cc: Peter Zijlstra Cc: Rik van Riel Cc: David S. Miller Cc: Jens Axboe Cc: Michal Hocko Cc: Hugh Dickins Cc: Al Viro Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 3 +++ mm/swapfile.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_io.c b/mm/page_io.c index 16bd82fa..eafe5dd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, int ret; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); if (sis->flags & SWP_FILE) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; @@ -337,6 +338,7 @@ int swap_readpage(struct page *page) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); if (frontswap_load(page) == 0) { @@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page) if (sis->flags & SWP_FILE) { struct address_space *mapping = sis->swap_file->f_mapping; + BUG_ON(!PageSwapCache(page)); return mapping->a_ops->set_page_dirty(page); } else { return __set_page_dirty_no_writeback(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 78cfa29..2657acc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry) struct swap_info_struct *page_swap_info(struct page *page) { swp_entry_t swap = { .val = page_private(page) }; - BUG_ON(!PageSwapCache(page)); return swap_info[swp_type(swap)]; } -- cgit v1.1 From 31b4beb473e3bdee1bf79db849502dcb24b5c202 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:18 -0700 Subject: ipc/shm: fix crash if CONFIG_SHMEM is not set Commit c01d5b300774 ("shmem: get_unmapped_area align huge page") makes use of shm_get_unmapped_area() in shm_file_operations() unconditional to CONFIG_MMU. As Tony Battersby pointed this can lead NULL-pointer dereference on machine with CONFIG_MMU=y and CONFIG_SHMEM=n. In this case ipc/shm is backed by ramfs which doesn't provide f_op->get_unmapped_area for configurations with MMU. The solution is to provide dummy f_op->get_unmapped_area for ramfs when CONFIG_MMU=y, which just call current->mm->get_unmapped_area(). Fixes: c01d5b300774 ("shmem: get_unmapped_area align huge page") Link: http://lkml.kernel.org/r/20160912102704.140442-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Tony Battersby Tested-by: Tony Battersby Cc: Hugh Dickins Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/file-mmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 183a212..12af049 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -27,9 +27,17 @@ #include #include #include +#include #include "internal.h" +static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, @@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, + .get_unmapped_area = ramfs_mmu_get_unmapped_area, }; const struct inode_operations ramfs_file_inode_operations = { -- cgit v1.1 From 2b0ad0085aa47ace4756aa501274a7de0325c09c Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:21 -0700 Subject: ocfs2: fix trans extend while flush truncate log Every time, ocfs2_extend_trans() included a credit for truncate log inode, but as that inode had been managed by jbd2 running transaction first time, it will not consume that credit until jbd2_journal_restart(). Since total credits to extend always included the un-consumed ones, there will be more and more un-consumed credit, at last jbd2_journal_restart() will fail due to credit number over the half of max transction credit. The following error was caught when unlinking a large file with many extents: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13626 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 13626 Comm: unlink Tainted: G W 4.1.12-37.6.3.el6uek.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_replay_truncate_records+0x93/0x360 [ocfs2] __ocfs2_flush_truncate_log+0x13e/0x3a0 [ocfs2] ocfs2_remove_btree_range+0x458/0x7f0 [ocfs2] ocfs2_commit_truncate+0x1b3/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlink+0x16/0x20 system_call_fastpath+0x12/0x71 ---[ end trace 28aa7410e69369cf ]--- JBD2: unlink wants too many credits (251 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7dabbc3..5112878 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5922,7 +5922,6 @@ bail: } static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, - handle_t *handle, struct inode *data_alloc_inode, struct buffer_head *data_alloc_bh) { @@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, struct ocfs2_truncate_log *tl; struct inode *tl_inode = osb->osb_tl_inode; struct buffer_head *tl_bh = osb->osb_tl_bh; + handle_t *handle; di = (struct ocfs2_dinode *) tl_bh->b_data; tl = &di->id2.i_dealloc; i = le16_to_cpu(tl->tl_used) - 1; while (i >= 0) { + handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail; + } + /* Caller has given us at least enough credits to * update the truncate log dinode */ status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, @@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, } } - status = ocfs2_extend_trans(handle, - OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_commit_trans(osb, handle); i--; } @@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; unsigned int num_to_flush; - handle_t *handle; struct inode *tl_inode = osb->osb_tl_inode; struct inode *data_alloc_inode = NULL; struct buffer_head *tl_bh = osb->osb_tl_bh; @@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto out_unlock; - } - - status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, + status = ocfs2_replay_truncate_records(osb, data_alloc_inode, data_alloc_bh); if (status < 0) mlog_errno(status); - ocfs2_commit_trans(osb, handle); - -out_unlock: brelse(data_alloc_bh); ocfs2_inode_unlock(data_alloc_inode, 1); -- cgit v1.1 From d5bf141893880f7283fe97e1812c58ff22c8f9a5 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:24 -0700 Subject: ocfs2: fix trans extend while free cached blocks The root cause of this issue is the same with the one fixed by the last patch, but this time credits for allocator inode and group descriptor may not be consumed before trans extend. The following error was caught: WARNING: CPU: 0 PID: 2037 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_netfront parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 2037 Comm: rm Tainted: G W 4.1.12-37.6.3.el6uek.bug24573128v2.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_free_cached_blocks+0x16b/0x4e0 [ocfs2] ocfs2_run_deallocs+0x70/0x270 [ocfs2] ocfs2_commit_truncate+0x474/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlinkat+0x22/0x40 system_call_fastpath+0x12/0x71 ---[ end trace a62437cb060baa71 ]--- JBD2: rm wants too many credits (149 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-2-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5112878..f165f86 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6404,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out_unlock; - } - while (head) { if (head->free_bg) bg_blkno = head->free_bg; else bg_blkno = ocfs2_which_suballoc_group(head->free_blk, head->free_bit); + handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock; + } + trace_ocfs2_free_cached_blocks( (unsigned long long)head->free_blk, head->free_bit); ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, head->free_bit, bg_blkno, 1); - if (ret) { + if (ret) mlog_errno(ret); - goto out_journal; - } - ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); - if (ret) { - mlog_errno(ret); - goto out_journal; - } + ocfs2_commit_trans(osb, handle); tmp = head; head = head->free_next; kfree(tmp); } -out_journal: - ocfs2_commit_trans(osb, handle); - out_unlock: ocfs2_inode_unlock(inode, 1); brelse(di_bh); -- cgit v1.1 From 12703dbfeb15402260e7554d32a34ac40c233990 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:27 -0700 Subject: fsnotify: add a way to stop queueing events on group shutdown Implement a function that can be called when a group is being shutdown to stop queueing new events to the group. Fanotify will use this. Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-2-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/group.c | 19 +++++++++++++++++++ fs/notify/notification.c | 8 +++++++- include/linux/fsnotify_backend.h | 3 +++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/fs/notify/group.c b/fs/notify/group.c index 3e2dd85..b47f7cf 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) } /* + * Stop queueing new events for this group. Once this function returns + * fsnotify_add_event() will not add any new events to the group's queue. + */ +void fsnotify_group_stop_queueing(struct fsnotify_group *group) +{ + mutex_lock(&group->notification_mutex); + group->shutdown = true; + mutex_unlock(&group->notification_mutex); +} + +/* * Trying to get rid of a group. Remove all marks, flush all events and release * the group reference. * Note that another thread calling fsnotify_clear_marks_by_group() may still @@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) */ void fsnotify_destroy_group(struct fsnotify_group *group) { + /* + * Stop queueing new events. The code below is careful enough to not + * require this but fanotify needs to stop queuing events even before + * fsnotify_destroy_group() is called and this makes the other callers + * of fsnotify_destroy_group() to see the same behavior. + */ + fsnotify_group_stop_queueing(group); + /* clear all inode marks for this group, attach them to destroy_list */ fsnotify_detach_group_marks(group); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index a95d8e0..3d76e65 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was * added to the queue, 1 if the event was merged with some other queued event, - * 2 if the queue of events has overflown. + * 2 if the event was not queued - either the queue of events has overflown + * or the group is shutting down. */ int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); + if (group->shutdown) { + mutex_unlock(&group->notification_mutex); + return 2; + } + if (group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 58205f3..40a9e99 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,7 @@ struct fsnotify_group { #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; + bool shutdown; /* group is being shut down, don't queue more events */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ @@ -292,6 +293,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); +/* group destruction begins, stop queuing new events */ +extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ -- cgit v1.1 From 96d41019e3ac55f6f0115b0ce97e4f24a3d636d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:30 -0700 Subject: fanotify: fix list corruption in fanotify_get_response() fanotify_get_response() calls fsnotify_remove_event() when it finds that group is being released from fanotify_release() (bypass_perm is set). However the event it removes need not be only in the group's notification queue but it can have already moved to access_list (userspace read the event before closing the fanotify instance fd) which is protected by a different lock. Thus when fsnotify_remove_event() races with fanotify_release() operating on access_list, the list can get corrupted. Fix the problem by moving all the logic removing permission events from the lists to one place - fanotify_release(). Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-3-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/fanotify/fanotify.c | 13 +------------ fs/notify/fanotify/fanotify_user.c | 36 ++++++++++++++++++++++++------------ fs/notify/notification.c | 15 --------------- include/linux/fsnotify_backend.h | 3 --- 4 files changed, 25 insertions(+), 42 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d2f97ec..e0e5f7c 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response || - atomic_read(&group->fanotify_data.bypass_perm)); - - if (!event->response) { /* bypass_perm set */ - /* - * Event was canceled because group is being destroyed. Remove - * it from group's event list because we are responsible for - * freeing the permission event. - */ - fsnotify_remove_event(group, &event->fae.fse); - return 0; - } + wait_event(group->fanotify_data.access_waitq, event->response); /* userspace responded, convert to something usable */ switch (event->response) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 8e8e6bc..a643138 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; + struct fsnotify_event *fsn_event; /* - * There may be still new events arriving in the notification queue - * but since userspace cannot use fanotify fd anymore, no event can - * enter or leave access_list by now. + * Stop new events from arriving in the notification queue. since + * userspace cannot use fanotify fd anymore, no event can enter or + * leave access_list by now either. */ - spin_lock(&group->fanotify_data.access_lock); - - atomic_inc(&group->fanotify_data.bypass_perm); + fsnotify_group_stop_queueing(group); + /* + * Process all permission events on access_list and notification queue + * and simulate reply from userspace. + */ + spin_lock(&group->fanotify_data.access_lock); list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, fae.fse.list) { pr_debug("%s: found group=%p event=%p\n", __func__, group, @@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) spin_unlock(&group->fanotify_data.access_lock); /* - * Since bypass_perm is set, newly queued events will not wait for - * access response. Wake up the already sleeping ones now. - * synchronize_srcu() in fsnotify_destroy_group() will wait for all - * processes sleeping in fanotify_handle_event() waiting for access - * response and thus also for all permission events to be freed. + * Destroy all non-permission events. For permission events just + * dequeue them and set the response. They will be freed once the + * response is consumed and fanotify_get_response() returns. */ + mutex_lock(&group->notification_mutex); + while (!fsnotify_notify_queue_is_empty(group)) { + fsn_event = fsnotify_remove_first_event(group); + if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) + fsnotify_destroy_event(group, fsn_event); + else + FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + } + mutex_unlock(&group->notification_mutex); + + /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); #endif @@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) spin_lock_init(&group->fanotify_data.access_lock); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3d76e65..e455e83 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -132,21 +132,6 @@ queue: } /* - * Remove @event from group's notification queue. It is the responsibility of - * the caller to destroy the event. - */ -void fsnotify_remove_event(struct fsnotify_group *group, - struct fsnotify_event *event) -{ - mutex_lock(&group->notification_mutex); - if (!list_empty(&event->list)) { - list_del_init(&event->list); - group->q_len--; - } - mutex_unlock(&group->notification_mutex); -} - -/* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 40a9e99..7268ed0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -180,7 +180,6 @@ struct fsnotify_group { spinlock_t access_lock; struct list_head access_list; wait_queue_head_t access_waitq; - atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; @@ -307,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); -/* Remove passed event from groups notification queue */ -extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v1.1 From 3bb8b653c86f6b1d2cc05aa1744fed4b18f99485 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:44:33 -0700 Subject: ocfs2: fix double unlock in case retry after free truncate log If ocfs2_reserve_cluster_bitmap_bits() fails with ENOSPC, it will try to free truncate log and then retry. Since ocfs2_try_to_free_truncate_log will lock/unlock global bitmap inode, we have to unlock it before calling this function. But when retry reserve and it fails with no global bitmap inode lock taken, it will unlock again in error handling branch and BUG. This issue also exists if no need retry and then ocfs2_inode_lock fails. So fix it. Fixes: 2070ad1aebff ("ocfs2: retry on ENOSPC if sufficient space in truncate log") Link: http://lkml.kernel.org/r/57D91939.6030809@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jiufei Xue Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ea47120..6ad3533 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1199,14 +1199,24 @@ retry: inode_unlock((*ac)->ac_inode); ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); - if (ret == 1) + if (ret == 1) { + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; goto retry; + } if (ret < 0) mlog_errno(ret); inode_lock((*ac)->ac_inode); - ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + if (ret < 0) { + mlog_errno(ret); + inode_unlock((*ac)->ac_inode); + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; + goto bail; + } } if (status < 0) { if (status != -ENOSPC) -- cgit v1.1 From db2ba40c277dc545bab531671c3f45ac0afea6f8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:36 -0700 Subject: mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting During cgroup2 rollout into production, we started encountering css refcount underflows and css access crashes in the memory controller. Splitting the heavily shared css reference counter into logical users narrowed the imbalance down to the cgroup2 socket memory accounting. The problem turns out to be the per-cpu charge cache. Cgroup1 had a separate socket counter, but the new cgroup2 socket accounting goes through the common charge path that uses a shared per-cpu cache for all memory that is being tracked. Those caches are safe against scheduling preemption, but not against interrupts - such as the newly added packet receive path. When cache draining is interrupted by network RX taking pages out of the cache, the resuming drain operation will put references of in-use pages, thus causing the imbalance. Disable IRQs during all per-cpu charge cache operations. Fixes: f7e1cb6ec51b ("mm: memcontrol: account socket memory in unified hierarchy memory controller") Link: http://lkml.kernel.org/r/20160914194846.11153-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: "David S. Miller" Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a6a51a..4be518d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex); static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { struct memcg_stock_pcp *stock; + unsigned long flags; bool ret = false; if (nr_pages > CHARGE_BATCH) return ret; - stock = &get_cpu_var(memcg_stock); + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { stock->nr_pages -= nr_pages; ret = true; } - put_cpu_var(memcg_stock); + + local_irq_restore(flags); + return ret; } @@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock) stock->cached = NULL; } -/* - * This must be called under preempt disabled or must be called by - * a thread which is pinned to local cpu. - */ static void drain_local_stock(struct work_struct *dummy) { - struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); + + local_irq_restore(flags); } /* @@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy) */ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + + local_irq_save(flags); + stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ drain_stock(stock); stock->cached = memcg; } stock->nr_pages += nr_pages; - put_cpu_var(memcg_stock); + + local_irq_restore(flags); } /* -- cgit v1.1 From d979a39d7242e0601bf9b60e89628fb8ac577179 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:38 -0700 Subject: cgroup: duplicate cgroup reference when cloning sockets When a socket is cloned, the associated sock_cgroup_data is duplicated but not its reference on the cgroup. As a result, the cgroup reference count will underflow when both sockets are destroyed later on. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Link: http://lkml.kernel.org/r/20160914194846.11153-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 6 ++++++ net/core/sock.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7..5e8dab5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b..fd7b41e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); - cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); + cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); } @@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + + cgroup_sk_alloc(&newsk->sk_cgrp_data); + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) -- cgit v1.1 From d21c353d5e99c56cdd5b5c1183ffbcaf23b8b960 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Mon, 19 Sep 2016 14:44:42 -0700 Subject: ocfs2: fix start offset to ocfs2_zero_range_for_truncate() If we punch a hole on a reflink such that following conditions are met: 1. start offset is on a cluster boundary 2. end offset is not on a cluster boundary 3. (end offset is somewhere in another extent) or (hole range > MAX_CONTIG_BYTES(1MB)), we dont COW the first cluster starting at the start offset. But in this case, we were wrongly passing this cluster to ocfs2_zero_range_for_truncate() to zero out. This will modify the cluster in place and zero it in the source too. Fix this by skipping this cluster in such a scenario. To reproduce: 1. Create a random file of say 10 MB xfs_io -c 'pwrite -b 4k 0 10M' -f 10MBfile 2. Reflink it reflink -f 10MBfile reflnktest 3. Punch a hole at starting at cluster boundary with range greater that 1MB. You can also use a range that will put the end offset in another extent. fallocate -p -o 0 -l 1048615 reflnktest 4. sync 5. Check the first cluster in the source file. (It will be zeroed out). dd if=10MBfile iflag=direct bs= count=1 | hexdump -C Link: http://lkml.kernel.org/r/1470957147-14185-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reported-by: Saar Maoz Reviewed-by: Srinivas Eeda Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Eric Ren Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4e7b0dc..0b055bf 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { int ret = 0; - u64 tmpend, end = start + len; + u64 tmpend = 0; + u64 end = start + len; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; @@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, } /* - * We want to get the byte offset of the end of the 1st cluster. + * If start is on a cluster boundary and end is somewhere in another + * cluster, we have not COWed the cluster starting at start, unless + * end is also within the same cluster. So, in this case, we skip this + * first call to ocfs2_zero_range_for_truncate() truncate and move on + * to the next one. */ - tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); - if (tmpend > end) - tmpend = end; + if ((start & (csize - 1)) != 0) { + /* + * We want to get the byte offset of the end of the 1st + * cluster. + */ + tmpend = (u64)osb->s_clustersize + + (start & ~(osb->s_clustersize - 1)); + if (tmpend > end) + tmpend = end; - trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, - (unsigned long long)tmpend); + trace_ocfs2_zero_partial_clusters_range1( + (unsigned long long)start, + (unsigned long long)tmpend); - ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); - if (ret) - mlog_errno(ret); + ret = ocfs2_zero_range_for_truncate(inode, handle, start, + tmpend); + if (ret) + mlog_errno(ret); + } if (tmpend < end) { /* -- cgit v1.1 From 63b52c4936a2e679639c38ef51a50aa8ca1c5c07 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:44 -0700 Subject: Revert "ocfs2: bump up o2cb network protocol version" This reverts commit 38b52efd218b ("ocfs2: bump up o2cb network protocol version"). This commit made rolling upgrade fail. When one node is upgraded to new version with this commit, the remaining nodes will fail to establish connections to it, then the application like VMs on the remaining nodes can't be live migrated to the upgraded one. This will cause an outage. Since negotiate hb timeout behavior didn't change without this commit, so revert it. Fixes: 38b52efd218bf ("ocfs2: bump up o2cb network protocol version") Link: http://lkml.kernel.org/r/1471396924-10375-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp_internal.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 94b1836..b95e7df 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -44,9 +44,6 @@ * version here in tcp_internal.h should not need to be bumped for * filesystem locking changes. * - * New in version 12 - * - Negotiate hb timeout when storage is down. - * * New in version 11 * - Negotiation of filesystem locking in the dlm join. * @@ -78,7 +75,7 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 12ULL +#define O2NET_PROTOCOL_VERSION 11ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; -- cgit v1.1 From b92ae139c308c5223521ed6ec022148b81312809 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 19 Sep 2016 14:44:47 -0700 Subject: rapidio/rio_cm: avoid GFP_KERNEL in atomic context As reported by Alexey Khoroshilov (https://lkml.org/lkml/2016/9/9/737): riocm_send_close() is called from rio_cm_shutdown() under spin_lock_bh(idr_lock), but riocm_send_close() uses a GFP_KERNEL allocation. Fix by taking riocm_send_close() outside of spinlock protected code. [akpm@linux-foundation.org: remove unneeded `if (!list_empty())'] Link: http://lkml.kernel.org/r/20160915175402.10122-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Reported-by: Alexey Khoroshilov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio_cm.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 3fa17ac..cebc296 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, { struct rio_channel *ch; unsigned int i; + LIST_HEAD(list); riocm_debug(EXIT, "."); + /* + * If there are any channels left in connected state send + * close notification to the connection partner. + * First build a list of channels that require a closing + * notification because function riocm_send_close() should + * be called outside of spinlock protected code. + */ spin_lock_bh(&idr_lock); idr_for_each_entry(&ch_idr, ch, i) { - riocm_debug(EXIT, "close ch %d", ch->id); - if (ch->state == RIO_CM_CONNECTED) - riocm_send_close(ch); + if (ch->state == RIO_CM_CONNECTED) { + riocm_debug(EXIT, "close ch %d", ch->id); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } } spin_unlock_bh(&idr_lock); + list_for_each_entry(ch, &list, ch_node) + riocm_send_close(ch); + return NOTIFY_DONE; } -- cgit v1.1 From 07b26c9454a2a19fff86d6fcf2aba6bc801eb8d8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 Sep 2016 12:58:47 +0200 Subject: gso: Support partial splitting at the frag_list pointer Since commit 8a29111c7 ("net: gro: allow to build full sized skb") gro may build buffers with a frag_list. This can hurt forwarding because most NICs can't offload such packets, they need to be segmented in software. This patch splits buffers with a frag_list at the frag_list pointer into buffers that can be TSO offloaded. Signed-off-by: Steffen Klassert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++++++++----------- net/ipv4/af_inet.c | 14 ++++++++++---- net/ipv4/gre_offload.c | 6 ++++-- net/ipv4/tcp_offload.c | 13 +++++++------ net/ipv4/udp_offload.c | 6 ++++-- net/ipv6/ip6_offload.c | 5 ++++- 6 files changed, 69 insertions(+), 26 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e329d4..7bf82a2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3097,11 +3097,31 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); - /* GSO partial only requires that we trim off any excess that - * doesn't fit into an MSS sized block, so take care of that - * now. - */ - if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { + if (sg && csum && (mss != GSO_BY_FRAGS)) { + if (!(features & NETIF_F_GSO_PARTIAL)) { + struct sk_buff *iter; + + if (!list_skb || + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) + goto normal; + + /* Split the buffer at the frag_list pointer. + * This is based on the assumption that all + * buffers in the chain excluding the last + * containing the same amount of data. + */ + skb_walk_frags(head_skb, iter) { + if (skb_headlen(iter)) + goto normal; + + len -= iter->len; + } + } + + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ partial_segs = len / mss; if (partial_segs > 1) mss *= partial_segs; @@ -3109,6 +3129,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, partial_segs = 0; } +normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3300,21 +3321,29 @@ perform_csum_check: */ segs->prev = tail; - /* Update GSO info on first skb in partial sequence. */ if (partial_segs) { + struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; + unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ - type |= SKB_GSO_PARTIAL; + type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ - skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; - skb_shinfo(segs)->gso_segs = partial_segs; - skb_shinfo(segs)->gso_type = type; - SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + for (iter = segs; iter; iter = iter->next) { + skb_shinfo(iter)->gso_size = gso_size; + skb_shinfo(iter)->gso_segs = partial_segs; + skb_shinfo(iter)->gso_type = type; + SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; + } + + if (tail->len - doffset <= gso_size) + skb_shinfo(tail)->gso_size = 0; + else if (tail != segs) + skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e94b47b..1effc98 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; @@ -1245,6 +1245,8 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); @@ -1259,9 +1261,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } - tot_len = skb_shinfo(skb)->gso_size + - SKB_GSO_CB(skb)->data_offset + - skb->head - (unsigned char *)iph; + + if (gso_partial) + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + else + tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ecd1e09..96e0efe 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo; + bool need_csum, ufo, gso_partial; if (!skb->encapsulation) goto out; @@ -69,6 +69,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); gre_offset = outer_hlen - tnl_hlen; skb = segs; @@ -96,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (skb_is_gso(skb)) { + if (gso_partial) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 5c59649..bc68da3 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -90,12 +90,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } - /* GSO partial only requires splitting the frame into an MSS - * multiple and possibly a remainder. So update the mss now. - */ - if (features & NETIF_F_GSO_PARTIAL) - mss = skb->len - (skb->len % mss); - copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -108,6 +102,13 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, /* Only first segment might have ooo_okay set */ segs->ooo_okay = ooo_okay; + /* GSO partial and frag_list segmentation only requires splitting + * the frame into an MSS multiple and possibly a remainder, both + * cases return a GSO skb. So update the mss now. + */ + if (skb_is_gso(segs)) + mss *= skb_shinfo(segs)->gso_segs; + delta = htonl(oldlen + (thlen + mss)); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 81f253b..f9333c9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo; + bool remcsum, need_csum, offload_csum, ufo, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -88,6 +88,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; @@ -117,7 +119,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (skb_is_gso(skb)) { + if (gso_partial) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 22e90e5..e7bfd55 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int offset = 0; bool encap, udpfrag; int nhoff; + bool gso_partial; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); @@ -101,9 +102,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (IS_ERR(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) + if (gso_partial) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); -- cgit v1.1 From 8d6be8b627389c6dc7e0ea2455a7542c8a2a16a7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:00 -0400 Subject: bnxt_en: Use RSS flags defined in the bnxt_hsi.h file. And remove redundant definitions of the same flags. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 228c964..cee0e8d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3419,10 +3419,10 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1); if (set_rss) { - vnic->hash_type = BNXT_RSS_HASH_TYPE_FLAG_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_IPV6 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6; + vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6; req.hash_type = cpu_to_le32(vnic->hash_type); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 23e04a6..db4814e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -389,11 +389,6 @@ struct rx_tpa_end_cmp_ext { #define INVALID_HW_RING_ID ((u16)-1) -#define BNXT_RSS_HASH_TYPE_FLAG_IPV4 0x01 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 0x02 -#define BNXT_RSS_HASH_TYPE_FLAG_IPV6 0x04 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6 0x08 - /* The hardware supports certain page sizes. Use the supported page sizes * to allocate the rings. */ -- cgit v1.1 From adbc830545003c4b7494c903654bea22e5a66bb4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:01 -0400 Subject: bnxt_en: Simplify PCI device names and add additinal PCI IDs. Remove "Single-port/Dual-port" from the device names. Dual-port devices will appear as 2 separate devices, so no need to call each a dual-port device. Use a more generic name for VF devices belonging to the same chip fanmily. Add some remaining NPAR device IDs. Signed-off-by: David Christensen Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 68 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cee0e8d..d9b4cd1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -93,50 +93,49 @@ enum board_idx { BCM57404_NPAR, BCM57406_NPAR, BCM57407_SFP, + BCM57407_NPAR, BCM57414_NPAR, BCM57416_NPAR, - BCM57304_VF, - BCM57404_VF, - BCM57414_VF, - BCM57314_VF, + NETXTREME_E_VF, + NETXTREME_C_VF, }; /* indexed by enum above */ static const struct { char *name; } board_info[] = { - { "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" }, - { "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" }, + { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" }, + { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" }, { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" }, - { "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-C Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 }, { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 }, { PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 }, @@ -160,13 +159,19 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP }, + { PCI_VDEVICE(BROADCOM, 0x16ea), .driver_data = BCM57407_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16eb), .driver_data = BCM57412_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR }, #ifdef CONFIG_BNXT_SRIOV - { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF }, - { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF }, - { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF }, - { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF }, + { PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, #endif { 0 } }; @@ -189,8 +194,7 @@ static const u16 bnxt_async_events_arr[] = { static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == BCM57304_VF || idx == BCM57404_VF || - idx == BCM57314_VF || idx == BCM57414_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) -- cgit v1.1 From 441cabbbf1bd0b99e283c9116fe430e53ee67a4a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:02 -0400 Subject: bnxt_en: Update to firmware interface spec 1.5.1. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 14 +- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 1251 +++++++++++++++---------- 3 files changed, 760 insertions(+), 508 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d9b4cd1..f6b4f34 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4253,6 +4253,9 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) if (bp->max_tc > BNXT_MAX_QUEUE) bp->max_tc = BNXT_MAX_QUEUE; + if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG) + bp->max_tc = 1; + qptr = &resp->queue_id0; for (i = 0; i < bp->max_tc; i++) { bp->q_info[i].queue_id = *qptr++; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index db4814e..012cc51 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,10 +11,10 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.3.0" +#define DRV_MODULE_VERSION "1.5.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 3 +#define DRV_VER_MIN 5 #define DRV_VER_UPD 0 struct tx_bd { @@ -106,11 +106,11 @@ struct tx_cmp { #define CMP_TYPE_REMOTE_DRIVER_REQ 34 #define CMP_TYPE_REMOTE_DRIVER_RESP 36 #define CMP_TYPE_ERROR_STATUS 48 - #define CMPL_BASE_TYPE_STAT_EJECT (0x1aUL << 0) - #define CMPL_BASE_TYPE_HWRM_DONE (0x20UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_REQ (0x22UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_RESP (0x24UL << 0) - #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define CMPL_BASE_TYPE_STAT_EJECT 0x1aUL + #define CMPL_BASE_TYPE_HWRM_DONE 0x20UL + #define CMPL_BASE_TYPE_HWRM_FWD_REQ 0x22UL + #define CMPL_BASE_TYPE_HWRM_FWD_RESP 0x24UL + #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL #define TX_CMP_FLAGS_ERROR (1 << 6) #define TX_CMP_FLAGS_PUSH (1 << 7) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 517567f..04a96cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -39,7 +39,7 @@ struct eject_cmpl { __le16 type; #define EJECT_CMPL_TYPE_MASK 0x3fUL #define EJECT_CMPL_TYPE_SFT 0 - #define EJECT_CMPL_TYPE_STAT_EJECT (0x1aUL << 0) + #define EJECT_CMPL_TYPE_STAT_EJECT 0x1aUL __le16 len; __le32 opaque; __le32 v; @@ -52,7 +52,7 @@ struct hwrm_cmpl { __le16 type; #define HWRM_CMPL_TYPE_MASK 0x3fUL #define HWRM_CMPL_TYPE_SFT 0 - #define HWRM_CMPL_TYPE_HWRM_DONE (0x20UL << 0) + #define HWRM_CMPL_TYPE_HWRM_DONE 0x20UL __le16 sequence_id; __le32 unused_1; __le32 v; @@ -65,7 +65,7 @@ struct hwrm_fwd_req_cmpl { __le16 req_len_type; #define HWRM_FWD_REQ_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_REQ_CMPL_TYPE_SFT 0 - #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ (0x22UL << 0) + #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ 0x22UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK 0xffc0UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT 6 __le16 source_id; @@ -81,7 +81,7 @@ struct hwrm_fwd_resp_cmpl { __le16 type; #define HWRM_FWD_RESP_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_RESP_CMPL_TYPE_SFT 0 - #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP (0x24UL << 0) + #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP 0x24UL __le16 source_id; __le16 resp_len; __le16 unused_1; @@ -96,25 +96,26 @@ struct hwrm_async_event_cmpl { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR (0x30UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE 0x2UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD 0x20UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD 0x21UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR 0x30UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_V 0x1UL @@ -130,9 +131,9 @@ struct hwrm_async_event_cmpl_link_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V 0x1UL @@ -156,9 +157,9 @@ struct hwrm_async_event_cmpl_link_mtu_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V 0x1UL @@ -176,9 +177,9 @@ struct hwrm_async_event_cmpl_link_speed_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V 0x1UL @@ -200,8 +201,7 @@ struct hwrm_async_event_cmpl_link_speed_change { #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB (0xffffUL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16 }; @@ -211,9 +211,9 @@ struct hwrm_async_event_cmpl_dcb_config_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V 0x1UL @@ -231,9 +231,9 @@ struct hwrm_async_event_cmpl_port_conn_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V 0x1UL @@ -258,9 +258,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL @@ -278,9 +278,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V 0x1UL @@ -300,9 +300,9 @@ struct hwrm_async_event_cmpl_func_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V 0x1UL @@ -320,9 +320,9 @@ struct hwrm_async_event_cmpl_func_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V 0x1UL @@ -340,9 +340,9 @@ struct hwrm_async_event_cmpl_pf_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V 0x1UL @@ -362,9 +362,9 @@ struct hwrm_async_event_cmpl_pf_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V 0x1UL @@ -384,9 +384,9 @@ struct hwrm_async_event_cmpl_vf_flr { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR (0x30UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR 0x30UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V 0x1UL @@ -404,9 +404,9 @@ struct hwrm_async_event_cmpl_vf_mac_addr_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V 0x1UL @@ -424,9 +424,9 @@ struct hwrm_async_event_cmpl_pf_vf_comm_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V 0x1UL @@ -443,9 +443,9 @@ struct hwrm_async_event_cmpl_vf_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V 0x1UL @@ -465,15 +465,15 @@ struct hwrm_async_event_cmpl_hwrm_error { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V 0x1UL @@ -485,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error { #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL }; -/* HW Resource Manager Specification 1.3.0 */ +/* HW Resource Manager Specification 1.5.1 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 3 -#define HWRM_VERSION_UPDATE 0 +#define HWRM_VERSION_MINOR 5 +#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_STR "1.3.0" +#define HWRM_VERSION_STR "1.5.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -556,8 +556,8 @@ struct cmd_nums { #define HWRM_QUEUE_QPORTCFG (0x30UL) #define HWRM_QUEUE_QCFG (0x31UL) #define HWRM_QUEUE_CFG (0x32UL) - #define HWRM_QUEUE_BUFFERS_QCFG (0x33UL) - #define HWRM_QUEUE_BUFFERS_CFG (0x34UL) + #define RESERVED2 (0x33UL) + #define RESERVED3 (0x34UL) #define HWRM_QUEUE_PFCENABLE_QCFG (0x35UL) #define HWRM_QUEUE_PFCENABLE_CFG (0x36UL) #define HWRM_QUEUE_PRI2COS_QCFG (0x37UL) @@ -574,6 +574,7 @@ struct cmd_nums { #define HWRM_VNIC_RSS_QCFG (0x47UL) #define HWRM_VNIC_PLCMODES_CFG (0x48UL) #define HWRM_VNIC_PLCMODES_QCFG (0x49UL) + #define HWRM_VNIC_QCAPS (0x4aUL) #define HWRM_RING_ALLOC (0x50UL) #define HWRM_RING_FREE (0x51UL) #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS (0x52UL) @@ -581,13 +582,15 @@ struct cmd_nums { #define HWRM_RING_RESET (0x5eUL) #define HWRM_RING_GRP_ALLOC (0x60UL) #define HWRM_RING_GRP_FREE (0x61UL) + #define RESERVED5 (0x64UL) + #define RESERVED6 (0x65UL) #define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC (0x70UL) #define HWRM_VNIC_RSS_COS_LB_CTX_FREE (0x71UL) #define HWRM_CFA_L2_FILTER_ALLOC (0x90UL) #define HWRM_CFA_L2_FILTER_FREE (0x91UL) #define HWRM_CFA_L2_FILTER_CFG (0x92UL) #define HWRM_CFA_L2_SET_RX_MASK (0x93UL) - #define RESERVED3 (0x94UL) + #define RESERVED4 (0x94UL) #define HWRM_CFA_TUNNEL_FILTER_ALLOC (0x95UL) #define HWRM_CFA_TUNNEL_FILTER_FREE (0x96UL) #define HWRM_CFA_ENCAP_RECORD_ALLOC (0x97UL) @@ -607,6 +610,8 @@ struct cmd_nums { #define HWRM_STAT_CTX_CLR_STATS (0xb3UL) #define HWRM_FW_RESET (0xc0UL) #define HWRM_FW_QSTATUS (0xc1UL) + #define HWRM_FW_SET_TIME (0xc8UL) + #define HWRM_FW_GET_TIME (0xc9UL) #define HWRM_EXEC_FWD_RESP (0xd0UL) #define HWRM_REJECT_FWD_RESP (0xd1UL) #define HWRM_FWD_RESP (0xd2UL) @@ -615,11 +620,13 @@ struct cmd_nums { #define HWRM_WOL_FILTER_ALLOC (0xf0UL) #define HWRM_WOL_FILTER_FREE (0xf1UL) #define HWRM_WOL_FILTER_QCFG (0xf2UL) + #define HWRM_WOL_REASON_QCFG (0xf3UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) #define HWRM_DBG_WRITE_INDIRECT (0xff13UL) #define HWRM_DBG_DUMP (0xff14UL) + #define HWRM_NVM_INSTALL_UPDATE (0xfff3UL) #define HWRM_NVM_MODIFY (0xfff4UL) #define HWRM_NVM_VERIFY_UPDATE (0xfff5UL) #define HWRM_NVM_GET_DEV_INFO (0xfff6UL) @@ -824,7 +831,9 @@ struct hwrm_ver_get_output { u8 netctrl_fw_min; u8 netctrl_fw_bld; u8 netctrl_fw_rsvd; - __le32 reserved1; + __le32 dev_caps_cfg; + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED 0x1UL + #define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED 0x2UL u8 roce_fw_maj; u8 roce_fw_min; u8 roce_fw_bld; @@ -839,9 +848,9 @@ struct hwrm_ver_get_output { u8 chip_metal; u8 chip_bond_id; u8 chip_platform_type; - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC (0x0UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA (0x1UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM (0x2UL << 0) + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC 0x0UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA 0x1UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM 0x2UL __le16 max_req_win_len; __le16 max_resp_len; __le16 def_req_timeout; @@ -863,10 +872,10 @@ struct hwrm_func_reset_input { #define FUNC_RESET_REQ_ENABLES_VF_ID_VALID 0x1UL __le16 vf_id; u8 func_reset_level; - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL (0x0UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME (0x1UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN (0x2UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF (0x3UL << 0) + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL 0x0UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME 0x1UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN 0x2UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF 0x3UL u8 unused_0; }; @@ -1028,6 +1037,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED 0x400UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1047,9 +1060,8 @@ struct hwrm_func_qcaps_output { __le32 max_mcast_filters; __le32 max_flow_id; __le32 max_hw_ring_grps; + __le16 max_sp_tx_rings; u8 unused_0; - u8 unused_1; - u8 unused_2; u8 valid; }; @@ -1077,6 +1089,7 @@ struct hwrm_func_qcfg_output { __le16 flags; #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL + #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1089,29 +1102,46 @@ struct hwrm_func_qcfg_output { __le16 mru; __le16 stat_ctx_id; u8 port_partition_type; - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF (0x0UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS (0x1UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 (0x2UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 (0x4UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN (0xffUL << 0) + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF 0x0UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS 0x1UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL u8 unused_0; __le16 dflt_vnic_id; u8 unused_1; u8 unused_2; __le32 min_bw; + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MIN_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MAX_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID u8 evb_mode; - #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL + #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL + #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL u8 unused_3; - __le16 unused_4; + __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; - u8 unused_5; - u8 unused_6; - u8 unused_7; + __le16 alloc_sp_tx_rings; + u8 unused_4; u8 valid; }; @@ -1171,18 +1201,36 @@ struct hwrm_func_cfg_input { __le16 dflt_vlan; __be32 dflt_ip_addr[4]; __le32 min_bw; + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MIN_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MAX_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID __le16 async_event_cr; u8 vlan_antispoof_mode; - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK (0x0UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN (0x1UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE (0x2UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN (0x3UL << 0) + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK 0x0UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN 0x1UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL u8 allowed_vlan_pris; u8 evb_mode; - #define FUNC_CFG_REQ_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_CFG_REQ_EVB_MODE_NO_EVB 0x0UL + #define FUNC_CFG_REQ_EVB_MODE_VEB 0x1UL + #define FUNC_CFG_REQ_EVB_MODE_VEPA 0x2UL u8 unused_2; __le16 num_mcast_filters; }; @@ -1341,16 +1389,16 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD 0x8UL #define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD 0x10UL __le16 os_type; - #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1415,13 +1463,13 @@ struct hwrm_func_buf_rgtr_input { __le16 vf_id; __le16 req_buf_num_pages; __le16 req_buf_page_size; - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B (0x4UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K (0xcUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K (0xdUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K (0x10UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x15UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x16UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G (0x1eUL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B 0x4UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K 0xcUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K 0xdUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K 0x10UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M 0x15UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M 0x16UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G 0x1eUL __le16 req_buf_len; __le16 resp_buf_len; u8 unused_0; @@ -1473,16 +1521,16 @@ struct hwrm_func_drv_qver_output { __le16 seq_id; __le16 resp_len; __le16 os_type; - #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1528,44 +1576,44 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER 0x400UL __le16 port_id; __le16 force_link_speed; - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_duplex; - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH (0x2UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH 0x2UL u8 auto_pause; #define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL u8 unused_0; __le16 auto_link_speed; - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1582,12 +1630,12 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_CFG_REQ_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL + #define PORT_PHY_CFG_REQ_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_PHY_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_PHY_CFG_REQ_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX 0x2UL @@ -1641,25 +1689,25 @@ struct hwrm_port_phy_qcfg_output { __le16 seq_id; __le16 resp_len; u8 link; - #define PORT_PHY_QCFG_RESP_LINK_NO_LINK (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SIGNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_LINK (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_NO_LINK 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_SIGNAL 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_LINK 0x2UL u8 unused_0; __le16 link_speed; - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL u8 duplex; - #define PORT_PHY_QCFG_RESP_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_DUPLEX_FULL (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_DUPLEX_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_FULL 0x1UL u8 pause; #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL @@ -1679,39 +1727,39 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB 0x2000UL __le16 force_link_speed; - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_pause; #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL __le16 auto_link_speed; - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1728,46 +1776,46 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_QCFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LPBK_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_PHY_QCFG_RESP_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX 0x2UL u8 module_status; - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE (0xffUL << 0) + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX 0x1UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG 0x2UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN 0x3UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED 0x4UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL __le32 preemphasis; u8 phy_maj; u8 phy_min; u8 phy_bld; u8 phy_type; - #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 (0x5UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX (0x6UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR (0x7UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET (0x8UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE (0x9UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY (0xaUL << 0) + #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR 0x1UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 0x2UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR 0x3UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR 0x4UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 0x5UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX 0x6UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR 0x7UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET 0x8UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE 0x9UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY 0xaUL u8 media_type; - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE (0x3UL << 0) + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC 0x2UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE 0x3UL u8 xcvr_pkg_type; - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL 0x1UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL 0x2UL u8 eee_config_phy_addr; #define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK 0x1fUL #define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT 0 @@ -1796,11 +1844,11 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB 0x2000UL u8 link_partner_adv_auto_mode; - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL u8 link_partner_adv_pause; #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX 0x2UL @@ -1859,7 +1907,7 @@ struct hwrm_port_mac_cfg_input { __le64 resp_addr; __le32 flags; #define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK 0x1UL - #define PORT_MAC_CFG_REQ_FLAGS_COS_ASSIGNMENT_ENABLE 0x2UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE 0x2UL #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE 0x4UL #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE 0x8UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE 0x10UL @@ -1868,28 +1916,50 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE 0x400UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE 0x800UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE 0x1000UL __le32 enables; #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL - #define PORT_MAC_CFG_REQ_ENABLES_IVLAN_PRI2COS_MAP_PRI 0x4UL - #define PORT_MAC_CFG_REQ_ENABLES_LCOS_MAP_PRI 0x8UL + #define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI 0x4UL + #define PORT_MAC_CFG_REQ_ENABLES_RESERVED1 0x8UL #define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI 0x10UL #define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI 0x20UL #define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL #define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL + #define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG 0x100UL __le16 port_id; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_REMOTE (0x2UL << 0) - u8 ivlan_pri2cos_map_pri; - u8 lcos_map_pri; + #define PORT_MAC_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_REQ_LPBK_REMOTE 0x2UL + u8 vlan_pri2cos_map_pri; + u8 reserved1; u8 tunnel_pri2cos_map_pri; u8 dscp2pri_map_pri; __le16 rx_ts_capture_ptp_msg_type; __le16 tx_ts_capture_ptp_msg_type; - __le32 unused_0; + u8 cos_field_cfg; + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1 0x1UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK 0x6UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT 1 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT 3 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK 0xe0UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 + u8 unused_0[3]; }; /* Output (16 bytes) */ @@ -1902,9 +1972,9 @@ struct hwrm_port_mac_cfg_output { __le16 mtu; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_MAC_CFG_RESP_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_RESP_LPBK_REMOTE 0x2UL u8 unused_0; u8 valid; }; @@ -2163,8 +2233,8 @@ struct hwrm_queue_qportcfg_input { __le64 resp_addr; __le32 flags; #define QUEUE_QPORTCFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX (0x1UL << 0) + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX 0x1UL #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX __le16 port_id; __le16 unused_0; @@ -2179,50 +2249,51 @@ struct hwrm_queue_qportcfg_output { u8 max_configurable_queues; u8 max_configurable_lossless_queues; u8 queue_cfg_allowed; - u8 queue_buffers_cfg_allowed; + u8 queue_cfg_info; + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL u8 queue_pfcenable_cfg_allowed; u8 queue_pri2cos_cfg_allowed; u8 queue_cos2bw_cfg_allowed; u8 queue_id0; u8 queue_id0_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id1; u8 queue_id1_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id2; u8 queue_id2_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id3; u8 queue_id3_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id4; u8 queue_id4_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id5; u8 queue_id5_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id6; u8 queue_id6_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id7; u8 queue_id7_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL u8 valid; }; @@ -2235,19 +2306,21 @@ struct hwrm_queue_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_RX + #define QUEUE_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_BIDIR __le32 enables; #define QUEUE_CFG_REQ_ENABLES_DFLT_LEN 0x1UL #define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE 0x2UL __le32 queue_id; __le32 dflt_len; u8 service_profile; - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN 0xffUL u8 unused_0[7]; }; @@ -2264,50 +2337,6 @@ struct hwrm_queue_cfg_output { u8 valid; }; -/* hwrm_queue_buffers_cfg */ -/* Input (56 bytes) */ -struct hwrm_queue_buffers_cfg_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le32 flags; - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_LAST QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX - __le32 enables; - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_RESERVED 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_SHARED 0x2UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XOFF 0x4UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XON 0x8UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_FULL 0x10UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_NOTFULL 0x20UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_MAX 0x40UL - __le32 queue_id; - __le32 reserved; - __le32 shared; - __le32 xoff; - __le32 xon; - __le32 full; - __le32 notfull; - __le32 max; -}; - -/* Output (16 bytes) */ -struct hwrm_queue_buffers_cfg_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_queue_pfcenable_cfg */ /* Input (24 bytes) */ struct hwrm_queue_pfcenable_cfg_input { @@ -2351,12 +2380,22 @@ struct hwrm_queue_pri2cos_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT 0 #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR (0x2UL << 0) + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x4UL __le32 enables; + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID 0x4UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID 0x8UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID 0x10UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID 0x20UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID 0x40UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID 0x80UL u8 port_id; u8 pri0_cos_queue_id; u8 pri1_cos_queue_id; @@ -2404,82 +2443,226 @@ struct hwrm_queue_cos2bw_cfg_input { u8 queue_id0; u8 unused_0; __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id0_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id0_pri_lvl; u8 queue_id0_bw_weight; u8 queue_id1; __le32 queue_id1_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id1_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id1_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id1_pri_lvl; u8 queue_id1_bw_weight; u8 queue_id2; __le32 queue_id2_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id2_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id2_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id2_pri_lvl; u8 queue_id2_bw_weight; u8 queue_id3; __le32 queue_id3_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id3_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id3_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id3_pri_lvl; u8 queue_id3_bw_weight; u8 queue_id4; __le32 queue_id4_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id4_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id4_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id4_pri_lvl; u8 queue_id4_bw_weight; u8 queue_id5; __le32 queue_id5_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id5_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id5_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id5_pri_lvl; u8 queue_id5_bw_weight; u8 queue_id6; __le32 queue_id6_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id6_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id6_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id6_pri_lvl; u8 queue_id6_bw_weight; u8 queue_id7; __le32 queue_id7_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id7_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id7_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id7_pri_lvl; u8 queue_id7_bw_weight; u8 unused_1[5]; @@ -2563,6 +2746,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE 0x4UL #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL + #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -2615,18 +2799,18 @@ struct hwrm_vnic_tpa_cfg_input { #define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN 0x8UL __le16 vnic_id; __le16 max_agg_segs; - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX (0x1fUL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX 0x1fUL __le16 max_aggs; - #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 (0x4UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX (0x7UL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 0x4UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX 0x7UL u8 unused_0; u8 unused_1; __le32 max_agg_timer; @@ -2780,15 +2964,15 @@ struct hwrm_ring_alloc_input { __le64 resp_addr; __le32 enables; #define RING_ALLOC_REQ_ENABLES_RESERVED1 0x1UL - #define RING_ALLOC_REQ_ENABLES_RESERVED2 0x2UL + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL #define RING_ALLOC_REQ_ENABLES_RESERVED3 0x4UL #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL #define RING_ALLOC_REQ_ENABLES_RESERVED4 0x10UL #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL u8 ring_type; - #define RING_ALLOC_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_ALLOC_REQ_RING_TYPE_CMPL 0x0UL + #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL + #define RING_ALLOC_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 unused_1; __le64 page_tbl_addr; @@ -2804,18 +2988,36 @@ struct hwrm_ring_alloc_input { u8 unused_4; u8 unused_5; __le32 reserved1; - __le16 reserved2; + __le16 ring_arb_cfg; + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK 0xfUL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT 0 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP (0x1UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ (0x2UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK 0xf0UL + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT 4 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK 0xff00UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8 u8 unused_6; u8 unused_7; __le32 reserved3; __le32 stat_ctx_id; __le32 reserved4; __le32 max_bw; + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT 0 + #define RING_ALLOC_REQ_MAX_BW_RSVD 0x10000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID u8 int_mode; - #define RING_ALLOC_REQ_INT_MODE_LEGACY (0x0UL << 0) - #define RING_ALLOC_REQ_INT_MODE_RSVD (0x1UL << 0) - #define RING_ALLOC_REQ_INT_MODE_MSIX (0x2UL << 0) - #define RING_ALLOC_REQ_INT_MODE_POLL (0x3UL << 0) + #define RING_ALLOC_REQ_INT_MODE_LEGACY 0x0UL + #define RING_ALLOC_REQ_INT_MODE_RSVD 0x1UL + #define RING_ALLOC_REQ_INT_MODE_MSIX 0x2UL + #define RING_ALLOC_REQ_INT_MODE_POLL 0x3UL u8 unused_8[3]; }; @@ -2842,9 +3044,9 @@ struct hwrm_ring_free_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_FREE_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_FREE_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_FREE_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_FREE_REQ_RING_TYPE_CMPL 0x0UL + #define RING_FREE_REQ_RING_TYPE_TX 0x1UL + #define RING_FREE_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -2942,9 +3144,9 @@ struct hwrm_ring_reset_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_RESET_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_RESET_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_RESET_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_RESET_REQ_RING_TYPE_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -3068,36 +3270,36 @@ struct hwrm_cfa_l2_filter_alloc_input { __le16 t_l2_ivlan; __le16 t_l2_ivlan_mask; u8 src_type; - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG (0x7UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG 0x7UL u8 unused_6; __le32 src_id; u8 tunnel_type; - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; __le16 mirror_vnic_id; u8 pri_hint; - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN (0x4UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN 0x4UL u8 unused_8; __le32 unused_9; __le64 l2_filter_id_hint; @@ -3246,16 +3448,16 @@ struct hwrm_cfa_tunnel_filter_alloc_input { u8 l3_addr_type; u8 t_l3_addr_type; u8 tunnel_type; - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; __le32 dst_vnic_id; @@ -3311,14 +3513,14 @@ struct hwrm_cfa_encap_record_alloc_input { __le32 flags; #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL u8 encap_type; - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN (0x1UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE (0x2UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE (0x3UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP (0x4UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE (0x5UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS (0x6UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN (0x7UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE (0x8UL << 0) + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL u8 unused_0; __le16 unused_1; __le32 encap_data[16]; @@ -3397,32 +3599,32 @@ struct hwrm_cfa_ntuple_filter_alloc_input { u8 src_macaddr[6]; __be16 ethertype; u8 ip_addr_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 (0x6UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL u8 ip_protocol; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP (0x11UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x11UL __le16 dst_id; __le16 mirror_vnic_id; u8 tunnel_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST (0x4UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST 0x4UL __be32 src_ipaddr[4]; __be32 src_ipaddr_mask[4]; __be32 dst_ipaddr[4]; @@ -3511,8 +3713,8 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0[7]; }; @@ -3539,8 +3741,8 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __be16 tunnel_dst_port_val; __le32 unused_1; @@ -3570,8 +3772,8 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -3720,15 +3922,15 @@ struct hwrm_fw_reset_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 selfrst_status; - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL __le16 unused_0[3]; }; @@ -3739,9 +3941,9 @@ struct hwrm_fw_reset_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3759,11 +3961,11 @@ struct hwrm_fw_qstatus_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 unused_0[7]; }; @@ -3774,9 +3976,9 @@ struct hwrm_fw_qstatus_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3785,6 +3987,42 @@ struct hwrm_fw_qstatus_output { u8 valid; }; +/* hwrm_fw_set_time */ +/* Input (32 bytes) */ +struct hwrm_fw_set_time_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 year; + #define FW_SET_TIME_REQ_YEAR_UNKNOWN 0x0UL + u8 month; + u8 day; + u8 hour; + u8 minute; + u8 second; + u8 unused_0; + __le16 millisecond; + __le16 zone; + #define FW_SET_TIME_REQ_ZONE_UTC 0x0UL + #define FW_SET_TIME_REQ_ZONE_UNKNOWN 0xffffUL + __le32 unused_1; +}; + +/* Output (16 bytes) */ +struct hwrm_fw_set_time_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_exec_fwd_resp */ /* Input (128 bytes) */ struct hwrm_exec_fwd_resp_input { @@ -3921,32 +4159,6 @@ struct hwrm_temp_monitor_query_output { u8 valid; }; -/* hwrm_nvm_raw_write_blk */ -/* Input (32 bytes) */ -struct hwrm_nvm_raw_write_blk_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le64 host_src_addr; - __le32 dest_addr; - __le32 len; -}; - -/* Output (16 bytes) */ -struct hwrm_nvm_raw_write_blk_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -4132,9 +4344,9 @@ struct hwrm_nvm_find_dir_entry_input { u8 opt_ordinal; #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK 0x3UL #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT 0 - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ (0x0UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE (0x1UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT (0x2UL << 0) + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ 0x0UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE 0x1UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT 0x2UL u8 unused_1[3]; }; @@ -4266,4 +4478,41 @@ struct hwrm_nvm_verify_update_output { u8 valid; }; +/* hwrm_nvm_install_update */ +/* Input (24 bytes) */ +struct hwrm_nvm_install_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 install_type; + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL 0x0UL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL 0xffffffffUL + __le32 unused_0; +}; + +/* Output (24 bytes) */ +struct hwrm_nvm_install_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 installed_items; + u8 result; + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + u8 problem_item; + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL + u8 reset_required; + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI 0x1UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER 0x2UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + #endif -- cgit v1.1 From 5ac67d8bc753b122175e682274599338b3ee7d42 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Mon, 19 Sep 2016 03:58:03 -0400 Subject: bnxt_en: Added support for Secure Firmware Update Using Ethtool flashdev command, entire NVM package (*.pkg) files may now be staged into the "update" area of the NVM and subsequently verified and installed by the firmware using the newly introduced command: NVM_INSTALL_UPDATE. We also introduce use of the new firmware command FW_SET_TIME so that the NVM-resident package installation log contains valid time-stamps. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 155 ++++++++++++++++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h | 16 ++- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f6b4f34..f0a9d23 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -4314,6 +4315,27 @@ hwrm_ver_get_exit: return rc; } +int bnxt_hwrm_fw_set_time(struct bnxt *bp) +{ + struct hwrm_fw_set_time_input req = {0}; + struct rtc_time tm; + struct timeval tv; + + if (bp->hwrm_spec_code < 0x10400) + return -EOPNOTSUPP; + + do_gettimeofday(&tv); + rtc_time_to_tm(tv.tv_sec, &tm); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); + req.year = cpu_to_le16(1900 + tm.tm_year); + req.month = 1 + tm.tm_mon; + req.day = tm.tm_mday; + req.hour = tm.tm_hour; + req.minute = tm.tm_min; + req.second = tm.tm_sec; + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + static int bnxt_hwrm_port_qstats(struct bnxt *bp) { int rc; @@ -6811,6 +6833,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err; + bnxt_hwrm_fw_set_time(bp); + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 012cc51..41033d0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1220,6 +1220,7 @@ int bnxt_hwrm_set_coal(struct bnxt *); int bnxt_hwrm_func_qcaps(struct bnxt *); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_close_nic(struct bnxt *, bool, bool); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b83e174..4a430b6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -21,6 +21,8 @@ #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */ #define FLASH_NVRAM_TIMEOUT ((HWRM_CMD_TIMEOUT) * 100) +#define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) +#define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); @@ -1028,6 +1030,10 @@ static u32 bnxt_get_link(struct net_device *dev) return bp->link_info.link_up; } +static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); + static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal, @@ -1179,7 +1185,6 @@ static int bnxt_flash_firmware(struct net_device *dev, (unsigned long)calculated_crc); return -EINVAL; } - /* TODO: Validate digital signature (RSA-encrypted SHA-256 hash) here */ rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw_data, fw_size); if (rc == 0) /* Firmware update successful */ @@ -1188,6 +1193,57 @@ static int bnxt_flash_firmware(struct net_device *dev, return rc; } +static int bnxt_flash_microcode(struct net_device *dev, + u16 dir_type, + const u8 *fw_data, + size_t fw_size) +{ + struct bnxt_ucode_trailer *trailer; + u32 calculated_crc; + u32 stored_crc; + int rc = 0; + + if (fw_size < sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode file size: %u\n", + (unsigned int)fw_size); + return -EINVAL; + } + trailer = (struct bnxt_ucode_trailer *)(fw_data + (fw_size - + sizeof(*trailer))); + if (trailer->sig != cpu_to_le32(BNXT_UCODE_TRAILER_SIGNATURE)) { + netdev_err(dev, "Invalid microcode trailer signature: %08X\n", + le32_to_cpu(trailer->sig)); + return -EINVAL; + } + if (le16_to_cpu(trailer->dir_type) != dir_type) { + netdev_err(dev, "Expected microcode type: %d, read: %d\n", + dir_type, le16_to_cpu(trailer->dir_type)); + return -EINVAL; + } + if (le16_to_cpu(trailer->trailer_length) < + sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode trailer length: %d\n", + le16_to_cpu(trailer->trailer_length)); + return -EINVAL; + } + + /* Confirm the CRC32 checksum of the file: */ + stored_crc = le32_to_cpu(*(__le32 *)(fw_data + fw_size - + sizeof(stored_crc))); + calculated_crc = ~crc32(~0, fw_data, fw_size - sizeof(stored_crc)); + if (calculated_crc != stored_crc) { + netdev_err(dev, + "CRC32 (%08lX) does not match calculated: %08lX\n", + (unsigned long)stored_crc, + (unsigned long)calculated_crc); + return -EINVAL; + } + rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, + 0, 0, fw_data, fw_size); + + return rc; +} + static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) { switch (dir_type) { @@ -1206,7 +1262,7 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) return false; } -static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) +static bool bnxt_dir_type_is_other_exec_format(u16 dir_type) { switch (dir_type) { case BNX_DIR_TYPE_AVS: @@ -1227,7 +1283,7 @@ static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) static bool bnxt_dir_type_is_executable(u16 dir_type) { return bnxt_dir_type_is_ape_bin_format(dir_type) || - bnxt_dir_type_is_unprotected_exec_format(dir_type); + bnxt_dir_type_is_other_exec_format(dir_type); } static int bnxt_flash_firmware_from_file(struct net_device *dev, @@ -1237,10 +1293,6 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, const struct firmware *fw; int rc; - if (dir_type != BNX_DIR_TYPE_UPDATE && - bnxt_dir_type_is_executable(dir_type) == false) - return -EINVAL; - rc = request_firmware(&fw, filename, &dev->dev); if (rc != 0) { netdev_err(dev, "Error %d requesting firmware file: %s\n", @@ -1249,6 +1301,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } if (bnxt_dir_type_is_ape_bin_format(dir_type) == true) rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size); + else if (bnxt_dir_type_is_other_exec_format(dir_type) == true) + rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size); else rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw->data, fw->size); @@ -1257,10 +1311,83 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } static int bnxt_flash_package_from_file(struct net_device *dev, - char *filename) + char *filename, u32 install_type) { - netdev_err(dev, "packages are not yet supported\n"); - return -EINVAL; + struct bnxt *bp = netdev_priv(dev); + struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_nvm_install_update_input install = {0}; + const struct firmware *fw; + u32 item_len; + u16 index; + int rc; + + bnxt_hwrm_fw_set_time(bp); + + if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_UPDATE, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, &item_len, NULL) != 0) { + netdev_err(dev, "PKG update area not created in nvram\n"); + return -ENOBUFS; + } + + rc = request_firmware(&fw, filename, &dev->dev); + if (rc != 0) { + netdev_err(dev, "PKG error %d requesting file: %s\n", + rc, filename); + return rc; + } + + if (fw->size > item_len) { + netdev_err(dev, "PKG insufficient update area in nvram: %lu", + (unsigned long)fw->size); + rc = -EFBIG; + } else { + dma_addr_t dma_handle; + u8 *kmem; + struct hwrm_nvm_modify_input modify = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1); + + modify.dir_idx = cpu_to_le16(index); + modify.len = cpu_to_le32(fw->size); + + kmem = dma_alloc_coherent(&bp->pdev->dev, fw->size, + &dma_handle, GFP_KERNEL); + if (!kmem) { + netdev_err(dev, + "dma_alloc_coherent failure, length = %u\n", + (unsigned int)fw->size); + rc = -ENOMEM; + } else { + memcpy(kmem, fw->data, fw->size); + modify.host_src_addr = cpu_to_le64(dma_handle); + + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); + dma_free_coherent(&bp->pdev->dev, fw->size, kmem, + dma_handle); + } + } + release_firmware(fw); + if (rc) + return rc; + + if ((install_type & 0xffff) == 0) + install_type >>= 16; + bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1); + install.install_type = cpu_to_le32(install_type); + + rc = hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) + return -EOPNOTSUPP; + + if (resp->result) { + netdev_err(dev, "PKG install error = %d, problem_item = %d\n", + (s8)resp->result, (int)resp->problem_item); + return -ENOPKG; + } + return 0; } static int bnxt_flash_device(struct net_device *dev, @@ -1271,8 +1398,10 @@ static int bnxt_flash_device(struct net_device *dev, return -EINVAL; } - if (flash->region == ETHTOOL_FLASH_ALL_REGIONS) - return bnxt_flash_package_from_file(dev, flash->data); + if (flash->region == ETHTOOL_FLASH_ALL_REGIONS || + flash->region > 0xffff) + return bnxt_flash_package_from_file(dev, flash->data, + flash->region); return bnxt_flash_firmware_from_file(dev, flash->region, flash->data); } @@ -1516,7 +1645,7 @@ static int bnxt_set_eeprom(struct net_device *dev, /* Create or re-write an NVM item: */ if (bnxt_dir_type_is_executable(type) == true) - return -EINVAL; + return -EOPNOTSUPP; ext = eeprom->magic & 0xffff; ordinal = eeprom->offset >> 16; attr = eeprom->offset & 0xffff; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h index 82bf44a..cad30dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h @@ -11,6 +11,7 @@ #define __BNXT_FW_HDR_H__ #define BNXT_FIRMWARE_BIN_SIGNATURE 0x1a4d4342 /* "BCM"+0x1a */ +#define BNXT_UCODE_TRAILER_SIGNATURE 0x726c7254 /* "Trlr" */ enum SUPPORTED_FAMILY { DEVICE_5702_3_4_FAMILY, /* 0 - Denali, Vinson, K2 */ @@ -85,7 +86,7 @@ enum SUPPORTED_MEDIA { struct bnxt_fw_header { __le32 signature; /* constains the constant value of - * BNXT_Firmware_Bin_Signatures + * BNXT_FIRMWARE_BIN_SIGNATURE */ u8 flags; /* reserved for ChiMP use */ u8 code_type; /* enum SUPPORTED_CODE */ @@ -102,4 +103,17 @@ struct bnxt_fw_header { u8 major_ver; }; +/* Microcode and pre-boot software/firmware trailer: */ +struct bnxt_ucode_trailer { + u8 rsa_sig[256]; + __le16 flags; + u8 version_format; + u8 version_length; + u8 version[16]; + __le16 dir_type; + __le16 trailer_length; + __le32 sig; /* BNXT_UCODE_TRAILER_SIGNATURE */ + __le32 chksum; /* CRC-32 */ +}; + #endif -- cgit v1.1 From 47f8e8b9bbbbe00740786bd1da0d5097d45ba46b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:04 -0400 Subject: bnxt_en: Fix ethtool -l|-L inconsistent channel counts. The existing code is inconsistent in reporting and accepting the combined channel count. bnxt_get_channels() reports maximum combined as the maximum rx count. bnxt_set_channels() accepts combined count that cannot be bigger than max rx or max tx. For example, if max rx = 2 and max tx = 1, we report max supported combined to be 2. But if the user tries to set combined to 2, it will fail because 2 is bigger than max tx which is 1. Fix the code to be consistent. Max allowed combined = max(max_rx, max_tx). We will accept a combined channel count <= max(max_rx, max_tx). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4a430b6..c74ce69 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -348,7 +348,7 @@ static void bnxt_get_channels(struct net_device *dev, int max_rx_rings, max_tx_rings, tcs; bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true); - channel->max_combined = max_rx_rings; + channel->max_combined = max_t(int, max_rx_rings, max_tx_rings); if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) { max_rx_rings = 0; @@ -406,8 +406,8 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) max_tx_rings /= tcs; - if (sh && (channel->combined_count > max_rx_rings || - channel->combined_count > max_tx_rings)) + if (sh && + channel->combined_count > max_t(int, max_rx_rings, max_tx_rings)) return -ENOMEM; if (!sh && (channel->rx_count > max_rx_rings || @@ -430,8 +430,10 @@ static int bnxt_set_channels(struct net_device *dev, if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; - bp->rx_nr_rings = channel->combined_count; - bp->tx_nr_rings_per_tc = channel->combined_count; + bp->rx_nr_rings = min_t(int, channel->combined_count, + max_rx_rings); + bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count, + max_tx_rings); } else { bp->flags &= ~BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->rx_count; -- cgit v1.1 From 7cc5a20e38fcaf395ac59e7ed6c3decb575a0dc7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:05 -0400 Subject: bnxt_en: Re-arrange bnxt_hwrm_func_qcaps(). Re-arrange the code so that the generation of the random MAC address for the VF is at the end of the function. The next patch will add one more step to call bnxt_approve_mac() to get the firmware to approve the random MAC address. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f0a9d23..b1dcece 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4161,6 +4161,11 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) if (rc) goto hwrm_func_qcaps_exit; + bp->tx_push_thresh = 0; + if (resp->flags & + cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) + bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; + if (BNXT_PF(bp)) { struct bnxt_pf_info *pf = &bp->pf; @@ -4192,12 +4197,6 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) struct bnxt_vf_info *vf = &bp->vf; vf->fw_fid = le16_to_cpu(resp->fid); - memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) - /* overwrite netdev dev_adr with admin VF MAC */ - memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else - random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -4209,14 +4208,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs); vf->max_vnics = le16_to_cpu(resp->max_vnics); vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); + + memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); + if (is_valid_ether_addr(vf->mac_addr)) + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + else + random_ether_addr(bp->dev->dev_addr); #endif } - bp->tx_push_thresh = 0; - if (resp->flags & - cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) - bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; - hwrm_func_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; -- cgit v1.1 From 001154eb242b5a6667b74e5cf20873fb75f1b9d3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:06 -0400 Subject: bnxt_en: Call firmware to approve the random VF MAC address. After generating the random MAC address for VF, call the firmware to approve it. This step serves 2 purposes. Some hypervisor (e.g. ESX) wants to approve the MAC address. 2nd, the call will setup the proper forwarding database in the internal switch. We need to unlock the hwrm_cmd_lock mutex before calling bnxt_approve_mac(). We can do that because we are at the end of the function and all the previous firmware response data has been copied. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b1dcece..cbc0b8a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4210,11 +4210,16 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) + mutex_unlock(&bp->hwrm_cmd_lock); + + if (is_valid_ether_addr(vf->mac_addr)) { /* overwrite netdev dev_adr with admin VF MAC */ memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else + } else { random_ether_addr(bp->dev->dev_addr); + rc = bnxt_approve_mac(bp, bp->dev->dev_addr); + } + return rc; #endif } -- cgit v1.1 From 4ffcd582301bd020b1f9d00c55473af305ec19b5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 Sep 2016 03:58:07 -0400 Subject: bnxt_en: Pad TX packets below 52 bytes. The hardware has a limitation that it won't pass host to BMC loopback packets below 52-bytes. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 41033d0..51b164a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -413,7 +413,7 @@ struct rx_tpa_end_cmp_ext { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) -#define BNXT_MIN_PKT_SIZE 45 +#define BNXT_MIN_PKT_SIZE 52 #define BNXT_NUM_TESTS(bp) 0 -- cgit v1.1 From ae8e98a6fa7a73917196c507e43414ea96b6a0fc Mon Sep 17 00:00:00 2001 From: Deepak Khungar Date: Mon, 19 Sep 2016 03:58:08 -0400 Subject: bnxt_en: Support for "ethtool -r" command Restart autoneg if autoneg is enabled. Signed-off-by: Deepak Khungar Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c74ce69..a7e04ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1849,6 +1849,25 @@ static int bnxt_get_module_eeprom(struct net_device *dev, return rc; } +static int bnxt_nway_reset(struct net_device *dev) +{ + int rc = 0; + + struct bnxt *bp = netdev_priv(dev); + struct bnxt_link_info *link_info = &bp->link_info; + + if (!BNXT_SINGLE_PF(bp)) + return -EOPNOTSUPP; + + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) + return -EINVAL; + + if (netif_running(dev)) + rc = bnxt_hwrm_set_link_setting(bp, true, false); + + return rc; +} + const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, @@ -1881,4 +1900,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_eee = bnxt_set_eee, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, + .nway_reset = bnxt_nway_reset }; -- cgit v1.1 From 350a714960eb8a980c913c9be5a96bb18b2fe9da Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Mon, 19 Sep 2016 03:58:09 -0400 Subject: bnxt_en: Fixed the VF link status after a link state change The VF link state can be changed via the 'ip link set' cmd. Currently, the new link state does not take effect immediately. The fix is for the PF to send a link change async event to the designated VF after a VF link state change. This async event will trigger the VF to update the link status. Signed-off-by: Eddie Wai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 84 ++++++++++++------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 50d2007..8be7185 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -19,6 +19,45 @@ #include "bnxt_ethtool.h" #ifdef CONFIG_BNXT_SRIOV +static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, + struct bnxt_vf_info *vf, u16 event_id) +{ + struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_fwd_async_event_cmpl_input req = {0}; + struct hwrm_async_event_cmpl *async_cmpl; + int rc = 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); + if (vf) + req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); + else + /* broadcast this async event to all VFs */ + req.encap_async_event_target_id = cpu_to_le16(0xffff); + async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; + async_cmpl->type = + cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); + async_cmpl->event_id = cpu_to_le16(event_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + if (rc) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", + rc); + goto fwd_async_event_cmpl_exit; + } + + if (resp->error_code) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", + resp->error_code); + rc = -1; + } + +fwd_async_event_cmpl_exit: + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) { if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { @@ -243,8 +282,9 @@ int bnxt_set_vf_link_state(struct net_device *dev, int vf_id, int link) rc = -EINVAL; break; } - /* CHIMP TODO: send msg to VF to update new link state */ - + if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED)) + rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE); return rc; } @@ -525,46 +565,6 @@ err_out1: return rc; } -static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, - struct bnxt_vf_info *vf, - u16 event_id) -{ - int rc = 0; - struct hwrm_fwd_async_event_cmpl_input req = {0}; - struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_async_event_cmpl *async_cmpl; - - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); - if (vf) - req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); - else - /* broadcast this async event to all VFs */ - req.encap_async_event_target_id = cpu_to_le16(0xffff); - async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; - async_cmpl->type = - cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); - async_cmpl->event_id = cpu_to_le16(event_id); - - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - - if (rc) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", - rc); - goto fwd_async_event_cmpl_exit; - } - - if (resp->error_code) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", - resp->error_code); - rc = -1; - } - -fwd_async_event_cmpl_exit: - mutex_unlock(&bp->hwrm_cmd_lock); - return rc; -} - void bnxt_sriov_disable(struct bnxt *bp) { u16 num_vfs = pci_num_vf(bp->pdev); -- cgit v1.1 From 51f141bec15aecb2ee5f0db77761dbf219333b93 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:34 +0200 Subject: net: ethernet: broadcom: b44: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 22 +++++++++++----------- drivers/net/ethernet/broadcom/b44.h | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 74f0a37..936f06f 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1486,7 +1486,7 @@ static int b44_open(struct net_device *dev) b44_enable_ints(bp); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_start(bp->phydev); + phy_start(dev->phydev); netif_start_queue(dev); out: @@ -1651,7 +1651,7 @@ static int b44_close(struct net_device *dev) netif_stop_queue(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_stop(bp->phydev); + phy_stop(dev->phydev); napi_disable(&bp->napi); @@ -1837,8 +1837,8 @@ static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct b44 *bp = netdev_priv(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - return phy_ethtool_gset(bp->phydev, cmd); + BUG_ON(!dev->phydev); + return phy_ethtool_gset(dev->phydev, cmd); } cmd->supported = (SUPPORTED_Autoneg); @@ -1886,12 +1886,12 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) int ret; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); + BUG_ON(!dev->phydev); spin_lock_irq(&bp->lock); if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(bp->phydev, cmd); + ret = phy_ethtool_sset(dev->phydev, cmd); spin_unlock_irq(&bp->lock); @@ -2137,8 +2137,8 @@ static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) spin_lock_irq(&bp->lock); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - err = phy_mii_ioctl(bp->phydev, ifr, cmd); + BUG_ON(!dev->phydev); + err = phy_mii_ioctl(dev->phydev, ifr, cmd); } else { err = generic_mii_ioctl(&bp->mii_if, if_mii(ifr), cmd, NULL); } @@ -2206,7 +2206,7 @@ static const struct net_device_ops b44_netdev_ops = { static void b44_adjust_link(struct net_device *dev) { struct b44 *bp = netdev_priv(dev); - struct phy_device *phydev = bp->phydev; + struct phy_device *phydev = dev->phydev; bool status_changed = 0; BUG_ON(!phydev); @@ -2303,7 +2303,6 @@ static int b44_register_phy_one(struct b44 *bp) SUPPORTED_MII); phydev->advertising = phydev->supported; - bp->phydev = phydev; bp->old_link = 0; bp->phy_addr = phydev->mdio.addr; @@ -2323,9 +2322,10 @@ err_out: static void b44_unregister_phy_one(struct b44 *bp) { + struct net_device *dev = bp->dev; struct mii_bus *mii_bus = bp->mii_bus; - phy_disconnect(bp->phydev); + phy_disconnect(dev->phydev); mdiobus_unregister(mii_bus); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/b44.h b/drivers/net/ethernet/broadcom/b44.h index 65d88d7..89d2cf3 100644 --- a/drivers/net/ethernet/broadcom/b44.h +++ b/drivers/net/ethernet/broadcom/b44.h @@ -404,7 +404,6 @@ struct b44 { u32 tx_pending; u8 phy_addr; u8 force_copybreak; - struct phy_device *phydev; struct mii_bus *mii_bus; int old_link; struct mii_if_info mii_if; -- cgit v1.1 From 2406e5d4c4c48c9402c0629748c001178ea4c149 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 00:11:35 +0200 Subject: net: ethernet: broadcom: b44: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 98 ++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 936f06f..17aa33c 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1832,58 +1832,65 @@ static int b44_nway_reset(struct net_device *dev) return r; } -static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); + u32 supported, advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_MII); + supported = (SUPPORTED_Autoneg); + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_MII); - cmd->advertising = 0; + advertising = 0; if (bp->flags & B44_FLAG_ADV_10HALF) - cmd->advertising |= ADVERTISED_10baseT_Half; + advertising |= ADVERTISED_10baseT_Half; if (bp->flags & B44_FLAG_ADV_10FULL) - cmd->advertising |= ADVERTISED_10baseT_Full; + advertising |= ADVERTISED_10baseT_Full; if (bp->flags & B44_FLAG_ADV_100HALF) - cmd->advertising |= ADVERTISED_100baseT_Half; + advertising |= ADVERTISED_100baseT_Half; if (bp->flags & B44_FLAG_ADV_100FULL) - cmd->advertising |= ADVERTISED_100baseT_Full; - cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; - ethtool_cmd_speed_set(cmd, ((bp->flags & B44_FLAG_100_BASE_T) ? - SPEED_100 : SPEED_10)); - cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? + advertising |= ADVERTISED_100baseT_Full; + advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + cmd->base.speed = (bp->flags & B44_FLAG_100_BASE_T) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->port = 0; - cmd->phy_address = bp->phy_addr; - cmd->transceiver = (bp->flags & B44_FLAG_EXTERNAL_PHY) ? - XCVR_EXTERNAL : XCVR_INTERNAL; - cmd->autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? + cmd->base.port = 0; + cmd->base.phy_address = bp->phy_addr; + cmd->base.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? AUTONEG_DISABLE : AUTONEG_ENABLE; - if (cmd->autoneg == AUTONEG_ENABLE) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) + advertising |= ADVERTISED_Autoneg; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (!netif_running(dev)){ - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = 0xff; + cmd->base.speed = 0; + cmd->base.duplex = 0xff; } - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + return 0; } -static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); u32 speed; int ret; + u32 advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { BUG_ON(!dev->phydev); @@ -1891,31 +1898,34 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(dev->phydev, cmd); + ret = phy_ethtool_ksettings_set(dev->phydev, cmd); spin_unlock_irq(&bp->lock); return ret; } - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); /* We do not support gigabit. */ - if (cmd->autoneg == AUTONEG_ENABLE) { - if (cmd->advertising & + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (advertising & (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) return -EINVAL; } else if ((speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) { + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) { return -EINVAL; } spin_lock_irq(&bp->lock); - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { bp->flags &= ~(B44_FLAG_FORCE_LINK | B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX | @@ -1923,19 +1933,19 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); - if (cmd->advertising == 0) { + if (advertising == 0) { bp->flags |= (B44_FLAG_ADV_10HALF | B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); } else { - if (cmd->advertising & ADVERTISED_10baseT_Half) + if (advertising & ADVERTISED_10baseT_Half) bp->flags |= B44_FLAG_ADV_10HALF; - if (cmd->advertising & ADVERTISED_10baseT_Full) + if (advertising & ADVERTISED_10baseT_Full) bp->flags |= B44_FLAG_ADV_10FULL; - if (cmd->advertising & ADVERTISED_100baseT_Half) + if (advertising & ADVERTISED_100baseT_Half) bp->flags |= B44_FLAG_ADV_100HALF; - if (cmd->advertising & ADVERTISED_100baseT_Full) + if (advertising & ADVERTISED_100baseT_Full) bp->flags |= B44_FLAG_ADV_100FULL; } } else { @@ -1943,7 +1953,7 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->flags &= ~(B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX); if (speed == SPEED_100) bp->flags |= B44_FLAG_100_BASE_T; - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) bp->flags |= B44_FLAG_FULL_DUPLEX; } @@ -2110,8 +2120,6 @@ static int b44_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops b44_ethtool_ops = { .get_drvinfo = b44_get_drvinfo, - .get_settings = b44_get_settings, - .set_settings = b44_set_settings, .nway_reset = b44_nway_reset, .get_link = ethtool_op_get_link, .get_wol = b44_get_wol, @@ -2125,6 +2133,8 @@ static const struct ethtool_ops b44_ethtool_ops = { .get_strings = b44_get_strings, .get_sset_count = b44_get_sset_count, .get_ethtool_stats = b44_get_ethtool_stats, + .get_link_ksettings = b44_get_link_ksettings, + .set_link_ksettings = b44_set_link_ksettings, }; static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -- cgit v1.1 From 625eb8667d6fcb22e474502133986b2d2838917d Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:06 +0200 Subject: net: ethernet: broadcom: bcm63xx: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++++++--------------- drivers/net/ethernet/broadcom/bcm63xx_enet.h | 1 - 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 6c8bc5f..082f3f0 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -791,7 +791,7 @@ static void bcm_enet_adjust_phy_link(struct net_device *dev) int status_changed; priv = netdev_priv(dev); - phydev = priv->phydev; + phydev = dev->phydev; status_changed = 0; if (priv->old_link != phydev->link) { @@ -913,7 +913,6 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; - priv->phydev = phydev; } /* mask all interrupts and request them */ @@ -1085,7 +1084,7 @@ static int bcm_enet_open(struct net_device *dev) ENETDMAC_IRMASK, priv->tx_chan); if (priv->has_phy) - phy_start(priv->phydev); + phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1127,7 +1126,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(priv->phydev); + phy_disconnect(phydev); return ret; } @@ -1190,7 +1189,7 @@ static int bcm_enet_stop(struct net_device *dev) netif_stop_queue(dev); napi_disable(&priv->napi); if (priv->has_phy) - phy_stop(priv->phydev); + phy_stop(dev->phydev); del_timer_sync(&priv->rx_timeout); /* mask all interrupts */ @@ -1234,10 +1233,8 @@ static int bcm_enet_stop(struct net_device *dev) free_irq(dev->irq, dev); /* release phy */ - if (priv->has_phy) { - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } + if (priv->has_phy) + phy_disconnect(dev->phydev); return 0; } @@ -1437,9 +1434,9 @@ static int bcm_enet_nway_reset(struct net_device *dev) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(priv->phydev); + return genphy_restart_aneg(dev->phydev); } return -EOPNOTSUPP; @@ -1456,9 +1453,9 @@ static int bcm_enet_get_settings(struct net_device *dev, cmd->maxtxpkt = 0; if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } else { cmd->autoneg = 0; ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) @@ -1483,9 +1480,9 @@ static int bcm_enet_set_settings(struct net_device *dev, priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } else { if (cmd->autoneg || @@ -1604,9 +1601,9 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } else { struct mii_if_info mii; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index f55af43..0a1b7b2 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -290,7 +290,6 @@ struct bcm_enet_priv { /* used when a phy is connected (phylib used) */ struct mii_bus *mii_bus; - struct phy_device *phydev; int old_link; int old_duplex; int old_pause; -- cgit v1.1 From 639cfa9e8cdaca5276c9786e22195653a0d4391b Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 16:59:07 +0200 Subject: net: ethernet: broadcom: bcm63xx: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 52 +++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 082f3f0..ae364c7 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1442,39 +1442,40 @@ static int bcm_enet_nway_reset(struct net_device *dev) return -EOPNOTSUPP; } -static int bcm_enet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; + u32 supported, advertising; priv = netdev_priv(dev); - cmd->maxrxpkt = 0; - cmd->maxtxpkt = 0; - if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } else { - cmd->autoneg = 0; - ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) - ? SPEED_100 : SPEED_10)); - cmd->duplex = (priv->force_duplex_full) ? + cmd->base.autoneg = 0; + cmd->base.speed = (priv->force_speed_100) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (priv->force_duplex_full) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->supported = ADVERTISED_10baseT_Half | + supported = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - cmd->advertising = 0; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; + advertising = 0; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); + cmd->base.port = PORT_MII; } return 0; } -static int bcm_enet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; @@ -1482,16 +1483,19 @@ static int bcm_enet_set_settings(struct net_device *dev, if (priv->has_phy) { if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } else { - if (cmd->autoneg || - (cmd->speed != SPEED_100 && cmd->speed != SPEED_10) || - cmd->port != PORT_MII) + if (cmd->base.autoneg || + (cmd->base.speed != SPEED_100 && + cmd->base.speed != SPEED_10) || + cmd->base.port != PORT_MII) return -EINVAL; - priv->force_speed_100 = (cmd->speed == SPEED_100) ? 1 : 0; - priv->force_duplex_full = (cmd->duplex == DUPLEX_FULL) ? 1 : 0; + priv->force_speed_100 = + (cmd->base.speed == SPEED_100) ? 1 : 0; + priv->force_duplex_full = + (cmd->base.duplex == DUPLEX_FULL) ? 1 : 0; if (netif_running(dev)) bcm_enet_adjust_link(dev); @@ -1585,14 +1589,14 @@ static const struct ethtool_ops bcm_enet_ethtool_ops = { .get_sset_count = bcm_enet_get_sset_count, .get_ethtool_stats = bcm_enet_get_ethtool_stats, .nway_reset = bcm_enet_nway_reset, - .get_settings = bcm_enet_get_settings, - .set_settings = bcm_enet_set_settings, .get_drvinfo = bcm_enet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = bcm_enet_get_ringparam, .set_ringparam = bcm_enet_set_ringparam, .get_pauseparam = bcm_enet_get_pauseparam, .set_pauseparam = bcm_enet_set_pauseparam, + .get_link_ksettings = bcm_enet_get_link_ksettings, + .set_link_ksettings = bcm_enet_set_link_ksettings, }; static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -- cgit v1.1 From 6b352ebccbcf68866fa5e2ec98cce5e6b7cdf92e Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 18 Sep 2016 17:16:45 +0200 Subject: net: ethernet: broadcom: bcmgenet: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 46f9043..2013474 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -459,11 +459,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { if (!netif_running(dev)) return -EINVAL; @@ -471,7 +471,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -977,8 +977,6 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -990,6 +988,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ -- cgit v1.1 From 9940803065e3e15df8fd0d6a9e29f7b0617d8935 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:26:34 +0800 Subject: phy: mark lan88xx_suspend() static We get 1 warning when building kernel with W=1: drivers/net/phy/microchip.c:58:5: warning: no previous prototype for 'lan88xx_suspend' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/phy/microchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 15f8206..7c00e50 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -55,7 +55,7 @@ static int lan88xx_phy_ack_interrupt(struct phy_device *phydev) return rc < 0 ? rc : 0; } -int lan88xx_suspend(struct phy_device *phydev) +static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; -- cgit v1.1 From d766e7e6b68d681d46d74e228ad0ba133e730e36 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:35:29 +0800 Subject: be2net: mark symbols static where possible We get 4 warnings when building kernel with W=1: drivers/net/ethernet/emulex/benet/be_main.c:4368:6: warning: no previous prototype for 'be_calculate_pf_pool_rss_tables' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4385:5: warning: no previous prototype for 'be_get_nic_pf_num_list' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4537:6: warning: no previous prototype for 'be_reset_nic_desc' [-Wmissing-prototypes] drivers/net/ethernet/emulex/benet/be_cmds.c:4910:5: warning: no previous prototype for '__be_cmd_set_logical_link_config' [-Wmissing-prototypes] In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 9 +++++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 15d02da..9cffe48 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4382,7 +4382,7 @@ err: } /* This routine returns a list of all the NIC PF_nums in the adapter */ -u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) +static u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) { struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf; struct be_pcie_res_desc *pcie = NULL; @@ -4534,7 +4534,7 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, } /* Mark all fields invalid */ -void be_reset_nic_desc(struct be_nic_res_desc *nic) +static void be_reset_nic_desc(struct be_nic_res_desc *nic) { memset(nic, 0, sizeof(*nic)); nic->unicast_mac_count = 0xFFFF; @@ -4907,8 +4907,9 @@ err: return status; } -int __be_cmd_set_logical_link_config(struct be_adapter *adapter, - int link_state, int version, u8 domain) +static int +__be_cmd_set_logical_link_config(struct be_adapter *adapter, + int link_state, int version, u8 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 34f63ef..9a94840 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4365,7 +4365,7 @@ static void be_setup_init(struct be_adapter *adapter) * for distribution between the VFs. This self-imposed limit will determine the * no: of VFs for which RSS can be enabled. */ -void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) +static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) { struct be_port_resources port_res = {0}; u8 rss_tables_on_port; -- cgit v1.1 From 766a0e978fc2ba98d4865b466f8b572402317189 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sun, 18 Sep 2016 16:44:22 +0800 Subject: net/mlx5: clean function declarations in eswitch.c up We get 2 warnings when building kernel with W=1: drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:463:5: warning: no previous prototype for 'esw_offloads_init' [-Wmissing-prototypes] drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:521:6: warning: no previous prototype for 'esw_offloads_cleanup' [-Wmissing-prototypes] In fact, both functions are declared in drivers/net/ethernet/mellanox/mlx5/core/eswitch.c,but should be declared in a header file, thus can be recognized in other file. So this patch moves the declarations into drivers/net/ethernet/mellanox/mlx5/core/eswitch.h Signed-off-by: Baoyou Xie Acked-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 654b76f..4927494 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,9 +81,6 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); - static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6855783..b96e8c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,9 @@ struct mlx5_eswitch { int mode; }; +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -- cgit v1.1 From 6a5d58b67e205f2ffc62d0a9ee4ef7d237e9a7fb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:42 -0400 Subject: net sched ife action: add 16 bit helpers encoder and checker for 16 bits metadata Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 ++ net/sched/act_ife.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 5164bd7..9fd2bea0 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -50,9 +50,11 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); int ife_validate_meta_u16(void *val, int len); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi); void ife_release_meta_gen(struct tcf_meta_info *mi); int register_ife_op(struct tcf_meta_ops *mops); int unregister_ife_op(struct tcf_meta_ops *mops); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81..ccf7b4b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; -- cgit v1.1 From 408fbc22ef1efb00dd896acd00e9f7d9b641e047 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:43 -0400 Subject: net sched ife action: Introduce skb tcindex metadata encap decap Sample use case of how this is encoded: user space via tuntap (or a connected VM/Machine/container) encodes the tcindex TLV. Sample use case of decoding: IFE action decodes it and the skb->tc_index is then used to classify. So something like this for encoded ICMP packets: .. first decode then reclassify... skb->tcindex will be set sudo $TC filter add dev $ETH parent ffff: prio 2 protocol 0xbeef \ u32 match u32 0 0 flowid 1:1 \ action ife decode reclassify ...next match the decode icmp packet... sudo $TC filter add dev $ETH parent ffff: prio 4 protocol ip \ u32 match ip protocol 1 0xff flowid 1:1 \ action continue ... last classify it using the tcindex classifier and do someaction.. sudo $TC filter add dev $ETH parent ffff: prio 5 protocol ip \ handle 0x11 tcindex classid 1:1 \ action blah.. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_ife.h | 3 +- net/sched/Kconfig | 5 +++ net/sched/Makefile | 1 + net/sched/act_meta_skbtcindex.c | 79 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 net/sched/act_meta_skbtcindex.c diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 4ece02a..cd18360 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -32,8 +32,9 @@ enum { #define IFE_META_HASHID 2 #define IFE_META_PRIO 3 #define IFE_META_QMAP 4 +#define IFE_META_TCINDEX 5 /*Can be overridden at runtime by module option*/ -#define __IFE_META_MAX 5 +#define __IFE_META_MAX 6 #define IFE_META_MAX (__IFE_META_MAX - 1) #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 7795d5a..87956a7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -793,6 +793,11 @@ config NET_IFE_SKBPRIO depends on NET_ACT_IFE ---help--- +config NET_IFE_SKBTCINDEX + tristate "Support to encoding decoding skb tcindex on IFE action" + depends on NET_ACT_IFE + ---help--- + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 148ae0d..4bdda36 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c new file mode 100644 index 0000000..3b35774 --- /dev/null +++ b/net/sched/act_meta_skbtcindex.c @@ -0,0 +1,79 @@ +/* + * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * copyright Jamal Hadi Salim (2016) + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, + struct tcf_meta_info *e) +{ + u32 ifetc_index = skb->tc_index; + + return ife_encode_meta_u16(ifetc_index, skbdata, e); +} + +static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len) +{ + u16 ifetc_index = *(u16 *)data; + + skb->tc_index = ntohs(ifetc_index); + return 0; +} + +static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e) +{ + return ife_check_meta_u16(skb->tc_index, e); +} + +static struct tcf_meta_ops ife_skbtcindex_ops = { + .metaid = IFE_META_TCINDEX, + .metatype = NLA_U16, + .name = "tc_index", + .synopsis = "skb tc_index 16 bit metadata", + .check_presence = skbtcindex_check, + .encode = skbtcindex_encode, + .decode = skbtcindex_decode, + .get = ife_get_meta_u16, + .alloc = ife_alloc_meta_u16, + .release = ife_release_meta_gen, + .validate = ife_validate_meta_u16, + .owner = THIS_MODULE, +}; + +static int __init ifetc_index_init_module(void) +{ + return register_ife_op(&ife_skbtcindex_ops); +} + +static void __exit ifetc_index_cleanup_module(void) +{ + unregister_ife_op(&ife_skbtcindex_ops); +} + +module_init(ifetc_index_init_module); +module_exit(ifetc_index_cleanup_module); + +MODULE_AUTHOR("Jamal Hadi Salim(2016)"); +MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); -- cgit v1.1 From f71b109f1730902b73f70d78764d8a41265080dd Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Sun, 18 Sep 2016 07:53:08 -0400 Subject: net sched actions police: peg drop stats for conforming traffic setting conforming action to drop is a valid policy. When it is set we need to at least see the stats indicating it for debugging. Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_police.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8a3be1d..ba7074b 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + if (police->tcfp_result == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } -- cgit v1.1 From 5a7a5555a362f60350668cd124df9a396f546c61 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 08:45:33 -0400 Subject: net sched: stylistic cleanups Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 16 ++++++---------- net/sched/act_csum.c | 36 ++++++++++++++++++------------------ net/sched/act_gact.c | 3 ++- net/sched/act_mirred.c | 3 ++- net/sched/act_police.c | 10 ++++------ net/sched/cls_api.c | 18 ++++++++++-------- net/sched/cls_bpf.c | 6 ++++-- net/sched/cls_flow.c | 21 ++++++++++++++------- net/sched/cls_flower.c | 3 ++- net/sched/cls_fw.c | 10 +++++----- net/sched/cls_route.c | 9 +++------ net/sched/cls_tcindex.c | 12 ++++++------ net/sched/cls_u32.c | 30 ++++++++++++------------------ net/sched/sch_api.c | 41 ++++++++++++++++++++++++++--------------- 14 files changed, 114 insertions(+), 104 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d09d068..d0aceb1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,9 +592,8 @@ err_out: return ERR_PTR(err); } -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind, struct list_head *actions) +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -923,9 +922,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, return err; } -static int -tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int ovr) +static int tcf_action_add(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); @@ -988,8 +986,7 @@ replay: return ret; } -static struct nlattr * -find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1016,8 +1013,7 @@ find_dump_kind(const struct nlmsghdr *n) return kind; } -static int -tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b5dbf63..e0defce 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, return (void *)(skb_network_header(skb) + ihl); } -static int tcf_csum_ipv4_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmphdr *icmph; @@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; @@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; @@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; @@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; @@ -270,8 +270,8 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; @@ -380,8 +380,8 @@ fail: return 0; } -static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, - unsigned int ixhl, unsigned int *pl) +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, + unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; @@ -494,8 +494,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, - const struct tc_action *a, struct tcf_result *res) +static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; @@ -531,8 +531,8 @@ drop: return TC_ACT_SHOT; } -static int tcf_csum_dump(struct sk_buff *skb, - struct tc_action *a, int bind, int ref) +static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e24a409..e0aa30f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, + packets); if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6038c85..1c76387 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,7 +204,8 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ba7074b..d1bd248 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -263,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int -tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); @@ -349,14 +349,12 @@ static struct pernet_operations police_net_ops = { .size = sizeof(struct tc_action_net), }; -static int __init -police_init_module(void) +static int __init police_init_module(void) { return tcf_register_action(&act_police_ops, &police_net_ops); } -static void __exit -police_cleanup_module(void) +static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops, &police_net_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7c5645..11da7da 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -344,13 +344,15 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, + RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER); } /* called with RTNL */ @@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, - TCA_ACT_BIND); + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, int err, i = 0; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, - TCA_ACT_BIND, &actions); + NULL, ovr, TCA_ACT_BIND, + &actions); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1d92d4d..c6f7a47 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -55,7 +55,8 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, - [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -409,7 +410,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], + ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a379bae..e396723 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto(const struct sk_buff *skb, + const struct flow_keys *flow) { return flow->basic.ip_proto; } -static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); @@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); @@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -161,7 +165,8 @@ fallback: return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -173,14 +178,16 @@ fallback: return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3f4c70..2af09c8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -241,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &tc); if (tc_skip_sw(flags)) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index cc0bda9..9dc63d5 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle) } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; @@ -188,7 +188,8 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tb, struct nlattr **tca, unsigned long base, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; @@ -237,9 +238,8 @@ errout: static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + u32 handle, struct nlattr **tca, unsigned long *arg, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c91e65d..a4ce39b 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void -route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); @@ -474,10 +473,8 @@ errout: } static int route4_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct tcf_proto *tp, unsigned long base, u32 handle, + struct nlattr **tca, unsigned long *arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index d950070..96144bd 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,14 +50,13 @@ struct tcindex_data { struct rcu_head rcu; }; -static inline int -tcindex_filter_is_set(struct tcindex_filter_result *r) +static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { return tcf_exts_is_predicative(&r->exts) || r->res.classid; } -static struct tcindex_filter_result * -tcindex_lookup(struct tcindex_data *p, u16 key) +static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, + u16 key) { if (p->perfect) { struct tcindex_filter_result *f = p->perfect + key; @@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head) static void tcindex_destroy_fexts(struct rcu_head *head) { - struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + struct tcindex_filter *f = container_of(head, struct tcindex_filter, + rcu); tcf_exts_destroy(&f->result.exts); kfree(f); @@ -550,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a29263a..ae83c3ae 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; @@ -256,8 +257,7 @@ deadloop: return -1; } -static struct tc_u_hnode * -u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static struct tc_u_knode * -u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; @@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tcf_proto *tp, - struct tc_u_knode *n, +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); @@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static int u32_replace_hw_hnode(struct tcf_proto *tp, - struct tc_u_hnode *h, - u32 flags) +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static int u32_replace_hw_knode(struct tcf_proto *tp, - struct tc_u_knode *n, - u32 flags) +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -763,8 +759,7 @@ errout: return err; } -static void u32_replace_knode(struct tcf_proto *tp, - struct tc_u_common *tp_c, +static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; @@ -845,8 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1088,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d677b34..206dc24 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -389,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, + struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -541,7 +542,8 @@ nla_put_failure: return -1; } -void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -888,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock; Parameters are passed via opt. */ -static struct Qdisc * -qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +static struct Qdisc *qdisc_create(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -1073,7 +1075,8 @@ struct check_loop_arg { int depth; }; -static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); +static int check_loop_fn(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { @@ -1450,7 +1453,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } else { if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1471,7 +1475,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1505,7 +1510,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + true) < 0) goto done; dev_queue = dev_ingress_queue(dev); @@ -1640,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, + RTM_DELTCLASS); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1738,12 +1745,14 @@ struct qdisc_dump_args { struct netlink_callback *cb; }; -static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTCLASS); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, @@ -1976,10 +1985,12 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, + NULL); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, + NULL); return 0; } -- cgit v1.1 From babd6134a54d70efe875fa5661a20eaecb63f278 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 18 Sep 2016 18:20:27 +0300 Subject: net/mlx5: Fix flow counter bulk command out mailbox allocation The FW command output length should be only the length of struct mlx5_cmd_fc_bulk out field. Failing to do so will cause the memcpy call which is invoked later in the driver to write over wrong memory address and corrupt kernel memory which results in random crashes. This bug was found using the kernel address sanitizer (kasan). Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 9134010..287ade1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -425,11 +425,11 @@ struct mlx5_cmd_fc_bulk * mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) { struct mlx5_cmd_fc_bulk *b; - int outlen = sizeof(*b) + + int outlen = MLX5_ST_SZ_BYTES(query_flow_counter_out) + MLX5_ST_SZ_BYTES(traffic_counter) * num; - b = kzalloc(outlen, GFP_KERNEL); + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); if (!b) return NULL; -- cgit v1.1 From 4eea37d7b92076fdeac2a21e5f4dbd92d286719d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:28 +0300 Subject: net/mlx5: E-Switch, Fix error flow in the SRIOV e-switch init code When enablement of the SRIOV e-switch in certain mode (switchdev or legacy) fails, we must set the mode to none. Otherwise, we'll run into double free based crashes when further attempting to deal with the e-switch (such as when disabling sriov or unloading the driver). Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8b78f15..b247949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1554,6 +1554,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } -- cgit v1.1 From 6c419ba8e2580ab17c164db6e918e163d3537ec1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:29 +0300 Subject: net/mlx5: E-Switch, Handle mode change failures E-switch mode changes involve creating HW tables, potentially allocating netdevices, etc, and things can fail. Add an attempt to rollback to the existing mode when changing to the new mode fails. Only if rollback fails, getting proper SRIOV functionality requires module unload or sriov disablement/enablement. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3dc83a9..7de40e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -446,7 +446,7 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); @@ -455,8 +455,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err) - esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } return err; } @@ -508,12 +512,16 @@ create_ft_err: static int esw_offloads_stop(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err) - esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } return err; } -- cgit v1.1 From a435a07f9164dda7c0c26e8ad758881f4bafc127 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sun, 18 Sep 2016 17:46:07 +0200 Subject: net: ipv6: fallback to full lookup if table lookup is unsuitable Commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") introduced a regression: insertion of an IPv6 route in a table not containing the appropriate connected route for the gateway but which contained a non-connected route (like a default gateway) fails while it was previously working: $ ip link add eth0 type dummy $ ip link set up dev eth0 $ ip addr add 2001:db8::1/64 dev eth0 $ ip route add ::/0 via 2001:db8::5 dev eth0 table 20 $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 RTNETLINK answers: No route to host $ ip -6 route show table 20 default via 2001:db8::5 dev eth0 metric 1024 pref medium After this patch, we get: $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 $ ip -6 route show table 20 2001:db8:cafe::1 via 2001:db8::6 dev eth0 metric 1024 pref medium default via 2001:db8::5 dev eth0 metric 1024 pref medium Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: Vincent Bernat Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4981755..e3a224b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1986,9 +1986,18 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - if (cfg->fc_table) + if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + if (!grt) grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); -- cgit v1.1 From 878786d95e07ce2f5fb6e3cd8a6c2ed320339196 Mon Sep 17 00:00:00 2001 From: Rob Swindell Date: Tue, 20 Sep 2016 03:36:33 -0400 Subject: bnxt_en: Fix build error for kernesl without RTC-LIB bnxt_hwrm_fw_set_time() now returns -EOPNOTSUPP when built for kernel without RTC_LIB. Setting the firmware time is not critical to the successful completion of the firmware update process. Signed-off-by: Rob Swindell Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cbc0b8a..a9f9f37 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4323,6 +4323,7 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { +#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; struct rtc_time tm; struct timeval tv; @@ -4340,6 +4341,9 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +#else + return -EOPNOTSUPP; +#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) -- cgit v1.1 From 3ed6e498b91a4dc5d0e8b6270a6c144061db2455 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 18 Sep 2016 21:17:19 +0200 Subject: MAINTAINERS: Add an entry for the core network DSA code The core distributed switch architecture code currently does not have a MAINTAINERS entry, which results in some contributions not landing in the right peoples inbox. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Acked-by: Vivien Didelot Signed-off-by: David S. Miller --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a5e1270..247b418 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8160,6 +8160,15 @@ S: Maintained W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c +NETWORKING [DSA] +M: Andrew Lunn +M: Vivien Didelot +M: Florian Fainelli +S: Maintained +F: net/dsa/ +F: include/net/dsa.h +F: drivers/net/dsa/ + NETWORKING [GENERAL] M: "David S. Miller" L: netdev@vger.kernel.org -- cgit v1.1 From 18c2d2c113eb330d260277350d09aae454e80177 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Mon, 19 Sep 2016 08:28:24 +0200 Subject: mlxsw: Change the RX LAG hash function from XOR to CRC Change the RX hash function from XOR to CRC in order to have better distribution of the traffic. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 4e2354c..6460c72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1392,7 +1392,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) { MLXSW_REG_ZERO(slcr, payload); mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); - mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); } -- cgit v1.1 From 1a9234e66eddb0f18447532ab3f12bd136473ed6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 19 Sep 2016 08:29:26 +0200 Subject: mlxsw: spectrum: Fix sparse warnings drivers/net/ethernet/mellanox/mlxsw//spectrum.c:251:28: warning: symbol 'mlxsw_sp_span_entry_find' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:265:28: warning: symbol 'mlxsw_sp_span_entry_get' was not declared. Should it be static? drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_sp_span_type versus drivers/net/ethernet/mellanox/mlxsw//spectrum.c:367:56: int enum mlxsw_reg_mpar_i_e ... drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum mlxsw_reg_sbxx_dir versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:598:32: int enum devlink_sb_pool_type drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum mlxsw_reg_sbpr_mode versus drivers/net/ethernet/mellanox/mlxsw//spectrum_buffers.c:600:39: int enum devlink_sb_threshold_type ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: warning: mixing different enum types drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_sp_l3proto versus drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:255:54: int enum mlxsw_reg_ralxx_protocol ... drivers/net/ethernet/mellanox/mlxsw//spectrum_router.c:1749:6: warning: symbol 'mlxsw_sp_fib_entry_put' was not declared. Should it be static? Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 12 ++++--- .../net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 15 ++++---- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 41 ++++++++++++++-------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 171f8dd..fa31261 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -248,7 +248,8 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, span_entry->used = false; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) { struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; @@ -262,7 +263,8 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) return NULL; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry +*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) { struct mlxsw_sp_span_entry *span_entry; @@ -364,7 +366,8 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, } /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, true, pa_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); if (err) goto err_mpar_reg_write; @@ -405,7 +408,8 @@ mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, return; /* remove the inspected port */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, false, pa_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); /* remove the SBIB buffer if it was egress SPAN */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 953b214..bcaed8a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -595,9 +595,9 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); - pool_info->pool_type = dir; + pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); - pool_info->threshold_type = pr->mode; + pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -608,9 +608,10 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - enum mlxsw_reg_sbpr_mode mode = threshold_type; u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + enum mlxsw_reg_sbpr_mode mode; + mode = (enum mlxsw_reg_sbpr_mode) threshold_type; return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); } @@ -696,13 +697,13 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, pool_type); + *p_pool_index = pool_index_get(cm->pool, dir); return 0; } @@ -716,7 +717,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; u8 pool = pool_get(pool_index); u32 max_buff; int err; @@ -943,7 +944,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3f5c51d..4afb498 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -252,7 +252,9 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -261,7 +263,9 @@ static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -384,7 +388,9 @@ static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, + vr->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -394,7 +400,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -1081,9 +1088,10 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, { char raleu_pl[MLXSW_REG_RALEU_LEN]; - mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, - adj_index, ecmp_size, - new_adj_index, new_ecmp_size); + mlxsw_reg_raleu_pack(raleu_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, + adj_index, ecmp_size, new_adj_index, + new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1558,8 +1566,9 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1573,8 +1582,9 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_local_pack(ralue_pl, MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, fib_entry->rif); @@ -1589,8 +1599,9 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1753,8 +1764,8 @@ mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, fib4->fi->fib_dev); } -void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_vr *vr = fib_entry->vr; -- cgit v1.1 From 1a21101d21d7ef056dfda1d7b843289e05ecd034 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 19 Sep 2016 15:33:54 +0530 Subject: net: phy: Add MAC-IF driver for Microsemi PHYs. All the review comments updated and resending for review. This is MAC interface feature. Microsemi PHY can support RGMII, RMII or GMII/MII interface between MAC and PHY. MAC-IF function program the right value based on Device tree configuration. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index c09cc4a..d350debd 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -23,6 +23,16 @@ enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_3_4_NS = 7 }; +/* Microsemi VSC85xx PHY registers */ +/* IEEE 802. Std Registers */ +#define MSCC_PHY_EXT_PHY_CNTL_1 23 +#define MAC_IF_SELECTION_MASK 0x1800 +#define MAC_IF_SELECTION_GMII 0 +#define MAC_IF_SELECTION_RMII 1 +#define MAC_IF_SELECTION_RGMII 2 +#define MAC_IF_SELECTION_POS 11 +#define FAR_END_LOOPBACK_MODE_MASK 0x0008 + #define MII_VSC85XX_INT_MASK 25 #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 @@ -48,6 +58,42 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static int vsc85xx_mac_if_set(struct phy_device *phydev, + phy_interface_t interface) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1); + reg_val &= ~(MAC_IF_SELECTION_MASK); + switch (interface) { + case PHY_INTERFACE_MODE_RGMII: + reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_RMII: + reg_val |= (MAC_IF_SELECTION_RMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + reg_val |= (MAC_IF_SELECTION_GMII << MAC_IF_SELECTION_POS); + break; + default: + rc = -EINVAL; + goto out_unlock; + } + rc = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, reg_val); + if (rc != 0) + goto out_unlock; + + rc = genphy_soft_reset(phydev); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_default_config(struct phy_device *phydev) { int rc; @@ -77,6 +123,11 @@ static int vsc85xx_config_init(struct phy_device *phydev) rc = vsc85xx_default_config(phydev); if (rc) return rc; + + rc = vsc85xx_mac_if_set(phydev, phydev->interface); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; -- cgit v1.1 From fd07160bb7180cdd0afeb089d8cdfd66002f17e6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Sep 2016 12:53:40 +0200 Subject: xen-netfront: avoid packet loss when ethernet header crosses page boundary Small packet loss is reported on complex multi host network configurations including tunnels, NAT, ... My investigation led me to the following check in netback which drops packets: if (unlikely(txreq.size < ETH_HLEN)) { netdev_err(queue->vif->dev, "Bad packet size: %d\n", txreq.size); xenvif_tx_err(queue, &txreq, extra_count, idx); break; } But this check itself is legitimate. SKBs consist of a linear part (which has to have the ethernet header) and (optionally) a number of frags. Netfront transmits the head of the linear part up to the page boundary as the first request and all the rest becomes frags so when we're reconstructing the SKB in netback we can't distinguish between original frags and the 'tail' of the linear part. The first SKB needs to be at least ETH_HLEN size. So in case we have an SKB with its linear part starting too close to the page boundary the packet is lost. I see two ways to fix the issue: - Change the 'wire' protocol between netfront and netback to start keeping the original SKB structure. We'll have to add a flag indicating the fact that the particular request is a part of the original linear part and not a frag. We'll need to know the length of the linear part to pre-allocate memory. - Avoid transmitting SKBs with linear parts starting too close to the page boundary. That seems preferable short-term and shouldn't bring significant performance degradation as such packets are rare. That's what this patch is trying to achieve with skb_copy(). Signed-off-by: Vitaly Kuznetsov Acked-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 96ccd4e..e17879d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -565,6 +565,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netfront_queue *queue = NULL; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; + struct sk_buff *nskb; /* Drop the packet if no queues are set up */ if (num_queues < 1) @@ -593,6 +594,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) page = virt_to_page(skb->data); offset = offset_in_page(skb->data); + + /* The first req should be at least ETH_HLEN size or the packet will be + * dropped by netback. + */ + if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + goto drop; + dev_kfree_skb_any(skb); + skb = nskb; + page = virt_to_page(skb->data); + offset = offset_in_page(skb->data); + } + len = skb_headlen(skb); spin_lock_irqsave(&queue->tx_lock, flags); -- cgit v1.1 From ca26893f05e86497a86732768ec53cd38c0819ca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:09 +0800 Subject: rhashtable: Add rhlist interface The insecure_elasticity setting is an ugly wart brought out by users who need to insert duplicate objects (that is, distinct objects with identical keys) into the same table. In fact, those users have a much bigger problem. Once those duplicate objects are inserted, they don't have an interface to find them (unless you count the walker interface which walks over the entire table). Some users have resorted to doing a manual walk over the hash table which is of course broken because they don't handle the potential existence of multiple hash tables. The result is that they will break sporadically when they encounter a hash table resize/rehash. This patch provides a way out for those users, at the expense of an extra pointer per object. Essentially each object is now a list of objects carrying the same key. The hash table will only see the lists so nothing changes as far as rhashtable is concerned. To use this new interface, you need to insert a struct rhlist_head into your objects instead of struct rhash_head. While the hash table is unchanged, for type-safety you'll need to use struct rhltable instead of struct rhashtable. All the existing interfaces have been duplicated for rhlist, including the hash table walker. One missing feature is nulls marking because AFAIK the only potential user of it does not need duplicate objects. Should anyone need this it shouldn't be too hard to add. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 491 ++++++++++++++++++++++++++++++++++----------- lib/rhashtable.c | 258 +++++++++++++++++++----- 2 files changed, 583 insertions(+), 166 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index fd82584..5c132d3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2015 Herbert Xu + * Copyright (c) 2015-2016 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * @@ -53,6 +53,11 @@ struct rhash_head { struct rhash_head __rcu *next; }; +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -137,6 +142,7 @@ struct rhashtable_params { * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -147,12 +153,21 @@ struct rhashtable { unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; + bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; }; /** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + +/** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers * @tbl: The table that we were walking over @@ -163,9 +178,10 @@ struct rhashtable_walker { }; /** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * struct rhashtable_iter - Hash table iterator * @ht: Table to iterate through * @p: Current pointer + * @list: Current hash list pointer * @walker: Associated rhashtable walker * @slot: Current slot * @skip: Number of entries to skip in slot @@ -173,6 +189,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; + struct rhlist_head *list; struct rhashtable_walker walker; unsigned int slot; unsigned int skip; @@ -339,13 +356,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl, - void **data); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); @@ -507,6 +522,31 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { @@ -516,18 +556,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -539,8 +569,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -549,8 +577,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -559,96 +586,165 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes in there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; - struct bucket_table *tbl, *new_tbl; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; struct rhash_head *head; spinlock_t *lock; - unsigned int elasticity; unsigned int hash; - void *data = NULL; - int err; + int elasticity; + void *data; -restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - + if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { +slow_path: spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); } - new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); - if (!IS_ERR_OR_NULL(tbl)) - goto slow_path; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; + + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + data = rht_obj(ht, head); - err = PTR_ERR(tbl); - if (err == -EEXIST) - err = 0; + if (!rhlist) + goto out; - goto out; - } - err = -E2BIG; - if (unlikely(rht_grow_above_max(ht, tbl))) - goto out; + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); - if (unlikely(rht_grow_above_100(ht, tbl))) { -slow_path: - spin_unlock_bh(lock); - err = rhashtable_insert_rehash(ht, tbl); - rcu_read_unlock(); - if (err) - return ERR_PTR(err); + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); - goto restart; + goto good; } - err = 0; - elasticity = ht->elasticity; - rht_for_each(head, tbl, hash) { - if (key && - unlikely(!(params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) { - data = rht_obj(ht, head); - goto out; - } - if (!--elasticity) - goto slow_path; - } + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); @@ -656,11 +752,14 @@ slow_path: if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); +good: + data = NULL; + out: spin_unlock_bh(lock); rcu_read_unlock(); - return err ? ERR_PTR(err) : data; + return data; } /** @@ -685,7 +784,7 @@ static inline int rhashtable_insert_fast( { void *ret; - ret = __rhashtable_insert_fast(ht, NULL, obj, params); + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -693,6 +792,58 @@ static inline int rhashtable_insert_fast( } /** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); +} + +/** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object @@ -722,7 +873,8 @@ static inline int rhashtable_lookup_insert_fast( BUG_ON(ht->p.obj_hashfn); - ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -759,7 +911,7 @@ static inline int rhashtable_lookup_insert_key( BUG_ON(!ht->p.obj_hashfn || !key); - ret = __rhashtable_insert_fast(ht, key, obj, params); + ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -783,13 +935,14 @@ static inline void *rhashtable_lookup_get_insert_key( { BUG_ON(!ht->p.obj_hashfn || !key); - return __rhashtable_insert_fast(ht, key, obj, params); + return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj, const struct rhashtable_params params) + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) { struct rhash_head __rcu **pprev; struct rhash_head *he; @@ -804,39 +957,66 @@ static inline int __rhashtable_remove_fast( pprev = &tbl->buckets[hash]; rht_for_each(he, tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + if (he != obj) { + struct rhlist_head __rcu **lpprev; + pprev = &he->next; - continue; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; } - rcu_assign_pointer(*pprev, obj->next); - err = 0; + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + rcu_assign_pointer(*pprev, obj); break; } spin_unlock_bh(lock); + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + return err; } -/** - * rhashtable_remove_fast - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. - * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. - * - * Returns zero on success, -ENOENT if the entry could not be found. - */ -static inline int rhashtable_remove_fast( +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct bucket_table *tbl; int err; @@ -850,24 +1030,60 @@ static inline int rhashtable_remove_fast( * visible then that guarantees the entry to still be in * the old tbl if it exists. */ - while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; - if (err) - goto out; - - atomic_dec(&ht->nelems); - if (unlikely(ht->p.automatic_shrinking && - rht_shrink_below_30(ht, tbl))) - schedule_work(&ht->run_work); - -out: rcu_read_unlock(); return err; } +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(ht, obj, params, false); +} + +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); +} + /* Internal function, please use rhashtable_replace_fast() instead */ static inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, @@ -958,4 +1174,51 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, return 0; } +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 06c2872..32d0ad0 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } -static bool rhashtable_check_elasticity(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - unsigned int elasticity = ht->elasticity; - struct rhash_head *head; - - rht_for_each(head, tbl, hash) - if (!--elasticity) - return true; - - return false; -} - -int rhashtable_insert_rehash(struct rhashtable *ht, - struct bucket_table *tbl) +static int rhashtable_insert_rehash(struct rhashtable *ht, + struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; @@ -439,57 +425,165 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *tbl, - void **data) +static void *rhashtable_lookup_one(struct rhashtable *ht, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_head __rcu **pprev; struct rhash_head *head; - unsigned int hash; - int err; + int elasticity; - tbl = rhashtable_last_table(ht, tbl); - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); - - err = -EEXIST; - if (key) { - *data = rhashtable_lookup_fast(ht, key, ht->p); - if (*data) - goto exit; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *list; + struct rhlist_head *plist; + + elasticity--; + if (!key || + (ht->p.obj_cmpfn ? + ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + if (!ht->rhlist) + return rht_obj(ht, head); + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + return NULL; } - err = -E2BIG; - if (unlikely(rht_grow_above_max(ht, tbl))) - goto exit; + if (elasticity <= 0) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-ENOENT); +} + +static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash, + struct rhash_head *obj, + void *data) +{ + struct bucket_table *new_tbl; + struct rhash_head *head; + + if (!IS_ERR_OR_NULL(data)) + return ERR_PTR(-EEXIST); - err = -EAGAIN; - if (rhashtable_check_elasticity(ht, tbl, hash) || - rht_grow_above_100(ht, tbl)) - goto exit; + if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); - err = 0; + new_tbl = rcu_dereference(tbl->future_tbl); + if (new_tbl) + return new_tbl; + + if (PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + + if (unlikely(rht_grow_above_max(ht, tbl))) + return ERR_PTR(-E2BIG); + + if (unlikely(rht_grow_above_100(ht, tbl))) + return ERR_PTR(-EAGAIN); head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (ht->rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); -exit: - spin_unlock(rht_bucket_lock(tbl, hash)); + return NULL; +} - if (err == 0) - return NULL; - else if (err == -EAGAIN) - return tbl; - else - return ERR_PTR(err); +static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int hash; + spinlock_t *lock; + void *data; + + tbl = rcu_dereference(ht->tbl); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rcu_dereference(tbl->future_tbl); + } + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + while (!IS_ERR_OR_NULL(new_tbl)) { + tbl = new_tbl; + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + spin_unlock(rht_bucket_lock(tbl, hash)); + } + + spin_unlock_bh(lock); + + if (PTR_ERR(data) == -EAGAIN) + data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: + -EAGAIN); + + return data; +} + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + void *data; + + do { + rcu_read_lock(); + data = rhashtable_try_insert(ht, key, obj); + rcu_read_unlock(); + } while (PTR_ERR(data) == -EAGAIN); + + return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); @@ -593,11 +687,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); void *rhashtable_walk_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; + struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } goto next; } @@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { + if (rhlist) { + list = container_of(p, struct rhlist_head, + rhead); + do { + if (!skip) + goto next; + skip--; + list = rcu_dereference(list->next); + } while (list); + + continue; + } if (!skip) break; skip--; @@ -614,7 +725,8 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - return rht_obj(ht, p); + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; @@ -803,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht, EXPORT_SYMBOL_GPL(rhashtable_init); /** + * rhltable_init - initialize a new hash list table + * @hlt: hash list table to be initialized + * @params: configuration parameters + * + * Initializes a new hash list table. + * + * See documentation for rhashtable_init. + */ +int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +{ + int err; + + /* No rhlist NULLs marking for now. */ + if (params->nulls_base) + return -EINVAL; + + err = rhashtable_init(&hlt->ht, params); + hlt->ht.rhlist = true; + return err; +} +EXPORT_SYMBOL_GPL(rhltable_init); + +static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, + void (*free_fn)(void *ptr, void *arg), + void *arg) +{ + struct rhlist_head *list; + + if (!ht->rhlist) { + free_fn(rht_obj(ht, obj), arg); + return; + } + + list = container_of(obj, struct rhlist_head, rhead); + do { + obj = &list->rhead; + list = rht_dereference(list->next, ht); + free_fn(rht_obj(ht, obj), arg); + } while (list); +} + +/** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy * @free_fn: callback to release resources of element @@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); + rhashtable_free_one(ht, pos, free_fn, arg); } } -- cgit v1.1 From 83e7e4ce9e93c3b020497144f4354b62aed5d894 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:10 +0800 Subject: mac80211: Use rhltable instead of rhashtable mac80211 currently uses rhashtable with insecure_elasticity set to true. The latter is because of duplicate objects. What's more, mac80211 walks the rhashtable chains by hand which is broken as rhashtable may contain multiple tables due to resizing or rehashing. This patch fixes it by converting it to the newly added rhltable interface which is designed for use with duplicate objects. With rhltable a lookup returns a list of objects instead of a single one. This is then fed into the existing for_each_sta_info macro. This patch also deletes the sta_addr_hash function since rhashtable defaults to jhash. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 7 ++----- net/mac80211/sta_info.c | 52 +++++++++++++++++++--------------------------- net/mac80211/sta_info.h | 19 +++++++---------- net/mac80211/status.c | 7 ++----- 5 files changed, 33 insertions(+), 54 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c71c735..e496dee 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1213,7 +1213,7 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct rhashtable sta_hash; + struct rhltable sta_hash; struct timer_list sta_cleanup; int sta_generation; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e796060..f7cf342 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4003,7 +4003,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct rhash_head *tmp; + struct rhlist_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -4046,13 +4046,10 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, goto out; } else if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - const struct bucket_table *tbl; prev_sta = NULL; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { + for_each_sta_info(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1b1b28f..c803e2c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,12 +67,10 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ - .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, - .hashfn = sta_addr_hash, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; @@ -80,8 +78,8 @@ static const struct rhashtable_params sta_rht_params = { static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_remove(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -157,19 +155,22 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr) +{ + return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold @@ -190,14 +191,11 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); @@ -263,8 +261,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_insert(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -453,9 +451,9 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; - /* Strictly speaking this isn't necessary as we hold the mutex, but - * the rhashtable code can't really deal with that distinction. We - * do require the mutex for correctness though. + /* The RCU read lock is required by rhashtable due to + * asynchronous resize/rehash. We also require the mutex + * for correctness. */ rcu_read_lock(); lockdep_assert_held(&sdata->local->sta_mtx); @@ -1043,16 +1041,11 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -u32 sta_addr_hash(const void *key, u32 length, u32 seed) -{ - return jhash(key, ETH_ALEN, seed); -} - int sta_info_init(struct ieee80211_local *local) { int err; - err = rhashtable_init(&local->sta_hash, &sta_rht_params); + err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; @@ -1068,7 +1061,7 @@ int sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - rhashtable_destroy(&local->sta_hash); + rhltable_destroy(&local->sta_hash); } @@ -1138,17 +1131,14 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; - - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 530231b..ed5fcb9 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -455,7 +455,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct rhash_head hash_node; + struct rhlist_head hash_node; u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -638,6 +638,9 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr); + /* * Get a STA info, must be under RCU read lock. */ @@ -647,17 +650,9 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -u32 sta_addr_hash(const void *key, u32 length, u32 seed); - -#define _sta_bucket_idx(_tbl, _a) \ - rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) - -#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ - rht_for_each_entry_rcu(_sta, _tmp, tbl, \ - _sta_bucket_idx(tbl, _addr), \ - hash_node) \ - /* compare address and run code only if it matches */ \ - if (ether_addr_equal(_sta->addr, (_addr))) +#define for_each_sta_info(local, _addr, _sta, _tmp) \ + rhl_for_each_entry_rcu(_sta, _tmp, \ + sta_info_hash_lookup(local, _addr), hash_node) /* * Get STA info by index, BROKEN! diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ea39f8a7..ddf71c6 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -746,8 +746,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); __le16 fc; struct ieee80211_supported_band *sband; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -755,7 +755,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int shift = 0; int tid = IEEE80211_NUM_TIDS; - const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -764,9 +763,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { + for_each_sta_info(local, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; -- cgit v1.1 From 06f8ec9041f02d44bb0b75d47668e2fe00d5e0c3 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:00 +0200 Subject: net-next: dsa: fix duplicate invocation of set_addr() commit 83c0afaec7b730b ("net: dsa: Add new binding implementation") has a duplicate invocation of the set_addr() operation callback. Remove one of them. Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8278385..cffc19e 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -308,10 +308,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) -- cgit v1.1 From 092183df0fa1f4b49baad3a980c55d55de07dfb7 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:01 +0200 Subject: net-next: dsa: make the set_addr() operation optional Only 1 of the 3 drivers currently has a set_addr() operation. Make the set_addr() callback optional to reduce the amount of empty stubs inside the drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- net/dsa/dsa.c | 8 +++++--- net/dsa/dsa2.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 66e31ac..a6902c1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -378,9 +378,11 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) goto out; - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - goto out; + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + goto out; + } if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index cffc19e..f8a7d9a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -304,9 +304,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; - err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; + if (ds->ops->set_addr) { + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + if (err < 0) + return err; + } if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); -- cgit v1.1 From 1f449736525addd6fcce674d654bd1471748484e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:02 +0200 Subject: net-next: dsa: b53: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that b53 has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0afc2e5..1a492c0 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -764,11 +764,6 @@ static int b53_get_sset_count(struct dsa_switch *ds) return b53_get_mib_size(dev); } -static int b53_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -1466,7 +1461,6 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, - .set_addr = b53_set_addr, .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, -- cgit v1.1 From 8941ee36e3266a5efca52e32c4dc214f202c751a Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 19 Sep 2016 15:28:03 +0200 Subject: net-next: dsa: qca8k: remove empty set_addr() stub The set_addr() callback is now optional. Remove the empty stub that qca8k has. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 7f3f178..4788a89 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -586,13 +586,6 @@ qca8k_setup(struct dsa_switch *ds) } static int -qca8k_set_addr(struct dsa_switch *ds, u8 *addr) -{ - /* The subsystem always calls this function so add an empty stub */ - return 0; -} - -static int qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; @@ -921,7 +914,6 @@ qca8k_get_tag_protocol(struct dsa_switch *ds) static struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, - .set_addr = qca8k_set_addr, .get_strings = qca8k_get_strings, .phy_read = qca8k_phy_read, .phy_write = qca8k_phy_write, -- cgit v1.1 From 67a99b7061c07b190ac6c39f136afedbb7aa86e9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 19 Sep 2016 17:47:41 +0300 Subject: qed: Fix stack corruption on probe Commit fe56b9e6a8d95 ("qed: Add module with basic common support") has introduced a stack corruption during probe, where filling a local struct with data to be sent to management firmware is incorrectly filled; The data is written outside of the struct and corrupts the stack. Changes from v1: ---------------- - Correct the value written [Caught by David Laight] Fixes: fe56b9e6a8d95 ("qed: Add module with basic common support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a240f26..f776a77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1153,8 +1153,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, p_drv_version = &union_data.drv_version; p_drv_version->version = p_ver->version; - for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) { - val = cpu_to_be32(p_ver->name[i]); + for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) { + val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)])); *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val; } -- cgit v1.1 From 5737f6c92681939e417579b421f81f035e57c582 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 19 Sep 2016 17:46:38 +0200 Subject: mlx4: add missed recycle opportunity for XDP_TX on TX failure Correct drop handling for XDP_TX on TX failure, were recently added in commit 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full"). The change missed an opportunity for recycling the RX page, instead of going through the page allocator, like the regular XDP_DROP action does. This patch cease the opportunity, by going through the XDP_DROP case. Fixes: 95357907ae73 ("mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full") Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c80073e..c46355b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -906,11 +906,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, tx_index, &doorbell_pending)) goto consumed; - goto next; /* Drop on xmit failure */ + goto xdp_drop; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: case XDP_DROP: +xdp_drop: if (mlx4_en_rx_recycle(ring, frags)) goto consumed; goto next; -- cgit v1.1 From f5beeb1851ea6f8cfcf2657f26cb24c0582b4945 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:07 +0200 Subject: fs/proc/kcore.c: Make bounce buffer global for read Next patch adds bounce buffer for ktext area, so it's convenient to have single bounce buffer for both vmalloc/module and ktext cases. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a939f5e..bd3ac9d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) static ssize_t read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) { + char *buf = file->private_data; ssize_t acc = 0; size_t size, tsz; size_t elf_buflen; @@ -500,18 +501,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); + vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; - } - kfree(elf_buf); } else { if (kern_addr_valid(start)) { unsigned long n; @@ -549,6 +542,11 @@ static int open_kcore(struct inode *inode, struct file *filp) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -559,10 +557,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, + .release = release_kcore, .llseek = default_llseek, }; -- cgit v1.1 From df04abfd181acc276ba6762c8206891ae10ae00d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:08 +0200 Subject: fs/proc/kcore.c: Add bounce buffer for ktext data We hit hardened usercopy feature check for kernel text access by reading kcore file: usercopy: kernel memory exposure attempt detected from ffffffff8179a01f () (4065 bytes) kernel BUG at mm/usercopy.c:75! Bypassing this check for kcore by adding bounce buffer for ktext data. Reported-by: Steve Best Fixes: f5509cc18daa ("mm: Hardened usercopy") Suggested-by: Kees Cook Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bd3ac9d..5c89a07 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -509,7 +509,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (kern_addr_valid(start)) { unsigned long n; - n = copy_to_user(buffer, (char *)start, tsz); + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + memcpy(buf, (char *) start, tsz); + n = copy_to_user(buffer, buf, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens -- cgit v1.1 From aa4f0601115319a52c80f468c8f007e5aa9277cb Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 20 Sep 2016 08:56:36 -0700 Subject: mm: usercopy: Check for module addresses While running a compile on arm64, I hit a memory exposure usercopy: kernel memory exposure attempt detected from fffffc0000f3b1a8 (buffer_head) (1 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:75! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_broute bridge stp llc ebtable_nat ip6table_security ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle iptable_security iptable_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle ebtable_filter ebtables ip6table_filter ip6_tables vfat fat xgene_edac xgene_enet edac_core i2c_xgene_slimpro i2c_core at803x realtek xgene_dma mdio_xgene gpio_dwapb gpio_xgene_sb xgene_rng mailbox_xgene_slimpro nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c sdhci_of_arasan sdhci_pltfm sdhci mmc_core xhci_plat_hcd gpio_keys CPU: 0 PID: 19744 Comm: updatedb Tainted: G W 4.8.0-rc3-threadinfo+ #1 Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.12 Aug 12 2016 task: fffffe03df944c00 task.stack: fffffe00d128c000 PC is at __check_object_size+0x70/0x3f0 LR is at __check_object_size+0x70/0x3f0 ... [] __check_object_size+0x70/0x3f0 [] filldir64+0x158/0x1a0 [] __fat_readdir+0x4a0/0x558 [fat] [] fat_readdir+0x34/0x40 [fat] [] iterate_dir+0x190/0x1e0 [] SyS_getdents64+0x88/0x120 [] el0_svc_naked+0x24/0x28 fffffc0000f3b1a8 is a module address. Modules may have compiled in strings which could get copied to userspace. In this instance, it looks like "." which matches with a size of 1 byte. Extend the is_vmalloc_addr check to be is_vmalloc_or_module_addr to cover all possible cases. Signed-off-by: Laura Abbott Signed-off-by: Kees Cook --- mm/usercopy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 089328f..3c8da0a 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -207,8 +207,11 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, * Some architectures (arm64) return true for virt_addr_valid() on * vmalloced addresses. Work around this by checking for vmalloc * first. + * + * We also need to check for module addresses explicitly since we + * may copy static data from modules to userspace */ - if (is_vmalloc_addr(ptr)) + if (is_vmalloc_or_module_addr(ptr)) return NULL; if (!virt_addr_valid(ptr)) -- cgit v1.1 From e23d4159b109167126e5bcd7f3775c95de7fee47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Sep 2016 20:07:42 +0100 Subject: fix fault_in_multipages_...() on architectures with no-op access_ok() Switching iov_iter fault-in to multipages variants has exposed an old bug in underlying fault_in_multipages_...(); they break if the range passed to them wraps around. Normally access_ok() done by callers will prevent such (and it's a guaranteed EFAULT - ERR_PTR() values fall into such a range and they should not point to any valid objects). However, on architectures where userland and kernel live in different MMU contexts (e.g. s390) access_ok() is a no-op and on those a range with a wraparound can reach fault_in_multipages_...(). Since any wraparound means EFAULT there, the fix is trivial - turn those while (uaddr <= end) ... into if (unlikely(uaddr > end)) return -EFAULT; do ... while (uaddr <= end); Reported-by: Jan Stancek Tested-by: Jan Stancek Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 66a1260..7e3d537 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -571,56 +571,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) */ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { - int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; + if (unlikely(uaddr > end)) + return -EFAULT; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ - while (uaddr <= end) { - ret = __put_user(0, uaddr); - if (ret != 0) - return ret; + do { + if (unlikely(__put_user(0, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) - ret = __put_user(0, end); + return __put_user(0, end); - return ret; + return 0; } static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { volatile char c; - int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; - while (uaddr <= end) { - ret = __get_user(c, uaddr); - if (ret != 0) - return ret; + if (unlikely(uaddr > end)) + return -EFAULT; + + do { + if (unlikely(__get_user(c, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) { - ret = __get_user(c, end); - (void)c; + return __get_user(c, end); } - return ret; + return 0; } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, -- cgit v1.1 From ad9798967dd67f080bf0e8d611b382a5d292aae2 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 19 Sep 2016 20:15:24 +0100 Subject: 6pack: fix buffer length mishandling Dmitry Vyukov wrote: > different runs). Looking at code, the following looks suspicious -- we > limit copy by 512 bytes, but use the original count which can be > larger than 512: > > static void sixpack_receive_buf(struct tty_struct *tty, > const unsigned char *cp, char *fp, int count) > { > unsigned char buf[512]; > .... > memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); > .... > sixpack_decode(sp, buf, count1); With the sane tty locking we now have I believe the following is safe as we consume the bytes and move them into the decoded buffer before returning. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 5a1e985..470b3dc 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -127,7 +127,7 @@ struct sixpack { #define AX25_6PACK_HEADER_LEN 0 -static void sixpack_decode(struct sixpack *, unsigned char[], int); +static void sixpack_decode(struct sixpack *, const unsigned char[], int); static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); /* @@ -428,7 +428,7 @@ out: /* * Handle the 'receiver data ready' interrupt. - * This function is called by the 'tty_io' module in the kernel when + * This function is called by the tty module in the kernel when * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ @@ -436,7 +436,6 @@ static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; - unsigned char buf[512]; int count1; if (!count) @@ -446,10 +445,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, if (!sp) return; - memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); - /* Read the characters out of the buffer */ - count1 = count; while (count) { count--; @@ -459,7 +455,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, continue; } } - sixpack_decode(sp, buf, count1); + sixpack_decode(sp, cp, count1); sp_put(sp); tty_unthrottle(tty); @@ -992,7 +988,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd) /* decode a 6pack packet */ static void -sixpack_decode(struct sixpack *sp, unsigned char *pre_rbuff, int count) +sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count) { unsigned char inbyte; int count1; -- cgit v1.1 From 190aa3e77880a05332ea1ccb382a51285d57adb5 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:50:59 -0700 Subject: openvswitch: Fix Frame-size larger than 1024 bytes warning. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to declare separate key on stack, we can just use sw_flow->key to store the key directly. This commit fixes following warning: net/openvswitch/datapath.c: In function ‘ovs_flow_cmd_new’: net/openvswitch/datapath.c:1080:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0536ab3..474e7a6 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -928,7 +928,6 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -956,20 +955,24 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &new_flow->key, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, true, &mask); - /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &key, log); + &new_flow->key, log); if (error) goto err_kfree_flow; + /* unmasked key is needed to match when ufid is not used. */ + if (ovs_identifier_is_key(&new_flow->id)) + match.key = new_flow->id.unmasked_key; + + ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); @@ -996,7 +999,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); -- cgit v1.1 From 2279994d07ab67ff7a1d09bfbd65588332dfb6d8 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Mon, 19 Sep 2016 13:51:00 -0700 Subject: openvswitch: avoid resetting flow key while installing new flow. since commit commit db74a3335e0f6 ("openvswitch: use percpu flow stats") flow alloc resets flow-key. So there is no need to reset the flow-key again if OVS is using newly allocated flow-key. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 8 ++++---- net/openvswitch/flow.c | 2 -- net/openvswitch/flow_netlink.c | 6 ++++-- net/openvswitch/flow_netlink.h | 3 ++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 474e7a6..4d67ea8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -955,7 +955,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->key, &mask); + ovs_match_init(&match, &new_flow->key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) @@ -1124,7 +1124,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &key, true, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); } else if (!ufid_present) { @@ -1241,7 +1241,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { @@ -1300,7 +1300,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (unlikely(err)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0fa45439..634cc10 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -767,8 +767,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, { int err; - memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); - /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 8efa718..ae25ded 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1996,13 +1996,15 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, + bool reset_key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; - memset(key, 0, sizeof(*key)); + if (reset_key) + memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); @@ -2049,7 +2051,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct nlattr *a; int err = 0, start, opts_type; - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 47dd142..45f9769 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -41,7 +41,8 @@ size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); + struct sw_flow_key *key, bool reset_key, + struct sw_flow_mask *mask); int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); -- cgit v1.1 From cf714ac147e08bc13cd6bc79f2b090da905398ef Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 19 Sep 2016 13:56:29 -0700 Subject: ipvlan: Fix dependency issue kbuild-build-bot reported that if NETFILTER is not selected, the build fails pointing to netfilter symbols. Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8768a62..95c32f2 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NETFILTER depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface -- cgit v1.1 From b399cf64e318ac8c5f10d36bb911e61c746b8788 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:12 +0200 Subject: bpf, verifier: enforce larger zero range for pkt on overloading stack buffs Current contract for the following two helper argument types is: * ARG_CONST_STACK_SIZE: passed argument pair must be (ptr, >0). * ARG_CONST_STACK_SIZE_OR_ZERO: passed argument pair can be either (NULL, 0) or (ptr, >0). With 6841de8b0d03 ("bpf: allow helpers access the packet directly"), we can pass also raw packet data to helpers, so depending on the argument type being PTR_TO_PACKET, we now either assert memory via check_packet_access() or check_stack_boundary(). As a result, the tests in check_packet_access() currently allow more than intended with regards to reg->imm. Back in 969bf05eb3ce ("bpf: direct packet access"), check_packet_access() was fine to ignore size argument since in check_mem_access() size was bpf_size_to_bytes() derived and prior to the call to check_packet_access() guaranteed to be larger than zero. However, for the above two argument types, it currently means, we can have a <= 0 size and thus breaking current guarantees for helpers. Enforce a check for size <= 0 and bail out if so. check_stack_boundary() doesn't have such an issue since it already tests for access_size <= 0 and bails out, resp. access_size == 0 in case of NULL pointer passed when allowed. Fixes: 6841de8b0d03 ("bpf: allow helpers access the packet directly") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 90493a6..bc138f3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -671,7 +671,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, struct reg_state *reg = ®s[regno]; off += reg->off; - if (off < 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; -- cgit v1.1 From 36bbef52c7eb646ed6247055a2acd3851e317857 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:13 +0200 Subject: bpf: direct packet write and access for helpers for clsact progs This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and 6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a22089 ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +- include/linux/skbuff.h | 14 ++++- include/uapi/linux/bpf.h | 21 ++++++++ kernel/bpf/helpers.c | 3 ++ kernel/bpf/verifier.c | 54 ++++++++++++++----- net/core/filter.c | 134 +++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 196 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9a904f6..5691fdc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -151,7 +152,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type); - + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4c5662f..c6dab3f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -676,13 +676,23 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + __unused:1; /* one bit hole */ kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f896dfa..e07432b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -398,6 +398,27 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_tail, + /** + * bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the + * skb is non-linear and not all of len are part of the + * linear section. Only needed for read/write with direct + * packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_pull_data, + + /** + * bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + */ + BPF_FUNC_csum_update, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a5b8bf8..3991840 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc138f3..3a75ee3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -196,6 +196,7 @@ struct verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool seen_direct_write; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -204,6 +205,7 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; + bool pkt_access; int regno; int access_size; }; @@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_write_pkt_data(enum bpf_prog_type type) +static bool may_access_direct_pkt_data(struct verifier_env *env, + const struct bpf_call_arg_meta *meta) { - switch (type) { + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: + if (meta) + return meta->pkt_access; + + env->seen_direct_write = true; return true; default: return false; @@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) { verbose("cannot write into packet\n"); return -EACCES; } @@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { - verbose("helper access to the packet is not allowed for clsact\n"); + if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) { + verbose("helper access to the packet is not allowed\n"); return -EACCES; } @@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id) changes_data = bpf_helper_changes_skb_data(fn->func); memset(&meta, 0, sizeof(meta)); + meta.pkt_access = fn->pkt_access; /* We only support one arg being in raw mode at the moment, which * is sufficient for the helper functions we have right now. @@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) */ static int convert_ctx_accesses(struct verifier_env *env) { - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16]; + const struct bpf_verifier_ops *ops = env->prog->aux->ops; + struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i; + int i, insn_cnt, cnt; - if (!env->prog->aux->ops->convert_ctx_access) + if (ops->gen_prologue) { + cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, + env->prog); + if (cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (cnt) { + new_prog = bpf_patch_insn_single(env->prog, 0, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + } + } + + if (!ops->convert_ctx_access) return 0; + insn_cnt = env->prog->len; + insn = env->prog->insnsi; + for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta, cnt; + u32 insn_delta; if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) @@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env) continue; } - cnt = env->prog->aux->ops-> - convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 298b146..0920c2a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1362,6 +1362,11 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, return err; } +static int bpf_try_make_head_writable(struct sk_buff *skb) +{ + return bpf_try_make_writable(skb, skb_headlen(skb)); +} + static inline void bpf_push_mac_rcsum(struct sk_buff *skb) { if (skb_at_tc_ingress(skb)) @@ -1441,6 +1446,28 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; +BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) +{ + /* Idea is the following: should the needed direct read/write + * test fail during runtime, we can pull in more data and redo + * again, since implicitly, we invalidate previous checks here. + * + * Or, since we know how much we need to make read/writeable, + * this can be done once at the program beginning for direct + * access case. By this we overcome limitations of only current + * headroom being accessible. + */ + return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto bpf_skb_pull_data_proto = { + .func = bpf_skb_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, u64, from, u64, to, u64, flags) { @@ -1567,6 +1594,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, static const struct bpf_func_proto bpf_csum_diff_proto = { .func = bpf_csum_diff, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_STACK, .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, @@ -1575,6 +1603,26 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) +{ + /* The interface is to be used in combination with bpf_csum_diff() + * for direct packet writes. csum rotation for alignment as well + * as emulating csum_sub() can be done from the eBPF program. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + return (skb->csum = csum_add(skb->csum, csum)); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_csum_update_proto = { + .func = bpf_csum_update, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { return dev_forward_skb(dev, skb); @@ -1602,6 +1650,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; + struct sk_buff *clone; + int ret; if (unlikely(flags & ~(BPF_F_INGRESS))) return -EINVAL; @@ -1610,14 +1660,25 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) if (unlikely(!dev)) return -EINVAL; - skb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!skb)) + clone = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!clone)) return -ENOMEM; - bpf_push_mac_rcsum(skb); + /* For direct write, we need to keep the invariant that the skbs + * we're dealing with need to be uncloned. Should uncloning fail + * here, we need to free the just generated clone to unclone once + * again. + */ + ret = bpf_try_make_head_writable(skb); + if (unlikely(ret)) { + kfree_skb(clone); + return -ENOMEM; + } + + bpf_push_mac_rcsum(clone); return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -2063,19 +2124,14 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { bool bpf_helper_changes_skb_data(void *func) { - if (func == bpf_skb_vlan_push) - return true; - if (func == bpf_skb_vlan_pop) - return true; - if (func == bpf_skb_store_bytes) - return true; - if (func == bpf_skb_change_proto) - return true; - if (func == bpf_skb_change_tail) - return true; - if (func == bpf_l3_csum_replace) - return true; - if (func == bpf_l4_csum_replace) + if (func == bpf_skb_vlan_push || + func == bpf_skb_vlan_pop || + func == bpf_skb_store_bytes || + func == bpf_skb_change_proto || + func == bpf_skb_change_tail || + func == bpf_skb_pull_data || + func == bpf_l3_csum_replace || + func == bpf_l4_csum_replace) return true; return false; @@ -2440,8 +2496,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_store_bytes_proto; case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; case BPF_FUNC_csum_diff: return &bpf_csum_diff_proto; + case BPF_FUNC_csum_update: + return &bpf_csum_update_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: @@ -2533,6 +2593,45 @@ static bool sk_filter_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (!direct_write) + return 0; + + /* if (!skb->cloned) + * goto start; + * + * (Fast-path, otherwise approximation that we might be + * a clone, do the rest in helper.) + */ + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); + + /* ret = bpf_skb_pull_data(skb, 0); */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); + *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_pull_data); + /* if (!ret) + * goto restore; + * return TC_ACT_SHOT; + */ + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); + *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT); + *insn++ = BPF_EXIT_INSN(); + + /* restore: */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + /* start: */ + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) @@ -2810,6 +2909,7 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, + .gen_prologue = tc_cls_act_prologue, }; static const struct bpf_verifier_ops xdp_ops = { -- cgit v1.1 From 7d95b0ab5bbe2dc9bf3fd99c27e80ced5bfa8acf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:14 +0200 Subject: bpf: add test cases for direct packet access Add couple of test cases for direct write and the negative size issue, and also adjust the direct packet access test4 since it asserts that writes are not possible, but since we've just added support for writes, we need to invert the verdict to ACCEPT, of course. Summary: 133 PASSED, 0 FAILED. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/test_verifier.c | 433 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 430 insertions(+), 3 deletions(-) diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 1f6cc9b..ac590d4 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -291,6 +291,29 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "invalid argument register", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "non-invalid argument register", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "check valid spill/fill", .insns = { /* spill R1(ctx) into stack */ @@ -1210,6 +1233,54 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "raw_stack: skb_load_bytes, negative len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, negative len 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, ~0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, zero len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "raw_stack: skb_load_bytes, no init", .insns = { BPF_MOV64_IMM(BPF_REG_2, 4), @@ -1511,7 +1582,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, }, { - "direct packet access: test4", + "direct packet access: test4 (write)", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1524,8 +1595,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "cannot write", - .result = REJECT, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -1631,6 +1701,26 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "direct packet access: test10 (write invalid)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -1736,6 +1826,343 @@ static struct bpf_test tests[] = { .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "helper access to packet: test6, cls valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test7, cls unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test8, cls variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test9, cls packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test10, cls packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test11, cls unsuitable helper 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 42), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test12, cls unsuitable helper 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test13, cls helper ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test14, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=inv expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test15, cls helper fail range 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test16, cls helper fail range 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, -9), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test17, cls helper fail range 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, ~0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test18, cls helper fail range zero", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test19, pkt end as input", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=pkt_end expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test20, wrong reg", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) -- cgit v1.1 From aecc5cefc389735b5327d234e11d1fe505e1c280 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 19 Sep 2016 19:02:51 -0400 Subject: net sched actions: fix GETing actions With the batch changes that translated transient actions into a temporary list lost in the translation was the fact that tcf_action_destroy() will eventually delete the action from the permanent location if the refcount is zero. Example of what broke: ...add a gact action to drop sudo $TC actions add action drop index 10 ...now retrieve it, looks good sudo $TC actions get action gact index 10 ...retrieve it again and find it is gone! sudo $TC actions get action gact index 10 Fixes: 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array"), Fixes: 824a7e8863b3 ("net_sched: remove an unnecessary list_del()") Fixes: f07fed82ad79 ("net_sched: remove the leftover cleanup_a()") Acked-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d0aceb1..c910217 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,6 +592,17 @@ err_out: return ERR_PTR(err); } +static void cleanup_a(struct list_head *actions, int ovr) +{ + struct tc_action *a; + + if (!ovr) + return; + + list_for_each_entry(a, actions, list) + a->tcfa_refcnt--; +} + int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct list_head *actions) { @@ -611,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, goto err; } act->order = i; + if (ovr) + act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + + /* Remove the temp refcnt which was necessary to protect against + * destroying an existing action which was being replaced + */ + cleanup_a(actions, ovr); return 0; err: @@ -882,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + if (event == RTM_GETACTION) + act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } -- cgit v1.1 From 8847293992606677d5e446d1e712bd128ea7977f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 19 Sep 2016 19:56:11 -0400 Subject: net: dsa: mv88e6xxx: handle multiple ports in ATU An address can be loaded in the ATU with multiple ports, for instance when adding multiple ports to a Multicast group with "bridge mdb". The current code doesn't allow that. Add an helper to get a single entry from the ATU, then set or clear the requested port, before loading the entry back in the ATU. Note that the required _mv88e6xxx_atu_getnext function is defined below mv88e6xxx_port_db_load_purge, so forward-declare it for the moment. The ATU code will be isolated in future patches. Fixes: 83dabd1fa84c ("net: dsa: mv88e6xxx: make switchdev DB ops generic") Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 56 +++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 70a812d..1d71802 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2091,12 +2091,48 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip, return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } +static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, + struct mv88e6xxx_atu_entry *entry); + +static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid, + const u8 *addr, struct mv88e6xxx_atu_entry *entry) +{ + struct mv88e6xxx_atu_entry next; + int err; + + eth_broadcast_addr(next.mac); + + err = _mv88e6xxx_atu_mac_write(chip, next.mac); + if (err) + return err; + + do { + err = _mv88e6xxx_atu_getnext(chip, fid, &next); + if (err) + return err; + + if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED) + break; + + if (ether_addr_equal(next.mac, addr)) { + *entry = next; + return 0; + } + } while (!is_broadcast_ether_addr(next.mac)); + + memset(entry, 0, sizeof(*entry)); + entry->fid = fid; + ether_addr_copy(entry->mac, addr); + + return 0; +} + static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, const unsigned char *addr, u16 vid, u8 state) { - struct mv88e6xxx_atu_entry entry = { 0 }; struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_atu_entry entry; int err; /* Null VLAN ID corresponds to the port private database */ @@ -2107,12 +2143,18 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (err) return err; - entry.fid = vlan.fid; - entry.state = state; - ether_addr_copy(entry.mac, addr); - if (state != GLOBAL_ATU_DATA_STATE_UNUSED) { - entry.trunk = false; - entry.portv_trunkid = BIT(port); + err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry); + if (err) + return err; + + /* Purge the ATU entry only if no port is using it anymore */ + if (state == GLOBAL_ATU_DATA_STATE_UNUSED) { + entry.portv_trunkid &= ~BIT(port); + if (!entry.portv_trunkid) + entry.state = GLOBAL_ATU_DATA_STATE_UNUSED; + } else { + entry.portv_trunkid |= BIT(port); + entry.state = state; } return _mv88e6xxx_atu_load(chip, &entry); -- cgit v1.1 From 94d308d060cd3ee65152b8ebd7a1c24fa86eee82 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 11:26:48 +0800 Subject: net: ethernet: mediatek: enhance with avoiding superfluous assignment inside mtk_get_ethtool_stats data_src is unchanged inside the loop, so this patch moves the assignment to outside the loop to avoid unnecessarily assignment Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 481f360..ca6b501 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2137,8 +2137,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } } + data_src = (u64 *)hwstats; + do { - data_src = (u64 *)hwstats; data_dst = data; start = u64_stats_fetch_begin_irq(&hwstats->syncp); -- cgit v1.1 From f78e73e27fdeab6f9317667f7e9676b59c1ec1fb Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:08 -0400 Subject: tcp: cdg: rename struct minmax in tcp_cdg.c to avoid a naming conflict The upcoming change "lib/win_minmax: windowed min or max estimator" introduces a struct called minmax, which is then included in include/linux/tcp.h in the upcoming change "tcp: use windowed min filter library for TCP min_rtt estimation". This would create a compilation error for tcp_cdg.c, which defines its own minmax struct. To avoid this naming conflict (and potentially others in the future), this commit renames the version used in tcp_cdg.c to cdg_minmax. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Kenneth Klette Jonassen Acked-by: Kenneth Klette Jonassen Signed-off-by: David S. Miller --- net/ipv4/tcp_cdg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 03725b2..35b2803 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -56,7 +56,7 @@ MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); module_param(use_tolerance, bool, 0644); MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); -struct minmax { +struct cdg_minmax { union { struct { s32 min; @@ -74,10 +74,10 @@ enum cdg_state { }; struct cdg { - struct minmax rtt; - struct minmax rtt_prev; - struct minmax *gradients; - struct minmax gsum; + struct cdg_minmax rtt; + struct cdg_minmax rtt_prev; + struct cdg_minmax *gradients; + struct cdg_minmax gsum; bool gfilled; u8 tail; u8 state; @@ -353,7 +353,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - struct minmax *gradients; + struct cdg_minmax *gradients; switch (ev) { case CA_EVENT_CWND_RESTART: -- cgit v1.1 From a4f1f9ac8153e22869b6408832b5a9bb9c762bf6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:09 -0400 Subject: lib/win_minmax: windowed min or max estimator This commit introduces a generic library to estimate either the min or max value of a time-varying variable over a recent time window. This is code originally from Kathleen Nichols. The current form of the code is from Van Jacobson. A single struct minmax_sample will track the estimated windowed-max value of the series if you call minmax_running_max() or the estimated windowed-min value of the series if you call minmax_running_min(). Nearly equivalent code is already in place for minimum RTT estimation in the TCP stack. This commit extracts that code and generalizes it to handle both min and max. Moving the code here reduces the footprint and complexity of the TCP code base and makes the filter generally available for other parts of the codebase, including an upcoming TCP congestion control module. This library works well for time series where the measurements are smoothly increasing or decreasing. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/win_minmax.h | 37 +++++++++++++++++ lib/Makefile | 2 +- lib/win_minmax.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 include/linux/win_minmax.h create mode 100644 lib/win_minmax.c diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h new file mode 100644 index 0000000..5656960 --- /dev/null +++ b/include/linux/win_minmax.h @@ -0,0 +1,37 @@ +/** + * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. + * + */ +#ifndef MINMAX_H +#define MINMAX_H + +#include + +/* A single data point for our parameterized min-max tracker */ +struct minmax_sample { + u32 t; /* time measurement was taken */ + u32 v; /* value measured */ +}; + +/* State for the parameterized min-max tracker */ +struct minmax { + struct minmax_sample s[3]; +}; + +static inline u32 minmax_get(const struct minmax *m) +{ + return m->s[0].v; +} + +static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + m->s[2] = m->s[1] = m->s[0] = val; + return m->s[0].v; +} + +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8..df747e5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 0000000..c8420d4 --- /dev/null +++ b/lib/win_minmax.c @@ -0,0 +1,98 @@ +/** + * lib/minmax.c: windowed min/max tracker + * + * Kathleen Nichols' algorithm for tracking the minimum (or maximum) + * value of a data stream over some fixed time interval. (E.g., + * the minimum RTT over the past five minutes.) It uses constant + * space and constant time per update yet almost always delivers + * the same minimum as an implementation that has to keep all the + * data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of + * the n'th best >= n-1'th best. It also makes sure that the three + * values are widely separated in the time window since that bounds + * the worse case error when that data is monotonically increasing + * over the window. + * + * Upon getting a new min, we can forget everything earlier because + * it has no value - the new min is <= everything else in the window + * by definition and it's the most recent. So we restart fresh on + * every new min and overwrites 2nd & 3rd choices. The same property + * holds for 2nd & 3rd best. + */ +#include +#include + +/* As time advances, update the 1st, 2nd, and 3rd choices. */ +static u32 minmax_subwin_update(struct minmax *m, u32 win, + const struct minmax_sample *val) +{ + u32 dt = val->t - m->s[0].t; + + if (unlikely(dt > win)) { + /* + * Passed entire window without a new val so make 2nd + * choice the new val & 3rd choice the new 2nd choice. + * we may have to iterate this since our 2nd choice + * may also be outside the window (we checked on entry + * that the third choice was in the window). + */ + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + if (unlikely(val->t - m->s[0].t > win)) { + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + } + } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { + /* + * We've passed a quarter of the window without a new val + * so take a 2nd choice from the 2nd quarter of the window. + */ + m->s[2] = m->s[1] = *val; + } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { + /* + * We've passed half the window without finding a new val + * so take a 3rd choice from the last half of the window + */ + m->s[2] = *val; + } + return m->s[0].v; +} + +/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v >= m->s[0].v) || /* found new max? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v >= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v >= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} +EXPORT_SYMBOL(minmax_running_max); + +/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v <= m->s[0].v) || /* found new min? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v <= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v <= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} -- cgit v1.1 From 6403389211e1f4d40ed963fe47a96fce1a3ba7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:10 -0400 Subject: tcp: use windowed min filter library for TCP min_rtt estimation Refactor the TCP min_rtt code to reuse the new win_minmax library in lib/win_minmax.c to simplify the TCP code. This is a pure refactor: the functionality is exactly the same. We just moved the windowed min code to make TCP easier to read and maintain, and to allow other parts of the kernel to use the windowed min/max filter code. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++-- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 64 ++++-------------------------------------------- net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 10 insertions(+), 65 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c723a46..6433cc8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -234,9 +235,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ diff --git a/include/net/tcp.h b/include/net/tcp.h index fdfbedd..2f1648a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -671,7 +671,7 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { - return tp->rtt_min[0].rtt; + return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7dae800..e79ed17 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - tp->rtt_min[0].rtt = ~0U; + minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad3e7e..6886f38 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2879,67 +2879,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -/* Kathleen Nichols' algorithm for tracking the minimum value of - * a data stream over some fixed time interval. (E.g., the minimum - * RTT over the past five minutes.) It uses constant space and constant - * time per update yet almost always delivers the same minimum as an - * implementation that has to keep all the data in the window. - * - * The algorithm keeps track of the best, 2nd best & 3rd best min - * values, maintaining an invariant that the measurement time of the - * n'th best >= n-1'th best. It also makes sure that the three values - * are widely separated in the time window since that bounds the worse - * case error when that data is monotonically increasing over the window. - * - * Upon getting a new min, we can forget everything earlier because it - * has no value - the new min is <= everything else in the window by - * definition and it's the most recent. So we restart fresh on every new min - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd - * best. - */ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { - const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; - struct rtt_meas *m = tcp_sk(sk)->rtt_min; - struct rtt_meas rttm = { - .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1), - .ts = now, - }; - u32 elapsed; - - /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ - if (unlikely(rttm.rtt <= m[0].rtt)) - m[0] = m[1] = m[2] = rttm; - else if (rttm.rtt <= m[1].rtt) - m[1] = m[2] = rttm; - else if (rttm.rtt <= m[2].rtt) - m[2] = rttm; - - elapsed = now - m[0].ts; - if (unlikely(elapsed > wlen)) { - /* Passed entire window without a new min so make 2nd choice - * the new min & 3rd choice the new 2nd. So forth and so on. - */ - m[0] = m[1]; - m[1] = m[2]; - m[2] = rttm; - if (now - m[0].ts > wlen) { - m[0] = m[1]; - m[1] = rttm; - if (now - m[0].ts > wlen) - m[0] = rttm; - } - } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { - /* Passed a quarter of the window without a new min so - * take 2nd choice from the 2nd quarter of the window. - */ - m[2] = m[1] = rttm; - } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { - /* Passed half the window without a new min so take the 3rd - * choice from the last half of the window. - */ - m[2] = rttm; - } + struct tcp_sock *tp = tcp_sk(sk); + u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; + + minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + rtt_us ? : jiffies_to_usecs(1)); } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f63c73d..5689471 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -464,7 +464,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - newtp->rtt_min[0].rtt = ~0U; + minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; -- cgit v1.1 From 77879147a3481babffd7e368d977ab682545a6bd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:11 -0400 Subject: net_sched: sch_fq: add low_rate_threshold parameter This commit adds to the fq module a low_rate_threshold parameter to insert a delay after all packets if the socket requests a pacing rate below the threshold. This helps achieve more precise control of the sending rate with low-rate paths, especially policers. The basic issue is that if a congestion control module detects a policer at a certain rate, it may want fq to be able to shape to that policed rate. That way the sender can avoid policer drops by having the packets arrive at the policer at or just under the policed rate. The default threshold of 550Kbps was chosen analytically so that for policers or links at 500Kbps or 512Kbps fq would very likely invoke this mechanism, even if the pacing rate was briefly slightly above the available bandwidth. This value was then empirically validated with two years of production testing on YouTube video servers. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_fq.c | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 2382eed..f8e39db 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -792,6 +792,8 @@ enum { TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + __TCA_FQ_MAX }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index dc52cc1..5dd929c 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -94,6 +94,7 @@ struct fq_sched_data { u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ u32 orphan_mask; /* mask for orphaned skb */ + u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -433,7 +434,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; - u32 rate; + u32 rate, plen; skb = fq_dequeue_head(sch, &q->internal); if (skb) @@ -482,7 +483,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (f->credit > 0 || !q->rate_enable) + if (!q->rate_enable) goto out; /* Do not pace locally generated ack packets */ @@ -493,8 +494,15 @@ begin: if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); + if (rate <= q->low_rate_threshold) { + f->credit = 0; + plen = qdisc_pkt_len(skb); + } else { + plen = max(qdisc_pkt_len(skb), q->quantum); + if (f->credit > 0) + goto out; + } if (rate != ~0U) { - u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) @@ -662,6 +670,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -716,6 +725,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) + q->low_rate_threshold = + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); @@ -781,6 +794,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; + q->low_rate_threshold = 550000 / 8; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -811,6 +825,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, + q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; -- cgit v1.1 From b2d3ea4a730f812b9c0f67a67b6762ce66ddb17c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2016 23:39:12 -0400 Subject: tcp: switch back to proper tcp_skb_cb size check in tcp_init() Revert to the tcp_skb_cb size check that tcp_init() had before commit b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families"). As related commit 744d5a3e9fe2 ("net: move skb->dropcount to skb->cb[]") explains, the sock_skb_cb_check_size() mechanism was added to ensure that there is space for dropcount, "for protocol families using it". But TCP is not a protocol using dropcount, so tcp_init() doesn't need to provision space for dropcount in the skb->cb[], and thus we can revert to the older form of the tcp_skb_cb size check. Doing so allows TCP to use 4 more bytes of the skb->cb[] space. Fixes: b4772ef879a8 ("net: use common macro for assering skb->cb[] available size in protocol families") Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e79ed17..de02fb4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3261,11 +3261,12 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - unsigned long limit; int max_rshare, max_wshare, cnt; + unsigned long limit; unsigned int i; - sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); -- cgit v1.1 From 0682e6902a52aca7caf6ad42551b16ea0f87bc31 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:13 -0400 Subject: tcp: count packets marked lost for a TCP connection Count the number of packets that a TCP connection marks lost. Congestion control modules can use this loss rate information for more intelligent decisions about how fast to send. Specifically, this is used in TCP BBR policer detection. BBR uses a high packet loss rate as one signal in its policer detection and policer bandwidth estimation algorithm. The BBR policer detection algorithm cannot simply track retransmits, because a retransmit can be (and often is) an indicator of packets lost long, long ago. This is particularly true in a long CA_Loss period that repairs the initial massive losses when a policer kicks in. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + net/ipv4/tcp_input.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6433cc8..38590fb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -267,6 +267,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 lost; /* Total data packets lost incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6886f38..9413288 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -899,12 +899,29 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } +/* Sum the number of packets on the wire we have marked as lost. + * There are two cases we care about here: + * a) Packet hasn't been marked lost (nor retransmitted), + * and this is the first loss. + * b) Packet has been marked both lost and retransmitted, + * and this means we think it was lost again. + */ +static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + __u8 sacked = TCP_SKB_CB(skb)->sacked; + + if (!(sacked & TCPCB_LOST) || + ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS))) + tp->lost += tcp_skb_pcount(skb); +} + static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) { if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tcp_verify_retransmit_hint(tp, skb); tp->lost_out += tcp_skb_pcount(skb); + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; } } @@ -913,6 +930,7 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); + tcp_sum_lost(tp, skb); if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -1890,6 +1908,7 @@ void tcp_enter_loss(struct sock *sk) struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ + bool mark_lost; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || @@ -1923,8 +1942,12 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; + mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || + is_reneg); + if (mark_lost) + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { + if (mark_lost) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); -- cgit v1.1 From b9f64820fb226a4e8ab10591f46cecd91ca56b30 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:14 -0400 Subject: tcp: track data delivery rate for a TCP connection This patch generates data delivery rate (throughput) samples on a per-ACK basis. These rate samples can be used by congestion control modules, and specifically will be used by TCP BBR in later patches in this series. Key state: tp->delivered: Tracks the total number of data packets (original or not) delivered so far. This is an already-existing field. tp->delivered_mstamp: the last time tp->delivered was updated. Algorithm: A rate sample is calculated as (d1 - d0)/(t1 - t0) on a per-ACK basis: d1: the current tp->delivered after processing the ACK t1: the current time after processing the ACK d0: the prior tp->delivered when the acked skb was transmitted t0: the prior tp->delivered_mstamp when the acked skb was transmitted When an skb is transmitted, we snapshot d0 and t0 in its control block in tcp_rate_skb_sent(). When an ACK arrives, it may SACK and ACK some skbs. For each SACKed or ACKed skb, tcp_rate_skb_delivered() updates the rate_sample struct to reflect the latest (d0, t0). Finally, tcp_rate_gen() generates a rate sample by storing (d1 - d0) in rs->delivered and (t1 - t0) in rs->interval_us. One caveat: if an skb was sent with no packets in flight, then tp->delivered_mstamp may be either invalid (if the connection is starting) or outdated (if the connection was idle). In that case, we'll re-stamp tp->delivered_mstamp. At first glance it seems t0 should always be the time when an skb was transmitted, but actually this could over-estimate the rate due to phase mismatch between transmit and ACK events. To track the delivery rate, we ensure that if packets are in flight then t0 and and t1 are times at which packets were marked delivered. If the initial and final RTTs are different then one may be corrupted by some sort of noise. The noise we see most often is sending gaps caused by delayed, compressed, or stretched acks. This either affects both RTTs equally or artificially reduces the final RTT. We approach this by recording the info we need to compute the initial RTT (duration of the "send phase" of the window) when we recorded the associated inflight. Then, for a filter to avoid bandwidth overestimates, we generalize the per-sample bandwidth computation from: bw = delivered / ack_phase_rtt to the following: bw = delivered / max(send_phase_rtt, ack_phase_rtt) In large-scale experiments, this filtering approach incorporating send_phase_rtt is effective at avoiding bandwidth overestimates due to ACK compression or stretched ACKs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 + include/net/tcp.h | 35 +++++++++++- net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 46 +++++++++++----- net/ipv4/tcp_output.c | 4 ++ net/ipv4/tcp_rate.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_rate.c diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 38590fb..c50e6ae 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,8 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f1648a..b261c89 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -763,8 +763,14 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { - /* There is space for up to 20 bytes */ + /* There is space for up to 24 bytes */ __u32 in_flight;/* Bytes in flight when packet sent */ + /* pkts S/ACKed so far upon tx of skb, incl retrans: */ + __u32 delivered; + /* start of send pipeline phase */ + struct skb_mstamp first_tx_mstamp; + /* when we reached the "delivered" count */ + struct skb_mstamp delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -860,6 +866,26 @@ struct ack_sample { u32 in_flight; }; +/* A rate sample measures the number of (original/retransmitted) data + * packets delivered "delivered" over an interval of time "interval_us". + * The tcp_rate.c code fills in the rate sample, and congestion + * control modules that define a cong_control function to run at the end + * of ACK processing can optionally chose to consult this sample when + * setting cwnd and pacing rate. + * A sample is invalid if "delivered" or "interval_us" is negative. + */ +struct rate_sample { + struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ + bool is_retrans; /* is sample from retransmission? */ +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -946,6 +972,13 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_rate.c */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs); +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs); + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 24629b6..9cfff1a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_recovery.o \ + tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9413288..d9ed4bb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1112,6 +1112,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct rate_sample *rate; int flag; }; @@ -1279,6 +1280,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1329,6 +1331,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) + TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1558,6 +1563,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack, tcp_skb_pcount(skb), &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1640,8 +1646,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); - if (found_dup_sack) + if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; + tp->delivered++; /* A spurious retransmission is delivered */ + } /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -3071,10 +3079,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, + struct skb_mstamp *now) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt, now; + struct skb_mstamp first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3106,7 +3115,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_tso_acked(sk, skb); if (!acked_pcount) break; - fully_acked = false; } else { /* Speedup tcp_unlink_write_queue() and next loop */ @@ -3142,6 +3150,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; + tcp_rate_skb_delivered(sk, skb, sack->rate); /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -3174,16 +3183,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - skb_mstamp_get(&now); if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); + ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); } if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); + sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); } - + sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, ca_rtt_us); @@ -3211,7 +3219,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { + sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3548,17 +3556,21 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_sacktag_state sack_state; + struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - u32 prior_delivered = tp->delivered; + u32 delivered = tp->delivered; + u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + struct skb_mstamp now; sack_state.first_sackt.v64 = 0; + sack_state.rate = &rs; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3581,6 +3593,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; + skb_mstamp_get(&now); + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3591,6 +3605,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) } prior_fackets = tp->fackets_out; + rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3646,7 +3661,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + &sack_state, &now); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -3663,7 +3678,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + lost = tp->lost - lost; /* freshly marked lost */ + tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_cong_control(sk, ack, delivered, flag); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8b45794..e02c8eb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -918,6 +918,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; + tcp_rate_skb_sent(sk, skb); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -1213,6 +1214,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); + /* Update delivered info for the new segment */ + TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; + /* If this packet has been sent out already, we must * adjust the various packet counters. */ diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c new file mode 100644 index 0000000..1daed6a --- /dev/null +++ b/net/ipv4/tcp_rate.c @@ -0,0 +1,149 @@ +#include + +/* The bandwidth estimator estimates the rate at which the network + * can currently deliver outbound data packets for this flow. At a high + * level, it operates by taking a delivery rate sample for each ACK. + * + * A rate sample records the rate at which the network delivered packets + * for this flow, calculated over the time interval between the transmission + * of a data packet and the acknowledgment of that packet. + * + * Specifically, over the interval between each transmit and corresponding ACK, + * the estimator generates a delivery rate sample. Typically it uses the rate + * at which packets were acknowledged. However, the approach of using only the + * acknowledgment rate faces a challenge under the prevalent ACK decimation or + * compression: packets can temporarily appear to be delivered much quicker + * than the bottleneck rate. Since it is physically impossible to do that in a + * sustained fashion, when the estimator notices that the ACK rate is faster + * than the transmit rate, it uses the latter: + * + * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) + * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) + * bw = min(send_rate, ack_rate) + * + * Notice the estimator essentially estimates the goodput, not always the + * network bottleneck link rate when the sending or receiving is limited by + * other factors like applications or receiver window limits. The estimator + * deliberately avoids using the inter-packet spacing approach because that + * approach requires a large number of samples and sophisticated filtering. + */ + + +/* Snapshot the current delivery information in the skb, to generate + * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). + */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* In general we need to start delivery rate samples from the + * time we received the most recent ACK, to ensure we include + * the full time the network needs to deliver all in-flight + * packets. If there are no packets in flight yet, then we + * know that any ACKs after now indicate that the network was + * able to deliver those packets completely in the sampling + * interval between now and the next ACK. + * + * Note that we use packets_out instead of tcp_packets_in_flight(tp) + * because the latter is a guess based on RTO and loss-marking + * heuristics. We don't want spurious RTOs or loss markings to cause + * a spuriously small time interval, causing a spuriously high + * bandwidth estimate. + */ + if (!tp->packets_out) { + tp->first_tx_mstamp = skb->skb_mstamp; + tp->delivered_mstamp = skb->skb_mstamp; + } + + TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; + TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; + TCP_SKB_CB(skb)->tx.delivered = tp->delivered; +} + +/* When an skb is sacked or acked, we fill in the rate sample with the (prior) + * delivery information when the skb was last transmitted. + * + * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is + * called multiple times. We favor the information from the most recently + * sent skb, i.e., the skb with the highest prior_delivered count. + */ +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (!scb->tx.delivered_mstamp.v64) + return; + + if (!rs->prior_delivered || + after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered = scb->tx.delivered; + rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_retrans = scb->sacked & TCPCB_RETRANS; + + /* Find the duration of the "send phase" of this window: */ + rs->interval_us = skb_mstamp_us_delta( + &skb->skb_mstamp, + &scb->tx.first_tx_mstamp); + + /* Record send time of most recently ACKed packet: */ + tp->first_tx_mstamp = skb->skb_mstamp; + } + /* Mark off the skb delivered once it's sacked to avoid being + * used again when it's cumulatively acked. For acked packets + * we don't need to reset since it'll be freed soon. + */ + if (scb->sacked & TCPCB_SACKED_ACKED) + scb->tx.delivered_mstamp.v64 = 0; +} + +/* Update the connection delivery information and generate a rate sample. */ +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 snd_us, ack_us; + + /* TODO: there are multiple places throughout tcp_ack() to get + * current time. Refactor the code using a new "tcp_acktag_state" + * to carry current time, flags, stats like "tcp_sacktag_state". + */ + if (delivered) + tp->delivered_mstamp = *now; + + rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ + rs->losses = lost; /* freshly marked lost */ + /* Return an invalid sample if no timing information is available. */ + if (!rs->prior_mstamp.v64) { + rs->delivered = -1; + rs->interval_us = -1; + return; + } + rs->delivered = tp->delivered - rs->prior_delivered; + + /* Model sending data and receiving ACKs as separate pipeline phases + * for a window. Usually the ACK phase is longer, but with ACK + * compression the send phase can be longer. To be safe we use the + * longer phase. + */ + snd_us = rs->interval_us; /* send phase */ + ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + rs->interval_us = max(snd_us, ack_us); + + /* Normally we expect interval_us >= min-rtt. + * Note that rate may still be over-estimated when a spuriously + * retransmistted skb was first (s)acked because "interval_us" + * is under-estimated (up to an RTT). However continuously + * measuring the delivery rate during loss recovery is crucial + * for connections suffer heavy or prolonged losses. + */ + if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { + rs->interval_us = -1; + if (!rs->is_retrans) + pr_debug("tcp rate: %ld %d %u %u %u\n", + rs->interval_us, rs->delivered, + inet_csk(sk)->icsk_ca_state, + tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + } +} -- cgit v1.1 From d7722e8570fc0f1e003cee7cf37694041828918b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:15 -0400 Subject: tcp: track application-limited rate samples This commit adds code to track whether the delivery rate represented by each rate_sample was limited by the application. Upon each transmit, we store in the is_app_limited field in the skb a boolean bit indicating whether there is a known "bubble in the pipe": a point in the rate sample interval where the sender was application-limited, and did not transmit even though the cwnd and pacing rate allowed it. This logic marks the flow app-limited on a write if *all* of the following are true: 1) There is less than 1 MSS of unsent data in the write queue available to transmit. 2) There is no packet in the sender's queues (e.g. in fq or the NIC tx queue). 3) The connection is not limited by cwnd. 4) There are no lost packets to retransmit. The tcp_rate_check_app_limited() code in tcp_rate.c determines whether the connection is application-limited at the moment. If the flow is application-limited, it sets the tp->app_limited field. If the flow is application-limited then that means there is effectively a "bubble" of silence in the pipe now, and this silence will be reflected in a lower bandwidth sample for any rate samples from now until we get an ACK indicating this bubble has exited the pipe: specifically, until we get an ACK for the next packet we transmit. When we send every skb we record in scb->tx.is_app_limited whether the resulting rate sample will be application-limited. The code in tcp_rate_gen() checks to see when it is safe to mark all known application-limited bubbles of silence as having exited the pipe. It does this by checking to see when the delivered count moves past the tp->app_limited marker. At this point it zeroes the tp->app_limited marker, as all known bubbles are out of the pipe. We make room for the tx.is_app_limited bit in the skb by borrowing a bit from the in_flight field used by NV to record the number of bytes in flight. The receive window in the TCP header is 16 bits, and the max receive window scaling shift factor is 14 (RFC 1323). So the max receive window offered by the TCP protocol is 2^(16+14) = 2^30. So we only need 30 bits for the tx.in_flight used by NV. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 6 +++++- net/ipv4/tcp.c | 8 ++++++++ net/ipv4/tcp_minisocks.c | 3 +++ net/ipv4/tcp_rate.c | 29 ++++++++++++++++++++++++++++- 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c50e6ae..fdcd00f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b261c89..a69ed7f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -764,7 +764,9 @@ struct tcp_skb_cb { union { struct { /* There is space for up to 24 bytes */ - __u32 in_flight;/* Bytes in flight when packet sent */ + __u32 in_flight:30,/* Bytes in flight at transmit */ + is_app_limited:1, /* cwnd not fully used? */ + unused:1; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ @@ -883,6 +885,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ }; @@ -978,6 +981,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct skb_mstamp *now, struct rate_sample *rs); +void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de02fb4..2250f89 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -396,6 +396,9 @@ void tcp_init_sock(struct sock *sk) */ tp->snd_cwnd = TCP_INIT_CWND; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ @@ -1014,6 +1017,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); + + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; @@ -1115,6 +1121,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5689471..6234eba 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -487,6 +487,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 1daed6a..52ff84b 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -26,9 +26,13 @@ * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. + * + * TCP flows can often be application-limited in request/response workloads. + * The estimator marks a bandwidth sample as application-limited if there + * was some moment during the sampled window of packets when there was no data + * ready to send in the write queue. */ - /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -58,6 +62,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -80,6 +85,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, after(scb->tx.delivered, rs->prior_delivered)) { rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ @@ -105,6 +111,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; + /* Clear app limited if bubble is acked and gone. */ + if (tp->app_limited && after(tp->delivered, tp->app_limited)) + tp->app_limited = 0; + /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". @@ -147,3 +157,20 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); } } + +/* If a gap is detected between sends, mark the socket application-limited. */ +void tcp_rate_check_app_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (/* We have less than one packet to send. */ + tp->write_seq - tp->snd_nxt < tp->mss_cache && + /* Nothing in sending host's qdisc queues or NIC tx queue. */ + sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && + /* We are not limited by CWND. */ + tcp_packets_in_flight(tp) < tp->snd_cwnd && + /* All lost packets have been retransmitted. */ + tp->lost_out <= tp->retrans_out) + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; +} -- cgit v1.1 From eb8329e0a04db0061f714f033b4454326ba147f4 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:16 -0400 Subject: tcp: export data delivery rate This commit export two new fields in struct tcp_info: tcpi_delivery_rate: The most recent goodput, as measured by tcp_rate_gen(). If the socket is limited by the sending application (e.g., no data to send), it reports the highest measurement instead of the most recent. The unit is bytes per second (like other rate fields in tcp_info). tcpi_delivery_rate_app_limited: A boolean indicating if the goodput was measured when the socket's throughput was limited by the sending application. This delivery rate information can be useful for applications that want to know the current throughput the TCP connection is seeing, e.g. adaptive bitrate video streaming. It can also be very useful for debugging or troubleshooting. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++++- include/uapi/linux/tcp.h | 3 +++ net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_rate.c | 12 +++++++++++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fdcd00f..a17ae7b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -213,7 +213,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -271,6 +272,8 @@ struct tcp_sock { u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 482898f..73ac0db 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -167,6 +167,7 @@ struct tcp_info { __u8 tcpi_backoff; __u8 tcpi_options; __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1; __u32 tcpi_rto; __u32 tcpi_ato; @@ -211,6 +212,8 @@ struct tcp_info { __u32 tcpi_min_rtt; __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2250f89..f253e50 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_time_stamp, intv; unsigned int start; int notsent_bytes; u64 rate64; @@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; + + info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; + rate = READ_ONCE(tp->rate_delivered); + intv = READ_ONCE(tp->rate_interval_us); + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + put_unaligned(rate64, &info->tcpi_delivery_rate); + } } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 52ff84b..9be1581 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * for connections suffer heavy or prolonged losses. */ if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { - rs->interval_us = -1; if (!rs->is_retrans) pr_debug("tcp rate: %ld %d %u %u %u\n", rs->interval_us, rs->delivered, inet_csk(sk)->icsk_ca_state, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + rs->interval_us = -1; + return; + } + + /* Record the last non-app-limited or the highest app-limited bw */ + if (!rs->is_app_limited || + ((u64)rs->delivered * tp->rate_interval_us >= + (u64)tp->rate_delivered * rs->interval_us)) { + tp->rate_delivered = rs->delivered; + tp->rate_interval_us = rs->interval_us; + tp->rate_app_limited = rs->is_app_limited; } } -- cgit v1.1 From ed6e7268b930e0a9a65d895d368eac79a438d992 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:17 -0400 Subject: tcp: allow congestion control module to request TSO skb segment count Add the tso_segs_goal() function in tcp_congestion_ops to allow the congestion control module to specify the number of segments that should be in a TSO skb sent by tcp_write_xmit() and tcp_xmit_retransmit_queue(). The congestion control module can either request a particular number of segments in TSO skb that we transmit, or return 0 if it doesn't care. This allows the upcoming BBR congestion control module to select small TSO skb sizes if the module detects that the bottleneck bandwidth is very low, or that the connection is policed to a low rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a69ed7f..f8f581f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -913,6 +913,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* suggest number of segments for each skb to transmit (optional) */ + u32 (*tso_segs_goal)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e02c8eb..0137956 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1566,6 +1566,17 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) return min_t(u32, segs, sk->sk_gso_max_segs); } +/* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + + return tso_segs ? : tcp_tso_autosize(sk, mss_now); +} + /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, @@ -2061,7 +2072,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } - max_segs = tcp_tso_autosize(sk, mss_now); + max_segs = tcp_tso_segs(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2778,7 +2789,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } - max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); + max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked; int segs; -- cgit v1.1 From 1b3878ca1551f3baab2c408d1e703b5ef785a1b2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:18 -0400 Subject: tcp: export tcp_tso_autosize() and parameterize minimum number of TSO segments To allow congestion control modules to use the default TSO auto-sizing algorithm as one of the ingredients in their own decision about TSO sizing: 1) Export tcp_tso_autosize() so that CC modules can use it. 2) Change tcp_tso_autosize() to allow callers to specify a minimum number of segments per TSO skb, in case the congestion control module has a different notion of the best floor for TSO skbs for the connection right now. For very low-rate paths or policed connections it can be appropriate to use smaller TSO skbs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f8f581f..3492041 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -533,6 +533,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0137956..0bf3d48 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1549,7 +1549,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1561,10 +1562,11 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ - segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + segs = max_t(u32, bytes / mss_now, min_tso_segs); return min_t(u32, segs, sk->sk_gso_max_segs); } +EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1574,7 +1576,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : tcp_tso_autosize(sk, mss_now); + return tso_segs ? : + tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ -- cgit v1.1 From 556c6b46d194cc0dbb6a5b22f1d2bbc699c86d8e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:19 -0400 Subject: tcp: export tcp_mss_to_mtu() for congestion control modules Export tcp_mss_to_mtu(), so that congestion control modules can use this to help calculate a pacing rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bf3d48..7d025a7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1362,6 +1362,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) } return mtu; } +EXPORT_SYMBOL(tcp_mss_to_mtu); /* MTU probing init per socket */ void tcp_mtup_init(struct sock *sk) -- cgit v1.1 From 77bfc174c38e558a3425d3b069aa2762b2fedfdd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:20 -0400 Subject: tcp: allow congestion control to expand send buffer differently Currently the TCP send buffer expands to twice cwnd, in order to allow limited transmits in the CA_Recovery state. This assumes that cwnd does not increase in the CA_Recovery. For some congestion control algorithms, like the upcoming BBR module, if the losses in recovery do not indicate congestion then we may continue to raise cwnd multiplicatively in recovery. In such cases the current multiplier will falsely limit the sending rate, much as if it were limited by the application. This commit adds an optional congestion control callback to use a different multiplier to expand the TCP send buffer. For congestion control modules that do not specificy this callback, TCP continues to use the previous default of 2. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3492041..1aa9628 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -917,6 +917,8 @@ struct tcp_congestion_ops { void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* suggest number of segments for each skb to transmit (optional) */ u32 (*tso_segs_goal)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + u32 (*sndbuf_expand)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9ed4bb..13a2e70 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,6 +289,7 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; @@ -309,7 +310,8 @@ static void tcp_sndbuf_expand(struct sock *sk) * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to POLLOUT) */ - sndmem = 2 * nr_segs * per_mss; + sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; + sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); -- cgit v1.1 From c0402760f565ae066621ebf8720a32fba074d538 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:21 -0400 Subject: tcp: new CC hook to set sending rate with rate_sample in any CA state This commit introduces an optional new "omnipotent" hook, cong_control(), for congestion control modules. The cong_control() function is called at the end of processing an ACK (i.e., after updating sequence numbers, the SACK scoreboard, and loss detection). At that moment we have precise delivery rate information the congestion control module can use to control the sending behavior (using cwnd, TSO skb size, and pacing rate) in any CA state. This function can also be used by a congestion control that prefers not to use the default cwnd reduction approach (i.e., the PRR algorithm) during CA_Recovery to control the cwnd and sending rate during loss recovery. We take advantage of the fact that recent changes defer the retransmission or transmission of new data (e.g. by F-RTO) in recovery until the new tcp_cong_control() function is run. With this commit, we only run tcp_update_pacing_rate() if the congestion control is not using this new API. New congestion controls which use the new API do not want the TCP stack to run the default pacing rate calculation and overwrite whatever pacing rate they have chosen at initialization time. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_input.c | 17 ++++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1aa9628..f83b7f2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -919,6 +919,10 @@ struct tcp_congestion_ops { u32 (*tso_segs_goal)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 882caa4..1294af4 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid) { + if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13a2e70..980a83e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + if (inet_csk(sk)->icsk_ca_ops->cong_control) + return; + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { @@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * information. All transmission or retransmission are delayed afterwards. */ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - int flag) + int flag, const struct rate_sample *rs) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cong_control) { + icsk->icsk_ca_ops->cong_control(sk, rs); + return; + } + if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ tcp_cwnd_reduction(sk, acked_sacked, flag); @@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag); + tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; -- cgit v1.1 From 7e744171386ae6da1248d3d27d10b6dbdc54f0ff Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:22 -0400 Subject: tcp: increase ICSK_CA_PRIV_SIZE from 64 bytes to 88 The TCP CUBIC module already uses 64 bytes. The upcoming TCP BBR module uses 88 bytes. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 49dcad4..197a30d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[64 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) + u64 icsk_ca_priv[88 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -- cgit v1.1 From 0f8782ea14974ce992618b55f0c041ef43ed0b78 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:23 -0400 Subject: tcp_bbr: add BBR congestion control This commit implements a new TCP congestion control algorithm: BBR (Bottleneck Bandwidth and RTT). A detailed description of BBR will be published in ACM Queue, Vol. 14 No. 5, September-October 2016, as "BBR: Congestion-Based Congestion Control". BBR has significantly increased throughput and reduced latency for connections on Google's internal backbone networks and google.com and YouTube Web servers. BBR requires only changes on the sender side, not in the network or the receiver side. Thus it can be incrementally deployed on today's Internet, or in datacenters. The Internet has predominantly used loss-based congestion control (largely Reno or CUBIC) since the 1980s, relying on packet loss as the signal to slow down. While this worked well for many years, loss-based congestion control is unfortunately out-dated in today's networks. On today's Internet, loss-based congestion control causes the infamous bufferbloat problem, often causing seconds of needless queuing delay, since it fills the bloated buffers in many last-mile links. On today's high-speed long-haul links using commodity switches with shallow buffers, loss-based congestion control has abysmal throughput because it over-reacts to losses caused by transient traffic bursts. In 1981 Kleinrock and Gale showed that the optimal operating point for a network maximizes delivered bandwidth while minimizing delay and loss, not only for single connections but for the network as a whole. Finding that optimal operating point has been elusive, since any single network measurement is ambiguous: network measurements are the result of both bandwidth and propagation delay, and those two cannot be measured simultaneously. While it is impossible to disambiguate any single bandwidth or RTT measurement, a connection's behavior over time tells a clearer story. BBR uses a measurement strategy designed to resolve this ambiguity. It combines these measurements with a robust servo loop using recent control systems advances to implement a distributed congestion control algorithm that reacts to actual congestion, not packet loss or transient queue delay, and is designed to converge with high probability to a point near the optimal operating point. In a nutshell, BBR creates an explicit model of the network pipe by sequentially probing the bottleneck bandwidth and RTT. On the arrival of each ACK, BBR derives the current delivery rate of the last round trip, and feeds it through a windowed max-filter to estimate the bottleneck bandwidth. Conversely it uses a windowed min-filter to estimate the round trip propagation delay. The max-filtered bandwidth and min-filtered RTT estimates form BBR's model of the network pipe. Using its model, BBR sets control parameters to govern sending behavior. The primary control is the pacing rate: BBR applies a gain multiplier to transmit faster or slower than the observed bottleneck bandwidth. The conventional congestion window (cwnd) is now the secondary control; the cwnd is set to a small multiple of the estimated BDP (bandwidth-delay product) in order to allow full utilization and bandwidth probing while bounding the potential amount of queue at the bottleneck. When a BBR connection starts, it enters STARTUP mode and applies a high gain to perform an exponential search to quickly probe the bottleneck bandwidth (doubling its sending rate each round trip, like slow start). However, instead of continuing until it fills up the buffer (i.e. a loss), or until delay or ACK spacing reaches some threshold (like Hystart), it uses its model of the pipe to estimate when that pipe is full: it estimates the pipe is full when it notices the estimated bandwidth has stopped growing. At that point it exits STARTUP and enters DRAIN mode, where it reduces its pacing rate to drain the queue it estimates it has created. Then BBR enters steady state. In steady state, PROBE_BW mode cycles between first pacing faster to probe for more bandwidth, then pacing slower to drain any queue that created if no more bandwidth was available, and then cruising at the estimated bandwidth to utilize the pipe without creating excess queue. Occasionally, on an as-needed basis, it sends significantly slower to probe for RTT (PROBE_RTT mode). BBR has been fully deployed on Google's wide-area backbone networks and we're experimenting with BBR on Google.com and YouTube on a global scale. Replacing CUBIC with BBR has resulted in significant improvements in network latency and application (RPC, browser, and video) metrics. For more details please refer to our upcoming ACM Queue publication. Example performance results, to illustrate the difference between BBR and CUBIC: Resilience to random loss (e.g. from shallow buffers): Consider a netperf TCP_STREAM test lasting 30 secs on an emulated path with a 10Gbps bottleneck, 100ms RTT, and 1% packet loss rate. CUBIC gets 3.27 Mbps, and BBR gets 9150 Mbps (2798x higher). Low latency with the bloated buffers common in today's last-mile links: Consider a netperf TCP_STREAM test lasting 120 secs on an emulated path with a 10Mbps bottleneck, 40ms RTT, and 1000-packet bottleneck buffer. Both fully utilize the bottleneck bandwidth, but BBR achieves this with a median RTT 25x lower (43 ms instead of 1.09 secs). Our long-term goal is to improve the congestion control algorithms used on the Internet. We are hopeful that BBR can help advance the efforts toward this goal, and motivate the community to do further research. Test results, performance evaluations, feedback, and BBR-related discussions are very welcome in the public e-mail list for BBR: https://groups.google.com/forum/#!forum/bbr-dev NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, since pacing is integral to the BBR design and implementation. BBR without pacing would not function properly, and may incur unnecessary high packet loss rates. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 13 + net/ipv4/Kconfig | 18 + net/ipv4/Makefile | 1 + net/ipv4/tcp_bbr.c | 896 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 928 insertions(+) create mode 100644 net/ipv4/tcp_bbr.c diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index b5c366f..509cd96 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -124,6 +124,7 @@ enum { INET_DIAG_PEERS, INET_DIAG_PAD, INET_DIAG_MARK, + INET_DIAG_BBRINFO, __INET_DIAG_MAX, }; @@ -157,8 +158,20 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +/* INET_DIAG_BBRINFO */ + +struct tcp_bbr_info { + /* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lo; /* lower 32 bits of bw */ + __u32 bbr_bw_hi; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; + struct tcp_bbr_info bbr; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 50d6a9b..300b068 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -640,6 +640,21 @@ config TCP_CONG_CDG D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg +config TCP_CONG_BBR + tristate "BBR TCP" + default n + ---help--- + + BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to + maximize network utilization and minimize queues. It builds an explicit + model of the the bottleneck delivery rate and path round-trip + propagation delay. It tolerates packet loss and delay unrelated to + congestion. It can operate over LAN, WAN, cellular, wifi, or cable + modem links. It can coexist with flows that use loss-based congestion + control, and can operate with shallow buffers, deep buffers, + bufferbloat, policers, or AQM schemes that do not provide a delay + signal. It requires the fq ("Fair Queue") pacing packet scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -674,6 +689,9 @@ choice config DEFAULT_CDG bool "CDG" if TCP_CONG_CDG=y + config DEFAULT_BBR + bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_RENO bool "Reno" endchoice diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9cfff1a..bc6a6c8 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o +obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c new file mode 100644 index 0000000..0ea66c2 --- /dev/null +++ b/net/ipv4/tcp_bbr.c @@ -0,0 +1,896 @@ +/* Bottleneck Bandwidth and RTT (BBR) congestion control + * + * BBR congestion control computes the sending rate based on the delivery + * rate (throughput) estimated from ACKs. In a nutshell: + * + * On each ACK, update our model of the network path: + * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) + * min_rtt = windowed_min(rtt, 10 seconds) + * pacing_rate = pacing_gain * bottleneck_bandwidth + * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) + * + * The core algorithm does not react directly to packet losses or delays, + * although BBR may adjust the size of next send per ACK when loss is + * observed, or adjust the sending rate if it estimates there is a + * traffic policer, in order to keep the drop rate reasonable. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, + * since pacing is integral to the BBR design and implementation. + * BBR without pacing would not function properly, and may incur unnecessary + * high packet loss rates. + */ +#include +#include +#include +#include +#include +#include + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ + u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + restore_cwnd:1, /* decided to revert cwnd to old value */ + round_start:1, /* start of packet-timed tx->ack round? */ + tso_segs_goal:7, /* segments we want in each skb we send */ + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + unused:5, + lt_is_sampling:1, /* taking long-term ("LT") samples now? */ + lt_rtt_cnt:7, /* round trips in long-term interval */ + lt_use_bw:1; /* use lt_bw as our bw estimate? */ + u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ + u32 lt_last_delivered; /* LT intvl start: tp->delivered */ + u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ + u32 lt_last_lost; /* LT intvl start: tp->lost */ + u32 pacing_gain:10, /* current gain for setting pacing rate */ + cwnd_gain:10, /* current gain for setting cwnd */ + full_bw_cnt:3, /* number of rounds without large bw gains */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + unused_b:6; + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ +}; + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* Window length of bw filter (in rounds): */ +static const int bbr_bw_rtts = CYCLE_LEN + 2; +/* Window length of min_rtt filter (in sec): */ +static const u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ +static const u32 bbr_probe_rtt_mode_ms = 200; +/* Skip TSO below the following bandwidth (bits/sec): */ +static const int bbr_min_tso_rate = 1200000; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would: + */ +static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round: + */ +static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */ +static const int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ +static const int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; +/* Randomize the starting gain cycling phase over N phases: */ +static const u32 bbr_cycle_rand = 7; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight: + */ +static const u32 bbr_cwnd_min_target = 4; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available: */ +static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ +static const u32 bbr_full_bw_cnt = 3; + +/* "long-term" ("LT") bandwidth estimator parameters... */ +/* The minimum number of rounds in an LT bw sampling interval: */ +static const u32 bbr_lt_intvl_min_rtts = 4; +/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ +static const u32 bbr_lt_loss_thresh = 50; +/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ +static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; +/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ +static const u32 bbr_lt_bw_diff = 4000 / 8; +/* If we estimate we're policed, use lt_bw for this many round trips: */ +static const u32 bbr_lt_bw_max_rtts = 48; + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_cnt >= bbr_full_bw_cnt; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return minmax_get(&bbr->bw); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +{ + rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC; + return rate >> BW_SCALE; +} + +/* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the + * estimated bandwidth. This is an important aspect of the design. In this + * implementation this slightly lower pacing rate is achieved implicitly by not + * including link-layer headers in the packet size used for the pacing rate. + */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +/* Return count of segments we want in the skbs we send, or 0 for default. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->tso_segs_goal; +} + +static void bbr_set_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 min_segs; + + min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), + 0x7FU); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + } +} + +/* Find target cwnd. Right-size the cwnd based on min RTT and the + * estimated bottleneck bandwidth: + * + * cwnd = bw * min_rtt * gain = BDP * gain + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + * + * To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: TCP_INIT_CWND. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd += 3 * bbr->tso_segs_goal; + + /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ + cwnd = (cwnd + 1) & ~1U; + + return cwnd; +} + +/* An optimization in BBR to reduce losses: On the first round of recovery, we + * follow the packet conservation principle: send P packets per P packets acked. + * After that, we slow-start and send at most 2*P packets per P packets acked. + * After recovery finishes, or upon undo, we restore the cwnd we had when + * recovery started (capped by the target cwnd based on estimated BDP). + * + * TODO(ycheng/ncardwell): implement a rate-based approach. + */ +static bool bbr_set_cwnd_to_recover_or_restore( + struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; + u32 cwnd = tp->snd_cwnd; + + /* An ACK for P pkts should release at most 2*P packets. We do this + * in two steps. First, here we deduct the number of lost packets. + * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. + */ + if (rs->losses > 0) + cwnd = max_t(s32, cwnd - rs->losses, 1); + + if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { + /* Starting 1st round of Recovery, so do packet conservation. */ + bbr->packet_conservation = 1; + bbr->next_rtt_delivered = tp->delivered; /* start round now */ + /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ + cwnd = tcp_packets_in_flight(tp) + acked; + } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { + /* Exiting loss recovery; restore cwnd saved before recovery. */ + bbr->restore_cwnd = 1; + bbr->packet_conservation = 0; + } + bbr->prev_ca_state = state; + + if (bbr->restore_cwnd) { + /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ + cwnd = max(cwnd, bbr->prior_cwnd); + bbr->restore_cwnd = 0; + } + + if (bbr->packet_conservation) { + *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); + return true; /* yes, using packet conservation */ + } + *new_cwnd = cwnd; + return false; +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd = 0, target_cwnd = 0; + + if (!acked) + return; + + if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) + goto done; + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + target_cwnd = bbr_target_cwnd(sk, bw, gain); + if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ + cwnd = min(cwnd + acked, target_cwnd); + else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) + cwnd = cwnd + acked; + cwnd = max(cwnd, bbr_cwnd_min_target); + +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); +} + +/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ +static bool bbr_is_next_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_full_length = + skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + bbr->min_rtt_us; + u32 inflight, bw; + + /* The pacing_gain of 1.0 paces at the estimated bw to try to fully + * use the pipe without increasing the queue. + */ + if (bbr->pacing_gain == BBR_UNIT) + return is_full_length; /* just use wall clock time */ + + inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ + bw = bbr_max_bw(sk); + + /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at + * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is + * small (e.g. on a LAN). We do not persist if packets are lost, since + * a path with small buffers may not hold that much. + */ + if (bbr->pacing_gain > BBR_UNIT) + return is_full_length && + (rs->losses || /* perhaps pacing_gain*BDP won't fit */ + inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + + /* A pacing_gain < 1.0 tries to drain extra queue we added if bw + * probing didn't find more bw. If inflight falls to match BDP then we + * estimate queue is drained; persisting would underutilize the pipe. + */ + return is_full_length || + inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); +} + +static void bbr_advance_cycle_phase(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); + bbr->cycle_mstamp = tp->delivered_mstamp; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; +} + +/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && + bbr_is_next_cycle_phase(sk, rs)) + bbr_advance_cycle_phase(sk); +} + +static void bbr_reset_startup_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_STARTUP; + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; +} + +static void bbr_reset_probe_bw_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_PROBE_BW; + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = bbr_cwnd_gain; + bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); + bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ +} + +static void bbr_reset_mode(struct sock *sk) +{ + if (!bbr_full_bw_reached(sk)) + bbr_reset_startup_mode(sk); + else + bbr_reset_probe_bw_mode(sk); +} + +/* Start a new long-term sampling interval. */ +static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_delivered = tp->delivered; + bbr->lt_last_lost = tp->lost; + bbr->lt_rtt_cnt = 0; +} + +/* Completely reset long-term bandwidth sampling. */ +static void bbr_reset_lt_bw_sampling(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_bw = 0; + bbr->lt_use_bw = 0; + bbr->lt_is_sampling = false; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Long-term bw sampling interval is done. Estimate whether we're policed. */ +static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 diff; + + if (bbr->lt_bw) { /* do we have bw from a previous interval? */ + /* Is new bw close to the lt_bw from the previous interval? */ + diff = abs(bw - bbr->lt_bw); + if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || + (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= + bbr_lt_bw_diff)) { + /* All criteria are met; estimate we're policed. */ + bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ + bbr->lt_use_bw = 1; + bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ + bbr->lt_rtt_cnt = 0; + return; + } + } + bbr->lt_bw = bw; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of + * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and + * explicitly models their policed rate, to reduce unnecessary losses. We + * estimate that we're policed if we see 2 consecutive sampling intervals with + * consistent throughput and high packet loss. If we think we're being policed, + * set lt_bw to the "long-term" average delivery rate from those 2 intervals. + */ +static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 lost, delivered; + u64 bw; + s32 t; + + if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ + if (bbr->mode == BBR_PROBE_BW && bbr->round_start && + ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { + bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ + bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ + } + return; + } + + /* Wait for the first loss before sampling, to let the policer exhaust + * its tokens and estimate the steady-state rate allowed by the policer. + * Starting samples earlier includes bursts that over-estimate the bw. + */ + if (!bbr->lt_is_sampling) { + if (!rs->losses) + return; + bbr_reset_lt_bw_sampling_interval(sk); + bbr->lt_is_sampling = true; + } + + /* To avoid underestimates, reset sampling if we run out of data. */ + if (rs->is_app_limited) { + bbr_reset_lt_bw_sampling(sk); + return; + } + + if (bbr->round_start) + bbr->lt_rtt_cnt++; /* count round trips in this interval */ + if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) + return; /* sampling interval needs to be longer */ + if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { + bbr_reset_lt_bw_sampling(sk); /* interval is too long */ + return; + } + + /* End sampling interval when a packet is lost, so we estimate the + * policer tokens were exhausted. Stopping the sampling before the + * tokens are exhausted under-estimates the policed rate. + */ + if (!rs->losses) + return; + + /* Calculate packets lost and delivered in sampling interval. */ + lost = tp->lost - bbr->lt_last_lost; + delivered = tp->delivered - bbr->lt_last_delivered; + /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ + if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) + return; + + /* Find average delivery rate in this sampling interval. */ + t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); + if (t < 1) + return; /* interval is less than one jiffy, so wait */ + t = jiffies_to_usecs(t); + /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ + if (t < 1) { + bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ + return; + } + bw = (u64)delivered * BW_UNIT; + do_div(bw, t); + bbr_lt_bw_interval_done(sk, bw); +} + +/* Estimate the bandwidth based on how fast packets are delivered */ +static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->round_start = 0; + if (rs->delivered < 0 || rs->interval_us <= 0) + return; /* Not a valid observation */ + + /* See if we've reached the next RTT */ + if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->rtt_cnt++; + bbr->round_start = 1; + bbr->packet_conservation = 0; + } + + bbr_lt_bw_sampling(sk, rs); + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + */ + bw = (u64)rs->delivered * BW_UNIT; + do_div(bw, rs->interval_us); + + /* If this sample is application-limited, it is likely to have a very + * low delivered count that represents application behavior rather than + * the available network rate. Such a sample could drag down estimated + * bw, causing needless slow-down. Thus, to continue to send at the + * last measured network rate, we filter out app-limited samples unless + * they describe the path bw at least as well as our bw model. + * + * So the goal during app-limited phase is to proceed with the best + * network rate no matter how long. We automatically leave this + * phase when app writes faster than the network can deliver :) + */ + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { + /* Incorporate new sample into our max bw filter. */ + minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + } +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ + bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + tcp_packets_in_flight(tcp_sk(sk)) <= + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool filter_expired; + + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + filter_expired = after(tcp_time_stamp, + bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + bbr->min_rtt_us = rs->rtt_us; + bbr->min_rtt_stamp = tcp_time_stamp; + } + + if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + bbr->probe_rtt_done_stamp = tcp_time_stamp + + msecs_to_jiffies(bbr_probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done && + after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_time_stamp; + bbr->restore_cwnd = 1; /* snap to prior_cwnd */ + bbr_reset_mode(sk); + } + } + } + bbr->idle_restart = 0; +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +{ + bbr_update_bw(sk, rs); + bbr_update_cycle_phase(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr_check_drain(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +static void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw; + + bbr_update_model(sk, rs); + + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_tso_segs_goal(sk); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->prior_cwnd = 0; + bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + bbr->rtt_cnt = 0; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_time_stamp; + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + + /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); + sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ + bbr_set_pacing_rate(sk, bw, bbr_high_gain); + + bbr->restore_cwnd = 0; + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp.v64 = 0; + bbr->cycle_idx = 0; + bbr_reset_lt_bw_sampling(sk); + bbr_reset_startup_mode(sk); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* In theory BBR does not need to undo the cwnd since it does not + * always reduce cwnd on losses (see bbr_main()). Keep it for now. + */ +static u32 bbr_undo_cwnd(struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +static u32 bbr_ssthresh(struct sock *sk) +{ + bbr_save_cwnd(sk); + return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ +} + +static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw(sk); + + bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; + memset(&info->bbr, 0, sizeof(info->bbr)); + info->bbr.bbr_bw_lo = (u32)bw; + info->bbr.bbr_bw_hi = (u32)(bw >> 32); + info->bbr.bbr_min_rtt = bbr->min_rtt_us; + info->bbr.bbr_pacing_gain = bbr->pacing_gain; + info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr); + } + return 0; +} + +static void bbr_set_state(struct sock *sk, u8 new_state) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + bbr->round_start = 1; /* treat RTO like end of a round */ + bbr_lt_bw_sampling(sk, &rs); + } +} + +static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED, + .name = "bbr", + .owner = THIS_MODULE, + .init = bbr_init, + .cong_control = bbr_main, + .sndbuf_expand = bbr_sndbuf_expand, + .undo_cwnd = bbr_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr_ssthresh, + .tso_segs_goal = bbr_tso_segs_goal, + .get_info = bbr_get_info, + .set_state = bbr_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); -- cgit v1.1 From 4bdcc6ca2158f43b1770e020f9b71ab8a808594f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 20 Sep 2016 08:14:08 +0300 Subject: mlxsw: spectrum: Make offloads stats functions static The offloads stats functions are local to this file, make them static. Fixes: fc1bbb0f1831 ('mlxsw: spectrum: Implement offload stats ndo [..]') Signed-off-by: Or Gerlitz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index fa31261..43d5081 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -823,7 +823,7 @@ err_span_port_mtu_update: return err; } -int +static int mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -855,7 +855,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, return 0; } -bool mlxsw_sp_port_has_offload_stats(int attr_id) +static bool mlxsw_sp_port_has_offload_stats(int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -865,8 +865,8 @@ bool mlxsw_sp_port_has_offload_stats(int attr_id) return false; } -int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: -- cgit v1.1 From 9b86a8d19bd6406a10de5f924bf2a003a502d427 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 20 Sep 2016 12:00:52 +0530 Subject: cxgb4/cxgb4vf: Allocate more queues for 25G and 100G adapter We were missing check for 25G and 100G while checking port speed, which lead to less number of queues getting allocated for 25G & 100G adapters and leading to low throughput. Adding the missing check for both NIC and vNIC driver. Also fixes port advertisement for 25G and 100G in ethtool output. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 4 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 15 +++++++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 7 ++++++- drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 6 ++++++ drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h | 15 +++++++++++---- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 9 +++++++-- 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 2e2aa9f..edd2338 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -419,8 +419,8 @@ struct link_config { unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c762a8c..3ceafb55 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4305,10 +4305,17 @@ static const struct pci_error_handlers cxgb4_eeh = { .resume = eeh_resume, }; +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, @@ -4756,8 +4763,12 @@ static void print_port_info(const struct net_device *dev) bufp += sprintf(bufp, "1000/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) bufp += sprintf(bufp, "10G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) + bufp += sprintf(bufp, "25G/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) bufp += sprintf(bufp, "40G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) + bufp += sprintf(bufp, "100G/"); if (bufp != buf) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index dc92c80..660204b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3627,7 +3627,8 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf) } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ FW_PORT_CAP_ANEG) /** @@ -7196,8 +7197,12 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; lc = &pi->link_cfg; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a89b307..30507d4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2265,6 +2265,12 @@ enum fw_port_cap { FW_PORT_CAP_802_3_ASM_DIR = 0x8000, }; +#define FW_PORT_CAP_SPEED_S 0 +#define FW_PORT_CAP_SPEED_M 0x3f +#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S) +#define FW_PORT_CAP_SPEED_G(x) \ + (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M) + enum fw_port_mdi { FW_PORT_CAP_MDI_UNCHANGED, FW_PORT_CAP_MDI_AUTO, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 8ee5414..17a2bbc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -108,8 +108,8 @@ struct link_config { unsigned int supported; /* link capabilities */ unsigned int advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ @@ -271,10 +271,17 @@ static inline bool is_10g_port(const struct link_config *lc) return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0; } +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline unsigned int core_ticks_per_usec(const struct adapter *adapter) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 427bfa7..b5622b1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -314,8 +314,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ - FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ + FW_PORT_CAP_ANEG) /** * init_link_config - initialize a link's SW state @@ -1712,8 +1713,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; /* * Scan all of our "ports" (Virtual Interfaces) looking for -- cgit v1.1 From e6449539828ac3b7c74b648793291640bcca8259 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:05 +0800 Subject: r8152: move some functions Move the following functions forward. r8152_mmd_indirect() r8152_mmd_read() r8152_mmd_write() r8152_eee_en() r8152b_enable_eee() r8153_eee_en() r8153_enable_eee() r8152b_enable_fc() r8153_aldps_en() Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 224 ++++++++++++++++++++++++------------------------ 1 file changed, 112 insertions(+), 112 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f41a8ad..ae7db46 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2552,6 +2552,77 @@ static void r8152_aldps_en(struct r8152 *tp, bool enable) } } +static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) +{ + ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); + ocp_reg_write(tp, OCP_EEE_DATA, reg); + ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); +} + +static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) +{ + u16 data; + + r8152_mmd_indirect(tp, dev, reg); + data = ocp_reg_read(tp, OCP_EEE_DATA); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); + + return data; +} + +static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) +{ + r8152_mmd_indirect(tp, dev, reg); + ocp_reg_write(tp, OCP_EEE_DATA, data); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); +} + +static void r8152_eee_en(struct r8152 *tp, bool enable) +{ + u16 config1, config2, config3; + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; + config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); + config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; + config1 |= sd_rise_time(1); + config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; + config3 |= fast_snr(42); + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN); + config1 |= sd_rise_time(7); + config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); + config3 |= fast_snr(511); + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); + ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); + ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); +} + +static void r8152b_enable_eee(struct r8152 *tp) +{ + r8152_eee_en(tp, true); + r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); +} + +static void r8152b_enable_fc(struct r8152 *tp) +{ + u16 anar; + + anar = r8152_mdio_read(tp, MII_ADVERTISE); + anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + r8152_mdio_write(tp, MII_ADVERTISE, anar); +} + static void rtl8152_disable(struct r8152 *tp) { r8152_aldps_en(tp, false); @@ -2701,6 +2772,47 @@ static void r8152b_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } +static void r8153_aldps_en(struct r8152 *tp, bool enable) +{ + u16 data; + + data = ocp_reg_read(tp, OCP_POWER_CFG); + if (enable) { + data |= EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + } else { + data &= ~EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + msleep(20); + } +} + +static void r8153_eee_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + u16 config; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config = ocp_reg_read(tp, OCP_EEE_CFG); + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config |= EEE10_EN; + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config &= ~EEE10_EN; + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CFG, config); +} + +static void r8153_enable_eee(struct r8152 *tp) +{ + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); +} + static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2866,21 +2978,6 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } -static void r8153_aldps_en(struct r8152 *tp, bool enable) -{ - u16 data; - - data = ocp_reg_read(tp, OCP_POWER_CFG); - if (enable) { - data |= EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - } else { - data &= ~EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - msleep(20); - } -} - static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); @@ -3246,103 +3343,6 @@ static int rtl8152_close(struct net_device *netdev) return res; } -static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) -{ - ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); - ocp_reg_write(tp, OCP_EEE_DATA, reg); - ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); -} - -static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) -{ - u16 data; - - r8152_mmd_indirect(tp, dev, reg); - data = ocp_reg_read(tp, OCP_EEE_DATA); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); - - return data; -} - -static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) -{ - r8152_mmd_indirect(tp, dev, reg); - ocp_reg_write(tp, OCP_EEE_DATA, data); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); -} - -static void r8152_eee_en(struct r8152 *tp, bool enable) -{ - u16 config1, config2, config3; - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; - config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); - config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; - config1 |= sd_rise_time(1); - config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; - config3 |= fast_snr(42); - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | - RX_QUIET_EN); - config1 |= sd_rise_time(7); - config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); - config3 |= fast_snr(511); - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); - ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); - ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); -} - -static void r8152b_enable_eee(struct r8152 *tp) -{ - r8152_eee_en(tp, true); - r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); -} - -static void r8153_eee_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - u16 config; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config = ocp_reg_read(tp, OCP_EEE_CFG); - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config |= EEE10_EN; - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config &= ~EEE10_EN; - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CFG, config); -} - -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - -static void r8152b_enable_fc(struct r8152 *tp) -{ - u16 anar; - - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - r8152_mdio_write(tp, MII_ADVERTISE, anar); -} - static void rtl_tally_reset(struct r8152 *tp) { u32 ocp_data; -- cgit v1.1 From 2dd436daac7848dbf3fe799cf59c1408871a14e3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:06 +0800 Subject: r8152: move enabling PHY Move enabling PHY to init(), otherwise some other settings may fail. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ae7db46..dbf11ba 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,14 +2632,6 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { - u16 data; - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - set_bit(PHY_RESET, &tp->flags); } @@ -2818,16 +2810,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; - if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || - tp->version == RTL_VER_05) - ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3355,10 +3337,17 @@ static void rtl_tally_reset(struct r8152 *tp) static void r8152b_init(struct r8152 *tp) { u32 ocp_data; + u16 data; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + r8152_aldps_en(tp, false); if (tp->version == RTL_VER_01) { @@ -3394,6 +3383,7 @@ static void r8152b_init(struct r8152 *tp) static void r8153_init(struct r8152 *tp) { u32 ocp_data; + u16 data; int i; if (test_bit(RTL8152_UNPLUG, &tp->flags)) @@ -3416,6 +3406,23 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || + tp->version == RTL_VER_05) + ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); + + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + + for (i = 0; i < 500; i++) { + ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK; + if (ocp_data == PHY_STAT_LAN_ON) + break; + msleep(20); + } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); -- cgit v1.1 From ef39df8eaba48c0de779440f41a648b17a560953 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:07 +0800 Subject: r8152: move PHY settings to hw_phy_cfg Move the PHY relative settings together to hw_phy_cfg(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index dbf11ba..9ce5bd5 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,6 +2632,10 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { + r8152b_enable_eee(tp); + r8152_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -2839,6 +2843,10 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); + r8153_enable_eee(tp); + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -3369,9 +3377,6 @@ static void r8152b_init(struct r8152 *tp) SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK; ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); - r8152b_enable_eee(tp); - r8152_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); /* enable rx aggregation */ @@ -3490,9 +3495,6 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - r8153_enable_eee(tp); - r8153_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); r8153_u2p3en(tp, true); } -- cgit v1.1 From af0287ec10c62c84cc5cd1bad4fd37644a1ac41d Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:08 +0800 Subject: r8152: remove r8153_enable_eee Remove r8153_enable_eee(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9ce5bd5..e7a05dd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2803,12 +2803,6 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) ocp_reg_write(tp, OCP_EEE_CFG, config); } -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2843,7 +2837,9 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); - r8153_enable_eee(tp); + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); + r8153_aldps_en(tp, true); r8152b_enable_fc(tp); -- cgit v1.1 From d768c61bc353a0e0de3f839e1de99eee7d4eca10 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:09 +0800 Subject: r8152: disable ALDPS and EEE before setting PHY Disable ALDPS and EEE to avoid the possible failure when setting the PHY. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e7a05dd..c254248 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "5" +#define NET_VERSION "6" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2808,6 +2808,13 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; + /* disable ALDPS before updating the PHY parameters */ + r8153_aldps_en(tp, false); + + /* disable EEE before updating the PHY parameters */ + r8153_eee_en(tp, false); + ocp_reg_write(tp, OCP_EEE_ADV, 0); + if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3390,7 +3397,6 @@ static void r8153_init(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; - r8153_aldps_en(tp, false); r8153_u1u2en(tp, false); for (i = 0; i < 500; i++) { -- cgit v1.1 From b5036cd4ed3173ab8cdbc85e2ba74acf46bafb51 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 20 Sep 2016 16:17:22 +0200 Subject: ipmr, ip6mr: return lastuse relative to now When I introduced the lastuse member I made a subtle error because it was returned as an absolute value but that is meaningless to user-space as it doesn't allow to see how old exactly an entry is. Let's make it similar to how the bridge returns such values and make it relative to "now" (jiffies). This allows us to show the actual age of the entries and is much more useful (e.g. user-space daemons can age out entries, iproute2 can display the lastuse properly). Fixes: 43b9e1274060 ("net: ipmr/ip6mr: add support for keeping an entry age") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 7 +++++-- net/ipv6/ip6mr.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2625332..a87bcd2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2076,6 +2076,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2105,12 +2106,14 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6122f9c..fccb5dd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2239,6 +2239,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2269,12 +2270,14 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; -- cgit v1.1 From 9f7f797c1df40ee6c0329e9e53ea9ca0d224f55d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:49 +0200 Subject: mlxsw: pci: Add lag related resources to resources query Add max lag and max ports in lag resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 6 +++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d2e3297..51f27a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -269,8 +269,12 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1; + u8 max_span_valid:1, + max_lag_valid:1, + max_ports_in_lag_valid:1; u8 max_span; + u8 max_lag; + u8 max_ports_in_lag; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 1d1360c..cb284ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1156,6 +1156,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_RESOURCES_TABLE_END_ID 0xffff #define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_MAX_LAG_ID 0x2520 +#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1167,6 +1169,14 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_span = val; resources->max_span_valid = 1; break; + case MLXSW_MAX_LAG_ID: + resources->max_lag = val; + resources->max_lag_valid = 1; + break; + case MLXSW_MAX_PORTS_IN_LAG_ID: + resources->max_ports_in_lag = val; + resources->max_ports_in_lag_valid = 1; + break; default: break; } -- cgit v1.1 From ce0bd2b0c57a2d97ea89f87f61b9f5758139bcb8 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:50 +0200 Subject: mlxsw: spectrum: lag resources- use resources data instead of consts Use max lag and max ports in lag resources as the result of resource query instead of using const to save them. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 26 +++++++------- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 ------- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 41 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 5 +-- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 ++- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 4 --- 7 files changed, 50 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 068ee65a..aa33d58 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,10 +1100,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } - if (mlxsw_driver->profile->used_max_lag && - mlxsw_driver->profile->used_max_port_per_lag) { - alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * - mlxsw_driver->profile->max_port_per_lag; + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); + if (err) + goto err_bus_init; + + if (mlxsw_core->resources.max_lag_valid && + mlxsw_core->resources.max_ports_in_lag_valid) { + alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag * + mlxsw_core->resources.max_ports_in_lag; mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_core->lag.mapping) { err = -ENOMEM; @@ -1111,11 +1116,6 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, - &mlxsw_core->resources); - if (err) - goto err_bus_init; - err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -1146,10 +1146,10 @@ err_hwmon_init: err_devlink_register: mlxsw_emad_fini(mlxsw_core); err_emad_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_bus->fini(bus_priv); +err_bus_init: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: devlink_free(devlink); @@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive); static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 port_index) { - return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + return mlxsw_core->resources.max_ports_in_lag * lag_id + port_index; } @@ -1644,7 +1644,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, { int i; - for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) { int index = mlxsw_core_lag_mapping_index(mlxsw_core, lag_id, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 51f27a3..558d1ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -179,8 +179,6 @@ struct mlxsw_swid_config { struct mlxsw_config_profile { u16 used_max_vepa_channels:1, - used_max_lag:1, - used_max_port_per_lag:1, used_max_mid:1, used_max_pgt:1, used_max_system_port:1, @@ -194,8 +192,6 @@ struct mlxsw_config_profile { used_adaptive_routing_group_cap:1, used_kvd_sizes:1; u8 max_vepa_channels; - u16 max_lag; - u16 max_port_per_lag; u16 max_mid; u16 max_pgt; u16 max_system_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb284ea..57c2d34 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1232,18 +1232,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( mbox, profile->max_vepa_channels); } - if (profile->used_max_lag) { - mlxsw_cmd_mbox_config_profile_set_max_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_lag_set( - mbox, profile->max_lag); - } - if (profile->used_max_port_per_lag) { - mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_port_per_lag_set( - mbox, profile->max_port_per_lag); - } if (profile->used_max_mid) { mlxsw_cmd_mbox_config_profile_set_max_mid_set( mbox, 1); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 43d5081..0f96d11 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2887,7 +2887,9 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char slcr_pl[MLXSW_REG_SLCR_LEN]; + int err; mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | @@ -2898,7 +2900,26 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SLCR_LAG_HASH_SPORT | MLXSW_REG_SLCR_LAG_HASH_DPORT | MLXSW_REG_SLCR_LAG_HASH_IPPROTO); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + if (err) + return err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid)) + return -EIO; + + mlxsw_sp->lags = kcalloc(resources->max_lag, + sizeof(struct mlxsw_sp_upper), + GFP_KERNEL); + if (!mlxsw_sp->lags) + return -ENOMEM; + + return 0; +} + +static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->lags); } static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, @@ -2982,6 +3003,7 @@ err_span_init: err_router_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: + mlxsw_sp_lag_fini(mlxsw_sp); err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: @@ -3001,6 +3023,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); @@ -3013,10 +3036,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = MLXSW_SP_LAG_MAX, - .used_max_port_per_lag = 1, - .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, @@ -3683,12 +3702,14 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u16 lag_id = mlxsw_sp_port->lag_id; + struct mlxsw_resources *resources; int i, count = 0; if (!mlxsw_sp_port->lagged) return true; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { struct mlxsw_sp_port *lag_port; lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); @@ -3894,11 +3915,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, u16 *p_lag_id) { + struct mlxsw_resources *resources; struct mlxsw_sp_upper *lag; int free_lag_id = -1; int i; - for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_lag; i++) { lag = mlxsw_sp_lag_get(mlxsw_sp, i); if (lag->ref_count) { if (lag->dev == lag_dev) { @@ -3932,9 +3955,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 *p_port_index) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { *p_port_index = i; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 49f4cafc..a056aaa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -56,9 +56,6 @@ #define MLXSW_SP_RFID_BASE 15360 #define MLXSW_SP_RIF_MAX 800 -#define MLXSW_SP_LAG_MAX 64 -#define MLXSW_SP_PORT_PER_LAG_MAX 16 - #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 @@ -290,7 +287,7 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; struct mlxsw_sp_upper master_bridge; - struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; + struct mlxsw_sp_upper *lags; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7186c48..2b04b76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1205,9 +1205,11 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); if (mlxsw_sp_port) return mlxsw_sp_port; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 377daa4..8b15bf0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1512,10 +1512,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = 64, - .used_max_port_per_lag = 1, - .max_port_per_lag = 16, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, -- cgit v1.1 From 2acd10c51bd2ce3a39c75fa3ff113e32e2413c6f Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:51 +0200 Subject: mlxsw: pci: Add KVD size relate resources Add KVD size, and minimum sizes for the single and double sections resources to resources query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 8 +++++++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 558d1ce..76ad566 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,10 +267,16 @@ struct mlxsw_driver { struct mlxsw_resources { u8 max_span_valid:1, max_lag_valid:1, - max_ports_in_lag_valid:1; + max_ports_in_lag_valid:1, + kvd_size_valid:1, + kvd_single_min_size_valid:1, + kvd_double_min_size_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; + u32 kvd_size; + u32 kvd_single_min_size; + u32 kvd_double_min_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 57c2d34..7b2ab1e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1158,6 +1158,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SPAN_ID 0x2420 #define MLXSW_MAX_LAG_ID 0x2520 #define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 +#define MLXSW_KVD_SIZE_ID 0x1001 +#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 +#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1177,6 +1180,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_ports_in_lag = val; resources->max_ports_in_lag_valid = 1; break; + case MLXSW_KVD_SIZE_ID: + resources->kvd_size = val; + resources->kvd_size_valid = 1; + break; + case MLXSW_KVD_SINGLE_MIN_SIZE_ID: + resources->kvd_single_min_size = val; + resources->kvd_single_min_size_valid = 1; + break; + case MLXSW_KVD_DOUBLE_MIN_SIZE_ID: + resources->kvd_double_min_size = val; + resources->kvd_double_min_size_valid = 1; + break; default: break; } -- cgit v1.1 From 403547d38d0b99f589a0d87f3a5f352895e54aae Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:52 +0200 Subject: mlxsw: profile: Add KVD resources to profile config Use resources from resource query to determine values for the profile configuration. Add KVD determined section sizes to the resources struct. Change the profile struct and value to match this changes. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 15 ++++-- drivers/net/ethernet/mellanox/mlxsw/pci.c | 73 +++++++++++++++++++++----- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- 4 files changed, 76 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 76ad566..1193bdc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_pkey:1, used_ar_sec:1, used_adaptive_routing_group_cap:1, - used_kvd_sizes:1; + used_kvd_split_data:1; /* indicate for the kvd's values */ + u8 max_vepa_channels; u16 max_mid; u16 max_pgt; @@ -210,8 +211,9 @@ struct mlxsw_config_profile { u16 adaptive_routing_group_cap; u8 arn; u32 kvd_linear_size; - u32 kvd_hash_single_size; - u32 kvd_hash_double_size; + u16 kvd_hash_granularity; + u8 kvd_hash_single_parts; + u8 kvd_hash_double_parts; u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -277,6 +279,13 @@ struct mlxsw_resources { u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + u32 kvd_single_size; + u32 kvd_double_size; + u32 kvd_linear_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7b2ab1e..c2d2fa1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1234,10 +1234,52 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, return -EIO; } +static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) +{ + u32 singles_size, doubles_size, linear_size; + + if (!resources->kvd_single_min_size_valid || + !resources->kvd_double_min_size_valid || + !profile->used_kvd_split_data) + return -EIO; + + linear_size = profile->kvd_linear_size; + + /* The hash part is what left of the kvd without the + * linear part. It is split to the single size and + * double size by the parts ratio from the profile. + * Both sizes must be a multiplications of the + * granularity from the profile. + */ + doubles_size = (resources->kvd_size - linear_size); + doubles_size *= profile->kvd_hash_double_parts; + doubles_size /= (profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts); + doubles_size /= profile->kvd_hash_granularity; + doubles_size *= profile->kvd_hash_granularity; + singles_size = resources->kvd_size - doubles_size - + linear_size; + + /* Check results are legal. */ + if (singles_size < resources->kvd_single_min_size || + doubles_size < resources->kvd_double_min_size || + resources->kvd_size < linear_size) + return -EIO; + + resources->kvd_single_size = singles_size; + resources->kvd_double_size = doubles_size; + resources->kvd_linear_size = linear_size; + + return 0; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { int i; + int err; mlxsw_cmd_mbox_zero(mbox); @@ -1323,19 +1365,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } - if (profile->used_kvd_sizes) { - mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( - mbox, profile->kvd_linear_size); - mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( - mbox, profile->kvd_hash_single_size); + if (resources->kvd_size_valid) { + err = mlxsw_pci_profile_get_kvd_sizes(profile, resources); + if (err) + return err; + + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, + resources->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, + resources->kvd_single_size); mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( - mbox, profile->kvd_hash_double_size); + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, + resources->kvd_double_size); } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) @@ -1537,7 +1582,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_query_resources; - err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources); if (err) goto err_config_profile; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0f96d11..d8e3da2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3057,10 +3057,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, - .used_kvd_sizes = 1, + .used_kvd_split_data = 1, + .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, + .kvd_hash_single_parts = 2, + .kvd_hash_double_parts = 1, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, - .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, - .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a056aaa..208dfee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -74,8 +74,7 @@ #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ -#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ -#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ +#define MLXSW_SP_KVD_GRANULARITY 128 /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. -- cgit v1.1 From b8a09f0a0938acee81c386e3c27e7aca78b087eb Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:53 +0200 Subject: mlxsw: pci: Add max virtual routers resource Add the max number of virtual routers to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 1193bdc..e936dc9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -272,13 +272,15 @@ struct mlxsw_resources { max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, - kvd_double_min_size_valid:1; + kvd_double_min_size_valid:1, + max_virtual_routers_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; u32 kvd_size; u32 kvd_single_min_size; u32 kvd_double_min_size; + u16 max_virtual_routers; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c2d2fa1..cb95f9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1161,6 +1161,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SIZE_ID 0x1001 #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 +#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1192,6 +1193,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->kvd_double_min_size = val; resources->kvd_double_min_size_valid = 1; break; + case MLXSW_MAX_VIRTUAL_ROUTERS_ID: + resources->max_virtual_routers = val; + resources->max_virtual_routers_valid = 1; + break; default: break; } -- cgit v1.1 From 9497c042bfa9012cdefaddd8588b031bfae59a7c Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:54 +0200 Subject: mlxsw: spectrum: Implement max virtual routers resource Replace max virtual routers const with the result from the resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 4 +- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 43 +++++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 208dfee..352079e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -64,8 +64,6 @@ #define MLXSW_SP_LPM_TREE_MAX 22 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) -#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -249,7 +247,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; - struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct { struct delayed_work dw; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4afb498..ded19f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -372,10 +372,12 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) return vr; @@ -417,11 +419,14 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, enum mlxsw_sp_l3proto proto) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (vr->used && vr->proto == proto && vr->tb_id == tb_id) return vr; @@ -555,15 +560,33 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) &vr->fib->prefix_usage); } -static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_virtual_routers_valid) + return -EIO; + + mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, + sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router.vrs) + return -ENOMEM; + + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; vr->id = i; } + + return 0; +} + +static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.vrs); } struct mlxsw_sp_neigh_key { @@ -1523,14 +1546,21 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) err = __mlxsw_sp_router_init(mlxsw_sp); if (err) return err; + mlxsw_sp_lpm_init(mlxsw_sp); - mlxsw_sp_vrs_init(mlxsw_sp); - err = mlxsw_sp_neigh_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); if (err) goto err_neigh_init; + return 0; err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: __mlxsw_sp_router_fini(mlxsw_sp); return err; } @@ -1538,6 +1568,7 @@ err_neigh_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); } -- cgit v1.1 From e44d49cbbc5fb1a310b71212acc4e1f378a8fd91 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:55 +0200 Subject: mlxsw: pci: Add some miscellaneous resources Add max system ports, max regions and max vlan groups to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 10 ++++++++-- drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index e936dc9..097e560 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -267,13 +267,16 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1, + u32 max_span_valid:1, max_lag_valid:1, max_ports_in_lag_valid:1, kvd_size_valid:1, kvd_single_min_size_valid:1, kvd_double_min_size_valid:1, - max_virtual_routers_valid:1; + max_virtual_routers_valid:1, + max_system_ports_valid:1, + max_vlan_groups_valid:1, + max_regions_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -281,6 +284,9 @@ struct mlxsw_resources { u32 kvd_single_min_size; u32 kvd_double_min_size; u16 max_virtual_routers; + u16 max_system_ports; + u16 max_vlan_groups; + u16 max_regions; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cb95f9a..826b502 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1162,6 +1162,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 #define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 #define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 +#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 +#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 +#define MLXSW_MAX_REGIONS_ID 0x2901 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1197,6 +1200,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_virtual_routers = val; resources->max_virtual_routers_valid = 1; break; + case MLXSW_MAX_SYSTEM_PORT_ID: + resources->max_system_ports = val; + resources->max_system_ports_valid = 1; + break; + case MLXSW_MAX_VLAN_GROUPS_ID: + resources->max_vlan_groups = val; + resources->max_vlan_groups_valid = 1; + break; + case MLXSW_MAX_REGIONS_ID: + resources->max_regions = val; + resources->max_regions_valid = 1; + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d8e3da2..9acc3e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3040,12 +3040,6 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, .max_pgt = 0, - .used_max_system_port = 1, - .max_system_port = 64, - .used_max_vlan_groups = 1, - .max_vlan_groups = 127, - .used_max_regions = 1, - .max_regions = 400, .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, -- cgit v1.1 From 274df7fb77fffd243336c6affa2f9469e8f11122 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:56 +0200 Subject: mlxsw: pci: Add max router interface resource Add the max number of rif (router interfaces) to resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 097e560..c4f550b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -276,7 +276,8 @@ struct mlxsw_resources { max_virtual_routers_valid:1, max_system_ports_valid:1, max_vlan_groups_valid:1, - max_regions_valid:1; + max_regions_valid:1, + max_rif_valid:1; u8 max_span; u8 max_lag; u8 max_ports_in_lag; @@ -287,6 +288,7 @@ struct mlxsw_resources { u16 max_system_ports; u16 max_vlan_groups; u16 max_regions; + u16 max_rif; /* Internal resources. * Determined by the SW, not queried from the HW. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 826b502..e742bd4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1165,6 +1165,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 #define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 #define MLXSW_MAX_REGIONS_ID 0x2901 +#define MLXSW_MAX_RIF_ID 0x2C02 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1212,6 +1213,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_regions = val; resources->max_regions_valid = 1; break; + case MLXSW_MAX_RIF_ID: + resources->max_rif = val; + resources->max_rif_valid = 1; + break; default: break; } -- cgit v1.1 From 8f8a62d462492a043349a08abc51e2ad65b1f49a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Tue, 20 Sep 2016 11:16:57 +0200 Subject: mlxsw: spectrum: Implement max rif resource Replace max rif const with using the result from resource query. Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++---- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 9 ++++--- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 31 ++++++++++++++++++++-- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9acc3e0..80f27b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3017,7 +3017,6 @@ err_rx_listener_register: static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - int i; mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); @@ -3029,8 +3028,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); WARN_ON(!list_empty(&mlxsw_sp->fids)); - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -3172,13 +3169,15 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) if (!mlxsw_sp->rifs[i]) return i; - return MLXSW_SP_RIF_MAX; + return MLXSW_SP_INVALID_RIF; } static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, @@ -3258,7 +3257,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return ERR_PTR(-ERANGE); err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); @@ -3490,7 +3489,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return -ERANGE; err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 352079e..73cae21 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -54,7 +54,7 @@ #define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_RIF_MAX 800 +#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 @@ -269,7 +269,7 @@ struct mlxsw_sp { DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; struct list_head fids; /* VLAN-aware bridge FIDs */ - struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; + struct mlxsw_sp_rif **rifs; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -477,9 +477,12 @@ static inline struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + + for (i = 0; i < resources->max_rif; i++) if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) return mlxsw_sp->rifs[i]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ded19f0..cc653ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1522,19 +1522,46 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; mlxsw_reg_rgcr_pack(rgcr_pl, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); } int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -- cgit v1.1 From 63c43787d35e45562a6b5927e2edc8f4783d95b8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Sep 2016 16:17:57 +0200 Subject: vti6: fix input path Since commit 1625f4529957, vti6 is broken, all input packets are dropped (LINUX_MIB_XFRMINNOSTATES is incremented). XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 is set by vti6_rcv() before calling xfrm6_rcv()/xfrm6_rcv_spi(), thus we cannot set to NULL that value in xfrm6_rcv_spi(). A new function xfrm6_rcv_tnl() that enables to pass a value to xfrm6_rcv_spi() is added, so that xfrm6_rcv() is not touched (this function is used in several handlers). CC: Alexey Kodanev Fixes: 1625f4529957 ("net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 +++- net/ipv6/ip6_vti.c | 4 +--- net/ipv6/xfrm6_input.c | 16 +++++++++++----- net/ipv6/xfrm6_tunnel.c | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6..1793431 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t); int xfrm6_transport_finish(struct sk_buff *skb, int async); +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 52a2f73..5bd3afd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - rcu_read_unlock(); - return xfrm6_rcv(skb); + return xfrm6_rcv_tnl(skb, t); } rcu_read_unlock(); return -EINVAL; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 00a2d40..b578956 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t) { - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); @@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return -1; } -int xfrm6_rcv(struct sk_buff *skb) +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], - 0); + 0, t); } -EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_rcv_tnl); +int xfrm6_rcv(struct sk_buff *skb) +{ + return xfrm6_rcv_tnl(skb, NULL); +} +EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5743044..e1c0bbe 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi); + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.1 From 8d58790b832e13d6006d842037732304af357c3c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 19 Sep 2016 21:34:01 +0200 Subject: net: can: ifi: Configure transmitter delay Configure the transmitter delay register at +0x1c to correctly handle the CAN FD bitrate switch (BRS). This moves the SSP (secondary sample point) to a proper offset, so that the TDC mechanism works and won't generate error frames on the CAN link. Signed-off-by: Marek Vasut Cc: Marc Kleine-Budde Cc: Mark Rutland Cc: Oliver Hartkopp Cc: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2d1d22e..368bb07 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -81,6 +81,10 @@ #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15) #define IFI_CANFD_TDELAY 0x1c +#define IFI_CANFD_TDELAY_DEFAULT 0xb +#define IFI_CANFD_TDELAY_MASK 0x3fff +#define IFI_CANFD_TDELAY_ABS BIT(14) +#define IFI_CANFD_TDELAY_EN BIT(15) #define IFI_CANFD_ERROR 0x20 #define IFI_CANFD_ERROR_TX_OFFSET 0 @@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) struct ifi_canfd_priv *priv = netdev_priv(ndev); const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u16 brp, sjw, tseg1, tseg2; + u16 brp, sjw, tseg1, tseg2, tdc; /* Configure bit timing */ brp = bt->brp - 2; @@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) (brp << IFI_CANFD_TIME_PRESCALE_OFF) | (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8), priv->base + IFI_CANFD_FTIME); + + /* Configure transmitter delay */ + tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; + writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, + priv->base + IFI_CANFD_TDELAY); } static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, -- cgit v1.1 From c2f672fc94642bae96821a393f342edcfa9794a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Sep 2016 15:45:26 +0200 Subject: xfrm: state lookup can be lockless This is called from the packet input path, we get lock contention if many cpus handle ipsec in parallel. After recent rcu conversion it is safe to call __xfrm_state_lookup without the spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ba8bf51..a38fdea 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1431,9 +1431,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup); -- cgit v1.1 From 332ae8e2f6ecda5e50c5c62ed62894963e3a83f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:53 +0100 Subject: net: cls_bpf: add hardware offload This patch adds hardware offload capability to cls_bpf classifier, similar to what have been done with U32 and flower. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 14 ++++++++++ net/sched/cls_bpf.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a10d8d1..69f242c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -789,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -800,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a459be5..41e8071 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -486,4 +486,18 @@ struct tc_cls_matchall_offload { unsigned long cookie; }; +enum tc_clsbpf_command { + TC_CLSBPF_ADD, + TC_CLSBPF_REPLACE, + TC_CLSBPF_DESTROY, +}; + +struct tc_cls_bpf_offload { + enum tc_clsbpf_command command; + struct tcf_exts *exts; + struct bpf_prog *prog; + const char *name; + bool exts_integrated; +}; + #endif diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c6f7a47..6523c5b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -39,6 +39,7 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; struct tcf_exts exts; u32 handle; union { @@ -138,6 +139,71 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, 0)) { + cmd = TC_CLSBPF_REPLACE; + } else { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } + } else { + if (!tc_should_offload(dev, tp, 0)) + return; + cmd = TC_CLSBPF_ADD; + } + + if (cls_bpf_offload_cmd(tp, obj, cmd)) + return; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -177,6 +243,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -193,6 +260,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -415,6 +483,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; + cls_bpf_offload(tp, prog, oldprog); + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); -- cgit v1.1 From 0d01d45f1b251448590c710baa32f722e43c62c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:54 +0100 Subject: net: cls_bpf: limit hardware offload by software-only flag Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_HW flag. Unlike U32 and flower cls_bpf already has some netlink flags defined. Create a new attribute to be able to use the same flag values as the above. Unlike U32 and flower reject unknown flags. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_bpf.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41e8071..57af9f3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -498,6 +498,7 @@ struct tc_cls_bpf_offload { struct bpf_prog *prog; const char *name; bool exts_integrated; + u32 gen_flags; }; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8915b61..8fd715f 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -396,6 +396,7 @@ enum { TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, __TCA_BPF_MAX, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6523c5b..ebf01f7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + TCA_CLS_FLAGS_SKIP_HW struct cls_bpf_head { struct list_head plist; @@ -40,6 +42,7 @@ struct cls_bpf_prog { struct tcf_result res; bool exts_integrated; bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -55,6 +58,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, @@ -154,6 +158,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.prog = prog->filter; bpf_offload.name = prog->bpf_name; bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -167,14 +172,14 @@ static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd; if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, 0)) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else { obj = oldprog; cmd = TC_CLSBPF_DESTROY; } } else { - if (!tc_should_offload(dev, tp, 0)) + if (!tc_should_offload(dev, tp, prog->gen_flags)) return; cmd = TC_CLSBPF_ADD; } @@ -370,6 +375,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -394,8 +400,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); @@ -568,6 +583,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); -- cgit v1.1 From eadb41489fd2249e71fd14b36fb488ed7217ca4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:55 +0100 Subject: net: cls_bpf: add support for marking filters as hardware-only Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_SW flag. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index ebf01f7..1becc2f 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -28,7 +28,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 #define CLS_BPF_SUPPORTED_GEN_FLAGS \ - TCA_CLS_FLAGS_SKIP_HW + (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) struct cls_bpf_head { struct list_head plist; @@ -96,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, qdisc_skb_cb(skb)->tc_classid = prog->res.classid; - if (at_ingress) { + if (tc_skip_sw(prog->gen_flags)) { + filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; + } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_end(skb); @@ -164,32 +166,42 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, tp->protocol, &offload); } -static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) +static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) { struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; + bool skip_sw; + int ret; + + skip_sw = tc_skip_sw(prog->gen_flags) || + (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; - } else { + } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; cmd = TC_CLSBPF_DESTROY; + } else { + return -EINVAL; } } else { if (!tc_should_offload(dev, tp, prog->gen_flags)) - return; + return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } - if (cls_bpf_offload_cmd(tp, obj, cmd)) - return; + ret = cls_bpf_offload_cmd(tp, obj, cmd); + if (ret) + return skip_sw ? ret : 0; obj->offloaded = true; if (oldprog) oldprog->offloaded = false; + + return 0; } static void cls_bpf_stop_offload(struct tcf_proto *tp, @@ -498,7 +510,11 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; - cls_bpf_offload(tp, prog, oldprog); + ret = cls_bpf_offload(tp, prog, oldprog); + if (ret) { + cls_bpf_delete_prog(tp, prog); + return ret; + } if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); -- cgit v1.1 From 3df126f35f88dc76eea33769f85a3c3bb8ce6c6b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:56 +0100 Subject: bpf: don't (ab)use instructions to store state Storing state in reserved fields of instructions makes it impossible to run verifier on programs already marked as read-only. Allocate and use an array of per-instruction state instead. While touching the error path rename and move existing jump target. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 70 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a75ee3..a9542d8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -181,6 +181,10 @@ struct verifier_stack_elem { struct verifier_stack_elem *next; }; +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ /* single container for all structs @@ -197,6 +201,7 @@ struct verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -2344,7 +2349,7 @@ static int do_check(struct verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + enum bpf_reg_type *prev_src_type, src_reg_type; /* check for reserved fields is already done */ @@ -2374,16 +2379,18 @@ static int do_check(struct verifier_env *env) continue; } - if (insn->imm == 0) { + prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) - * use reserved 'imm' field to mark this insn + * save type to validate intersecting paths */ - insn->imm = src_reg_type; + *prev_src_type = src_reg_type; - } else if (src_reg_type != insn->imm && + } else if (src_reg_type != *prev_src_type && (src_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_src_type == PTR_TO_CTX)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -2396,7 +2403,7 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; + enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -2424,11 +2431,13 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (insn->imm == 0) { - insn->imm = dst_reg_type; - } else if (dst_reg_type != insn->imm && + prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; + } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_dst_type == PTR_TO_CTX)) { verbose("same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -2686,10 +2695,11 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) static int convert_ctx_accesses(struct verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, insn_cnt, cnt; + int i, cnt, delta = 0; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -2703,18 +2713,16 @@ static int convert_ctx_accesses(struct verifier_env *env) if (!new_prog) return -ENOMEM; env->prog = new_prog; + delta += cnt - 1; } } if (!ops->convert_ctx_access) return 0; - insn_cnt = env->prog->len; - insn = env->prog->insnsi; + insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; @@ -2724,11 +2732,8 @@ static int convert_ctx_accesses(struct verifier_env *env) else continue; - if (insn->imm != PTR_TO_CTX) { - /* clear internal mark */ - insn->imm = 0; + if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - } cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, insn->off, insn_buf, env->prog); @@ -2737,18 +2742,16 @@ static int convert_ctx_accesses(struct verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); + new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, + cnt); if (!new_prog) return -ENOMEM; - insn_delta = cnt - 1; + delta += cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + insn_delta; - - insn_cnt += insn_delta; - i += insn_delta; + insn = new_prog->insnsi + i + delta; } return 0; @@ -2792,6 +2795,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!env) return -ENOMEM; + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + (*prog)->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ @@ -2810,12 +2818,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* log_* values have to be sane */ if (log_size < 128 || log_size > UINT_MAX >> 8 || log_level == 0 || log_ubuf == NULL) - goto free_env; + goto err_unlock; ret = -ENOMEM; log_buf = vmalloc(log_size); if (!log_buf) - goto free_env; + goto err_unlock; } else { log_level = 0; } @@ -2884,14 +2892,16 @@ skip_full_check: free_log_buf: if (log_level) vfree(log_buf); -free_env: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); *prog = env->prog; - kfree(env); +err_unlock: mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); return ret; } -- cgit v1.1 From 58e2af8b3a6b587e4ac8414343581da4349d3c0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:57 +0100 Subject: bpf: expose internal verfier structures Move verifier's internal structures to a header file and prefix their names with bpf_ to avoid potential namespace conflicts. Those structures will soon be used by external analyzers. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 79 +++++++++++++ kernel/bpf/verifier.c | 266 +++++++++++++++++-------------------------- 2 files changed, 182 insertions(+), 163 deletions(-) create mode 100644 include/linux/bpf_verifier.h diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 0000000..9457a22 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include /* for enum bpf_reg_type */ +#include /* for MAX_BPF_STACK */ + +struct bpf_reg_state { + enum bpf_reg_type type; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +#endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a9542d8..dca2b9b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -126,82 +127,16 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -struct reg_state { - enum bpf_reg_type type; - union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u32 id; - u16 off; - u16 range; - }; - - /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | - * PTR_TO_MAP_VALUE_OR_NULL - */ - struct bpf_map *map_ptr; - }; -}; - -enum bpf_stack_slot_type { - STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ -}; - -#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ - -/* state of the program: - * type of all registers and stack info - */ -struct verifier_state { - struct reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; -}; - -/* linked list of verifier states used to prune search */ -struct verifier_state_list { - struct verifier_state state; - struct verifier_state_list *next; -}; - /* verifier_state + insn_idx are pushed to stack when branch is encountered */ -struct verifier_stack_elem { +struct bpf_verifier_stack_elem { /* verifer state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ - struct verifier_state st; + struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; - struct verifier_stack_elem *next; -}; - -struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ -}; - -#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ - -/* single container for all structs - * one verifier_env per bpf_check() call - */ -struct verifier_env { - struct bpf_prog *prog; /* eBPF program being verified */ - struct verifier_stack_elem *head; /* stack of verifier states to be processed */ - int stack_size; /* number of states to be processed */ - struct verifier_state cur_state; /* current verifier state */ - struct verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ - u32 used_map_cnt; /* number of used maps */ - u32 id_gen; /* used to generate unique reg IDs */ - bool allow_ptr_leaks; - bool seen_direct_write; - struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + struct bpf_verifier_stack_elem *next; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -254,9 +189,9 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -static void print_verifier_state(struct verifier_state *state) +static void print_verifier_state(struct bpf_verifier_state *state) { - struct reg_state *reg; + struct bpf_reg_state *reg; enum bpf_reg_type t; int i; @@ -432,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn) } } -static int pop_stack(struct verifier_env *env, int *prev_insn_idx) +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; int insn_idx; if (env->head == NULL) @@ -451,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx) return insn_idx; } -static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, - int prev_insn_idx) +static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; - elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); + elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; @@ -482,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void init_reg_state(struct reg_state *regs) +static void init_reg_state(struct bpf_reg_state *regs) { int i; @@ -498,7 +433,7 @@ static void init_reg_state(struct reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; @@ -511,7 +446,7 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct reg_state *regs, u32 regno, +static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, enum reg_arg_type t) { if (regno >= MAX_BPF_REG) { @@ -571,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) { int i; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -597,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } else { /* regular write of data into stack */ state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + (struct bpf_reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -605,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, return 0; } -static int check_stack_read(struct verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -646,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size, } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct verifier_env *env, u32 regno, int off, +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -661,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_access_direct_pkt_data(struct verifier_env *env, +static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta) { switch (env->prog->type) { @@ -678,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env, } } -static int check_packet_access(struct verifier_env *env, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; off += reg->off; if (off < 0 || size <= 0 || off + size > reg->range) { @@ -694,7 +629,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && @@ -709,7 +644,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct verifier_env *env, int regno) +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { if (env->allow_ptr_leaks) return false; @@ -723,12 +658,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } -static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, - int off, int size) +static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) { if (reg->type != PTR_TO_PACKET) { if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); + verbose("misaligned access off %d size %d\n", + off, size); return -EACCES; } else { return 0; @@ -769,12 +705,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { - struct verifier_state *state = &env->cur_state; - struct reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; if (reg->type == PTR_TO_STACK) @@ -860,9 +796,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || @@ -896,12 +832,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, int regno, +static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs; int off, i; if (regs[regno].type != PTR_TO_STACK) { @@ -940,11 +876,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return 0; } -static int check_func_arg(struct verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -1149,10 +1085,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -static void clear_all_pkt_pointers(struct verifier_env *env) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) @@ -1172,12 +1108,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env) } } -static int check_call(struct verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct reg_state *regs = state->regs; - struct reg_state *reg; + struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1280,12 +1216,13 @@ static int check_call(struct verifier_env *env, int func_id) return 0; } -static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +static int check_packet_ptr_add(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; - struct reg_state tmp_reg; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1353,10 +1290,10 @@ add_imm: return 0; } -static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; u8 opcode = BPF_OP(insn->code); s64 imm_log2; @@ -1366,7 +1303,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) */ if (BPF_SRC(insn->code) == BPF_X) { - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && dst_reg->imm && opcode == BPF_ADD) { @@ -1455,11 +1392,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. @@ -1476,9 +1414,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) } /* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) +static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1652,10 +1590,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_state *state, - const struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct bpf_verifier_state *state, + struct bpf_reg_state *dst_reg) { - struct reg_state *regs = state->regs, *reg; + struct bpf_reg_state *regs = state->regs, *reg; int i; /* LLVM can generate two kind of checks: @@ -1701,11 +1639,11 @@ static void find_good_pkt_pointers(struct verifier_state *state, } } -static int check_cond_jmp_op(struct verifier_env *env, +static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct verifier_state *other_branch, *this_branch = &env->cur_state; - struct reg_state *regs = this_branch->regs, *dst_reg; + struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1767,7 +1705,7 @@ static int check_cond_jmp_op(struct verifier_env *env, if (!other_branch) return -EFAULT; - /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { @@ -1809,9 +1747,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) } /* verify BPF_LD_IMM64 instruction */ -static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -1866,11 +1804,11 @@ static bool may_access_skb(enum bpf_prog_type type) * Output: * R0 - 8/16/32-bit skb data converted to cpu endianness */ -static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct reg_state *reg; + struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -1956,7 +1894,7 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) +#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static int *insn_stack; /* stack of insns to process */ static int cur_stack; /* current stack index */ @@ -1967,7 +1905,7 @@ static int *insn_state; * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -2008,7 +1946,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env) /* non-recursive depth-first-search to detect loops in BPF program * loop == back-edge in directed graph */ -static int check_cfg(struct verifier_env *env) +static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2117,7 +2055,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +static bool compare_ptrs_to_packet(struct bpf_reg_state *old, + struct bpf_reg_state *cur) { if (old->id != cur->id) return false; @@ -2192,9 +2131,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct verifier_state *old, struct verifier_state *cur) +static bool states_equal(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) { - struct reg_state *rold, *rcur; + struct bpf_reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { @@ -2234,9 +2174,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) * the same, check that stored pointers types * are the same as well. * Ex: explored safe path could have stored - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8} * but current path has stored: - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16} * such verifier states are not equivalent. * return false to continue verification of this path */ @@ -2247,10 +2187,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) return true; } -static int is_state_visited(struct verifier_env *env, int insn_idx) +static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { - struct verifier_state_list *new_sl; - struct verifier_state_list *sl; + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; sl = env->explored_states[insn_idx]; if (!sl) @@ -2274,7 +2214,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); + new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); if (!new_sl) return -ENOMEM; @@ -2285,11 +2225,11 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; } -static int do_check(struct verifier_env *env) +static int do_check(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; @@ -2572,7 +2512,7 @@ static int check_map_prog_compatibility(struct bpf_map *map, /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ -static int replace_map_fd_with_map_ptr(struct verifier_env *env) +static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2669,7 +2609,7 @@ next_insn: } /* drop refcnt of maps used by the rejected program */ -static void release_maps(struct verifier_env *env) +static void release_maps(struct bpf_verifier_env *env) { int i; @@ -2678,7 +2618,7 @@ static void release_maps(struct verifier_env *env) } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -static void convert_pseudo_ld_imm64(struct verifier_env *env) +static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2692,7 +2632,7 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ -static int convert_ctx_accesses(struct verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; const int insn_cnt = env->prog->len; @@ -2757,9 +2697,9 @@ static int convert_ctx_accesses(struct verifier_env *env) return 0; } -static void free_states(struct verifier_env *env) +static void free_states(struct bpf_verifier_env *env) { - struct verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl, *sln; int i; if (!env->explored_states) @@ -2782,16 +2722,16 @@ static void free_states(struct verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; - struct verifier_env *env; + struct bpf_verifier_env *env; int ret = -EINVAL; if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; - /* 'struct verifier_env' can be global, but since it's not small, + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; @@ -2833,7 +2773,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; env->explored_states = kcalloc(env->prog->len, - sizeof(struct verifier_state_list *), + sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; if (!env->explored_states) -- cgit v1.1 From 13a27dfc669724564aafa2699976ee756029fed2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:58 +0100 Subject: bpf: enable non-core use of the verfier Advanced JIT compilers and translators may want to use eBPF verifier as a base for parsers or to perform custom checks and validations. Add ability for external users to invoke the verifier and provide callbacks to be invoked for every intruction checked. For now only add most basic callback for per-instruction pre-interpretation checks is added. More advanced users may also like to have per-instruction post callback and state comparison callback. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 +++++++ kernel/bpf/verifier.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9457a22..c5cb661 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,12 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -68,6 +74,8 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -76,4 +84,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dca2b9b..ee86a77 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -632,6 +632,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + /* for analyzer ctx accesses are already validated and converted */ + if (env->analyzer_ops) + return 0; + if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ @@ -2225,6 +2229,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) +{ + if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) + return 0; + + return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); +} + static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state = &env->cur_state; @@ -2283,6 +2296,10 @@ static int do_check(struct bpf_verifier_env *env) print_bpf_insn(insn); } + err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + if (err) + return err; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -2845,3 +2862,54 @@ err_free_env: kfree(env); return ret; } + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv) +{ + struct bpf_verifier_env *env; + int ret; + + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + prog->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; + env->prog = prog; + env->analyzer_ops = ops; + env->analyzer_priv = priv; + + /* grab the mutex to protect few globals used by verifier */ + mutex_lock(&bpf_verifier_lock); + + log_level = 0; + + env->explored_states = kcalloc(env->prog->len, + sizeof(struct bpf_verifier_state_list *), + GFP_KERNEL); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); + +skip_full_check: + while (pop_stack(env, NULL) >= 0); + free_states(env); + + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); + return ret; +} +EXPORT_SYMBOL_GPL(bpf_analyzer); -- cgit v1.1 From 6b17387307bafc71624b9890b9239b6a438e2e89 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:59 +0100 Subject: bpf: recognize 64bit immediate loads as consts When running as parser interpret BPF_LD | BPF_IMM | BPF_DW instructions as loading CONST_IMM with the value stored in imm. The verifier will continue not recognizing those due to concerns about search space/program complexity increase. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ee86a77..8c3f794 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1769,9 +1769,19 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; - if (insn->src_reg == 0) - /* generic move 64-bit immediate into a register */ + if (insn->src_reg == 0) { + /* generic move 64-bit immediate into a register, + * only analyzer needs to collect the ld_imm value. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + + if (!env->analyzer_ops) + return 0; + + regs[insn->dst_reg].type = CONST_IMM; + regs[insn->dst_reg].imm = imm; return 0; + } /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); -- cgit v1.1 From cd7df56ed3e60d046ddb3acd987778c00aa9ee33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:00 +0100 Subject: nfp: add BPF to NFP code translator Add translator for JITing eBPF to operations which can be executed on NFP's programmable engines. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 6 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 233 +++ drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 208 +++ drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 1724 ++++++++++++++++++++ .../net/ethernet/netronome/nfp/nfp_bpf_verifier.c | 162 ++ 5 files changed, 2333 insertions(+) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf.h create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 6817881..5f12689 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -5,4 +5,10 @@ nfp_netvf-objs := \ nfp_net_ethtool.o \ nfp_netvf_main.o +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp_netvf-objs += \ + nfp_bpf_verifier.o \ + nfp_bpf_jit.o +endif + nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 0000000..22484b6 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include "nfp_bpf.h" + +#define REG_NONE 0 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define nfp_is_br(_insn) \ + (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BHS = 0x04, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NEG = 0x04, + ALU_OP_AND = 0x08, + ALU_OP_SUB_C = 0x0d, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + __CMD_TGT_MAP_SIZE, +}; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_NO_SWAP = 3, +}; + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h new file mode 100644 index 0000000..3726421 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include +#include +#include +#include + +#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask))) + +/* For branch fixup logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_BR_SPECIAL 0xff00000000000000ULL + +enum br_special { + OP_BR_NORMAL = 0, + OP_BR_GO_OUT, + OP_BR_GO_ABORT, +}; + +enum static_regs { + STATIC_REG_PKT = 1, +#define REG_PKT_BANK ALU_DST_A + STATIC_REG_IMM = 2, /* Bank AB */ +}; + +enum nfp_bpf_action_type { + NN_ACT_TC_DROP, +}; + +/* Software register representation, hardware encoding in asm.h */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) + +#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) +#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) +#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) +#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) +#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) +#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) +#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) + +#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) +#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_PKT reg_nnr(2) +#define NFP_BPF_ABI_LEN reg_nnr(3) + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + unsigned int off; + unsigned short n; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +/** + * struct nfp_prog - nfp BPF program + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @num_regs: number of registers used by this program + * @regs_per_thread: number of basic registers allocated per thread + * @start_off: address of the first instruction in the memory + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_done: jump target to get the next packet + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + enum nfp_bpf_action_type act; + + unsigned int num_regs; + unsigned int regs_per_thread; + + unsigned int start_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_done; + + unsigned int n_translated; + int error; + + struct list_head insns; +}; + +struct nfp_bpf_result { + unsigned int n_instr; + bool dense_mode; +}; + +#ifdef CONFIG_BPF_SYSCALL +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res); +#else +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + return -ENOTSUPP; +} +#endif + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c new file mode 100644 index 0000000..cfbf536 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -0,0 +1,1724 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include +#include + +#include "nfp_asm.h" +#include "nfp_bpf.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.next != &nfp_prog->insns; +} + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->start_off + nfp_prog->prog_len; +} + +static unsigned int +nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) +{ + return offset - nfp_prog->start_off; +} + +/* --- SW reg --- */ +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding %08x\n", swreg); + return 0; + } +} + +static int +swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding\n"); + return 0; + } +} + +static int +swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, + bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} + +/* --- Emitters --- */ +static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) +{ + enum cmd_ctx_swap ctx; + u64 insn; + + if (sync) + ctx = CMD_CTX_SWAP; + else + ctx = CMD_CTX_NO_SWAP; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); +} + +static void +__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, + u8 byte, bool equal, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BBYTE_BASE | + FIELD_PREP(OP_BB_A_SRC, areg) | + FIELD_PREP(OP_BB_BYTE, byte) | + FIELD_PREP(OP_BB_B_SRC, breg) | + FIELD_PREP(OP_BB_I8, imm8) | + FIELD_PREP(OP_BB_EQ, equal) | + FIELD_PREP(OP_BB_DEFBR, defer) | + FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_byte_neq(struct nfp_prog *nfp_prog, + u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, + defer); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, + invert, shift, reg.wr_both); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + if (sc == SHF_SC_L_SHF) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, + enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, + u32 dst, u8 bmask, u32 src, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void +wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, + enum br_special special) +{ + emit_br(nfp_prog, mask, 0, 0); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_BR_SPECIAL, special); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, + u16 src, bool src_valid, u8 size) +{ + unsigned int i; + u16 shift, sz; + u32 tmp_reg; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = size < 4 ? 4 : size; + shift = size < 4 ? 4 - size : 0; + + if (src_valid) { + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), + reg_a(src), ALU_OP_ADD, tmp_reg); + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + } else { + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, + imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + } + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + emit_alu(nfp_prog, reg_both(i), + reg_none(), ALU_OP_NONE, reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(1), 0); + + return 0; +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + return construct_data_ind_ld(nfp_prog, offset, 0, false, size); +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + u32 tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NEG, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static int +wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 reg = insn->dst_reg * 2; + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +static int +wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), + meta->insn.imm); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + meta->double_cb = imm_ld8_part2; + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 4); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, len)) + emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + else + return -ENOTSUPP; + + return 0; +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off < 0) /* TODO */ + return -ENOTSUPP; + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); +} + +static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); +} + +static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, + [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, + [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Misc code --- */ +static void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *next; + u32 off, br_idx; + u32 idx; + + nfp_for_each_insn_walk2(nfp_prog, meta, next) { + if (meta->skip) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + continue; + + /* Find the target offset in assembler realm */ + off = meta->insn.off; + if (!off) { + pr_err("Fixup found zero offset!!\n"); + return -ELOOP; + } + + while (off && nfp_meta_has_next(nfp_prog, next)) { + next = nfp_meta_next(next); + off--; + } + if (off) { + pr_err("Fixup found too large jump!! %d\n", off); + return -ELOOP; + } + + if (next->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); + idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], next->off); + } + } + + /* Fixup 'goto out's separately, they can be scattered around */ + for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { + enum br_special special; + + if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) + continue; + + special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); + switch (special) { + case OP_BR_NORMAL: + break; + case OP_BR_GO_OUT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_out); + break; + case OP_BR_GO_ABORT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_abort); + break; + } + + nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + emit_alu(nfp_prog, pkt_reg(nfp_prog), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); +} + +static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) +{ + const u8 act2code[] = { + [NN_ACT_TC_DROP] = 0x22, + }; + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + wrp_immed(nfp_prog, reg_both(0), 0); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + /* Legacy TC mode: + * 0 0x11 -> pass, count as stat0 + * -1 drop 0x22 -> drop, count as stat1 + * redir 0x24 -> redir, count as stat1 + * ife mark 0x21 -> pass, count as stat1 + * ife + tx 0x24 -> redir, count as stat1 + */ + emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), + SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->act) { + case NN_ACT_TC_DROP: + nfp_outro_tc_legacy(nfp_prog); + break; + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + + nfp_prog->n_translated++; + } + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* Try to rename registers so that program uses only low ones */ +static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) +{ + bool reg_used[MAX_BPF_REG] = {}; + u8 tgt_reg[MAX_BPF_REG] = {}; + struct nfp_insn_meta *meta; + unsigned int i, j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + reg_used[meta->insn.src_reg] = true; + reg_used[meta->insn.dst_reg] = true; + } + + for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { + if (!reg_used[i]) + continue; + + tgt_reg[i] = j++; + } + nfp_prog->num_regs = j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; + meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; + } + + return 0; +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + int ret; + + nfp_bpf_opt_reg_init(nfp_prog); + + ret = nfp_bpf_opt_reg_rename(nfp_prog); + if (ret) + return ret; + + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + + return 0; +} + +/** + * nfp_bpf_jit() - translate BPF code into NFP assembly + * @filter: kernel BPF filter struct + * @prog_mem: memory to store assembler instructions + * @act: action attached to this eBPF program + * @prog_start: offset of the first instruction when loaded + * @prog_done: where to jump on exit + * @prog_sz: size of @prog_mem in instructions + * @res: achieved parameters of translation results + */ +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, + enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->act = act; + nfp_prog->start_off = prog_start; + nfp_prog->tgt_done = prog_done; + + ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); + if (ret) + goto out; + + ret = nfp_prog_verify(nfp_prog, filter); + if (ret) + goto out; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + goto out; + + if (nfp_prog->num_regs <= 7) + nfp_prog->regs_per_thread = 16; + else + nfp_prog->regs_per_thread = 32; + + nfp_prog->prog = prog_mem; + nfp_prog->__prog_alloc_len = prog_sz; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + ret = -EINVAL; + } + + res->n_instr = nfp_prog->prog_len; + res->dense_mode = nfp_prog->num_regs <= 7; +out: + nfp_prog_free(nfp_prog); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c new file mode 100644 index 0000000..ef6775b --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include + +#include "nfp_bpf.h" + +/* Analyzer/verifier definitions */ +struct nfp_bpf_analyzer_priv { + struct nfp_prog *prog; + struct nfp_insn_meta *meta; +}; + +static struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + + if (reg0->type != CONST_IMM) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env, u8 reg) +{ + if (env->cur_state.regs[reg].type != PTR_TO_CTX) + return -EINVAL; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; + struct nfp_insn_meta *meta = priv->meta; + + meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); + priv->meta = meta; + + if (meta->insn.src_reg == BPF_REG_10 || + meta->insn.dst_reg == BPF_REG_10) { + pr_err("stack not yet supported\n"); + return -EINVAL; + } + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_err("program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(priv->prog, env); + + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.src_reg); + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.dst_reg); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) +{ + struct nfp_bpf_analyzer_priv *priv; + int ret; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->prog = nfp_prog; + priv->meta = nfp_prog_first_meta(nfp_prog); + + ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); + + kfree(priv); + + return ret; +} -- cgit v1.1 From 7533fdc0f77f207fcc370b10965f4bcee82dfedf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:01 +0100 Subject: nfp: bpf: add hardware bpf offload Add hardware bpf offload on our smart NICs. Detect if capable firmware is loaded and use it to load the code JITed with just added translator onto programmable engines. This commit only supports offloading cls_bpf in legacy mode (non-direct action). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 1 + drivers/net/ethernet/netronome/nfp/nfp_net.h | 26 ++- .../net/ethernet/netronome/nfp/nfp_net_common.c | 40 +++- drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 44 ++++- .../net/ethernet/netronome/nfp/nfp_net_offload.c | 220 +++++++++++++++++++++ 5 files changed, 324 insertions(+), 7 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_net_offload.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 5f12689..0efb2ba 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o nfp_netvf-objs := \ nfp_net_common.o \ nfp_net_ethtool.o \ + nfp_net_offload.o \ nfp_netvf_main.o ifeq ($(CONFIG_BPF_SYSCALL),y) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 6906356..ea6f5e6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -220,7 +220,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -413,6 +413,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @is_vf: Is the driver attached to a VF? * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? + * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts @@ -473,6 +474,7 @@ struct nfp_net { unsigned is_vf:1; unsigned is_nfp3200:1; unsigned fw_loaded:1; + unsigned bpf_offload_skip_sw:1; u32 ctrl; u32 fl_bufsz; @@ -561,12 +563,28 @@ struct nfp_net { /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->ctrl_bar + off); +} + static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) { writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ +/* NFP-3200 can't handle 16-bit accesses too well */ +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + WARN_ON_ONCE(nn->is_nfp3200); + return readw(nn->ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + WARN_ON_ONCE(nn->is_nfp3200); + writew(val, nn->ctrl_bar + off); +} static inline u32 nn_readl(struct nfp_net *nn, int off) { @@ -757,4 +775,8 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf); + #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e492..51978df 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -60,6 +60,7 @@ #include +#include #include #include "nfp_net_ctrl.h" @@ -2382,6 +2383,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, return stats; } +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; + return false; +} + +static int +nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -ENOTSUPP; + if (proto != htons(ETH_P_ALL)) + return -ENOTSUPP; + + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) + return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + + return -EINVAL; +} + static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2436,6 +2462,11 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; } + if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + nn_err(nn, "Cannot disable HW TC offload while in use\n"); + return -EBUSY; + } + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", netdev->features, features, changed); @@ -2585,6 +2616,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, + .ndo_setup_tc = nfp_net_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -2610,7 +2642,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2627,7 +2659,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", - nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nfp_net_ebpf_capable(nn) ? "BPF " : ""); } /** @@ -2795,6 +2828,9 @@ int nfp_net_netdev_init(struct net_device *netdev) netdev->features = netdev->hw_features; + if (nfp_net_ebpf_capable(nn)) + netdev->hw_features |= NETIF_F_HW_TC; + /* Advertise but disable TSO by default. */ netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ad6c4e3..7aa11f3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -123,6 +123,7 @@ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ #define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ @@ -134,6 +135,7 @@ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -196,10 +198,37 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * 64B reserved for future use (0x0080 - 0x00c0) + * NFP6000 - BPF section + * @NFP_NET_CFG_BPF_ABI: BPF ABI version + * @NFP_NET_CFG_BPF_CAP: BPF capabilities + * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ -#define NFP_NET_CFG_RESERVED 0x0080 -#define NFP_NET_CFG_RESERVED_SZ 0x0040 +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_BPF_ABI 1 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/** + * 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -303,6 +332,15 @@ #define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) #define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c new file mode 100644 index 0000000..313988c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nfp_bpf.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +static int +nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + /* TC direct action */ + if (cls_bpf->exts_integrated) + return -ENOTSUPP; + + /* TC legacy mode */ + if (!tc_single_action(cls_bpf->exts)) + return -ENOTSUPP; + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) + return NN_ACT_TC_DROP; + } + + return -ENOTSUPP; +} + +static int +nfp_net_bpf_offload_prepare(struct nfp_net *nn, + struct tc_cls_bpf_offload *cls_bpf, + struct nfp_bpf_result *res, + void **code, dma_addr_t *dma_addr, u16 max_instr) +{ + unsigned int code_sz = max_instr * sizeof(u64); + enum nfp_bpf_action_type act; + u16 start_off, done_off; + unsigned int max_mtu; + int ret; + + ret = nfp_net_bpf_get_act(nn, cls_bpf); + if (ret < 0) + return ret; + act = ret; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return -ENOTSUPP; + } + + start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr, + GFP_KERNEL); + if (!*code) + return -ENOMEM; + + ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + max_instr, res); + if (ret) + goto out; + + return 0; + +out: + dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr); + return ret; +} + +static void +nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, + void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr, + bool dense_mode) +{ + u64 bpf_addr = dma_addr; + int err; + + nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + + if (dense_mode) + bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); + + /* Enable passing packets through BPF function */ + nn->ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + nn_err(nn, "FW command error while enabling BPF: %d\n", err); + + dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + + nn->bpf_offload_skip_sw = 0; + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf) +{ + struct nfp_bpf_result res; + dma_addr_t dma_addr; + u16 max_instr; + void *code; + int err; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (nn->bpf_offload_skip_sw) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_stop(nn); + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_ADD: + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_stop(nn); + + default: + return -ENOTSUPP; + } +} -- cgit v1.1 From 68d640630d4ef2a4bf3f68b5073dec5e4c4f878b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:02 +0100 Subject: net: cls_bpf: allow offloaded filters to update stats Call into offloaded filters to update stats. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + net/sched/cls_bpf.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 57af9f3..5ccaa4b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -490,6 +490,7 @@ enum tc_clsbpf_command { TC_CLSBPF_ADD, TC_CLSBPF_REPLACE, TC_CLSBPF_DESTROY, + TC_CLSBPF_STATS, }; struct tc_cls_bpf_offload { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1becc2f..bb1d5a4 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -221,6 +221,15 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, prog->offloaded = false; } +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -577,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; -- cgit v1.1 From 66860beb7ed5df11433528cb535d5e9f7dad2302 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:03 +0100 Subject: nfp: bpf: allow offloaded filters to update stats Periodically poll stats and call into offloaded actions to update them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 19 +++++++ .../net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++ .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 12 +++++ .../net/ethernet/netronome/nfp/nfp_net_offload.c | 63 ++++++++++++++++++++++ 4 files changed, 97 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index ea6f5e6..13c6a90 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -62,6 +62,9 @@ /* Max time to wait for NFP to respond on updates (in seconds) */ #define NFP_NET_POLL_TIMEOUT 5 +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + /* Bar allocation */ #define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 @@ -405,6 +408,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, fw_ver->minor == minor; } +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + /** * struct nfp_net - NFP network device structure * @pdev: Backpointer to PCI device @@ -428,6 +436,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table + * @rx_filter: Filter offload statistics - dropped packets/bytes + * @rx_filter_prev: Filter offload statistics - values from previous update + * @rx_filter_change: Jiffies when statistics last changed + * @rx_filter_stats_timer: Timer for polling filter offload statistics + * @rx_filter_lock: Lock protecting timer state changes (teardown) * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings @@ -504,6 +517,11 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + struct nfp_stat_pair rx_filter, rx_filter_prev; + unsigned long rx_filter_change; + struct timer_list rx_filter_stats_timer; + spinlock_t rx_filter_lock; + int max_tx_rings; int max_rx_rings; @@ -775,6 +793,7 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +void nfp_net_filter_stats_timer(unsigned long data); int nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, struct tc_cls_bpf_offload *cls_bpf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 51978df..f091eb7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2703,10 +2703,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->rx_filter_lock); spin_lock_init(&nn->link_status_lock); setup_timer(&nn->reconfig_timer, nfp_net_reconfig_timer, (unsigned long)nn); + setup_timer(&nn->rx_filter_stats_timer, + nfp_net_filter_stats_timer, (unsigned long)nn); return nn; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 4c98972..3418f22 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = { {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, + + {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)}, + {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)}, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)}, + {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)}, + {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)}, + {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)}, + {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)}, + {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)}, }; #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 313988c..0537a53 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -51,6 +51,60 @@ #include "nfp_net_ctrl.h" #include "nfp_net.h" +void nfp_net_filter_stats_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + struct nfp_stat_pair latest; + + spin_lock_bh(&nn->rx_filter_lock); + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + mod_timer(&nn->rx_filter_stats_timer, + jiffies + NFP_NET_STAT_POLL_IVL); + + spin_unlock_bh(&nn->rx_filter_lock); + + latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + + if (latest.pkts != nn->rx_filter.pkts) + nn->rx_filter_change = jiffies; + + nn->rx_filter = latest; +} + +static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +{ + nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + nn->rx_filter_prev = nn->rx_filter; + nn->rx_filter_change = jiffies; +} + +static int +nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + struct tc_action *a; + LIST_HEAD(actions); + u64 bytes, pkts; + + pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts; + bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes; + bytes -= pkts * ETH_HLEN; + + nn->rx_filter_prev = nn->rx_filter; + + preempt_disable(); + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change); + + preempt_enable(); + + return 0; +} + static int nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) { @@ -147,6 +201,9 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, nn_err(nn, "FW command error while enabling BPF: %d\n", err); dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); + + nfp_net_bpf_stats_reset(nn); + mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) @@ -154,9 +211,12 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; + spin_lock_bh(&nn->rx_filter_lock); nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + spin_unlock_bh(&nn->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + del_timer_sync(&nn->rx_filter_stats_timer); nn->bpf_offload_skip_sw = 0; return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); @@ -214,6 +274,9 @@ nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, case TC_CLSBPF_DESTROY: return nfp_net_bpf_stop(nn); + case TC_CLSBPF_STATS: + return nfp_net_bpf_stats_update(nn, cls_bpf); + default: return -ENOTSUPP; } -- cgit v1.1 From 19d0f54edab6e77b6b73277ac33717be1f858fa8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:04 +0100 Subject: nfp: bpf: add packet marking support Add missing ABI defines and eBPF instructions to allow mark to be passed on and extend prepend parsing on the RX path to pick it up from packet metadata. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 2 + drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 19 +++++ drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 + .../net/ethernet/netronome/nfp/nfp_net_common.c | 91 +++++++++++++++++----- drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 7 ++ .../net/ethernet/netronome/nfp/nfp_netvf_main.c | 2 +- 6 files changed, 101 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 3726421..2adb1d8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -91,6 +91,8 @@ enum nfp_bpf_reg_type { #define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) #define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAG_MARK 1 +#define NFP_BPF_ABI_MARK reg_nnr(1) #define NFP_BPF_ABI_PKT reg_nnr(2) #define NFP_BPF_ABI_LEN reg_nnr(3) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index cfbf536..368381f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -674,6 +674,16 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) return construct_data_ind_ld(nfp_prog, offset, 0, false, size); } +static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +{ + emit_alu(nfp_prog, NFP_BPF_ABI_MARK, + reg_none(), ALU_OP_NONE, reg_b(src)); + emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, + NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1117,6 +1127,14 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, mark)) + return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + + return -ENOTSUPP; +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ @@ -1306,6 +1324,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 13c6a90..ed824e1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -269,6 +269,8 @@ struct nfp_net_rx_desc { }; }; +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) + struct nfp_net_rx_hash { __be32 hash_type; __be32 hash; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f091eb7..415691e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1293,38 +1293,72 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, } } -/** - * nfp_net_set_hash() - Set SKB hash data - * @netdev: adapter's net_device structure - * @skb: SKB to set the hash data on - * @rxd: RX descriptor - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the skb fields. - */ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, - struct nfp_net_rx_desc *rxd) + unsigned int type, __be32 *hash) { - struct nfp_net_rx_hash *rx_hash; - - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || - !(netdev->features & NETIF_F_RXHASH)) + if (!(netdev->features & NETIF_F_RXHASH)) return; - rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); - - switch (be32_to_cpu(rx_hash->hash_type)) { + switch (type) { case NFP_NET_RSS_IPV4: case NFP_NET_RSS_IPV6: case NFP_NET_RSS_IPV6_EX: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3); break; default: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4); break; } } +static void +nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) + return; + + rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); + + nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +static void * +nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, + int meta_len) +{ + u8 *data = skb->data - meta_len; + u32 meta_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_net_set_hash(netdev, skb, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + skb->mark = get_unaligned_be32(data); + data += 4; + break; + default: + return NULL; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data; +} + /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1439,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); - nfp_net_set_hash(nn->netdev, skb, rxd); - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); + if (nn->fw_ver.major <= 3) { + nfp_net_set_hash_desc(nn->netdev, skb, rxd); + } else if (meta_len) { + void *end; + + end = nfp_net_parse_meta(nn->netdev, skb, meta_len); + if (unlikely(end != skb->data)) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + continue; + } + } + skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 7aa11f3..93b10b4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -66,6 +66,13 @@ #define NFP_NET_LSO_MAX_HDR_SZ 255 /** + * Prepend field types + */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 + +/** * Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index f7062cb..2800bbf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); } else { switch (fw_ver.major) { - case 1 ... 3: + case 1 ... 4: if (is_nfp3200) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; -- cgit v1.1 From 9798e6fe4f9b6a2847a40e24b75e68afdc7a01b3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:05 +0100 Subject: net: act_mirred: allow statistic updates from offloaded actions Implement .stats_update() callback. The implementation is generic and can be reused by other simple actions if needed. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1c76387..667dc38 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,6 +204,13 @@ out: return retval; } +static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + tcf_lastuse_update(&a->tcfa_tm); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); +} + static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = { .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, + .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, -- cgit v1.1 From 2d18421debc29a338e6783c06fb75ab7b16fc9ba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:06 +0100 Subject: nfp: bpf: add support for legacy redirect action Data path has redirect support so expressing redirect to the port frame came from is a trivial matter of setting the right result code. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 2 ++ drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 2adb1d8..adbe023 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -60,6 +60,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, + NN_ACT_TC_REDIR, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 368381f..434bef9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1440,6 +1440,7 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) { const u8 act2code[] = { [NN_ACT_TC_DROP] = 0x22, + [NN_ACT_TC_REDIR] = 0x24 }; /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); @@ -1468,6 +1469,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { case NN_ACT_TC_DROP: + case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); break; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 0537a53..1ec8e5b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -123,6 +123,10 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) list_for_each_entry(a, &actions, list) { if (is_tcf_gact_shot(a)) return NN_ACT_TC_DROP; + + if (is_tcf_mirred_redirect(a) && + tcf_mirred_ifindex(a) == nn->netdev->ifindex) + return NN_ACT_TC_REDIR; } return -ENOTSUPP; -- cgit v1.1 From e3b8baf0ca2a69f88846b5446234e5647ecd17eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:07 +0100 Subject: nfp: bpf: add offload of TC direct action mode Add offload of TC in direct action mode. We just need to provide appropriate checks in the verifier and a new outro block to translate the exit codes to what data path expects Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_bpf.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c | 66 ++++++++++++++++++++++ .../net/ethernet/netronome/nfp/nfp_bpf_verifier.c | 11 +++- .../net/ethernet/netronome/nfp/nfp_net_offload.c | 6 +- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index adbe023..fc220cd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -61,6 +61,7 @@ enum static_regs { enum nfp_bpf_action_type { NN_ACT_TC_DROP, NN_ACT_TC_REDIR, + NN_ACT_DIRECT, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 434bef9..3de819a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -321,6 +321,16 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } +static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +{ + if (defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); +} + static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { @@ -1465,9 +1475,65 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) SHF_SC_L_SHF, 16); } +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { + case NN_ACT_DIRECT: + nfp_outro_tc_da(nfp_prog); + break; case NN_ACT_TC_DROP: case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c index ef6775b..144cae8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -86,7 +86,16 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return -EINVAL; } - if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + if (nfp_prog->act != NN_ACT_DIRECT && + reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && + reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && + reg0->imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", reg0->type, reg0->imm); return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 1ec8e5b..43f42f8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -112,8 +112,12 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) LIST_HEAD(actions); /* TC direct action */ - if (cls_bpf->exts_integrated) + if (cls_bpf->exts_integrated) { + if (tc_no_actions(cls_bpf->exts)) + return NN_ACT_DIRECT; + return -ENOTSUPP; + } /* TC legacy mode */ if (!tc_single_action(cls_bpf->exts)) -- cgit v1.1 From 5a924b8951f835b5ff8a3d9f434f3b230fc9905f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs Don't store the rxrpc protocol header in sk_buffs on the transmit queue, but rather generate it on the fly and pass it to kernel_sendmsg() as a separate iov. This reduces the amount of storage required. Note that the security header is still stored in the sk_buff as it may get encrypted along with the data (and doesn't change with each transmission). Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 5 ++- net/rxrpc/call_event.c | 11 ++----- net/rxrpc/conn_object.c | 1 - net/rxrpc/output.c | 83 +++++++++++++++++++++++++++++++++---------------- net/rxrpc/rxkad.c | 8 ++--- net/rxrpc/sendmsg.c | 51 +++++------------------------- 6 files changed, 71 insertions(+), 88 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 034f525..f021df4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -385,10 +385,9 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ + u32 security_nonce; /* response re-use preventer */ u8 size_align; /* data size alignment (for security) */ - u8 header_size; /* rxrpc + security header size */ u8 security_size; /* security header size */ - u32 security_nonce; /* response re-use preventer */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; @@ -946,7 +945,7 @@ extern const s8 rxrpc_ack_priority[]; * output.c */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7d1b998..6247ce2 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -139,7 +139,6 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, */ static void rxrpc_resend(struct rxrpc_call *call) { - struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; @@ -201,15 +200,8 @@ static void rxrpc_resend(struct rxrpc_call *call) skb = call->rxtx_buffer[ix]; rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - sp = rxrpc_skb(skb); - - /* Each Tx packet needs a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - whdr = (struct rxrpc_wire_header *)skb->head; - whdr->serial = htonl(sp->hdr.serial); - - if (rxrpc_send_data_packet(call->conn, skb) < 0) { + if (rxrpc_send_data_packet(call, skb) < 0) { call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; @@ -217,6 +209,7 @@ static void rxrpc_resend(struct rxrpc_call *call) if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); + sp = rxrpc_skb(skb); sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3b55aee..e1e83af 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -53,7 +53,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->size_align = 4; - conn->header_size = sizeof(struct rxrpc_wire_header); conn->idle_timestamp = jiffies; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 16e18a9..817fb0e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -208,19 +208,42 @@ out: /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) { - struct kvec iov[1]; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; int ret, opt; _enter(",{%d}", skb->len); - iov[0].iov_base = skb->head; - iov[0].iov_len = skb->len; + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -234,26 +257,33 @@ int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) } } + _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { - down_read(&conn->params.local->defrag_sem); - /* send the packet by UDP - * - returns -EMSGSIZE if UDP would have to fragment the packet - * to go out of the interface - * - in which case, we'll have processed the ICMP error - * message and update the peer record - */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); - - up_read(&conn->params.local->defrag_sem); - if (ret == -EMSGSIZE) - goto send_fragmentable; - - _leave(" = %d [%u]", ret, conn->params.peer->maxdata); - return ret; + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; + + down_read(&conn->params.local->defrag_sem); + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + + up_read(&conn->params.local->defrag_sem); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + if (ret == 0) { + sp->resend_at = jiffies + rxrpc_resend_timeout; + sp->hdr.serial = serial; } + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; send_fragmentable: /* attempt to send this message with fragmentation enabled */ @@ -268,8 +298,8 @@ send_fragmentable: SOL_IP, IP_MTU_DISCOVER, (char *)&opt, sizeof(opt)); if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -298,8 +328,7 @@ send_fragmentable: } up_write(&conn->params.local->defrag_sem); - _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); - return ret; + goto done; } /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ae39255..88d080a 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) case RXRPC_SECURITY_AUTH: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level1_hdr); - conn->header_size += sizeof(struct rxkad_level1_hdr); break; case RXRPC_SECURITY_ENCRYPT: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level2_hdr); - conn->header_size += sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; @@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); @@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; @@ -277,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* calculate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(sp->hdr.callNumber); + call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6a39ee9..814b17f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -134,13 +134,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, write_unlock_bh(&call->state_lock); } - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); sp->resend_at = jiffies + rxrpc_resend_timeout; - ret = rxrpc_send_data_packet(call->conn, skb); + ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); @@ -151,29 +149,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, } /* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); -} - -/* * send data through a socket * - must be called in process context * - caller holds the socket locked @@ -232,7 +207,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); - size = space + call->conn->header_size; + size = space + call->conn->security_size; _debug("SIZE: %zu/%zu/%zu", chunk, space, size); @@ -248,9 +223,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ASSERTCMP(skb->mark, ==, 0); - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; sp = rxrpc_skb(skb); sp->remain = chunk; @@ -312,33 +287,23 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, seq = call->tx_top + 1; - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = call->security_ix; sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; + sp->hdr.flags = conn->out_clientflag; - sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) + if (seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); + call, skb, skb->mark, skb->head); if (ret < 0) goto out; - rxrpc_insert_header(skb); rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } -- cgit v1.1 From f07373ead455a396e15a431bc08d8ce1dac6f1cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: rxrpc: Add re-sent Tx annotation Add a Tx-phase annotation for packet buffers to indicate that a buffer has already been retransmitted. This will be used by future congestion management. Re-retransmissions of a packet don't affect the congestion window managment in the same way as initial retransmissions. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 28 +++++++++++++++++++--------- net/rxrpc/input.c | 14 +++++++++++--- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f021df4..dcf54e3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -505,6 +505,8 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_UNACK 1 #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_RESENT 0x04 #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6247ce2..34ad967 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -144,7 +144,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_seq_t cursor, seq, top; unsigned long resend_at, now; int ix; - u8 annotation; + u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -165,14 +165,16 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation == RXRPC_TX_ANNO_ACK) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) continue; skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_seen); sp = rxrpc_skb(skb); - if (annotation == RXRPC_TX_ANNO_UNACK) { + if (anno_type == RXRPC_TX_ANNO_UNACK) { if (time_after(sp->resend_at, now)) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; @@ -181,7 +183,7 @@ static void rxrpc_resend(struct rxrpc_call *call) } /* Okay, we need to retransmit a packet. */ - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } call->resend_at = resend_at; @@ -194,7 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; - if (annotation != RXRPC_TX_ANNO_RETRANS) + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; skb = call->rxtx_buffer[ix]; @@ -220,10 +223,17 @@ static void rxrpc_resend(struct rxrpc_call *call) * received and the packet might have been hard-ACK'd (in which * case it will no longer be in the buffer). */ - if (after(seq, call->tx_hard_ack) && - (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || - call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + if (after(seq, call->tx_hard_ack)) { + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_RETRANS || + anno_type == RXRPC_TX_ANNO_NAK) { + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK; + } + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + } if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7ac1edf..aa261df 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -388,17 +388,25 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, { bool resend = false; int ix; + u8 annotation, anno_type; for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: - if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + if (anno_type == RXRPC_TX_ANNO_NAK) continue; - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; resend = true; break; default: -- cgit v1.1 From cf1a6474f80735ff4a5d99f3dd68a94dbec8455f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:41:53 +0100 Subject: rxrpc: Add per-peer RTT tracker Add a function to track the average RTT for a peer. Sources of RTT data will be added in subsequent patches. The RTT data will be useful in the future for determining resend timeouts and for handling the slow-start part of the Rx protocol. Also add a pair of tracepoints, one to log transmissions to elicit a response for RTT purposes and one to log responses that contribute RTT data. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 61 ++++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 25 +++++++++++++++--- net/rxrpc/misc.c | 8 ++++++ net/rxrpc/peer_event.c | 41 +++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 75a5d8b..e8f2afb 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -353,6 +353,67 @@ TRACE_EVENT(rxrpc_recvmsg, __entry->ret) ); +TRACE_EVENT(rxrpc_rtt_tx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why, + rxrpc_serial_t send_serial), + + TP_ARGS(call, why, send_serial), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_tx_trace, why ) + __field(rxrpc_serial_t, send_serial ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + ), + + TP_printk("c=%p %s sr=%08x", + __entry->call, + rxrpc_rtt_tx_traces[__entry->why], + __entry->send_serial) + ); + +TRACE_EVENT(rxrpc_rtt_rx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + s64 rtt, u8 nr, s64 avg), + + TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_rx_trace, why ) + __field(u8, nr ) + __field(rxrpc_serial_t, send_serial ) + __field(rxrpc_serial_t, resp_serial ) + __field(s64, rtt ) + __field(u64, avg ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + __entry->resp_serial = resp_serial; + __entry->rtt = rtt; + __entry->nr = nr; + __entry->avg = avg; + ), + + TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + __entry->call, + rxrpc_rtt_rx_traces[__entry->why], + __entry->send_serial, + __entry->resp_serial, + __entry->rtt, + __entry->nr, + __entry->avg) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dcf54e3..79c671e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -258,10 +258,11 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 - suseconds_t rtt; /* current RTT estimate (in uS) */ - unsigned int rtt_point; /* next entry at which to insert */ - unsigned int rtt_usage; /* amount of cache actually used */ - suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */ + u64 rtt; /* Current RTT estimate (in nS) */ + u64 rtt_sum; /* Sum of cache contents */ + u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ + u8 rtt_cursor; /* next entry at which to insert */ + u8 rtt_usage; /* amount of cache actually used */ }; /* @@ -657,6 +658,20 @@ enum rxrpc_recvmsg_trace { extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; +enum rxrpc_rtt_tx_trace { + rxrpc_rtt_tx_ping, + rxrpc_rtt_tx__nr_trace +}; + +extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; + +enum rxrpc_rtt_rx_trace { + rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx__nr_trace +}; + +extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -955,6 +970,8 @@ void rxrpc_reject_packets(struct rxrpc_local *); */ void rxrpc_error_report(struct sock *); void rxrpc_peer_error_distributor(struct work_struct *); +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); /* * peer_object.c diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 026e1f2..6321c23 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -182,3 +182,11 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { [rxrpc_recvmsg_to_be_accepted] = "TBAC", [rxrpc_recvmsg_return] = "RETN", }; + +const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { + [rxrpc_rtt_tx_ping] = "PING", +}; + +const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { + [rxrpc_rtt_rx_ping_response] = "PONG", +}; diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 18276e7..bf13b84 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -305,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work) rxrpc_put_peer(peer); _leave(""); } + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt; + u64 sum = peer->rtt_sum, avg; + u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; + + rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); + if (rtt < 0) + return; + + /* Replace the oldest datum in the RTT buffer */ + sum -= peer->rtt_cache[cursor]; + sum += rtt; + peer->rtt_cache[cursor] = rtt; + peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); + peer->rtt_sum = sum; + if (usage < RXRPC_RTT_CACHE_SIZE) { + usage++; + peer->rtt_usage = usage; + } + + /* Now recalculate the average */ + if (usage == RXRPC_RTT_CACHE_SIZE) { + avg = sum / RXRPC_RTT_CACHE_SIZE; + } else { + avg = sum; + do_div(avg, usage); + } + + peer->rtt = avg; + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, + usage, avg); +} -- cgit v1.1 From de3d6fa81e684af5817dc379ffc394235a9666cc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 20 Sep 2016 14:39:39 +0300 Subject: net/mlx4_en: Add branch prediction hints in RX data-path Add likely/unlikely hints to improve branch predictions in the RX data-path. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c46355b..6e474af 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, frag_info->dma_dir); - if (dma_mapping_error(priv->ddev, dma)) { + if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } @@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ring_alloc[i].page_size) continue; - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], + frag_info, gfp))) goto out; } @@ -585,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (!frags[nr].page) + if (unlikely(!frags[nr].page)) goto fail; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -625,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, dma_addr_t dma; skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (!skb) { + if (unlikely(!skb)) { en_dbg(RX_ERR, priv, "Failed allocating skb\n"); return NULL; } @@ -736,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, { __wsum csum_pseudo_hdr = 0; - if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || + ipv6h->nexthdr == IPPROTO_HOPOPTS)) return -1; hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); @@ -769,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) return -1; #endif return 0; @@ -796,10 +798,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u64 timestamp; bool l2_tunnel; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return 0; - if (budget <= 0) + if (unlikely(budget <= 0)) return polled; /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ @@ -902,9 +904,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (!mlx4_en_xmit_frame(frags, dev, + if (likely(!mlx4_en_xmit_frame(frags, dev, length, tx_index, - &doorbell_pending)) + &doorbell_pending))) goto consumed; goto xdp_drop; /* Drop on xmit failure */ default: @@ -912,7 +914,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_ABORTED: case XDP_DROP: xdp_drop: - if (mlx4_en_rx_recycle(ring, frags)) + if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } @@ -1016,7 +1018,7 @@ xdp_drop: /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (!skb) { + if (unlikely(!skb)) { ring->dropped++; goto next; } -- cgit v1.1 From 57c970c2e8d8772237294bb8a6a25a205448fd96 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:39:40 +0300 Subject: net/mlx4_en: Fix wrong indentation Use tabs instead of spaces before if statement, no functional change. Fixes: e7c1c2c46201 ("mlx4_en: Added self diagnostics test implementation") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6e474af..f2e8bed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1023,7 +1023,7 @@ xdp_drop: goto next; } - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } -- cgit v1.1 From 30353bfc43a1602c020f31d95cf27182ffd23824 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Sep 2016 14:39:41 +0300 Subject: net/mlx4_core: Use RCU to perform radix tree lookup for SRQ Radix tree lookup can be performed without locking. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Leon Romanovsky Suggested-by: Sagi Grimberg Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/srq.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 6714662..f44d089 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - spin_lock(&srq_table->lock); - + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); if (srq) atomic_inc(&srq->refcount); - - spin_unlock(&srq_table->lock); - - if (!srq) { + else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - unsigned long flags; - spin_lock_irqsave(&srq_table->lock, flags); + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - spin_unlock_irqrestore(&srq_table->lock, flags); + rcu_read_unlock(); return srq; } -- cgit v1.1 From a7e1f04905e5b2b90251974dddde781301b6be37 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 20 Sep 2016 14:39:42 +0300 Subject: net/mlx4_core: Fix deadlock when switching between polling and event fw commands When switching from polling-based fw commands to event-based fw commands, there is a race condition which could cause a fw command in another task to hang: that task will keep waiting for the polling sempahore, but may never be able to acquire it. This is due to mlx4_cmd_use_events, which "down"s the sempahore back to 0. During driver initialization, this is not a problem, since no other tasks which invoke FW commands are active. However, there is a problem if the driver switches to polling mode and then back to event mode during normal operation. The "test_interrupts" feature does exactly that. Running "ethtool -t offline" causes the PF driver to temporarily switch to polling mode, and then back to event mode. (Note that for VF drivers, such switching is not performed). Fix this by adding a read-write semaphore for protection when switching between modes. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 23 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423..a58d96c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) return mlx4_internal_err_ret_value(dev, op, op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); if (mlx4_priv(dev)->cmd.use_events) - return mlx4_cmd_wait(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); else - return mlx4_cmd_poll(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); @@ -2454,6 +2460,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2583,6 +2590,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; @@ -2606,6 +2614,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); return err; } @@ -2618,6 +2627,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) @@ -2626,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) kfree(priv->cmd.context); up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c9d7fc51..c128ba3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -627,6 +628,7 @@ struct mlx4_cmd { struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct rw_semaphore switch_sem; int max_cmds; spinlock_t context_lock; int free_head; -- cgit v1.1 From bfca4c520f7ea78138ddccea2de18dc062b0fefd Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:09 +0300 Subject: net: skbuff: Export __skb_vlan_pop This exports the functionality of extracting the tag from the payload, without moving next vlan tag into hw accel tag. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6dab3f..9bf60b5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3085,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7bf82a2..6c22351 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4522,8 +4522,10 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len) } EXPORT_SYMBOL(skb_ensure_writable); -/* remove VLAN header from packet and update csum accordingly. */ -static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) +/* remove VLAN header from packet and update csum accordingly. + * expects a non skb_vlan_tag_present skb with a vlan tag payload + */ +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; unsigned int offset = skb->data - skb_mac_header(skb); @@ -4554,6 +4556,7 @@ pull: return err; } +EXPORT_SYMBOL(__skb_vlan_pop); int skb_vlan_pop(struct sk_buff *skb) { -- cgit v1.1 From 45a497f2d149a4a8061c61518a79d59f1f3034b2 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:10 +0300 Subject: net/sched: act_vlan: Introduce TCA_VLAN_ACT_MODIFY vlan action TCA_VLAN_ACT_MODIFY allows one to change an existing tag. It accepts same attributes as TCA_VLAN_ACT_PUSH (protocol, id, priority). If packet is vlan tagged, then the tag gets overwritten according to user specified attributes. For example, this allows user to replace a tag's vid while preserving its priority bits (as opposed to "action vlan pop pipe action vlan push"). Signed-off-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_vlan.h | 1 + net/sched/act_vlan.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index be72b6e..bddb272 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,7 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 +#define TCA_VLAN_ACT_MODIFY 3 struct tc_vlan { tc_gen; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 59a8d31..a95c00b 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_vlan *v = to_vlan(a); int action; int err; + u16 tci; spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); @@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (err) goto drop; break; + case TCA_VLAN_ACT_MODIFY: + /* No-op if no vlan tag (either hw-accel or in-payload) */ + if (!skb_vlan_tagged(skb)) + goto unlock; + /* extract existing tag (and guarantee no hw-accel tag) */ + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + skb->vlan_tci = 0; + } else { + /* in-payload vlan tag, pop it */ + err = __skb_vlan_pop(skb, &tci); + if (err) + goto drop; + } + /* replace the vid */ + tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + /* replace prio bits, if tcfv_push_prio specified */ + if (v->tcfv_push_prio) { + tci &= ~VLAN_PRIO_MASK; + tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + } + /* put updated tci as hwaccel tag */ + __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + break; default: BUG(); } @@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_hash_release(*a, bind); @@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || + v->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto) || -- cgit v1.1 From 636c2628086e40c86dac7ddc84a1c4b4fcccc6e3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:36 +0300 Subject: net: skbuff: Remove errornous length validation in skb_vlan_pop() In 93515d53b1 "net: move vlan pop/push functions into common code" skb_vlan_pop was moved from its private location in openvswitch to skbuff common code. In case skb has non hw-accel vlan tag, the original 'pop_vlan()' assured that skb->len is sufficient (if skb->len < VLAN_ETH_HLEN then pop was considered a no-op). This validation was moved as is into the new common 'skb_vlan_pop'. Alas, in its original location (openvswitch), there was a guarantee that 'data' points to the mac_header, therefore the 'skb->len < VLAN_ETH_HLEN' condition made sense. However there's no such guarantee in the generic 'skb_vlan_pop'. For short packets received in rx path going through 'skb_vlan_pop', this causes 'skb_vlan_pop' to fail pop-ing a valid vlan hdr (in the non hw-accel case) or to fail moving next tag into hw-accel tag. Remove the 'skb->len < VLAN_ETH_HLEN' condition entirely: It is superfluous since inner '__skb_vlan_pop' already verifies there are VLAN_ETH_HLEN writable bytes at the mac_header. Note this presents a slight change to skb_vlan_pop() users: In case total length is smaller than VLAN_ETH_HLEN, skb_vlan_pop() now returns an error, as opposed to previous "no-op" behavior. Existing callers (e.g. tc act vlan, ovs) usually drop the packet if 'skb_vlan_pop' fails. Fixes: 93515d53b1 ("net: move vlan pop/push functions into common code") Signed-off-by: Shmulik Ladkani Cc: Pravin Shelar Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c22351..b2a51bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,9 +4567,8 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (unlikely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4577,9 +4576,8 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) return 0; vlan_proto = skb->protocol; -- cgit v1.1 From ecf4ee41d25832a6ec52f8b54dfaa46c08b949d5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Tue, 20 Sep 2016 12:48:37 +0300 Subject: net: skbuff: Coding: Use eth_type_vlan() instead of open coding it Fix 'skb_vlan_pop' to use eth_type_vlan instead of directly comparing skb->protocol to ETH_P_8021Q or ETH_P_8021AD. Signed-off-by: Shmulik Ladkani Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2a51bf..d36c754 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4567,8 +4567,7 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4576,8 +4575,7 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; -- cgit v1.1 From d57fd6cafbad29d0648ed769f6df07b02f10d613 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:06 +0530 Subject: cxgb4: move common filter code to separate file Move common filter code to separate files. Also fix the following checkpatch checks. CHECK: Comparison to NULL could be written "!f->l2t" + if (f->l2t == NULL) { CHECK: spaces preferred around that '/' (ctx:VxV) + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 ++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 274 ++++++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 47 ++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 264 +-------------------- 5 files changed, 346 insertions(+), 264 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 2461296..da88981 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 1f9867d..a844fd2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1025,6 +1025,29 @@ enum { VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c new file mode 100644 index 0000000..8a26a54 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -0,0 +1,274 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "l2t.h" +#include "t4fw_api.h" +#include "cxgb4_filter.h" + +/* Delete the filter at a specified index. */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + struct sk_buff *skb; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + unsigned int ftid; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + kfree_skb(skb); + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); + fwr->tid_to_iq = + htonl(FW_FILTER_WR_TID_V(ftid) | + FW_FILTER_WR_RQTYPE_V(f->fs.type) | + FW_FILTER_WR_NOREPLY_V(0) | + FW_FILTER_WR_IQ_V(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | + FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | + FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | + FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | + FW_FILTER_WR_TXCHAN_V(f->fs.eport) | + FW_FILTER_WR_PRIO_V(f->fs.prio) | + FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | + FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | + FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | + FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | + FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | + FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | + FW_FILTER_WR_PORT_V(f->fs.val.iport) | + FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | + FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | + FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Return an error number if the indicated filter isn't writable ... */ +int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. */ +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + struct filter_entry *f; + unsigned int ret; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = TCB_COOKIE_G(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h new file mode 100644 index 0000000..f6bd0bf --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -0,0 +1,47 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_FILTER_H +#define __CXGB4_FILTER_H + +#include "t4_msg.h" + +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void clear_filter(struct adapter *adap, struct filter_entry *f); + +int set_filter_wr(struct adapter *adapter, int fidx); +int delete_filter(struct adapter *adapter, unsigned int fidx); + +int writable_filter(struct filter_entry *f); +#endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d1ebb84..5cdcfe8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -67,6 +67,7 @@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -87,30 +88,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -527,66 +504,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -1026,151 +943,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -2514,40 +2286,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) -- cgit v1.1 From 578b46b9383c3619cc0a6002ff867e732b08b67a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:07 +0530 Subject: cxgb4: add common api support for configuring filters Enable filters for non-offload configuration and add common api support for setting and deleting filters in LE-TCAM region of the hardware. IPv4 filters occupy one slot. IPv6 filters occupy 4 slots and must be on a 4-slot boundary. IPv4 filters can not occupy a slot belonging to IPv6 and the vice-versa is also true. Filters are set and deleted asynchronously. Use completion to wait for reply from firmware in order to allow for synchronization if needed. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 477 +++++++++++++++++++++- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 39 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 26 +- 5 files changed, 512 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a844fd2..51edb12 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1038,7 +1038,10 @@ struct filter_entry { u32 pending:1; /* filter action is pending firmware reply */ u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ /* The filter itself. Most of this is a straight copy of information * provided by the extended ioctl(). Some fields are translated to diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 8a26a54..2a61617 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -33,27 +33,165 @@ */ #include "cxgb4.h" +#include "t4_regs.h" #include "l2t.h" #include "t4fw_api.h" #include "cxgb4_filter.h" +static inline bool is_field_set(u32 val, u32 mask) +{ + return val || mask; +} + +static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) +{ + return !(conf & conf_mask) && is_field_set(val, mask); +} + +/* Validate filter spec against configuration done on the card. */ +static int validate_filter(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + u32 fconf, iconf; + + /* Check for unconfigured fields being used. */ + fconf = adapter->params.tp.vlan_pri_map; + iconf = adapter->params.tp.ingress_config; + + if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || + unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || + unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) || + unsupported(fconf, ETHERTYPE_F, fs->val.ethtype, + fs->mask.ethtype) || + unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) || + unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype, + fs->mask.matchtype) || + unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) || + unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) || + unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld, + fs->mask.pfvf_vld) || + unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld, + fs->mask.ovlan_vld) || + unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld)) + return -EOPNOTSUPP; + + /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer + * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set + * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks + * below. Additionally, since the T4 firmware interface also + * carries that overlap, we need to translate any PF/VF + * specification into that internal format below. + */ + if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) && + is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) + return -EOPNOTSUPP; + if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) || + (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) && + (iconf & VNIC_F))) + return -EOPNOTSUPP; + if (fs->val.pf > 0x7 || fs->val.vf > 0x7f) + return -ERANGE; + fs->mask.pf &= 0x7; + fs->mask.vf &= 0x7f; + + /* If the user is requesting that the filter action loop + * matching packets back out one of our ports, make sure that + * the egress port is in range. + */ + if (fs->action == FILTER_SWITCH && + fs->eport >= adapter->params.nports) + return -ERANGE; + + /* Don't allow various trivially obvious bogus out-of-range values... */ + if (fs->val.iport >= adapter->params.nports) + return -ERANGE; + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(adapter->params.chip) && + fs->action == FILTER_SWITCH && + (fs->newvlan == VLAN_REMOVE || + fs->newvlan == VLAN_REWRITE)) + return -EOPNOTSUPP; + + return 0; +} + +static unsigned int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int iq; + + /* If the user has requested steering matching Ingress Packets + * to a specific Queue Set, we need to make sure it's in range + * for the port and map that into the Absolute Queue ID of the + * Queue Set's Response Queue. + */ + if (!fs->dirsteer) { + if (fs->iq) + return -EINVAL; + iq = 0; + } else { + struct port_info *pi = netdev_priv(dev); + + /* If the iq id is greater than the number of qsets, + * then assume it is an absolute qid. + */ + if (fs->iq < pi->nqsets) + iq = adapter->sge.ethrxq[pi->first_qset + + fs->iq].rspq.abs_id; + else + iq = fs->iq; + } + + return iq; +} + +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->ftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->ftid_bmap); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) + __clear_bit(fidx, t->ftid_bmap); + else + bitmap_release_region(t->ftid_bmap, fidx, 2); + spin_unlock_bh(&t->ftid_lock); +} + /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; - unsigned int len, ftid; struct sk_buff *skb; + unsigned int len; len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -ENOMEM; fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -74,7 +212,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; struct sk_buff *skb; - unsigned int ftid; skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) @@ -94,8 +231,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) } } - ftid = adapter->tids.ftid_base + fidx; - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); memset(fwr, 0, sizeof(*fwr)); @@ -110,7 +245,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | + htonl(FW_FILTER_WR_TID_V(f->tid) | FW_FILTER_WR_RQTYPE_V(f->fs.type) | FW_FILTER_WR_NOREPLY_V(0) | FW_FILTER_WR_IQ_V(f->fs.iq)); @@ -235,33 +370,341 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) memset(f, 0, sizeof(*f)); } +void clear_all_filters(struct adapter *adapter) +{ + unsigned int i; + + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + unsigned int max_ftid = adapter->tids.nftids + + adapter->tids.nsftids; + + for (i = 0; i < max_ftid; i++, f++) + if (f->valid || f->pending) + clear_filter(adapter, f); + } +} + +/* Fill up default masks for set match fields. */ +static void fill_default_mask(struct ch_filter_specification *fs) +{ + unsigned int lip = 0, lip_mask = 0; + unsigned int fip = 0, fip_mask = 0; + unsigned int i; + + if (fs->val.iport && !fs->mask.iport) + fs->mask.iport |= ~0; + if (fs->val.fcoe && !fs->mask.fcoe) + fs->mask.fcoe |= ~0; + if (fs->val.matchtype && !fs->mask.matchtype) + fs->mask.matchtype |= ~0; + if (fs->val.macidx && !fs->mask.macidx) + fs->mask.macidx |= ~0; + if (fs->val.ethtype && !fs->mask.ethtype) + fs->mask.ethtype |= ~0; + if (fs->val.ivlan && !fs->mask.ivlan) + fs->mask.ivlan |= ~0; + if (fs->val.ovlan && !fs->mask.ovlan) + fs->mask.ovlan |= ~0; + if (fs->val.frag && !fs->mask.frag) + fs->mask.frag |= ~0; + if (fs->val.tos && !fs->mask.tos) + fs->mask.tos |= ~0; + if (fs->val.proto && !fs->mask.proto) + fs->mask.proto |= ~0; + + for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) { + lip |= fs->val.lip[i]; + lip_mask |= fs->mask.lip[i]; + fip |= fs->val.fip[i]; + fip_mask |= fs->mask.fip[i]; + } + + if (lip && !lip_mask) + memset(fs->mask.lip, ~0, sizeof(fs->mask.lip)); + + if (fip && !fip_mask) + memset(fs->mask.fip, ~0, sizeof(fs->mask.lip)); + + if (fs->val.lport && !fs->mask.lport) + fs->mask.lport = ~0; + if (fs->val.fport && !fs->mask.fport) + fs->mask.fport = ~0; +} + +/* Check a Chelsio Filter Request for validity, convert it into our internal + * format and send it to the hardware. Return 0 on success, an error number + * otherwise. We attach any provided filter operation context to the internal + * filter specification in order to facilitate signaling completion of the + * operation. + */ +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int max_fidx, fidx, iq; + struct filter_entry *f; + u32 iconf; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + fill_default_mask(fs); + + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + /* IPv6 filters occupy four slots and must be aligned on + * four-slot boundaries. IPv4 filters only occupy a single + * slot and have no alignment requirements but writing a new + * IPv4 filter into the middle of an existing IPv6 filter + * requires clearing the old IPv6 filter and hence we prevent + * insertion. + */ + if (fs->type == 0) { /* IPv4 */ + /* If our IPv4 filter isn't being written to a + * multiple of four filter index and there's an IPv6 + * filter at the multiple of 4 base slot, then we + * prevent insertion. + */ + fidx = filter_id & ~0x3; + if (fidx != filter_id && + adapter->tids.ftid_tab[fidx].fs.type) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", + fidx, fidx + 3); + return -EINVAL; + } + } + } else { /* IPv6 */ + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } + + /* Check all except the base overlapping IPv4 filter slots. */ + for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EINVAL; + } + } + } + + /* Check to make sure that provided filter index is not + * already in use by someone else + */ + f = &adapter->tids.ftid_tab[filter_id]; + if (f->valid) + return -EBUSY; + + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + if (ret) + return ret; + + /* Check to make sure the filter requested is writable ... */ + ret = writable_filter(f); + if (ret) { + /* Clear the bits we have set above */ + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + return ret; + } + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adapter, f); + + /* Convert the filter specification into our internal format. + * We copy the PF/VF specification into the Outer VLAN field + * here so the rest of the code -- including the interface to + * the firmware -- doesn't have to constantly do these checks. + */ + f->fs = *fs; + f->fs.iq = iq; + f->dev = dev; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + /* Attempt to set the filter. If we don't succeed, we clear + * it and return the failure. + */ + f->ctx = ctx; + f->tid = fidx; /* Save the actual tid */ + ret = set_filter_wr(adapter, filter_id); + if (ret) { + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + clear_filter(adapter, f); + } + + return ret; +} + +/* Check a delete filter request for validity and send it to the hardware. + * Return 0 on success, an error number otherwise. We attach any provided + * filter operation context to the internal filter specification in order to + * facilitate signaling completion of the operation. + */ +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct filter_entry *f; + unsigned int max_fidx; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + f = &adapter->tids.ftid_tab[filter_id]; + ret = writable_filter(f); + if (ret) + return ret; + + if (f->valid) { + f->ctx = ctx; + cxgb4_clear_ftid(&adapter->tids, filter_id, + f->fs.type ? PF_INET6 : PF_INET); + return del_filter_wr(adapter, filter_id); + } + + /* If the caller has passed in a Completion Context then we need to + * mark it as a successful completion so they don't stall waiting + * for it. + */ + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } + return ret; +} + +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +int cxgb4_del_filter(struct net_device *dev, int filter_id) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_del_filter(dev, filter_id, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + /* Handle a filter write/deletion reply. */ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) { - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - struct filter_entry *f; - unsigned int ret; + unsigned int tid = GET_TID(rpl); + struct filter_entry *f = NULL; + unsigned int max_fidx; + int idx; - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); + max_fidx = adap->tids.nftids + adap->tids.nsftids; + /* Get the corresponding filter entry for this tid */ + if (adap->tids.ftid_tab) { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; f = &adap->tids.ftid_tab[idx]; + if (f->tid != tid) + return; + } + + /* We found the filter entry for this tid */ + if (f) { + unsigned int ret = TCB_COOKIE_G(rpl->cookie); + struct filter_ctx *ctx; + + /* Pull off any filter operation context attached to the + * filter. + */ + ctx = f->ctx; + f->ctx = NULL; if (ret == FW_FILTER_WR_FLT_DELETED) { /* Clear the filter when we get confirmation from the * hardware that the filter has been deleted. */ clear_filter(adap, f); + if (ctx) + ctx->result = 0; } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", idx); clear_filter(adap, f); + if (ctx) + ctx->result = -ENOMEM; } else if (ret == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } } else { /* Something went wrong. Issue a warning about the * problem and clear everything out. @@ -269,6 +712,10 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", idx, ret); clear_filter(adap, f); + if (ctx) + ctx->result = -EINVAL; } + if (ctx) + complete(&ctx->completion); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index f6bd0bf..23742cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -44,4 +44,5 @@ int set_filter_wr(struct adapter *adapter, int fidx); int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); +void clear_all_filters(struct adapter *adapter); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5cdcfe8..e97daa0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1324,19 +1324,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1346,8 +1349,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1362,12 +1367,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4825,7 +4834,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) i); } - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; @@ -5012,13 +5021,7 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b3544f6..47bd14f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -106,6 +106,7 @@ struct tid_info { unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -126,6 +127,8 @@ struct tid_info { atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -185,6 +188,27 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); -- cgit v1.1 From 2e8aad7bf20323c6ef0beec859a77c94a082c55d Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:08 +0530 Subject: cxgb4: add parser to translate u32 filters to internal spec Parse information sent by u32 into internal filter specification. Add support for parsing several fields in IPv4, IPv6, TCP, and UDP. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h | 282 +++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h new file mode 100644 index 0000000..65c20ca --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -0,0 +1,282 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_PARSE_H +#define __CXGB4_TC_U32_PARSE_H + +struct cxgb4_match_field { + int off; /* Offset from the beginning of the header to match */ + /* Fill the value/mask pair in the spec if matched */ + int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); +}; + +/* IPv4 match fields */ +static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 16) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + u32 mask_val; + u8 frag_val; + + frag_val = (ntohl(val) >> 13) & 0x00000007; + mask_val = ntohl(mask) & 0x0000FFFF; + + if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */ + f->val.frag = 1; + f->mask.frag = 1; + } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */ + f->val.frag = 0; + f->mask.frag = 1; + } else { + return -EINVAL; + } + + return 0; +} + +static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 16) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv4_tos }, + { .off = 4, .val = cxgb4_fill_ipv4_frag }, + { .off = 8, .val = cxgb4_fill_ipv4_proto }, + { .off = 12, .val = cxgb4_fill_ipv4_src_ip }, + { .off = 16, .val = cxgb4_fill_ipv4_dst_ip }, + { .val = NULL } +}; + +/* IPv6 match fields */ +static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 20) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 8) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[4], &val, sizeof(u32)); + memcpy(&f->mask.fip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[8], &val, sizeof(u32)); + memcpy(&f->mask.fip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[12], &val, sizeof(u32)); + memcpy(&f->mask.fip[12], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[4], &val, sizeof(u32)); + memcpy(&f->mask.lip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[8], &val, sizeof(u32)); + memcpy(&f->mask.lip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[12], &val, sizeof(u32)); + memcpy(&f->mask.lip[12], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv6_tos }, + { .off = 4, .val = cxgb4_fill_ipv6_proto }, + { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 }, + { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 }, + { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 }, + { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 }, + { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 }, + { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 }, + { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 }, + { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 }, + { .val = NULL } +}; + +/* TCP/UDP match */ +static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.fport = ntohl(val) >> 16; + f->mask.fport = ntohl(mask) >> 16; + f->val.lport = ntohl(val) & 0x0000FFFF; + f->mask.lport = ntohl(mask) & 0x0000FFFF; + + return 0; +}; + +static const struct cxgb4_match_field cxgb4_tcp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +static const struct cxgb4_match_field cxgb4_udp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +struct cxgb4_next_header { + unsigned int offset; /* Offset to next header */ + /* offset, shift, and mask added to offset above + * to get to next header. Useful when using a header + * field's value to jump to next header such as IHL field + * in IPv4 header. + */ + unsigned int offoff; + u32 shift; + u32 mask; + /* match criteria to make this jump */ + unsigned int match_off; + u32 match_val; + u32 match_mask; + /* location of jump to make */ + const struct cxgb4_match_field *jump; +}; + +/* Accept a rule with a jump to transport layer header based on IHL field in + * IPv4 header. + */ +static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, + .jump = cxgb4_tcp_fields }, + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header + * to get to transport layer header. + */ +static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, + .jump = cxgb4_tcp_fields }, + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; +#endif /* __CXGB4_TC_U32_PARSE_H */ -- cgit v1.1 From d8931847488d250e27d8f18ca6b7373e9f981d7a Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:09 +0530 Subject: cxgb4: add support for offloading u32 filters Add support for offloading u32 filter onto hardware. Links are stored in a jump table to perform necessary jumps to match TCP/UDP header. When inserting rules in the linked bucket, the TCP/UDP match fields in the corresponding entry of the jump table are appended to the filter rule before insertion. If a link is deleted, then all corresponding filters associated with the link are also deleted. Also enable hardware tc offload as a supported feature. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 41 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 412 +++++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h | 57 +++ .../ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h | 12 + 6 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index da88981..c6b71f6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 51edb12..ea0d1f1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -851,6 +851,9 @@ struct adapter { spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e97daa0..1be4d23 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -78,6 +78,7 @@ #include "clip_tbl.h" #include "l2t.h" #include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -2711,6 +2712,35 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -2734,6 +2764,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_busy_poll = cxgb_busy_poll, #endif .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, }; #ifdef CONFIG_PCI_IOV @@ -4406,6 +4437,7 @@ static void free_some_resources(struct adapter *adapter) t4_free_mem(adapter->l2t); t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -4750,7 +4782,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -4838,6 +4871,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c new file mode 100644 index 0000000..d63b895 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -0,0 +1,412 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "cxgb4_tc_u32_parse.h" +#include "cxgb4_tc_u32.h" + +/* Fill ch_filter_specification with parsed match value/mask pair. */ +static int fill_match_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls, + const struct cxgb4_match_field *entry, + bool next_header) +{ + unsigned int i, j; + u32 val, mask; + int off, err; + bool found; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + off = cls->knode.sel->keys[i].off; + val = cls->knode.sel->keys[i].val; + mask = cls->knode.sel->keys[i].mask; + + if (next_header) { + /* For next headers, parse only keys with offmask */ + if (!cls->knode.sel->keys[i].offmask) + continue; + } else { + /* For the remaining, parse only keys without offmask */ + if (cls->knode.sel->keys[i].offmask) + continue; + } + + found = false; + + for (j = 0; entry[j].val; j++) { + if (off == entry[j].off) { + found = true; + err = entry[j].val(fs, val, mask); + if (err) + return err; + break; + } + } + + if (!found) + return -EINVAL; + } + + return 0; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + const struct cxgb4_match_field *start, *link_start = NULL; + struct adapter *adapter = netdev2adap(dev); + struct ch_filter_specification fs; + struct cxgb4_tc_u32_table *t; + struct cxgb4_link *link; + unsigned int filter_id; + u32 uhtid, link_uhtid; + bool is_ipv6 = false; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + /* Fetch the location to insert the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for insertion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Ensure link handle uhtid is sane, if specified. */ + if (link_uhtid >= t->size) + return -EINVAL; + + memset(&fs, 0, sizeof(fs)); + + if (protocol == htons(ETH_P_IPV6)) { + start = cxgb4_ipv6_fields; + is_ipv6 = true; + } else { + start = cxgb4_ipv4_fields; + is_ipv6 = false; + } + + if (uhtid != 0x800) { + /* Link must exist from root node before insertion. */ + if (!t->table[uhtid - 1].link_handle) + return -EINVAL; + + /* Link must have a valid supported next header. */ + link_start = t->table[uhtid - 1].match_field; + if (!link_start) + return -EINVAL; + } + + /* Parse links and record them for subsequent jumps to valid + * next headers. + */ + if (link_uhtid) { + const struct cxgb4_next_header *next; + bool found = false; + unsigned int i, j; + u32 val, mask; + int off; + + if (t->table[link_uhtid - 1].link_handle) { + dev_err(adapter->pdev_dev, + "Link handle exists for: 0x%x\n", + link_uhtid); + return -EINVAL; + } + + next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps; + + /* Try to find matches that allow jumps to next header. */ + for (i = 0; next[i].jump; i++) { + if (next[i].offoff != cls->knode.sel->offoff || + next[i].shift != cls->knode.sel->offshift || + next[i].mask != cls->knode.sel->offmask || + next[i].offset != cls->knode.sel->off) + continue; + + /* Found a possible candidate. Find a key that + * matches the corresponding offset, value, and + * mask to jump to next header. + */ + for (j = 0; j < cls->knode.sel->nkeys; j++) { + off = cls->knode.sel->keys[j].off; + val = cls->knode.sel->keys[j].val; + mask = cls->knode.sel->keys[j].mask; + + if (next[i].match_off == off && + next[i].match_val == val && + next[i].match_mask == mask) { + found = true; + break; + } + } + + if (!found) + continue; /* Try next candidate. */ + + /* Candidate to jump to next header found. + * Translate all keys to internal specification + * and store them in jump table. This spec is copied + * later to set the actual filters. + */ + ret = fill_match_fields(adapter, &fs, cls, + start, false); + if (ret) + goto out; + + link = &t->table[link_uhtid - 1]; + link->match_field = next[i].jump; + link->link_handle = cls->knode.handle; + memcpy(&link->fs, &fs, sizeof(fs)); + break; + } + + /* No candidate found to jump to next header. */ + if (!found) + return -EINVAL; + + return 0; + } + + /* Fill ch_filter_specification match fields to be shipped to hardware. + * Copy the linked spec (if any) first. And then update the spec as + * needed. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) { + /* Copy linked ch_filter_specification */ + memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs)); + ret = fill_match_fields(adapter, &fs, cls, + link_start, true); + if (ret) + goto out; + } + + ret = fill_match_fields(adapter, &fs, cls, start, false); + if (ret) + goto out; + + /* The filter spec has been completely built from the info + * provided from u32. We now set some default fields in the + * spec for sanity. + */ + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs.val.iport = netdev2pinfo(dev)->port_id; + fs.mask.iport = ~0; + + /* Enable filter hit counts. */ + fs.hitcnts = 1; + + /* Set type of filter - IPv6 or IPv4 */ + fs.type = is_ipv6 ? 1 : 0; + + /* Set the filter */ + ret = cxgb4_set_filter(dev, filter_id, &fs); + if (ret) + goto out; + + /* If this is a linked bucket, then set the corresponding + * entry in the bitmap to mark it as belonging to this linked + * bucket. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) + set_bit(filter_id, t->table[uhtid - 1].tid_map); + +out: + return ret; +} + +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int filter_id, max_tids, i, j; + struct cxgb4_link *link = NULL; + struct cxgb4_tc_u32_table *t; + u32 handle, uhtid; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + /* Fetch the location to delete the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for deletion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Delete the specified filter */ + if (uhtid != 0x800) { + link = &t->table[uhtid - 1]; + if (!link->link_handle) + return -EINVAL; + + if (!test_bit(filter_id, link->tid_map)) + return -EINVAL; + } + + ret = cxgb4_del_filter(dev, filter_id); + if (ret) + goto out; + + if (link) + clear_bit(filter_id, link->tid_map); + + /* If a link is being deleted, then delete all filters + * associated with the link. + */ + max_tids = adapter->tids.nftids; + for (i = 0; i < t->size; i++) { + link = &t->table[i]; + + if (link->link_handle == handle) { + for (j = 0; j < max_tids; j++) { + if (!test_bit(j, link->tid_map)) + continue; + + ret = __cxgb4_del_filter(dev, j, NULL); + if (ret) + goto out; + + clear_bit(j, link->tid_map); + } + + /* Clear the link state */ + link->match_field = NULL; + link->link_handle = 0; + memset(&link->fs, 0, sizeof(link->fs)); + break; + } + } + +out: + return ret; +} + +void cxgb4_cleanup_tc_u32(struct adapter *adap) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!adap->tc_u32) + return; + + /* Free up all allocated memory. */ + t = adap->tc_u32; + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + t4_free_mem(link->tid_map); + } + t4_free_mem(adap->tc_u32); +} + +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!size) + return NULL; + + t = t4_alloc_mem(sizeof(*t) + + (size * sizeof(struct cxgb4_link))); + if (!t) + return NULL; + + t->size = size; + + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + unsigned int bmap_size; + unsigned int max_tids; + + max_tids = adap->tids.nftids; + bmap_size = BITS_TO_LONGS(max_tids); + link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + if (!link->tid_map) + goto out_no_mem; + bitmap_zero(link->tid_map, max_tids); + } + + return t; + +out_no_mem: + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + if (link->tid_map) + t4_free_mem(link->tid_map); + } + + if (t) + t4_free_mem(t); + + return NULL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h new file mode 100644 index 0000000..6bdc885 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_H +#define __CXGB4_TC_U32_H + +#include + +#define CXGB4_MAX_LINK_HANDLE 32 + +static inline bool can_tc_u32_offload(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + + return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); + +void cxgb4_cleanup_tc_u32(struct adapter *adapter); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size); +#endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h index 65c20ca..a4b99ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -279,4 +279,16 @@ static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { .jump = cxgb4_udp_fields }, { .jump = NULL } }; + +struct cxgb4_link { + const struct cxgb4_match_field *match_field; /* Next header */ + struct ch_filter_specification fs; /* Match spec associated with link */ + u32 link_handle; /* Knode handle associated with the link */ + unsigned long *tid_map; /* Bitmap for filter tids */ +}; + +struct cxgb4_tc_u32_table { + unsigned int size; /* number of entries in table */ + struct cxgb4_link table[0]; /* Jump table */ +}; #endif /* __CXGB4_TC_U32_PARSE_H */ -- cgit v1.1 From b20ff726fa8360a0508d2d79ecdee5a45d854e99 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Tue, 20 Sep 2016 17:13:10 +0530 Subject: cxgb4: add support for drop and redirect actions Add support for dropping matched packets in hardware. Also add support for re-directing matched packets to a specified port in hardware. Signed-off-by: Rahul Lakkireddy Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 71 +++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index d63b895..49d2deb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -32,6 +32,9 @@ * SOFTWARE. */ +#include +#include + #include "cxgb4.h" #include "cxgb4_tc_u32_parse.h" #include "cxgb4_tc_u32.h" @@ -82,6 +85,67 @@ static int fill_match_fields(struct adapter *adap, return 0; } +/* Fill ch_filter_specification with parsed action. */ +static int fill_action_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls) +{ + unsigned int num_actions = 0; + const struct tc_action *a; + struct tcf_exts *exts; + LIST_HEAD(actions); + + exts = cls->knode.exts; + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Don't allow more than one action per rule. */ + if (num_actions) + return -EINVAL; + + /* Drop in hardware. */ + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + num_actions++; + continue; + } + + /* Re-direct to specified port in hardware. */ + if (is_tcf_mirred_redirect(a)) { + struct net_device *n_dev; + unsigned int i, index; + bool found = false; + + index = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (index == n_dev->ifindex) { + fs->action = FILTER_SWITCH; + fs->eport = i; + found = true; + break; + } + } + + /* Interface doesn't belong to any port of + * the underlying hardware. + */ + if (!found) + return -EINVAL; + + num_actions++; + continue; + } + + /* Un-supported action. */ + return -EINVAL; + } + + return 0; +} + int cxgb4_config_knode(struct net_device *dev, __be16 protocol, struct tc_cls_u32_offload *cls) { @@ -234,6 +298,13 @@ int cxgb4_config_knode(struct net_device *dev, __be16 protocol, if (ret) goto out; + /* Fill ch_filter_specification action fields to be shipped to + * hardware. + */ + ret = fill_action_fields(adapter, &fs, cls); + if (ret) + goto out; + /* The filter spec has been completely built from the info * provided from u32. We now set some default fields in the * spec for sanity. -- cgit v1.1 From fba1296624bf95fc07057da1e26beee8a733180c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:55:31 +0300 Subject: net/mlx4_core: Fix to clean devlink resources This patch cleans devlink resources by calling devlink_port_unregister() to avoid the following issues: - Kernel panic when triggering reset flow. - Memory leak due to unfreed resources in mlx4_init_port_info(). Fixes: 09d4d087cd48 ("mlx4: Implement devlink interface") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 75dd2e3..7183ac4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2970,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -2984,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); + devlink_port_unregister(&info->devlink_port); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); info->rmap = NULL; -- cgit v1.1 From e82f71489ffb325a9b7bca367b06a39315452dfe Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 20 Sep 2016 23:53:24 +0800 Subject: net: ethernet: mediatek: fix missing changes merged for conflicts overlapping commits add the missing commits about 1) Commit d3bd1ce4db8e843dce421e2f8f123e5251a9c7d3 ("remove redundant free_irq for devm_request_ir allocated irq") 2) Commit 7c6b0d76fa02213393815e3b6d5e4a415bf3f0e2 ("fix logic unbalance between probe and remove") during merge for conflicts overlapping commits by Commit b20b378d49926b82c0a131492fa8842156e0e8a9 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net") Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ca6b501..2909372 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1894,11 +1894,8 @@ static void mtk_uninit(struct net_device *dev) struct mtk_eth *eth = mac->hw; phy_disconnect(mac->phy_dev); - mtk_mdio_cleanup(eth); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); - free_irq(eth->irq[1], dev); - free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -2454,6 +2451,7 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); + mtk_mdio_cleanup(eth); return 0; } -- cgit v1.1 From 262b38cdb3e47d402f4fdf76fcf3e8c4c8380a52 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:11 +0200 Subject: net: ethernet: hisilicon: hns: use phydev from struct net_device The private structure contain a pointer to phydev, but the structure net_device already contain such pointer. So we can remove the pointer phydev in the private structure, and update the driver to use the one contained in struct net_device. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 +++++++---------- drivers/net/ethernet/hisilicon/hns/hns_enet.h | 1 - drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 33 +++++++++++------------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d7e1f8c..059aaed 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; - if (priv->phy) { + if (ndev->phydev) { h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex); - state = priv->phy->link; + state = ndev->phydev->link; } state = state && h->dev->ops->get_status(h); @@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { - struct hns_nic_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; - priv->phy = phy_dev; - return 0; } @@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev) if (ret) goto out_start_err; - if (priv->phy) - phy_start(priv->phy); + if (ndev->phydev) + phy_start(ndev->phydev); clear_bit(NIC_STATE_DOWN, &priv->state); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev) netif_tx_disable(ndev); priv->link = 0; - if (priv->phy) - phy_stop(priv->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); ops = priv->ae_handle->dev->ops; @@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev) static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; if (!netif_running(netdev)) return -EINVAL; @@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev) hns_nic_uninit_ring_data(priv); priv->ring_data = NULL; - if (priv->phy) - phy_disconnect(priv->phy); - priv->phy = NULL; + if (ndev->phydev) + phy_disconnect(ndev->phydev); if (!IS_ERR_OR_NULL(priv->ae_handle)) hnae_put_handle(priv->ae_handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index 44bb301..5b412de 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -59,7 +59,6 @@ struct hns_nic_priv { u32 port_id; int phy_mode; int phy_led_val; - struct phy_device *phy; struct net_device *netdev; struct device *dev; struct hnae_handle *ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 5eb3245..0e2c174 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev) h = priv->ae_handle; - if (priv->phy) { - if (!genphy_read_status(priv->phy)) - link_stat = priv->phy->link; + if (net_dev->phydev) { + if (!genphy_read_status(net_dev->phydev)) + link_stat = net_dev->phydev->link; else link_stat = 0; } @@ -67,8 +67,7 @@ static void hns_get_mdix_mode(struct net_device *net_dev, struct ethtool_cmd *cmd) { int mdix_ctrl, mdix, retval, is_resolved; - struct hns_nic_priv *priv = netdev_priv(net_dev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; @@ -144,8 +143,8 @@ static int hns_nic_get_settings(struct net_device *net_dev, ethtool_cmd_speed_set(cmd, speed); cmd->duplex = duplex; - if (priv->phy) - (void)phy_ethtool_gset(priv->phy, cmd); + if (net_dev->phydev) + (void)phy_ethtool_gset(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { @@ -215,13 +214,13 @@ static int hns_nic_set_settings(struct net_device *net_dev, cmd->duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) return -EINVAL; if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) return -EINVAL; - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); + if (net_dev->phydev) + return phy_ethtool_sset(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && @@ -305,7 +304,7 @@ static int __lb_setup(struct net_device *ndev, { int ret = 0; struct hns_nic_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = ndev->phydev; struct hnae_handle *h = priv->ae_handle; switch (loop) { @@ -910,7 +909,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], ETH_GSTRING_LEN); buff += ETH_GSTRING_LEN; - if ((priv->phy) && (!priv->phy->is_c45)) + if ((netdev->phydev) && (!netdev->phydev->is_c45)) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], ETH_GSTRING_LEN); @@ -996,7 +995,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII) cnt--; - if ((!priv->phy) || (priv->phy->is_c45)) + if ((!netdev->phydev) || (netdev->phydev->is_c45)) cnt--; return cnt; @@ -1015,8 +1014,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) int hns_phy_led_set(struct net_device *netdev, int value) { int retval; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); @@ -1039,7 +1037,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; int ret; if (phy_dev) @@ -1159,8 +1157,7 @@ static int hns_get_regs_len(struct net_device *net_dev) static int hns_nic_nway_reset(struct net_device *netdev) { int ret = 0; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phy; + struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { if (phy) -- cgit v1.1 From d270f76c2d6ee3d96cfb1affb78a3d536e0b8fd6 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 20 Sep 2016 22:30:12 +0200 Subject: net: ethernet: hisilicon: hns: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 105 +++++++++++++---------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 0e2c174..47e59bb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -64,14 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev) } static void hns_get_mdix_mode(struct net_device *net_dev, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { int mdix_ctrl, mdix, retval, is_resolved; struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; return; } @@ -88,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev, switch (mdix_ctrl) { case 0x0: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; break; case 0x1: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; break; case 0x3: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; break; default: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; break; } if (!is_resolved) - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; else if (mdix) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } /** - *hns_nic_get_settings - implement ethtool get settings + *hns_nic_get_link_ksettings - implement ethtool get link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_get_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_get_link_ksettings(struct net_device *net_dev, + struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -124,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev, int ret; u8 duplex; u16 speed; + u32 supported, advertising; if (!priv || !priv->ae_handle) return -ESRCH; @@ -138,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev, return -EINVAL; } + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + /* When there is no phy, autoneg is off. */ - cmd->autoneg = false; - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.autoneg = false; + cmd->base.cmd = speed; + cmd->base.duplex = duplex; if (net_dev->phydev) - (void)phy_ethtool_gset(net_dev->phydev, cmd); + (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { - ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = (u32)SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - if (cmd->autoneg) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg) + advertising |= ADVERTISED_Autoneg; - cmd->supported |= h->if_support; + supported |= h->if_support; if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_TP; + advertising |= ADVERTISED_1000baseT_Full; } else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_10000baseKR_Full; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_10000baseKR_Full; } switch (h->media_type) { case HNAE_MEDIA_TYPE_FIBER: - cmd->port = PORT_FIBRE; + cmd->base.port = PORT_FIBRE; break; case HNAE_MEDIA_TYPE_COPPER: - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case HNAE_MEDIA_TYPE_UNKNOWN: default: @@ -177,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev, } if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) - cmd->supported |= SUPPORTED_Pause; + supported |= SUPPORTED_Pause; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); - cmd->transceiver = XCVR_EXTERNAL; - cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22; hns_get_mdix_mode(net_dev, cmd); return 0; } /** - *hns_nic_set_settings - implement ethtool set settings + *hns_nic_set_link_settings - implement ethtool set link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_set_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_set_link_ksettings(struct net_device *net_dev, + const struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -207,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev, return -ENODEV; h = priv->ae_handle; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 || - cmd->duplex != DUPLEX_FULL) + if (cmd->base.autoneg == AUTONEG_ENABLE || + speed != SPEED_10000 || + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!net_dev->phydev && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE) return -EINVAL; - if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) + if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF) return -EINVAL; if (net_dev->phydev) - return phy_ethtool_sset(net_dev->phydev, cmd); + return phy_ethtool_ksettings_set(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && - speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) + speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; } else { netdev_err(net_dev, "Not supported!"); @@ -232,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { - h->dev->ops->adjust_link(h, (int)speed, cmd->duplex); + h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); return 0; } @@ -1264,8 +1275,6 @@ static int hns_get_rxnfc(struct net_device *netdev, static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, - .get_settings = hns_nic_get_settings, - .set_settings = hns_nic_set_settings, .get_ringparam = hns_get_ringparam, .get_pauseparam = hns_get_pauseparam, .set_pauseparam = hns_set_pauseparam, @@ -1285,6 +1294,8 @@ static const struct ethtool_ops hns_ethtool_ops = { .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, .get_rxnfc = hns_get_rxnfc, + .get_link_ksettings = hns_nic_get_link_ksettings, + .set_link_ksettings = hns_nic_set_link_ksettings, }; void hns_ethtool_set_ops(struct net_device *ndev) -- cgit v1.1 From efee95f42b5dddedcaff0a0eaa44e170fc7522e8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 Sep 2016 19:25:58 -0400 Subject: ptp_clock: future-proofing drivers against PTP subsystem becoming optional Drivers must be ready to accept NULL from ptp_clock_register() if the PTP clock subsystem is configured out. This patch documents that and ensures that all drivers cope well with a NULL return. Signed-off-by: Nicolas Pitre Reviewed-by: Eugenia Emantayev Acked-by: Richard Cochran Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/ptp.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 2 +- drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 2 +- drivers/net/ethernet/sfc/ptp.c | 14 +++++++------- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- include/linux/ptp_clock_kernel.h | 5 +++++ 9 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 2e1b17a..ad03763 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_err("ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { e_info("registered PHC clock\n"); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ed39cba..f1fecea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf->ptp_clock = NULL; dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n", __func__); - } else { + } else if (pf->ptp_clock) { struct timespec64 ts; u32 regval; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 66dfa20..1dd14e1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1159,7 +1159,7 @@ void igb_ptp_init(struct igb_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5431bf..a922776 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); return err; - } else + } else if (adapter->ptp_clock) e_dev_info("registered PHC device on %s\n", netdev->name); /* set default timestamp mode to disabled here. We do this in diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 1494997..08fc5fc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (IS_ERR(mdev->ptp_clock)) { mdev->ptp_clock = NULL; mlx4_err(mdev, "ptp_clock_register failed\n"); - } else { + } else if (mdev->ptp_clock) { mlx4_info(mdev, "registered PHC clock\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3..13dc388 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index dd204d9..77a5364 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) if (IS_ERR(ptp->phc_clock)) { rc = PTR_ERR(ptp->phc_clock); goto fail3; - } - - INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); - ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); - if (!ptp->pps_workwq) { - rc = -ENOMEM; - goto fail4; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } } } ptp->nic_ts_enabled = false; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 170a18b..6e3b829 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else + } else if (priv->ptp_clock) pr_debug("Added PTP HW clock successfully on %s\n", priv->dev->name); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6b15e16..5ad54fc 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -127,6 +127,11 @@ struct ptp_clock; * * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. */ extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, -- cgit v1.1 From 0e7b99257be4b596d9fdd435698c0bfdb0b38d91 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:31 +0200 Subject: net: dsa: mv88e6xxx: Add helper for accessing port registers There is a device coming soon which places its port registers somewhere different to all other Marvell switches supported so far. Add helper functions for reading/writing port registers, making it easier to handle this new device. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 370 +++++++++++++++++----------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 - 2 files changed, 182 insertions(+), 189 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1d71802..25bd3fa 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,22 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -585,23 +601,23 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u32 reg; - int ret; + u16 reg; + int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) goto out; - reg = ret & ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); + reg &= ~(PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX | + PORT_PCS_CTRL_UNFORCED); reg |= PORT_PCS_CTRL_FORCE_LINK; if (phydev->link) @@ -639,7 +655,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg); + mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); out: mutex_unlock(&chip->reg_lock); @@ -799,22 +815,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, { u32 low; u32 high = 0; - int ret; + int err; + u16 reg; u64 value; switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg, ®); + if (err) return UINT64_MAX; - low = ret; + low = reg; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), - s->reg + 1); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); + if (err) return UINT64_MAX; - high = ret; + high = reg; } break; case BANK0: @@ -893,6 +909,8 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { struct mv88e6xxx_chip *chip = ds->priv; + int err; + u16 reg; u16 *p = _p; int i; @@ -903,11 +921,10 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); for (i = 0; i < 32; i++) { - int ret; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i); - if (ret >= 0) - p[i] = ret; + err = mv88e6xxx_port_read(chip, port, i, ®); + if (!err) + p[i] = reg; } mutex_unlock(&chip->reg_lock); @@ -938,7 +955,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - err = mv88e6xxx_read(chip, REG_PORT(port), PORT_STATUS, ®); + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); if (err) goto out; @@ -1106,12 +1123,13 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, u8 state) { struct dsa_switch *ds = chip->ds; - int reg, ret = 0; + u16 reg; + int err; u8 oldstate; - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; oldstate = reg & PORT_CONTROL_STATE_MASK; @@ -1124,23 +1142,22 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate == PORT_CONTROL_STATE_FORWARDING) && (state == PORT_CONTROL_STATE_DISABLED || state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (ret) - return ret; + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + if (err) + return err; } reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", mv88e6xxx_port_state_names[state], mv88e6xxx_port_state_names[oldstate]); } - return ret; + return err; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1149,7 +1166,8 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) const u16 mask = (1 << chip->info->num_ports) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - int reg; + u16 reg; + int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ @@ -1170,14 +1188,14 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg); + return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1218,23 +1236,22 @@ static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { struct dsa_switch *ds = chip->ds; - u16 pvid; - int ret; + u16 pvid, reg; + int err; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; - pvid = ret & PORT_DEFAULT_VLAN_MASK; + pvid = reg & PORT_DEFAULT_VLAN_MASK; if (new) { - ret &= ~PORT_DEFAULT_VLAN_MASK; - ret |= *new & PORT_DEFAULT_VLAN_MASK; + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_DEFAULT_VLAN, ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "DefaultVID %d (was %d)\n", *new, pvid); @@ -1613,7 +1630,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, struct dsa_switch *ds = chip->ds; u16 upper_mask; u16 fid; - int ret; + u16 reg; + int err; if (mv88e6xxx_num_databases(chip) == 4096) upper_mask = 0xff; @@ -1623,37 +1641,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; - fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; + fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; if (new) { - ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; - ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; + reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; + reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; - fid |= (ret & upper_mask) << 4; + fid |= (reg & upper_mask) << 4; if (new) { - ret &= ~upper_mask; - ret |= (*new >> 4) & upper_mask; + reg &= ~upper_mask; + reg |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "FID %d (was %d)\n", *new, fid); @@ -1865,26 +1881,26 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) goto unlock; - old = ret & PORT_CONTROL_2_8021Q_MASK; + old = reg & PORT_CONTROL_2_8021Q_MASK; if (new != old) { - ret &= ~PORT_CONTROL_2_8021Q_MASK; - ret |= new & PORT_CONTROL_2_8021Q_MASK; + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2, - ret); - if (ret < 0) + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) goto unlock; netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", @@ -1892,11 +1908,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mv88e6xxx_port_8021q_mode_names[old]); } - ret = 0; + err = 0; unlock: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int @@ -2406,19 +2422,20 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int ret; + int err, ret; + u16 reg; int i; /* Set all ports to the disabled state. */ for (i = 0; i < chip->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, + reg & 0xfffc); + if (err) + return err; } /* Wait for transmit queues to drain. */ @@ -2437,11 +2454,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); else - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); - if (ret) - return ret; + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); + if (err) + return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -2455,11 +2472,11 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; + err = -ETIMEDOUT; else - ret = 0; + err = 0; - return ret; + return err; } static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) @@ -2480,21 +2497,10 @@ static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) return err; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, - int reg, u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - if (port >= chip->info->num_ports) - return -EINVAL; - - return mv88e6xxx_read(chip, addr, reg, val); -} - static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct dsa_switch *ds = chip->ds; - int ret; + int err; u16 reg; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || @@ -2507,7 +2513,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | @@ -2522,10 +2528,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PCS_CTRL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2576,26 +2581,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; } /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (ret < 0) - return ret; - ret &= PORT_STATUS_CMODE_MASK; - if ((ret == PORT_STATUS_CMODE_100BASE_X) || - (ret == PORT_STATUS_CMODE_1000BASE_X) || - (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_serdes_power_on(chip); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + reg &= PORT_STATUS_CMODE_MASK; + if ((reg == PORT_STATUS_CMODE_100BASE_X) || + (reg == PORT_STATUS_CMODE_1000BASE_X) || + (reg == PORT_STATUS_CMODE_SGMII)) { + err = mv88e6xxx_serdes_power_on(chip); + if (err < 0) + return err; } } @@ -2629,10 +2633,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL_2, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; } /* Port Association Vector: when learning source addresses @@ -2645,16 +2648,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) reg = 0; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg); + if (err) + return err; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000); + if (err) + return err; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || @@ -2663,96 +2664,89 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PAUSE_CTRL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000); + if (err) + return err; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ATU_CONTROL, 0x0000); + err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, + 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PRI_OVERRIDE, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, + 0x0000); + if (err) + return err; /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE, + ETH_P_EDSA); + if (err) + return err; } /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_0123, 0x3210); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123, + 0x3210); + if (err) + return err; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_4567, 0x7654); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567, + 0x7654); + if (err) + return err; } /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || mv88e6xxx_6320_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0001); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0001); + if (err) + return err; } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0000); + if (err) + return err; } /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000); + if (err) + return err; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(chip, port, 0); - if (ret) - return ret; + err = _mv88e6xxx_port_fid_set(chip, port, 0); + if (err) + return err; - ret = _mv88e6xxx_port_based_vlan_map(chip, port); - if (ret) - return ret; + err = _mv88e6xxx_port_based_vlan_map(chip, port); + if (err) + return err; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x0000); - if (ret) - return ret; - - return 0; + return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 52f3f52..e349d0d 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -37,7 +37,6 @@ #define ADDR_SERDES 0x0f #define SERDES_PAGE_FIBER 0x01 -#define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) #define PORT_STATUS_MY_PAUSE BIT(14) -- cgit v1.1 From d6b1023a83e85943d1f2fa3baafbb4d5cfee7179 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 21 Sep 2016 01:40:32 +0200 Subject: net: dsa: mv88e6xxx: Convert flag bits to unsigned long long We are soon going to run out of flag bits on 32bit systems. Convert to unsigned long long. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e349d0d..8279883 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -452,36 +452,36 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EDSA BIT(MV88E6XXX_CAP_EDSA) -#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) - -#define MV88E6XXX_FLAG_SMI_CMD BIT(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT(MV88E6XXX_CAP_SMI_DATA) - -#define MV88E6XXX_FLAG_PHY_PAGE BIT(MV88E6XXX_CAP_PHY_PAGE) - -#define MV88E6XXX_FLAG_SERDES BIT(MV88E6XXX_CAP_SERDES) - -#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) -#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) -#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) -#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) -#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT(MV88E6XXX_CAP_G2_SMI_PHY_CMD) -#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT(MV88E6XXX_CAP_G2_SMI_PHY_DATA) - -#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) -#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA) +#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) + +#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) + +#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE) + +#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) + +#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT_ULL(MV88E6XXX_CAP_G2_SWITCH_MAC) +#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) +#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_CMD) +#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT_ULL(MV88E6XXX_CAP_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_G2_SMI_PHY_CMD BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_CMD) +#define MV88E6XXX_FLAG_G2_SMI_PHY_DATA BIT_ULL(MV88E6XXX_CAP_G2_SMI_PHY_DATA) + +#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) +#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* EEPROM Programming via Global2 with 16-bit data */ #define MV88E6XXX_FLAGS_EEPROM16 \ @@ -614,7 +614,7 @@ struct mv88e6xxx_info { unsigned int num_ports; unsigned int port_base_addr; unsigned int age_time_coeff; - unsigned long flags; + unsigned long long flags; }; struct mv88e6xxx_atu_entry { -- cgit v1.1 From adb03115f4590baa280ddc440a8eff08a6be0cb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 18:06:17 -0700 Subject: net: get rid of an signed integer overflow in ip_idents_reserve() Jiri Pirko reported an UBSAN warning happening in ip_idents_reserve() [] UBSAN: Undefined behaviour in ./arch/x86/include/asm/atomic.h:156:11 [] signed integer overflow: [] -2117905507 + -695755206 cannot be represented in type 'int' Since we do not have uatomic_add_return() yet, use atomic_cmpxchg() so that the arithmetics can be done using unsigned int. Fixes: 04ca6973f7c1 ("ip: make IP identifiers less predictable") Signed-off-by: Eric Dumazet Reported-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830..b5b47a2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -476,12 +476,18 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 delta = 0; + u32 new, delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, p_id) - segs; + /* Do not use atomic_add_return() as it makes UBSAN unhappy */ + do { + old = (u32)atomic_read(p_id); + new = old + delta + segs; + } while (atomic_cmpxchg(p_id, old, new) != old); + + return new - segs; } EXPORT_SYMBOL(ip_idents_reserve); -- cgit v1.1 From 75c9510b8f745f75280029a8a9f96567f55f401e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 20 Sep 2016 23:33:15 -0400 Subject: MAINTAINERS: Update b44 maintainer. Taking over as maintainer since Gary Zambrano is no longer working for Broadcom. Signed-off-by: Michael Chan Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 247b418..3df4be3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2500,7 +2500,7 @@ S: Supported F: kernel/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER -M: Gary Zambrano +M: Michael Chan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* -- cgit v1.1 From f9616c35a0d786bc64fff4bf819d1e4984873367 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 22:45:58 -0700 Subject: tcp: implement TSQ for retransmits We saw sch_fq drops caused by the per flow limit of 100 packets and TCP when dealing with large cwnd and bursts of retransmits. Even after increasing the limit to 1000, and even after commit 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time"), we can still have these drops. Under certain conditions, TCP can spend a considerable amount of time queuing thousands of skbs in a single tcp_xmit_retransmit_queue() invocation, incurring latency spikes and stalls of other softirq handlers. This patch implements TSQ for retransmits, limiting number of packets and giving more chance for scheduling packets in both ways. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 72 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d025a7..478dfc5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -734,9 +734,16 @@ static void tcp_tsq_handler(struct sock *sk) { if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | - TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->lost_out > tp->retrans_out && + tp->snd_cwnd > tcp_packets_in_flight(tp)) + tcp_xmit_retransmit_queue(sk); + + tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); + } } /* * One tasklet per cpu tries to send more skbs. @@ -2039,6 +2046,39 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +/* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * (These limits are doubled for retransmits) + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) + */ +static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, + unsigned int factor) +{ + unsigned int limit; + + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit <<= factor; + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + */ + smp_mb__after_atomic(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + return true; + } + return false; +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -2125,29 +2165,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - /* TCP Small Queues : - * Control number of packets in qdisc/devices to two packets / or ~1 ms. - * This allows for : - * - better RTT estimation and ACK scheduling - * - faster recovery - * - high rates - * Alas, some drivers / subsystems require a fair amount - * of queued bytes to ensure line rate. - * One example is wifi aggregation (802.11 AMPDU) - */ - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); - - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tp->tsq_flags); - /* It is possible TX completion already happened - * before we set TSQ_THROTTLED, so we must - * test again the condition. - */ - smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) - break; - } + if (tcp_small_queue_check(sk, skb, 0)) + break; if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2847,6 +2866,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; + if (tcp_small_queue_check(sk, skb, 1)) + return; + if (tcp_retransmit_skb(sk, skb, segs)) return; -- cgit v1.1 From 1bfecfca565c0505d04dbf5fdd3d2fbb951827c0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:42 +0300 Subject: net/mlx5e: Build RX SKB on demand For non-striding RQ configuration before this patch we had a ring with pre-allocated SKBs and mapped the SKB->data buffers for device. For robustness and better RX data buffers management, we allocate a page per packet and build_skb around it. This patch (which is a prerequisite for XDP) will actually reduce performance for normal stack usage, because we are now hitting a bottleneck in the page allocator. We use the page-cache to restore or even improve performance in comparison to the old RX scheme. Packet rate performance testing was done with pktgen 64B packets on xmit side and TC ingress dropping action on RX side. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1.Baseline, before 'net/mlx5e: Build RX SKB on demand' 2.Build SKB with RX page cache (This patch) RX Cores Baseline Build SKB+page-cache Improvement ----------------------------------------------------------- 1 4.16Mpps 5.33Mpps 28% 2 7.16Mpps 10.24Mpps 43% 4 13.61Mpps 20.51Mpps 51% 8 25.32Mpps 32.00Mpps 26% All respective cores were 100% utilized. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 31 +++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 215 +++++++++++----------- 3 files changed, 133 insertions(+), 123 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7dd4763..4d06c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -65,6 +65,8 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_WQE_SZ 18 @@ -302,10 +304,14 @@ struct mlx5e_page_cache { struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; + + struct mlx5e_dma_info *dma_info; struct mlx5e_mpw_info *wqe_info; void *mtt_no_align; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + } buff; __be32 mkey_be; struct device *pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8595b50..d09588b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -408,6 +408,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -442,29 +444,40 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { err = -ENOMEM; goto err_rq_wq_destroy; } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->wqe_sz = (priv->params.lro_en) ? + rq->buff.wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + byte_count |= MLX5_HW_START_PADDING; rq->mkey_be = c->mkey_be; } @@ -499,7 +512,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dc86779..d017829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -179,50 +179,99 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) + +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { - struct sk_buff *skb; - dma_addr_t dma_addr; + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) - return -ENOMEM; + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; +} - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct page *page; - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) + return -ENOMEM; - rq->skb[ix] = skb; + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; + } return 0; +} -err_free_skb: - dev_kfree_skb(skb); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; + + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + put_page(dma_info->page); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - return -ENOMEM; + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct sk_buff *skb = rq->skb[ix]; + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); - } + mlx5e_page_release(rq, di, true); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -305,79 +354,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - - if (tail_next == cache->head) { - rq->stats.cache_full++; - return false; - } - - cache->page_cache[cache->tail] = *dma_info; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - - if (unlikely(cache->head == cache->tail)) { - rq->stats.cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head].page) != 1) { - rq->stats.cache_busy++; - return false; - } - - *dma_info = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - rq->stats.cache_reuse++; - - dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, - DMA_FROM_DEVICE); - return true; -} - -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - struct page *page; - - if (mlx5e_rx_cache_get(rq, dma_info)) - return 0; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - dma_info->page = page; - dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); - return -ENOMEM; - } - - return 0; -} - -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) -{ - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; - - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE); - put_page(dma_info->page); -} - static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) @@ -448,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; @@ -658,31 +634,46 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; - struct sk_buff *skb; __be16 wqe_counter_be; + struct sk_buff *skb; u16 wqe_counter; u32 cqe_bcnt; + void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(va + MLX5_RX_HEADROOM); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - dev_kfree_skb(skb); + mlx5e_page_release(rq, di, true); goto wq_ll_pop; } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); -- cgit v1.1 From 21c59685dd176dd6b2c4fc5e18dc65730cfd546a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:43 +0300 Subject: net/mlx5e: Union RQ RX info per RQ type We have two types of RX RQs, and they use two separate sets of info arrays and structures in RX data path function. Today those structures are mutually exclusive per RQ type, hence one kind is allocated on RQ creation according to the RQ type. For better cache locality and to minimalize the sizeof(struct mlx5e_rq), in this patch we define them as a union. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 14 ++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++---- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4d06c1b..e333123 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -305,9 +305,14 @@ struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - struct mlx5e_dma_info *dma_info; - struct mlx5e_mpw_info *wqe_info; - void *mtt_no_align; + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ @@ -327,7 +332,6 @@ struct mlx5e_rq { unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -770,7 +774,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq->mpwqe_mtt_offset + + return rq->mpwqe.mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09588b..f2efa53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -314,7 +314,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); @@ -342,21 +342,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; int i; - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, cpu_to_node(c->cpu)); - if (unlikely(!rq->mtt_no_align)) + if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc, + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, MLX5_UMR_ALIGN); wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, PCI_DMA_TODEVICE); @@ -370,14 +370,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, err_unmap_mtts: while (--i >= 0) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); + kfree(rq->mpwqe.mtt_no_align); err_free_wqe_info: - kfree(rq->wqe_info); + kfree(rq->mpwqe.info); err_out: return -ENOMEM; @@ -390,13 +390,13 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) int i; for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[i]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); } - kfree(rq->mtt_no_align); - kfree(rq->wqe_info); + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); } static int mlx5e_create_rq(struct mlx5e_channel *c, @@ -439,7 +439,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_mtt_offset = c->ix * + rq->mpwqe.mtt_offset = c->ix * MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); @@ -654,7 +654,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d017829..a403a79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -328,7 +328,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int err; @@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -438,7 +438,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; mlx5e_free_rx_mpwqe(rq, wi); } @@ -725,7 +725,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; -- cgit v1.1 From e4b85508072b32682ba84df32cac5cf6a4f6178e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:44 +0300 Subject: net/mlx5e: Slightly reduce hardware LRO size Before this patch LRO size was 64K, now with build_skb requires extra room, headroom + sizeof(skb_shared_info) added to the data buffer will make wqe size or page_frag_size slightly larger than 64K which will demand order 5 page instead of order 4 in 4K page systems. We take those extra bytes from hardware LRO data size in order to not increase the required page order for when hardware LRO is enabled. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f2efa53..8734240 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3187,8 +3187,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, -- cgit v1.1 From 2fc4bfb7250d79ee4e58c1d5bca257687e9f5e53 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:45 +0300 Subject: net/mlx5e: Dynamic RQ type infrastructure Add two helper functions to allow dynamic changes of RQ type. mlx5e_set_rq_priv_params and mlx5e_set_rq_type_params will be used on netdev creation to determine the default RQ type. This will be needed later for downstream patches of XDP support. When enabling XDP we will dynamically move from striding RQ to linked list RQ type. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 92 ++++++++++++----------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8734240..ff520ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -69,6 +69,47 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -3038,13 +3079,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -3124,11 +3158,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3141,33 +3177,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3197,12 +3211,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif -- cgit v1.1 From 86994156c736978d113e7927455d4eeeb2128b9f Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Wed, 21 Sep 2016 12:19:46 +0300 Subject: net/mlx5e: XDP fast RX drop bpf programs support Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx5e driver. When XDP is on we make sure to change channels RQs type to MLX5_WQ_TYPE_LINKED_LIST rather than "striding RQ" type to ensure "page per packet". On XDP set, we fail if HW LRO is set and request from user to turn it off. Since on ConnectX4-LX HW LRO is always on by default, this will be annoying, but we prefer not to enforce LRO off from XDP set function. Full channels reset (close/open) is required only when setting XDP on/off. When XDP set is called just to exchange programs, we will update each RQ xdp program on the fly and for synchronization with current data path RX activity of that RQ, we temporally disable that RQ and ensure RX path is not running, quickly update and re-enable that RQ, for that we do: - rq.state = disabled - napi_synnchronize - xchg(rq->xdp_prg) - rq.state = enabled - napi_schedule // Just in case we've missed an IRQ Packet rate performance testing was done with pktgen 64B packets and on TX side and, TC drop action on RX side compared to XDP fast drop. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz Comparison is done between: 1. Baseline, Before this patch with TC drop action 2. This patch with TC drop action 3. This patch with XDP RX fast drop RX Cores Baseline(TC drop) TC drop XDP fast Drop -------------------------------------------------------------- 1 5.3Mpps 5.3Mpps 16.5Mpps 2 10.2Mpps 10.2Mpps 31.3Mpps 4 20.5Mpps 19.9Mpps 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 100 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 26 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 + 4 files changed, 130 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e333123..5e8e669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -334,6 +334,7 @@ struct mlx5e_rq { int ix; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -627,6 +628,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ff520ad..6e95a16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -104,7 +105,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; mlx5e_set_rq_type_params(priv, rq_type); @@ -177,6 +179,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -473,6 +476,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->xdp_prog = priv->xdp_prog; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -536,6 +540,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->page_cache.head = 0; rq->page_cache.tail = 0; + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); + return 0; err_rq_wq_destroy: @@ -548,6 +555,9 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int i; + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); @@ -2955,6 +2965,92 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2974,6 +3070,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -3005,6 +3102,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a403a79..96f6317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,8 +632,20 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + void *data, u32 len) +{ + struct xdp_buff xdp; + + xdp.data = data; + xdp.data_end = xdp.data + len; + return bpf_prog_run_xdp(prog, &xdp); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; @@ -654,6 +666,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(va + MLX5_RX_HEADROOM); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; @@ -661,6 +674,18 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } + if (xdp_prog) { + enum xdp_action act = + mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, + cqe_bcnt); + + if (act != XDP_PASS) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + goto wq_ll_pop; + } + } + skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; @@ -672,7 +697,6 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) page_ref_inc(di->page); mlx5e_page_release(rq, di, true); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6af8d79..084d6c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,7 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -100,6 +101,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -278,6 +280,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -295,6 +298,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, -- cgit v1.1 From f10b7cc7707f7d598e3ddacd848080b18ba4cbff Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:47 +0300 Subject: net/mlx5e: Have a clear separation between different SQ types Make a clear separate between Regular SQ (TXQ) and ICO SQ creation, destruction and union their mutual information structures. Don't allocate redundant TXQ skb/wqe_info/dma_fifo arrays for ICO SQ. And have a different SQ edge for ICO SQ than TXQ SQ, to be more accurate. In preparation for XDP TX support. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 23 +++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 121 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 30 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 5 files changed, 120 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5e8e669..5917f5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -101,6 +101,9 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -386,6 +389,11 @@ struct mlx5e_ico_wqe_info { u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO +}; + struct mlx5e_sq { /* data path */ @@ -403,10 +411,15 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_ico_wqe_info *ico_wqe; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -428,8 +441,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6e95a16..632de09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -740,8 +740,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -765,26 +765,43 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); - - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -793,6 +810,30 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + } + + return 0; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -803,8 +844,16 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + u16 sq_max_wqebbs; int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -833,18 +882,8 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -852,19 +891,14 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + if (sq->type == MLX5E_SQ_ICO) + sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; + + sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -879,7 +913,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -908,11 +941,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -1027,8 +1061,10 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); @@ -1505,6 +1541,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1578,7 +1615,7 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96f6317..941e531 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -337,8 +337,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } @@ -348,8 +348,8 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index eb0e725..f02f24c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -499,10 +502,13 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) u16 ci; int i; + if (sq->type != MLX5E_SQ_TXQ) + return; + while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 08d8b0c..47cd561 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; -- cgit v1.1 From b5503b994ed5ed8dbfe821317e7b5b38acb065c5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:48 +0300 Subject: net/mlx5e: XDP TX forwarding support Adding support for XDP_TX forwarding from xdp program. Using XDP, now user can loop packets out of the same port. We create a dedicated TX SQ for each channel that will serve XDP programs that return XDP_TX action to loop packets back to the wire directly from the channel RQ RX path. For that RX pages will now need to be mapped bi-directionally, and on XDP_TX action we will sync the page back to device then queue it into SQ for transmission. The XDP xmit frame function will report back to the RX path if the page was consumed (transmitted), if so, RX path will forget about that page as if it were released to the stack. Later on, on XDP TX completion, the page will be released back to the page cache. For simplicity this patch will hit a doorbell on every XDP TX packet. Next patch will introduce a xmit more like mechanism that will queue up more than one packet into SQ w/o notifying the hardware, once RX napi loop is done we will hit doorbell once for all XDP TX packets form the previous loop. This should drastically improve XDP TX performance. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 25 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 93 +++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 110 +++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 8 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 39 +++++++- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 65 +++++++++++- 6 files changed, 304 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5917f5e..82eeded 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -104,6 +104,15 @@ #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -319,6 +328,7 @@ struct mlx5e_rq { struct { u8 page_order; u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ } buff; __be32 mkey_be; @@ -384,14 +394,15 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; enum mlx5e_sq_type { MLX5E_SQ_TXQ, - MLX5E_SQ_ICO + MLX5E_SQ_ICO, + MLX5E_SQ_XDP }; struct mlx5e_sq { @@ -418,7 +429,11 @@ struct mlx5e_sq { struct mlx5e_sq_dma *dma_fifo; struct mlx5e_tx_wqe_info *wqe_info; } txq; - struct mlx5e_ico_wqe_info *ico_wqe; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + } xdp; } db; /* read only */ @@ -458,8 +473,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -688,7 +705,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 632de09..a9fc9d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -64,6 +64,7 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; @@ -180,6 +181,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; @@ -478,6 +481,10 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->priv = c->priv; rq->xdp_prog = priv->xdp_prog; + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; @@ -765,6 +772,28 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); +} + +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) { kfree(sq->db.ico_wqe); @@ -819,6 +848,9 @@ static void mlx5e_free_sq_db(struct mlx5e_sq *sq) case MLX5E_SQ_ICO: mlx5e_free_sq_ico_db(sq); break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; } } @@ -829,11 +861,24 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return mlx5e_alloc_sq_txq_db(sq, numa); case MLX5E_SQ_ICO: return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); } return 0; } +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -844,7 +889,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - u16 sq_max_wqebbs; int err; sq->type = param->type; @@ -882,7 +926,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - sq_max_wqebbs = MLX5_SEND_WQE_MAX_WQEBBS; if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; @@ -891,10 +934,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - if (sq->type == MLX5E_SQ_ICO) - sq_max_wqebbs = MLX5E_ICOSQ_MAX_WQEBBS; - - sq->edge = (sq->wq.sz_m1 + 1) - sq_max_wqebbs; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; @@ -1068,7 +1108,7 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1429,14 +1469,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1465,9 +1522,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1618,12 +1679,28 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, param->type = MLX5E_SQ_ICO; } +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; +} + static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 941e531..57d4951 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -236,7 +236,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->page = page; dma_info->addr = dma_map_page(rq->pdev, page, 0, - RQ_PAGE_SIZE(rq), DMA_FROM_DEVICE); + RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; @@ -252,7 +252,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, return; dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), - DMA_FROM_DEVICE); + rq->buff.map_dir); put_page(dma_info->page); } @@ -632,15 +632,95 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } -static inline enum xdp_action mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - void *data, u32 len) +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; + mlx5e_send_nop(sq, false); + } + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) { struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; xdp.data = data; xdp.data_end = xdp.data + len; - return bpf_prog_run_xdp(prog, &xdp); + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -651,21 +731,22 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) __be16 wqe_counter_be; struct sk_buff *skb; u16 wqe_counter; + void *va, *data; u32 cqe_bcnt; - void *va; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; dma_sync_single_range_for_cpu(rq->pdev, di->addr, MLX5_RX_HEADROOM, rq->buff.wqe_sz, DMA_FROM_DEVICE); - prefetch(va + MLX5_RX_HEADROOM); + prefetch(data); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { @@ -674,17 +755,8 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_ll_pop; } - if (xdp_prog) { - enum xdp_action act = - mlx5e_xdp_handle(rq, xdp_prog, va + MLX5_RX_HEADROOM, - cqe_bcnt); - - if (act != XDP_PASS) { - rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); - goto wq_ll_pop; - } - } + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + goto wq_ll_pop; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 084d6c8..57452fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -66,6 +66,8 @@ struct mlx5e_sw_stats { u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -102,6 +104,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -281,6 +285,8 @@ struct mlx5e_rq_stats { u64 lro_packets; u64 lro_bytes; u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; u64 buff_alloc_err; @@ -299,6 +305,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f02f24c..70a7173 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -495,16 +495,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; u16 ci; int i; - if (sq->type != MLX5E_SQ_TXQ) - return; - while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; skb = sq->db.txq.skb[ci]; @@ -526,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 47cd561..5703f19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); -- cgit v1.1 From 35b510e257f7516546a0a3f725f71dfbccc3f733 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 21 Sep 2016 12:19:49 +0300 Subject: net/mlx5e: XDP TX xmit more Previously we rang XDP SQ doorbell on every forwarded XDP packet. Here we introduce a xmit more like mechanism that will queue up more than one packet into SQ (up to RX napi budget) w/o notifying the hardware. Once RX napi budget is consumed and we exit napi RX loop, we will flush (doorbell) all XDP looped packets in case there are such. XDP forward packet rate: Comparing XDP with and w/o xmit more (bulk transmit): RX Cores XDP TX XDP TX (xmit more) --------------------------------------------------- 1 6.5Mpps 12.4Mpps 2 13.2Mpps 24.2Mpps 4 25.2Mpps 36.3Mpps* 8 36.3Mpps* 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 32 ++++++++++++++++++------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 82eeded..3460154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -433,6 +433,7 @@ struct mlx5e_sq { struct { struct mlx5e_sq_wqe_info *wqe_info; struct mlx5e_dma_info *di; + bool doorbell; } xdp; } db; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 57d4951..0a81bd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -632,6 +632,18 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, napi_gro_receive(rq->cq.napi, skb); } +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, unsigned int data_offset, @@ -652,6 +664,11 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, void *data = page_address(di->page) + data_offset; if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return; @@ -681,14 +698,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; sq->pc += MLX5E_XDP_TX_WQEBBS; - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP; - mlx5e_send_nop(sq, false); - } + sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; } @@ -863,6 +873,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -889,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ -- cgit v1.1 From e2f036a97271cf5811ee754bf321a29a814577f9 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:55 -0300 Subject: sctp: rename WORD_TRUNC/ROUND macros To something more meaningful these days, specially because this is working on packet headers or lengths and which are not tied to any CPU arch but to the protocol itself. So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_PAD4. Reported-by: David Laight Reported-by: David Miller Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 +++++----- net/netfilter/xt_sctp.c | 2 +- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 6 +++--- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/output.c | 12 ++++++------ net/sctp/sm_make_chunk.c | 28 ++++++++++++++-------------- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/transport.c | 4 ++-- net/sctp/ulpevent.c | 4 ++-- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 632e205..87a7f42 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -83,9 +83,9 @@ #endif /* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) +#define SCTP_PAD4(s) (((s)+3)&~3) /* Truncate to the previous multiple of 4. */ -#define WORD_TRUNC(s) ((s)&~3) +#define SCTP_TRUNC4(s) ((s)&~3) /* * Function declarations. @@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ - pos.v += WORD_ROUND(ntohs(pos.p->length))) + pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) @@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index ef36a56..4dedb96 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += WORD_ROUND(ntohs(sch->length)); + offset += SCTP_PAD4(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1c23060..f10d339 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) transports) { if (t->pmtu_pending && t->dst) { sctp_transport_update_pmtu(sk, t, - WORD_TRUNC(dst_mtu(t->dst))); + SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index af9cc80..76eae82 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -208,8 +208,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + - hmac_desc->hmac_len); + max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); } /* Now, check if we need to reduce our max */ @@ -229,7 +229,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max) - max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d3..a1d8506 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, - WORD_TRUNC(info)); + SCTP_TRUNC4(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { @@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = offset + WORD_ROUND(ntohs(ch->length)); + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; @@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; @@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ - if (WORD_ROUND(ntohs(ch->length)) > skb->len) + if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 6437aa9..f731de3e8 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -213,7 +213,7 @@ new_skb: } chunk->chunk_hdr = ch; - chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0c605ec..2a5c189 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); + __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (gso) { pkt_size = packet->overhead; list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = WORD_ROUND(chunk->skb->len); + int padded = SCTP_PAD4(chunk->skb->len); if (pkt_size + padded > tp->pathmtu) break; @@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * included in the chunk length field. The sender should * never pad with more than 3 bytes. * - * [This whole comment explains WORD_ROUND() below.] + * [This whole comment explains SCTP_PAD4() below.] */ pkt_size -= packet->overhead; @@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) has_data = 1; } - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); @@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * acknowledged or have failed. * Re-queue auth chunks if needed. */ - pkt_size -= WORD_ROUND(chunk->skb->len); + pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); @@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, */ maxsize = pmtu - packet->overhead; if (packet->auth) - maxsize -= WORD_ROUND(packet->auth->skb->len); + maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87..79dd660 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; - chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); + chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) @@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); if (!skb) goto nodata; @@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = WORD_ROUND(chunklen) - chunklen; + int padlen = SCTP_PAD4(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, struct __sctp_missing report; __u16 len; - len = WORD_ROUND(sizeof(report)); + len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length)))) + SCTP_PAD4(ntohs(param.p->length)))) sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), + SCTP_PAD4(ntohs(param.p->length)), param.v); } else { /* If there is no memory for generating the ERROR diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d88bb2b..026e3bc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 81b8667..ce54dce 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d85b803..bea0005 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ch = (sctp_errhdr_t *)(chunk->skb->data); cause = ch->cause; - elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); /* Pull off the ERROR header. */ skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); @@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * MUST ignore the padding bytes. */ len = ntohs(chunk->chunk_hdr->length); - padding = WORD_ROUND(len) - len; + padding = SCTP_PAD4(len) - len; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); -- cgit v1.1 From 4a225ce3950879a5426c56f306f5d1c9d6330292 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:56 -0300 Subject: sctp: make use of SCTP_TRUNC4 macro And avoid the usage of '&~3'. This is the last place still not using the macro. Also break the line to make it easier to read. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/chunk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 76eae82..8afe2e9 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -195,9 +195,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* This is the biggest possible DATA chunk that can fit into * the packet */ - max_data = (asoc->pathmtu - - sctp_sk(asoc->base.sk)->pf->af->net_header_len - - sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3; + max_data = asoc->pathmtu - + sctp_sk(asoc->base.sk)->pf->af->net_header_len - + sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + max_data = SCTP_TRUNC4(max_data); max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks -- cgit v1.1 From fcfbfd68b36b2bf4e0133dffa316831a5e180199 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:04:43 +0000 Subject: net: dsa: qca8k: fix non static symbol warning Fixes the following sparse warning: drivers/net/dsa/qca8k.c:259:22: warning: symbol 'qca8k_regmap_config' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 4788a89..6fc379c 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -256,7 +256,7 @@ static struct regmap_access_table qca8k_readable_table = { .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), }; -struct regmap_config qca8k_regmap_config = { +static struct regmap_config qca8k_regmap_config = { .reg_bits = 16, .val_bits = 32, .reg_stride = 4, -- cgit v1.1 From a084ab33543cfa07033f91161978b626a9d9bd57 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:05:05 +0000 Subject: net: dsa: qca8k: use mdio_module_driver to simplify the code mdio_module_driver() makes the code simpler by eliminating boilerplate code. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 6fc379c..b3df70d 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1032,19 +1032,7 @@ static struct mdio_driver qca8kmdio_driver = { }, }; -static int __init -qca8kmdio_driver_register(void) -{ - return mdio_driver_register(&qca8kmdio_driver); -} -module_init(qca8kmdio_driver_register); - -static void __exit -qca8kmdio_driver_unregister(void) -{ - mdio_driver_unregister(&qca8kmdio_driver); -} -module_exit(qca8kmdio_driver_unregister); +mdio_module_driver(qca8kmdio_driver); MODULE_AUTHOR("Mathieu Olivari, John Crispin "); MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); -- cgit v1.1 From 524605e5ba5a897fb0a8c29398ed049100fe80aa Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:09:16 +0000 Subject: cxgb4: Convert to use simple_open() Remove an open coded simple_open() function and replace file operations references to the function with simple_open() instead. Generated by: scripts/coccinelle/api/simple_open.cocci Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 52be9a4..20455d0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, size_mb << 20); } -static int blocked_fl_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -2797,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, static const struct file_operations blocked_fl_fops = { .owner = THIS_MODULE, - .open = blocked_fl_open, + .open = simple_open, .read = blocked_fl_read, .write = blocked_fl_write, .llseek = generic_file_llseek, -- cgit v1.1 From 8e83134db4ecb77a1dc3390b60ddeea840a5afbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Send pings to get RTT data Send a PING ACK packet to the peer when we get a new incoming call from a peer we don't have a record for. The PING RESPONSE ACK packet will tell us the following about the peer: (1) its receive window size (2) its MTU sizes (3) its support for jumbo DATA packets (4) if it supports slow start (similar to RFC 5681) (5) an estimate of the RTT This is necessary because the peer won't normally send us an ACK until it gets to the Rx phase and we send it a packet, but we would like to know some of this information before we start sending packets. A pair of tracepoints are added so that RTT determination can be observed. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++++-- net/rxrpc/input.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- net/rxrpc/misc.c | 11 ++++++----- net/rxrpc/output.c | 22 ++++++++++++++++++++++ 4 files changed, 80 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 79c671e..8b47f46 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -403,6 +403,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ }; /* @@ -487,6 +488,8 @@ struct rxrpc_call { u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Rx/Tx circular buffer, depending on phase. * @@ -530,8 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ - unsigned short rx_pkt_len; /* Current recvmsg packet len */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index aa261df..a0a5bd1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -37,6 +37,19 @@ static void rxrpc_proto_abort(const char *why, } /* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); +} + +/* * Apply a hard ACK by advancing the Tx window. */ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) @@ -343,6 +356,32 @@ ack: } /* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; + + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; + + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + +/* * Process the extra information that may be appended to an ACK packet */ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, @@ -438,6 +477,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_ackinfo info; u8 acks[RXRPC_MAXACKS]; } buf; + rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; int nr_acks, offset; @@ -449,6 +489,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } sp->offset += sizeof(buf.ack); + acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; @@ -460,10 +501,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, ntohs(buf.ack.maxSkew), first_soft_ack, ntohl(buf.ack.previousPacket), - ntohl(buf.ack.serial), + acked_serial, rxrpc_acks(buf.ack.reason), buf.ack.nAcks); + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, @@ -830,6 +875,7 @@ void rxrpc_data_ready(struct sock *udp_sk) rcu_read_unlock(); goto reject_packet; } + rxrpc_send_ping(call, skb, skew); } rxrpc_input_call_packet(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 6321c23..56e6683 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -83,11 +83,12 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, }; const char *rxrpc_acks(u8 reason) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 817fb0e..0d89cd3 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -57,6 +57,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + if (after(top, hard_ack)) { seq = hard_ack + 1; do { @@ -97,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct kvec iov[2]; rxrpc_serial_t serial; size_t len, n; + bool ping = false; int ioc, ret; u32 abort_code; @@ -147,6 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; @@ -183,12 +188,29 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); + } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { switch (type) { case RXRPC_PACKET_TYPE_ACK: + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), -- cgit v1.1 From 7aa51da7c88d42cc0bb85ab7d01429fbd4e51282 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Expedite ping response transmission Expedite the transmission of a response to a PING ACK by sending it from sendmsg if one is pending. We're most likely to see a PING ACK during the client call Tx phase as the other side may use it to determine a number of parameters, such as the client's receive window size, the RTT and whether the client is doing slow start (similar to RFC5681). If we don't expedite it, it's left to the background processing thread to transmit. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 814b17f..3c969de 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -180,6 +180,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, copied = 0; do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + if (!skb) { size_t size, chunk, max, space; -- cgit v1.1 From 77f2efcbdd7133466060198e02c6e8a170c3cd14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:01 +0100 Subject: rxrpc: Add ktime_sub_ms() Add a ktime_sub_ms() to go with ktime_add_ms() and co. for use in AF_RXRPC RTT determination. Signed-off-by: David Howells --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b6a204..aa118ba 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -231,6 +231,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } +static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) +{ + return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); +} + extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** -- cgit v1.1 From 50235c4b5a2fb9a9690f02cd1dea6ca047d7f79e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Obtain RTT data by requesting ACKs on DATA packets In addition to sending a PING ACK to gain RTT data, we can set the RXRPC_REQUEST_ACK flag on a DATA packet and get a REQUESTED-ACK ACK. The ACK packet contains the serial number of the packet it is in response to, so we can look through the Tx buffer for a matching DATA packet. This requires that the data packets be stamped with the time of transmission as a ktime rather than having the resend_at time in jiffies. This further requires the resend code to do the resend determination in ktimes and convert to jiffies to set the timer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++---- net/rxrpc/call_event.c | 19 +++++++++---------- net/rxrpc/input.c | 35 +++++++++++++++++++++++++++++++++++ net/rxrpc/misc.c | 6 ++++-- net/rxrpc/output.c | 7 +++++-- net/rxrpc/sendmsg.c | 1 - net/rxrpc/sysctl.c | 2 +- 7 files changed, 57 insertions(+), 20 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8b47f46..1c4597b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -142,10 +142,7 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { union { - unsigned long resend_at; /* time in jiffies at which to resend */ - struct { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { unsigned int offset; /* offset into buffer of next read */ @@ -663,6 +660,7 @@ extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, rxrpc_rtt_tx__nr_trace }; @@ -670,6 +668,7 @@ extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, rxrpc_rtt_rx__nr_trace }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 34ad967..adb2ec6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,12 +142,14 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - unsigned long resend_at, now; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + spin_lock_bh(&call->lock); cursor = call->tx_hard_ack; @@ -160,8 +162,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ - now = jiffies; - resend_at = now + rxrpc_resend_timeout; + oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; @@ -175,9 +176,9 @@ static void rxrpc_resend(struct rxrpc_call *call) sp = rxrpc_skb(skb); if (anno_type == RXRPC_TX_ANNO_UNACK) { - if (time_after(sp->resend_at, now)) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; continue; } } @@ -186,7 +187,8 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - call->resend_at = resend_at; + resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the @@ -205,15 +207,12 @@ static void rxrpc_resend(struct rxrpc_call *call) spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb) < 0) { - call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp = rxrpc_skb(skb); - sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a0a5bd1..c121949 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -356,6 +356,38 @@ ack: } /* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; + +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} + +/* * Process a ping response. */ static void rxrpc_input_ping_response(struct rxrpc_call *call, @@ -508,6 +540,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) rxrpc_input_ping_response(call, skb->tstamp, acked_serial, sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 56e6683..0d425e7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -68,9 +68,9 @@ unsigned int rxrpc_rx_mtu = 5692; unsigned int rxrpc_rx_jumbo_max = 4; /* - * Time till packet resend (in jiffies). + * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * HZ; +unsigned int rxrpc_resend_timeout = 4 * 1000; const char *const rxrpc_pkts[] = { "?00", @@ -186,8 +186,10 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", }; const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d89cd3..db01fbb 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -300,9 +300,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - if (ret == 0) { - sp->resend_at = jiffies + rxrpc_resend_timeout; + if (ret >= 0) { + skb->tstamp = ktime_get_real(); + smp_wmb(); sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3c969de..607223f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -137,7 +137,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp->resend_at = jiffies + rxrpc_resend_timeout; ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index a03c61c..13d1df03 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -59,7 +59,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { -- cgit v1.1 From ada66b54c44add8702612940a08d077b4d6ecd0e Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:58 +0930 Subject: net/faraday: Separate rx page storage from rxdesc The ftgmac100 hardware revision in e.g. the Aspeed AST2500 no longer reserves all bits in RXDES#2 but instead uses the bottom 16 bits to store MAC frame metadata. Avoid corruption by shifting struct page pointers out to their own member in struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 36361f8..4062256 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -60,6 +60,8 @@ struct ftgmac100 { struct ftgmac100_descs *descs; dma_addr_t descs_dma_addr; + struct page *rx_pages[RX_QUEUE_ENTRIES]; + unsigned int rx_pointer; unsigned int tx_clean_pointer; unsigned int tx_pointer; @@ -341,18 +343,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR); } +static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) +{ + return &priv->rx_pages[rxdes - priv->descs->rxdes]; +} + /* * rxdes2 is not used by hardware. We use it to keep track of page. * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu(). */ -static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page) +static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes, + struct page *page) { - rxdes->rxdes2 = (unsigned int)page; + *ftgmac100_rxdes_page_slot(priv, rxdes) = page; } -static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes) +static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - return (struct page *)rxdes->rxdes2; + return *ftgmac100_rxdes_page_slot(priv, rxdes); } /****************************************************************************** @@ -501,7 +512,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) do { dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); unsigned int size; dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE); @@ -779,7 +790,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, return -ENOMEM; } - ftgmac100_rxdes_set_page(rxdes, page); + ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); ftgmac100_rxdes_set_dma_own(rxdes); return 0; @@ -791,7 +802,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv) for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); if (!page) -- cgit v1.1 From 7906a4da0ef845d01e76f2187c23cc71ae00fa1d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Sep 2016 08:34:59 +0930 Subject: net/faraday: Make EDO{R,T}R bits configurable These bits are #defined at a fixed location. In order to support future hardware that has chosen to move these bits around move the bits into a member of the struct ftgmac100. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 40 +++++++++++++++++++++----------- drivers/net/ethernet/faraday/ftgmac100.h | 2 -- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 4062256..62a88d1 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -79,6 +79,9 @@ struct ftgmac100 { int int_mask_all; bool use_ncsi; bool enabled; + + u32 rxdes0_edorr_mask; + u32 txdes0_edotr_mask; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -259,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY); } -static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { /* clear status bits */ - rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask); } static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes) @@ -300,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST); } -static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask); } static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes, @@ -393,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv) if (ftgmac100_rxdes_first_segment(rxdes)) return rxdes; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } @@ -464,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv) if (ftgmac100_rxdes_last_segment(rxdes)) done = true; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } while (!done && ftgmac100_rxdes_packet_ready(rxdes)); @@ -556,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) /****************************************************************************** * internal functions (transmit descriptor) *****************************************************************************/ -static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_reset(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { /* clear all except end of ring bit */ - txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask); txdes->txdes1 = 0; txdes->txdes2 = 0; txdes->txdes3 = 0; @@ -580,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes) txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN); } -static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { - txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask); } static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes) @@ -701,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) dev_kfree_skb(skb); - ftgmac100_txdes_reset(txdes); + ftgmac100_txdes_reset(priv, txdes); ftgmac100_tx_clean_pointer_advance(priv); @@ -792,7 +799,7 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); return 0; } @@ -839,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) return -ENOMEM; /* initialize RX ring */ - ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); + ftgmac100_rxdes_set_end_of_ring(priv, + &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; @@ -849,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) } /* initialize TX ring */ - ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); + ftgmac100_txdes_set_end_of_ring(priv, + &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); return 0; err: @@ -1336,6 +1345,9 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + spin_lock_init(&priv->tx_lock); /* initialize NAPI */ diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 13408d4..c258586 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -189,7 +189,6 @@ struct ftgmac100_txdes { } __attribute__ ((aligned(16))); #define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) -#define FTGMAC100_TXDES0_EDOTR (1 << 15) #define FTGMAC100_TXDES0_CRC_ERR (1 << 19) #define FTGMAC100_TXDES0_LTS (1 << 28) #define FTGMAC100_TXDES0_FTS (1 << 29) @@ -215,7 +214,6 @@ struct ftgmac100_rxdes { } __attribute__ ((aligned(16))); #define FTGMAC100_RXDES0_VDBC 0x3fff -#define FTGMAC100_RXDES0_EDORR (1 << 15) #define FTGMAC100_RXDES0_MULTICAST (1 << 16) #define FTGMAC100_RXDES0_BROADCAST (1 << 17) #define FTGMAC100_RXDES0_RX_ERR (1 << 18) -- cgit v1.1 From 2a0ab8ebbec634127987fc8dbbd09a7fd7274e3d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:00 +0930 Subject: net/faraday: Adapt for Aspeed SoCs The RXDES and TXDES registers bits in the ftgmac100 indicates EDO{R,T}R at bit position 15 for the Faraday Tech IP. However, the version of this IP present in the Aspeed SoCs has these bits at position 30 in the registers. It appers that ast2400 SoCs support both positions, with the 15th bit marked as reserved but still functional. In the ast2500 this bit is reused for another function, so we need a work around. This was confirmed with engineers from Aspeed that using bit 30 is correct for both the ast2400 and ast2500 SoCs. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 62a88d1..47f5122 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1345,9 +1345,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; - priv->rxdes0_edorr_mask = BIT(15); - priv->txdes0_edotr_mask = BIT(15); - spin_lock_init(&priv->tx_lock); /* initialize NAPI */ @@ -1381,6 +1378,16 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); + + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + priv->rxdes0_edorr_mask = BIT(30); + priv->txdes0_edotr_mask = BIT(30); + } else { + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + } + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { -- cgit v1.1 From 08c9c126004e999f0c05b369d1e0cc757e6040cc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 22 Sep 2016 08:35:01 +0930 Subject: net/faraday: Clear stale interrupts There is stale interrupt (PHYSTS_CHG in ISR, bit#6 in 0x0) from the bootloader (uboot) when enabling the MAC. The stale interrupts aren't part of kernel and should be cleared. This clears the stale interrupts in ISR (0x0) when enabling the MAC. Signed-off-by: Gavin Shan Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 47f5122..1893737 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1112,6 +1112,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + unsigned int status; int err; err = ftgmac100_alloc_buffers(priv); @@ -1137,6 +1138,11 @@ static int ftgmac100_open(struct net_device *netdev) ftgmac100_init_hw(priv); ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + + /* Clear stale interrupts */ + status = ioread32(priv->base + FTGMAC100_OFFSET_ISR); + iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR); + if (netdev->phydev) phy_start(netdev->phydev); else if (priv->use_ncsi) -- cgit v1.1 From e07dc63ba22df2d8bc931821b22938d4ce37a934 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:02 +0930 Subject: net/faraday: Configure old MDIO interface on Aspeed SoCs The Aspeed SoCs have a new MDIO interface as an option in the G4 and G5 SoCs. The old one is still available, so select it in order to remain compatible with the ftgmac100 driver. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 9 +++++++++ drivers/net/ethernet/faraday/ftgmac100.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 1893737..e3653b1 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1252,12 +1252,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); int i, err = 0; + u32 reg; /* initialize mdio bus */ priv->mii_bus = mdiobus_alloc(); if (!priv->mii_bus) return -EIO; + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + /* This driver supports the old MDIO interface */ + reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR); + reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); + }; + priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index c258586..8a377ab 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -134,6 +134,11 @@ #define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31) /* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31) + +/* * Receive buffer size register */ #define FTGMAC100_RBSR_SIZE(x) ((x) & 0x3fff) -- cgit v1.1 From edcd692fe4816ba6cb240b7a1d8b984ee7082763 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 22 Sep 2016 08:35:03 +0930 Subject: net/faraday: Mask out PHYSTS_CHG interrupt The PHYSTS_CHG (the ftgmac100's PHY IRQ) is telling the system to go look at the PHY registers for a link status change. The interrupt was causing issues on Aspeed SoC where some board designs had an active high configuration, some active low, and in some cases repurposed for other functions. When misconfigured Linux would chew 100% of CPU cycles servicing interrupts: [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.280000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG [ 20.300000] ftgmac100 1e660000.ethernet eth0: [ISR] = 0x200: PHYSTS_CHG While in the ftgmac100 IP can be configured for high, low and edge sensitivity the current driver always polls the PHY, so we chose to mask out the interrupt. See https://patchwork.ozlabs.org/patch/672099/ for more discussion. Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 10 +++------- drivers/net/ethernet/faraday/ftgmac100.h | 1 + 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e3653b1..90f9c54 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1075,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) } if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | - FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG)) { + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) { if (net_ratelimit()) - netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, + netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "", - status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "", - status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : ""); + status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : ""); if (status & FTGMAC100_INT_NO_RXBUF) { /* RX buffer unavailable */ @@ -1390,7 +1388,6 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_XPKT_ETH | FTGMAC100_INT_XPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); @@ -1412,7 +1409,6 @@ static int ftgmac100_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; - priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) goto err_ncsi_dev; diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 8a377ab..a7ce0ac 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -157,6 +157,7 @@ #define FTGMAC100_MACCR_FULLDUP (1 << 8) #define FTGMAC100_MACCR_GIGA_MODE (1 << 9) #define FTGMAC100_MACCR_CRC_APD (1 << 10) +#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11) #define FTGMAC100_MACCR_RX_RUNT (1 << 12) #define FTGMAC100_MACCR_JUMBO_LF (1 << 13) #define FTGMAC100_MACCR_RX_ALL (1 << 14) -- cgit v1.1 From de1d657816c6fbb70f07b01d50ec669dff0d4e60 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:14 -0700 Subject: tcp: fix under-accounting retransmit SNMP counters This patch fixes these under-accounting SNMP rtx stats LINUX_MIB_TCPFORWARDRETRANS LINUX_MIB_TCPFASTRETRANS LINUX_MIB_TCPSLOWSTARTRETRANS when retransmitting TSO packets Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f53d0cc..e15ec82 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2831,7 +2831,7 @@ begin_fwd: if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS(sock_net(sk), mib_idx); + NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); -- cgit v1.1 From 7e32b44361abc77fbc01f2b97b045c405b2583e5 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:15 -0700 Subject: tcp: properly account Fast Open SYN-ACK retrans Since the TFO socket is accepted right off SYN-data, the socket owner can call getsockopt(TCP_INFO) to collect ongoing SYN-ACK retransmission or timeout stats (i.e., tcpi_total_retrans, tcpi_retransmits). Currently those stats are only updated upon handshake completes. This patch fixes it. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 ++ net/ipv4/tcp_timer.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ebf45b..08323bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5885,7 +5885,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * so release it. */ if (req) { - tp->total_retrans = req->num_retrans; + inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for correct metrics. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e15ec82..5288cec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3568,6 +3568,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + if (unlikely(tcp_passive_fastopen(sk))) + tcp_sk(sk)->total_retrans++; } return res; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d84930b..f712b41 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -384,6 +384,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; + icsk->icsk_retransmits++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } -- cgit v1.1 From 0d4b103c008ac9f6f438d2618c155f6e868e5a67 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Reduce the number of ACK-Requests sent Reduce the number of ACK-Requests we set on DATA packets that we're sending to reduce network traffic. We set the flag on odd-numbered DATA packets to start off the RTT cache until we have at least three entries in it and then probe once per second thereafter to keep it topped up. This could be made tunable in future. Note that from this point, the RXRPC_REQUEST_ACK flag is set on DATA packets as we transmit them and not stored statically in the sk_buff. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/output.c | 13 +++++++++++-- net/rxrpc/peer_object.c | 1 + net/rxrpc/sendmsg.c | 2 -- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1c4597b..b13754a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -255,6 +255,7 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index db01fbb..282cb1e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -270,6 +270,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; + /* If our RTT cache needs working on, request an ACK. */ + if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + whdr.flags |= RXRPC_REQUEST_ACK; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { @@ -301,11 +307,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) done: if (ret >= 0) { - skb->tstamp = ktime_get_real(); + ktime_t now = ktime_get_real(); + skb->tstamp = now; smp_wmb(); sp->hdr.serial = serial; - if (whdr.flags & RXRPC_REQUEST_ACK) + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + } } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index f3e5766..941b724 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -244,6 +244,7 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) peer->hash_key = hash_key; rxrpc_assess_MTU_size(peer); peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); switch (peer->srx.transport.family) { case AF_INET: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 607223f..ca7c3be 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -299,8 +299,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, else if (call->tx_top - call->tx_hard_ack < call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = conn->security->secure_packet( call, skb, skb->mark, skb->head); -- cgit v1.1 From fc943f67773487bb85131273f39b5f183caafe95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: rxrpc: Reduce the number of PING ACKs sent We don't want to send a PING ACK for every new incoming call as that just adds to the network traffic. Instead, we send a PING ACK to the first three that we receive and then once per second thereafter. This could probably be made adjustable in future. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index adb2ec6..6e2ea8f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,7 +142,7 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c121949..cbb5d53 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -44,9 +44,12 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, int skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); } /* -- cgit v1.1 From 572de608e36279f249c9a6350f142e69f23dacab Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:54 +0800 Subject: net: ethernet: mediatek: add extension of phy-mode for TRGMII adds PHY-mode "trgmii" as an extension for the operation mode of the PHY interface for PHY_INTERFACE_MODE_TRGMII. and adds a variable trgmii inside mtk_mac as the indication to make the difference between the MAC connected to internal switch or connected to external PHY by the given configuration on the board and then to perform the corresponding setup on TRGMII hardware module. Signed-off-by: Sean Wang Cc: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 +++ include/linux/phy.h | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 2909372..e873e21 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -244,6 +244,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_TRGMII: + mac->trgmii = true; case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7c5e534..e3b9525 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -529,6 +529,8 @@ struct mtk_eth { * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter * @phy_dev: The attached PHY if available + * @trgmii Indicate if the MAC uses TRGMII connected to internal + switch */ struct mtk_mac { int id; @@ -539,6 +541,7 @@ struct mtk_mac { struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; + bool trgmii; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d24b28..e25f183 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -80,6 +80,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "moca"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; + case PHY_INTERFACE_MODE_TRGMII: + return "trgmii"; default: return "unknown"; } -- cgit v1.1 From f430dea7c150dab2c103d28fa32efb59b5ae80b4 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:55 +0800 Subject: net: ethernet: mediatek: add support for GMAC0 connecting with external PHY through TRGMII Changing dynamically source clock, TX/RX delay and interface mode used by TRGMII hardware module inside PHY capability polling routine for adapting to the various speed of RGMII used by external PHY for GMAC0. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 32 ++++++++++++++++++++++++++++- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 31 +++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e873e21..ec60794 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -52,7 +52,7 @@ static const struct mtk_ethtool_stats { }; static const char * const mtk_clks_source_name[] = { - "ethif", "esw", "gp1", "gp2" + "ethif", "esw", "gp1", "gp2", "trgpll" }; void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) @@ -135,6 +135,33 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) return _mtk_mdio_read(eth, phy_addr, phy_reg); } +static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) +{ + u32 val; + int ret; + + val = (speed == SPEED_1000) ? + INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; + mtk_w32(eth, val, INTF_MODE); + + regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, + ETHSYS_TRGMII_CLK_SEL362_5, + ETHSYS_TRGMII_CLK_SEL362_5); + + val = (speed == SPEED_1000) ? 250000000 : 500000000; + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + if (ret) + dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); + + val = (speed == SPEED_1000) ? + RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_RCK_CTRL); + + val = (speed == SPEED_1000) ? + TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_TCK_CTRL); +} + static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -157,6 +184,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) break; }; + if (mac->id == 0 && !mac->trgmii) + mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e3b9525..e521156 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -313,6 +313,30 @@ MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \ MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK) +/* TRGMII RXC control register */ +#define TRGMII_RCK_CTRL 0x10300 +#define DQSI0(x) ((x << 0) & GENMASK(6, 0)) +#define DQSI1(x) ((x << 8) & GENMASK(14, 8)) +#define RXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define RXC_DQSISEL BIT(30) +#define RCK_CTRL_RGMII_1000 (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16)) +#define RCK_CTRL_RGMII_10_100 RXCTL_DMWTLAT(2) + +/* TRGMII RXC control register */ +#define TRGMII_TCK_CTRL 0x10340 +#define TXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define TXC_INV BIT(30) +#define TCK_CTRL_RGMII_1000 TXCTL_DMWTLAT(2) +#define TCK_CTRL_RGMII_10_100 (TXC_INV | TXCTL_DMWTLAT(2)) + +/* TRGMII Interface mode register */ +#define INTF_MODE 0x10390 +#define TRGMII_INTF_DIS BIT(0) +#define TRGMII_MODE BIT(1) +#define TRGMII_CENTRAL_ALIGNED BIT(2) +#define INTF_MODE_RGMII_1000 (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED) +#define INTF_MODE_RGMII_10_100 0 + /* GPIO port control registers for GMAC 2*/ #define GPIO_OD33_CTRL8 0x4c0 #define GPIO_BIAS_CTRL 0xed0 @@ -323,7 +347,11 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) -/*ethernet reset control register*/ +/* ethernet subsystem clock register */ +#define ETHSYS_CLKCFG0 0x2c +#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) + +/* ethernet reset control register */ #define ETHSYS_RSTCTRL 0x34 #define RSTCTRL_FE BIT(6) #define RSTCTRL_PPE BIT(31) @@ -389,6 +417,7 @@ enum mtk_clks_map { MTK_CLK_ESW, MTK_CLK_GP1, MTK_CLK_GP2, + MTK_CLK_TRGPLL, MTK_CLK_MAX }; -- cgit v1.1 From b88539658a597e649627c4cc2d446456fc01e104 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:56 +0800 Subject: net: ethernet: mediatek: add the dts property to set if TRGMII supported on GMAC0 Add the dts property for the capability if TRGMII supported on GAMC0 Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 6103e55..7111278 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -31,7 +31,10 @@ Optional properties: Required properties: - compatible: Should be "mediatek,eth-mac" - reg: The number of the MAC -- phy-handle: see ethernet.txt file in the same directory. +- phy-handle: see ethernet.txt file in the same directory and + the phy-mode "trgmii" required being provided when reg + is equal to 0 and the MAC uses fixed-link to connect + with inernal switch such as MT7530. Example: -- cgit v1.1 From 2364c5c5ec14e936826eb10af56a337ccec01ffa Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:33:35 +0800 Subject: net: ethernet: mediatek: use phydev from struct net_device reuse phydev already in struct net_device instead of creating another new one in private structure. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 73 ++++++++++++++--------------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 - 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ec60794..6b7acf4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -175,7 +175,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return; - switch (mac->phy_dev->speed) { + switch (dev->phydev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; break; @@ -185,22 +185,22 @@ static void mtk_phy_link_adjust(struct net_device *dev) }; if (mac->id == 0 && !mac->trgmii) - mtk_gmac0_rgmii_adjust(mac->hw, mac->phy_dev->speed); + mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed); - if (mac->phy_dev->link) + if (dev->phydev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) { + if (dev->phydev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) + if (dev->phydev->pause) rmt_adv = LPA_PAUSE_CAP; - if (mac->phy_dev->asym_pause) + if (dev->phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - if (mac->phy_dev->advertising & ADVERTISED_Pause) + if (dev->phydev->advertising & ADVERTISED_Pause) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + if (dev->phydev->advertising & ADVERTISED_Asym_Pause) lcl_adv |= ADVERTISE_PAUSE_ASYM; flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); @@ -217,7 +217,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); - if (mac->phy_dev->link) + if (dev->phydev->link) netif_carrier_on(dev); else netif_carrier_off(dev); @@ -255,17 +255,17 @@ static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, mac->id, phydev_name(phydev), phydev->phy_id, phydev->drv->name); - mac->phy_dev = phydev; - return 0; } -static int mtk_phy_connect(struct mtk_mac *mac) +static int mtk_phy_connect(struct net_device *dev) { - struct mtk_eth *eth = mac->hw; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth; struct device_node *np; u32 val; + eth = mac->hw; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) if (!of_phy_register_fixed_link(mac->of_node)) @@ -303,20 +303,21 @@ static int mtk_phy_connect(struct mtk_mac *mac) val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); + /* couple phydev to net_device */ mtk_phy_connect_node(eth, mac, np); - mac->phy_dev->autoneg = AUTONEG_ENABLE; - mac->phy_dev->speed = 0; - mac->phy_dev->duplex = 0; + dev->phydev->autoneg = AUTONEG_ENABLE; + dev->phydev->speed = 0; + dev->phydev->duplex = 0; if (of_phy_is_fixed_link(mac->of_node)) - mac->phy_dev->supported |= + dev->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->advertising = mac->phy_dev->supported | + dev->phydev->advertising = dev->phydev->supported | ADVERTISED_Autoneg; - phy_start_aneg(mac->phy_dev); + phy_start_aneg(dev->phydev); of_node_put(np); @@ -1742,7 +1743,7 @@ static int mtk_open(struct net_device *dev) } atomic_inc(ð->dma_refcnt); - phy_start(mac->phy_dev); + phy_start(dev->phydev); netif_start_queue(dev); return 0; @@ -1777,7 +1778,7 @@ static int mtk_stop(struct net_device *dev) struct mtk_eth *eth = mac->hw; netif_tx_disable(dev); - phy_stop(mac->phy_dev); + phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ if (!atomic_dec_and_test(ð->dma_refcnt)) @@ -1917,7 +1918,7 @@ static int __init mtk_init(struct net_device *dev) dev->addr_assign_type = NET_ADDR_RANDOM; } - return mtk_phy_connect(mac); + return mtk_phy_connect(dev); } static void mtk_uninit(struct net_device *dev) @@ -1925,20 +1926,18 @@ static void mtk_uninit(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - phy_disconnect(mac->phy_dev); + phy_disconnect(dev->phydev); mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mtk_mac *mac = netdev_priv(dev); - switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return phy_mii_ioctl(mac->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); default: break; } @@ -1983,7 +1982,7 @@ static void mtk_pending_work(struct work_struct *work) if (!eth->mac[i] || of_phy_is_fixed_link(eth->mac[i]->of_node)) continue; - err = phy_init_hw(eth->mac[i]->phy_dev); + err = phy_init_hw(eth->netdev[i]->phydev); if (err) dev_err(eth->dev, "%s: PHY init failed.\n", eth->netdev[i]->name); @@ -2052,11 +2051,11 @@ static int mtk_get_settings(struct net_device *dev, if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(mac->phy_dev); + err = phy_read_status(dev->phydev); if (err) return -ENODEV; - return phy_ethtool_gset(mac->phy_dev, cmd); + return phy_ethtool_gset(dev->phydev, cmd); } static int mtk_set_settings(struct net_device *dev, @@ -2064,14 +2063,14 @@ static int mtk_set_settings(struct net_device *dev, { struct mtk_mac *mac = netdev_priv(dev); - if (cmd->phy_address != mac->phy_dev->mdio.addr) { - mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus, + if (cmd->phy_address != dev->phydev->mdio.addr) { + dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, cmd->phy_address); - if (!mac->phy_dev) + if (!dev->phydev) return -ENODEV; } - return phy_ethtool_sset(mac->phy_dev, cmd); + return phy_ethtool_sset(dev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2105,7 +2104,7 @@ static int mtk_nway_reset(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - return genphy_restart_aneg(mac->phy_dev); + return genphy_restart_aneg(dev->phydev); } static u32 mtk_get_link(struct net_device *dev) @@ -2116,11 +2115,11 @@ static u32 mtk_get_link(struct net_device *dev) if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = genphy_update_link(mac->phy_dev); + err = genphy_update_link(dev->phydev); if (err) return ethtool_op_get_link(dev); - return mac->phy_dev->link; + return dev->phydev->link; } static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e521156..7e194f7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -557,7 +557,6 @@ struct mtk_eth { * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter - * @phy_dev: The attached PHY if available * @trgmii Indicate if the MAC uses TRGMII connected to internal switch */ @@ -567,7 +566,6 @@ struct mtk_mac { struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; - struct phy_device *phy_dev; __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; bool trgmii; -- cgit v1.1 From a2b2a19f0fbc674478a6806ea9e4f6aff06763f8 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:36:15 +0800 Subject: net: ethernet: mediatek: remove superfluous local variable for phy address remove the unused variable for parsing PHY address and the related logic for sanity test which would be all already handled done when of_mdiobus_register was called Reported-by: Nelson Chang Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6b7acf4..1918c39 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -226,17 +226,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, struct device_node *phy_node) { - const __be32 *_addr = NULL; struct phy_device *phydev; - int phy_mode, addr; + int phy_mode; - _addr = of_get_property(phy_node, "reg", NULL); - - if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) { - pr_err("%s: invalid phy address\n", phy_node->name); - return -EINVAL; - } - addr = be32_to_cpu(*_addr); phy_mode = of_get_phy_mode(phy_node); if (phy_mode < 0) { dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode); -- cgit v1.1 From 3e60b748fd2f50bbdb6a61fdd48222cd492c77e3 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:42:03 +0800 Subject: net: ethernet: mediatek: use [get|set]_link_ksettings 1) use new api [get|set]_link_ksettings instead of [get|set]_settings old ones. 2) dev->phydev is sure being ready before calling these callbacks, so removing all the sanity check if it is existing. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 33 +++++++++++------------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1918c39..a65801a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2034,35 +2034,26 @@ static int mtk_cleanup(struct mtk_eth *eth) return 0; } -static int mtk_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); - int err; + struct mtk_mac *mac = netdev_priv(ndev); if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) return -EBUSY; - err = phy_read_status(dev->phydev); - if (err) - return -ENODEV; - - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -static int mtk_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); + struct mtk_mac *mac = netdev_priv(ndev); - if (cmd->phy_address != dev->phydev->mdio.addr) { - dev->phydev = mdiobus_get_phy(mac->hw->mii_bus, - cmd->phy_address); - if (!dev->phydev) - return -ENODEV; - } + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -2225,8 +2216,8 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) } static const struct ethtool_ops mtk_ethtool_ops = { - .get_settings = mtk_get_settings, - .set_settings = mtk_set_settings, + .get_link_ksettings = mtk_get_link_ksettings, + .set_link_ksettings = mtk_set_link_ksettings, .get_drvinfo = mtk_get_drvinfo, .get_msglevel = mtk_get_msglevel, .set_msglevel = mtk_set_msglevel, -- cgit v1.1 From f6f7d9c03f5daae04449ad19de5e2f0c20c5eaac Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 16:44:16 +0800 Subject: net: ethernet: mediatek: get out of potential invalid pointer access Potential dangerous invalid pointer might be accessed if the error happens when couple phy_device to net_device so cleanup the error path. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a65801a..3d7e0cb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -296,7 +296,9 @@ static int mtk_phy_connect(struct net_device *dev) regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); /* couple phydev to net_device */ - mtk_phy_connect_node(eth, mac, np); + if (mtk_phy_connect_node(eth, mac, np)) + goto err_phy; + dev->phydev->autoneg = AUTONEG_ENABLE; dev->phydev->speed = 0; dev->phydev->duplex = 0; @@ -317,7 +319,7 @@ static int mtk_phy_connect(struct net_device *dev) err_phy: of_node_put(np); - dev_err(eth->dev, "invalid phy_mode\n"); + dev_err(eth->dev, "%s: invalid phy\n", __func__); return -EINVAL; } -- cgit v1.1 From 0364a8824c020f12e2d5e9fad963685b58f7574e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 22 Sep 2016 11:06:25 +0200 Subject: xen-netback: switch to threaded irq for control ring Instead of open coding it use the threaded irq mechanism in xen-netback. Signed-off-by: Juergen Gross Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 +--- drivers/net/xen-netback/interface.c | 38 ++++++------------------------------- drivers/net/xen-netback/netback.c | 18 ++++-------------- 3 files changed, 11 insertions(+), 49 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3a56268..ff94c51 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -292,8 +292,6 @@ struct xenvif { #endif struct xen_netif_ctrl_back_ring ctrl; - struct task_struct *ctrl_task; - wait_queue_head_t ctrl_wq; unsigned int ctrl_irq; /* Miscellaneous private stuff. */ @@ -359,7 +357,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); -int xenvif_ctrl_kthread(void *data); +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 83deeeb..fb50c6d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -128,15 +128,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id) -{ - struct xenvif *vif = dev_id; - - wake_up(&vif->ctrl_wq); - - return IRQ_HANDLED; -} - int xenvif_queue_stopped(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -570,8 +561,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, struct net_device *dev = vif->dev; void *addr; struct xen_netif_ctrl_sring *shared; - struct task_struct *task; - int err = -ENOMEM; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), &ring_ref, 1, &addr); @@ -581,11 +571,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - init_waitqueue_head(&vif->ctrl_wq); - - err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn, - xenvif_ctrl_interrupt, - 0, dev->name, vif); + err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -593,19 +579,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, xenvif_init_hash(vif); - task = kthread_create(xenvif_ctrl_kthread, (void *)vif, - "%s-control", dev->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", dev->name); - err = PTR_ERR(task); + err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn, + IRQF_ONESHOT, "xen-netback-ctrl", vif); + if (err) { + pr_warn("Could not setup irq handler for %s\n", dev->name); goto err_deinit; } - get_task_struct(task); - vif->ctrl_task = task; - - wake_up_process(vif->ctrl_task); - return 0; err_deinit: @@ -774,12 +754,6 @@ void xenvif_disconnect_data(struct xenvif *vif) void xenvif_disconnect_ctrl(struct xenvif *vif) { - if (vif->ctrl_task) { - kthread_stop(vif->ctrl_task); - put_task_struct(vif->ctrl_task); - vif->ctrl_task = NULL; - } - if (vif->ctrl_irq) { xenvif_deinit_hash(vif); unbind_from_irqhandler(vif->ctrl_irq, vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index edbae0b..3d0c989 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2359,24 +2359,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) return 0; } -int xenvif_ctrl_kthread(void *data) +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; - for (;;) { - wait_event_interruptible(vif->ctrl_wq, - xenvif_ctrl_work_todo(vif) || - kthread_should_stop()); - if (kthread_should_stop()) - break; - - while (xenvif_ctrl_work_todo(vif)) - xenvif_ctrl_action(vif); + while (xenvif_ctrl_work_todo(vif)) + xenvif_ctrl_action(vif); - cond_resched(); - } - - return 0; + return IRQ_HANDLED; } static int __init netback_init(void) -- cgit v1.1 From f15823510246444052b35f148c7ae627842b0e05 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:46 -0700 Subject: i40e: fix setting user defined RSS hash key Previously, when using ethtool to change the RSS hash key, ethtool would report back saying the old key was still being used and no error was reported. It was unclear whether it was being reported incorrectly or being set incorrectly. Debugging revealed 'i40e_set_rxfh()' returned zero immediately instead of setting the key because a user defined indirection table is not supplied when changing the hash key. This fix instead changes it such that if an indirection table is not supplied, then a default one is created and the hash key is now correctly set. Change-ID: Iddb621897ecf208650272b7ee46702cad7b69a71 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 +++++++----- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 19103a6..30aaee4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -701,6 +701,8 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 1835186..af28a8c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2922,15 +2922,13 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; u8 *seed = NULL; u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (!indir) - return 0; - if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, @@ -2948,8 +2946,12 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, } /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) - vsi->rss_lut_user[i] = (u8)(indir[i]); + if (indir) + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + else + i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE, + vsi->rss_size); return i40e_config_rss(vsi, seed, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 61b0fc4..69b9e30 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -57,8 +57,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -8244,8 +8242,8 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @rss_table_size: Lookup table size * @rss_size: Range of queue number for hashing */ -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size) +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) { u16 i; -- cgit v1.1 From a01c7f6709925919e2e3c6c190a92692f63f74e4 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 24 Aug 2016 11:33:47 -0700 Subject: i40e: fix "dump port" command when NPAR enabled When using the debugfs to issue the "dump port" command with NPAR enabled, the firmware reports back with invalid argument. The issue occurs because the pf->mac_seid was used to perform the query. This is fine when NPAR is disabled because the switch ID == pf->mac_seid, however this is not the case when NPAR is enabled. This fix instead goes through the VSI to determine the correct ID to use in either case. Change-ID: I0cd67913a7f2c4a2962e06d39e32e7447cc55b6a Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 05cf9a7..8555f04 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1054,6 +1054,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, struct i40e_dcbx_config *r_cfg = &pf->hw.remote_dcbx_config; int i, ret; + u32 switch_id; bw_data = kzalloc(sizeof( struct i40e_aqc_query_port_ets_config_resp), @@ -1063,8 +1064,12 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } + vsi = pf->vsi[pf->lan_vsi]; + switch_id = + vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK; + ret = i40e_aq_query_port_ets_config(&pf->hw, - pf->mac_seid, + switch_id, bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, -- cgit v1.1 From 8d9d927f4ab8d87fee91d9aa8bdcdf19a1787ce0 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Wed, 24 Aug 2016 11:33:49 -0700 Subject: i40e: return correct opcode to VF This conditional is backward, so the driver responds back to the VF with the wrong opcode. Do the old switcheroo to fix this. Change-ID: I384035b0fef8a3881c176de4b4672009b3400b25 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da34235..611fc87 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2217,8 +2217,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, - config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP : - I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, + config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP : + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, aq_ret); } -- cgit v1.1 From d4a0658d813ec72965a52f04f07258a4018ccb17 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:50 -0700 Subject: i40e: Fix to check for NULL This patch fixes an issue in the virt channel code, where a return from i40e_find_vsi_from_id was not checked for NULL when applicable. Without this patch, there is a risk for panic and static analysis tools complain. This patch fixes the problem by adding the check and adding an additional input check for similar reasons. Change-ID: I7e9be88eb7a3addb50eadc451c8336d9e06f5394 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 611fc87..2ab5355 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -502,8 +502,16 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, u32 qtx_ctl; int ret = 0; + if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + ret = -ENOENT; + goto error_context; + } pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); vsi = i40e_find_vsi_from_id(pf, vsi_id); + if (!vsi) { + ret = -ENOENT; + goto error_context; + } /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq)); @@ -1476,7 +1484,8 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || + !vsi) { aq_ret = I40E_ERR_PARAM; goto error_param; } -- cgit v1.1 From b3f5c7bc88bab134e9649e42d30be15e3775f00d Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 24 Aug 2016 11:33:51 -0700 Subject: i40e: Fix for extra byte swap in tunnel setup This patch fixes an issue where we were byte swapping the port parameter, then byte swapping it again in function execution. Obviously, that's unnecessary, so take it out of the function calls. Without this patch, the udp based tunnel configuration would not be correct. Change-ID: I788d83c5bd5732170f1a81dbfa0b1ac3ca8ea5b7 Signed-off-by: Carolyn Wyborny Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 69b9e30..53cde5b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7154,9 +7154,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) pf->pending_udp_bitmap &= ~BIT_ULL(i); port = pf->udp_ports[i].index; if (port) - ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), - pf->udp_ports[i].type, - NULL, NULL); + ret = i40e_aq_add_udp_tunnel(hw, port, + pf->udp_ports[i].type, + NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); -- cgit v1.1 From ff918912e1b8ba4e743d1f0b06ced1d01969e17c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Aug 2016 18:41:01 +0100 Subject: i40e: avoid potential null pointer dereference when assigning len There is a sanitcy check for desc being null in the first line of function i40evf_debug_aq. However, before that, aq_desc is cast from desc, and aq_desc is being dereferenced on the assignment of len, so this could be a potential null pointer deference. Fix this by moving the initialization of len to the code block where len is being used and hence at this point we know it is OK to dereference aq_desc. Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40e_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c03..7953c13 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -302,7 +302,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, void *buffer, u16 buf_len) { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; - u16 len = le16_to_cpu(aq_desc->datalen); u8 *buf = (u8 *)buffer; u16 i = 0; @@ -326,6 +325,8 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, le32_to_cpu(aq_desc->params.external.addr_low)); if ((buffer != NULL) && (aq_desc->datalen != 0)) { + u16 len = le16_to_cpu(aq_desc->datalen); + i40e_debug(hw, mask, "AQ CMD Buffer:\n"); if (buf_len < len) len = buf_len; -- cgit v1.1 From 3f341acc1c65b800ced567174c683cda12dfb17d Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 1 Sep 2016 22:27:27 +0200 Subject: i40evf: Fix link state event handling Currently disabling the link state from PF via ip link set enp5s0f0 vf 0 state disable doesn't disable the CARRIER on the VF. This patch updates the carrier and starts/stops the tx queues based on the link state notification from PF. PF: enp5s0f0, VF: enp5s2 #modprobe i40e #echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs #ip link set enp5s2 up #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 #ip link set enp5s0f0 vf 0 state disable #ip -d link show enp5s0f0 171: enp5s0f0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 72 numrxqueues 72 portid 6805ca2e7268 vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off #ip -d link show enp5s2 175: enp5s2: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether ea:4d:60:bc:6f:85 brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64 numtxqueues 16 numrxqueues 16 Signed-off-by: Sridhar Samudrala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 4 ++++ drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f751f7b..e0a8cd8 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1037,6 +1037,7 @@ void i40evf_down(struct i40evf_adapter *adapter) netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); @@ -1731,6 +1732,7 @@ static void i40evf_reset_task(struct work_struct *work) set_bit(__I40E_DOWN, &adapter->vsi.state); netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); i40evf_free_traffic_irqs(adapter); @@ -1769,6 +1771,7 @@ continue_reset: if (netif_running(adapter->netdev)) { netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); } i40evf_irq_disable(adapter); @@ -2457,6 +2460,7 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; netif_carrier_off(netdev); + adapter->link_up = false; if (!adapter->netdev_registered) { err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index cc6cb30..ddf478d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -898,8 +898,14 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, vpe->event_data.link_event.link_status) { adapter->link_up = vpe->event_data.link_event.link_status; + if (adapter->link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } i40evf_print_link_message(adapter); - netif_tx_stop_all_queues(netdev); } break; case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: @@ -974,8 +980,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); - netif_tx_start_all_queues(adapter->netdev); - netif_carrier_on(adapter->netdev); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); -- cgit v1.1 From cb130a0b41d2a825fa48d7dfc964f08da9ccbb96 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:03 -0700 Subject: i40evf: remove unnecessary error checking against i40evf_up_complete Function i40evf_up_complete() always returns success. Changed this to a void type and removed the code that checks the return status and prints an error message. Change-ID: I8c400f174786b9c855f679e470f35af292fb50ad Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index e0a8cd8..9906775 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1007,7 +1007,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) * i40evf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure **/ -static int i40evf_up_complete(struct i40evf_adapter *adapter) +static void i40evf_up_complete(struct i40evf_adapter *adapter) { adapter->state = __I40EVF_RUNNING; clear_bit(__I40E_DOWN, &adapter->vsi.state); @@ -1016,7 +1016,6 @@ static int i40evf_up_complete(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - return 0; } /** @@ -1827,9 +1826,7 @@ continue_reset: i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto reset_err; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); } else { @@ -2059,9 +2056,7 @@ static int i40evf_open(struct net_device *netdev) i40evf_add_filter(adapter, adapter->hw.mac.addr); i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto err_req_irq; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); -- cgit v1.1 From 841493a3f64395b60554afbcaa17f4350f90e764 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 Sep 2016 18:05:04 -0700 Subject: i40e: Limit TX descriptor count in cases where frag size is greater than 16K The i40e driver was incorrectly assuming that we would always be pulling no more than 1 descriptor from each fragment. It is in fact possible for us to end up with the case where 2 descriptors worth of data may be pulled when a frame is larger than one of the pieces generated when aligning the payload to either 4K or pieces smaller than 16K. To adjust for this we just need to make certain to test all the way to the end of the fragments as it is possible for us to span 2 descriptors in the block before us so we need to guarantee that even the last 6 descriptors have enough data to fill a full frame. Change-ID: Ic2ecb4d6b745f447d334e66c14002152f50e2f99 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 7 ++----- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f8d6623..bf7bb7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2621,9 +2621,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -2654,8 +2652,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 0130458..e3427eb 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1832,9 +1832,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -1865,8 +1863,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); -- cgit v1.1 From 903e68323bb62cc8ca30c5a7a41d962f92c27b97 Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Tue, 6 Sep 2016 18:05:05 -0700 Subject: i40evf: remove unnecessary error checking against i40e_shutdown_adminq The i40e_shutdown_adminq function never returns failure. There is no need to check the non-0 return value. Clean up the unnecessary error checking and warning against it. Change-ID: Ibb616f09cfb93bd1a872ebf3241a15fb8354b31b Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 9906775..99833f3 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1785,8 +1785,7 @@ continue_reset: i40evf_free_all_tx_resources(adapter); /* kill and reinit the admin queue */ - if (i40evf_shutdown_adminq(hw)) - dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n"); + i40evf_shutdown_adminq(hw); adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; err = i40evf_init_adminq(hw); if (err) -- cgit v1.1 From 691e412132f07bd566934b7cbc430e87c5656de1 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:17:46 -0700 Subject: ixgbe: simplify the logic for setting VLAN filtering Simplify the logic for setting VLNCTRL.VFE by checking the VMDQ flag and 82598 MAC instead of having to maintain a list of MAC types. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 39 ++++++++------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d76bc1a..1c88858 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4105,23 +4105,20 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: - /* legacy case, we can just disable VLAN filtering */ + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + } else { vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } + /* Nothing to do for 82598 */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + /* We are already in VLAN promisc, nothing to do */ if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) return; @@ -4129,10 +4126,6 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; - /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4204,19 +4197,9 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) vlnctrl |= IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: + if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) || + hw->mac.type == ixgbe_mac_82598EB) return; - } /* We are not in VLAN promisc, nothing to do */ if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) -- cgit v1.1 From d2d43e5b9fce2c30182dd9b6c63f436ea923a4d9 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 22 Aug 2016 16:28:34 -0700 Subject: ixgbe: make ixgbe_led_on/off_t_x550em static These functions are only used in ixgbe_x550.c. Fixes a warning when compiling with -Wmissing-prototypes Reported-by: Krishneil Singh Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index e092a89..dec8b11 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2125,7 +2125,7 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * @led_idx: led number to turn on **/ -s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; @@ -2147,7 +2147,7 @@ s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * @hw: pointer to hardware structure * @led_idx: led number to turn off **/ -s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) { u16 phy_data; -- cgit v1.1 From 8fe293aaaa7abd192633cf612065b355a66ed6ad Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:28 -0700 Subject: ixgbe: Resolve NULL reference by setting {read, write}_reg_mdi Set the read_reg_mdi and write_reg_mdi method pointers for X550EM_A_10G_T devices to resolve jumping to NULL. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index dec8b11..cd22efb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3049,6 +3049,8 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_x550a, .write_reg = &ixgbe_write_phy_reg_x550a, + .read_reg_mdi = &ixgbe_read_phy_reg_mdi, + .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { -- cgit v1.1 From ade3ccf9dc75c94c1557108572d445f0300adead Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 26 Aug 2016 14:48:33 -0700 Subject: ixgbe: Indicate support for pause frames in all cases All the MACs supported by ixgbe support pause frames, so indicate that support in ethtool. Also set advertising according to requested mode. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9547191..730a99f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -313,6 +313,25 @@ static int ixgbe_get_settings(struct net_device *netdev, break; } + /* Indicate pause support */ + ecmd->supported |= SUPPORTED_Pause; + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + ecmd->advertising |= ADVERTISED_Pause; + break; + case ixgbe_fc_rx_pause: + ecmd->advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; + break; + case ixgbe_fc_tx_pause: + ecmd->advertising |= ADVERTISED_Asym_Pause; + break; + default: + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + } + if (netif_carrier_ok(netdev)) { switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: -- cgit v1.1 From 14b22cd9827ad6765a00ca0b267c3cb0353d9c10 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 29 Aug 2016 16:39:28 -0700 Subject: ixgbevf: add spinlocks for MTU change calls Protect set_rlpml with mailbox lock to make sure the MTU configuration is handled properly. This change resolves an issue where set_rlpml can fail when the VF interface is brought up: ixgbevf 0000:03:1d.6: Failed to set MTU at 1500 Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4044608..7eaac32 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1810,8 +1810,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) if (hw->mac.type >= ixgbe_mac_X550_vf) ixgbevf_setup_vfmrqc(adapter); + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN); + spin_unlock_bh(&adapter->mbx_lock); if (ret) dev_err(&adapter->pdev->dev, "Failed to set MTU at %d\n", netdev->mtu); @@ -3758,8 +3760,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) if ((new_mtu < 68) || (max_frame > max_possible_frame)) return -EINVAL; + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); + spin_unlock_bh(&adapter->mbx_lock); if (ret) return -EINVAL; -- cgit v1.1 From 7564a8880a3cf831078a67bffb05c51f34d133eb Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 1 Sep 2016 13:58:51 -0700 Subject: ixgbe: Use MDIO_PRTAD_NONE consistently The value MDIO_PRTAD_NONE should be used to indicate no PHY address. Not 0, not 0xFFFF. Use the MDIO_PRTAD_NONE value consistently. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index db0731e..021ab9b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -346,8 +346,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) return 0; } } - /* clear value if nothing found */ - hw->phy.mdio.prtad = 0; + /* indicate no PHY found */ + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; return IXGBE_ERR_PHY_ADDR_INVALID; } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cd22efb..7e6b926 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1459,7 +1459,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); - if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF) + if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ -- cgit v1.1 From 3b00da03ae303a3bdfa3bdfbb078e0eadb749375 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:11 -0700 Subject: ixgbe: Allow setting multiple queues when SR-IOV is enabled The maximum queue count reported was 1, however support for multiple queues with SR-IOV was added some time ago so we should report support for it to the user so that they can select multiple queues if they so desire. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 730a99f..2d872be 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3060,8 +3060,8 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) /* We only support one q_vector without MSI-X */ max_combined = 1; } else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - /* SR-IOV currently only allows one queue on the PF */ - max_combined = 1; + /* Limit value based on the queue mask */ + max_combined = adapter->ring_feature[RING_F_RSS].mask + 1; } else if (tcs > 1) { /* For DCB report channels per traffic class */ if (adapter->hw.mac.type == ixgbe_mac_82598EB) { -- cgit v1.1 From fa81da7e5b261cf8010f65253661522d3ff71714 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:17 -0700 Subject: ixgbe: Limit reporting of redirection table if SR-IOV is enabled The hardware redirection table can support more queues then the PF currently has when SR-IOV is enabled. In order to account for this use the RSS mask to trim of the bits that are not used. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 2d872be..f49f803 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2947,9 +2947,13 @@ static u32 ixgbe_rss_indir_size(struct net_device *netdev) static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir) { int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter); + u16 rss_m = adapter->ring_feature[RING_F_RSS].mask; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + rss_m = adapter->ring_feature[RING_F_RSS].indices - 1; for (i = 0; i < reta_size; i++) - indir[i] = adapter->rss_indir_tbl[i]; + indir[i] = adapter->rss_indir_tbl[i] & rss_m; } static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, -- cgit v1.1 From e24fcf28959298e07cae9ee19eb9a4b2b399b4fb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 7 Sep 2016 20:28:24 -0700 Subject: ixgbe: Support 4 queue RSS on VFs with 1 or 2 queue RSS on PF Instead of limiting the VFs if we don't use 4 queues for RSS in the PF we can instead just limit the RSS queues used to a power of 2. By doing this we can support use cases where VFs are using more queues than the PF is currently using and can support RSS if so desired. The only limitation on this is that we cannot support 3 queues of RSS in the PF or VF. In either of these cases we should fall back to 2 queues in order to be able to use the power of 2 masking provided by the psrtype register. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 7 ++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index bcdc884..15ab337 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -515,15 +515,16 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); /* 64 pool mode with 2 queues per pool */ - if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { + if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) { vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; rss_m = IXGBE_RSS_2Q_MASK; rss_i = min_t(u16, rss_i, 2); - /* 32 pool mode with 4 queues per pool */ + /* 32 pool mode with up to 4 queues per pool */ } else { vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; rss_m = IXGBE_RSS_4Q_MASK; - rss_i = 4; + /* We can support 4, 2, or 1 queues */ + rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1; } #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1c88858..a244d9a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3248,7 +3248,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; else if (tcs > 1) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mtqc |= IXGBE_MTQC_32VF; else mtqc |= IXGBE_MTQC_64VF; @@ -3475,12 +3476,12 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter) u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter); u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - /* Program table for at least 2 queues w/ SR-IOV so that VFs can + /* Program table for at least 4 queues w/ SR-IOV so that VFs can * make full use of any rings they may have. We will use the * PSRTYPE register to control how many rings we use within the PF. */ - if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2)) - rss_i = 2; + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4)) + rss_i = 4; /* Fill out hash function seeds */ for (i = 0; i < 10; i++) @@ -3544,7 +3545,8 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) mrqc = IXGBE_MRQC_VMDQRT8TCEN; /* 8 TCs */ else if (tcs > 1) mrqc = IXGBE_MRQC_VMDQRT4TCEN; /* 4 TCs */ - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mrqc = IXGBE_MRQC_VMDQRSS32EN; else mrqc = IXGBE_MRQC_VMDQRSS64EN; -- cgit v1.1 From 0c339bf9ac2eed861d34a9dd40aee2a2d490ec36 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 9 Sep 2016 12:59:10 -0700 Subject: ixgbe: reset before SRIOV init to avoid mailbox issues Enabling SRIOV while the ixgbevf driver is loaded will result in all mailbox requests from ixgbevf_open() being rejected by ixgbe because adapter->clear_to_send is set to false on reset. Call ixgbe_sriov_reinit() before pci_enable_sriov() to make sure that mailbox requests are handled from the time ixgbevf is loaded. Reported-by: Andrew Bowers Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599..343a182 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -329,13 +329,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) for (i = 0; i < adapter->num_vfs; i++) ixgbe_vf_configuration(dev, (i | 0x10000000)); + /* reset before enabling SRIOV to avoid mailbox issues */ + ixgbe_sriov_reinit(adapter); + err = pci_enable_sriov(dev, num_vfs); if (err) { e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } ixgbe_get_vfs(adapter); - ixgbe_sriov_reinit(adapter); return num_vfs; #else -- cgit v1.1 From 21641c2e1ffd0b504610a33beaeab8fcc5140677 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 15:52:20 -0700 Subject: net_sched: check NULL on error path in route4_change() On error path in route4_change(), 'f' could be NULL, so we should check NULL before calling tcf_exts_destroy(). Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Reported-by: kbuild test robot Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a4ce39b..455fc8f 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -559,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: - tcf_exts_destroy(&f->exts); + if (f) + tcf_exts_destroy(&f->exts); kfree(f); return err; } -- cgit v1.1 From a3007446e53af07c53bdb4cabad7b3ea60859da4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:13 -0300 Subject: sctp: fix the handling of SACK Gap Ack blocks sctp_acked() is using 32bit arithmetics on 16bits vars, via TSN_lte() macros, which is weird and confusing. Once the offset to ctsn is calculated, all wrapping is already handled and thus to verify the Gap Ack blocks we can just use pure less/big-or-equal than checks. Also, rename gap variable to tsn_offset, so it's more meaningful, as it doesn't point to any gap at all. Even so, I don't think this discrepancy resulted in any practical bug. This patch is a preparation for the next one, which will introduce typecheck() for TSN_lte() macros and would cause a compile error here. Suggested-by: David Laight Reported-by: David Laight Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8c3f446..3ec6da8b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1719,7 +1719,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { int i; sctp_sack_variable_t *frags; - __u16 gap; + __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); if (TSN_lte(tsn, ctsn)) @@ -1738,10 +1738,11 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) */ frags = sack->variable; - gap = tsn - ctsn; - for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) { - if (TSN_lte(ntohs(frags[i].gab.start), gap) && - TSN_lte(gap, ntohs(frags[i].gab.end))) + blocks = ntohs(sack->num_gap_ack_blocks); + tsn_offset = tsn - ctsn; + for (i = 0; i < blocks; ++i) { + if (tsn_offset >= ntohs(frags[i].gab.start) && + tsn_offset <= ntohs(frags[i].gab.end)) goto pass; } -- cgit v1.1 From 182691d0998400f35ad304718024e60feaa864aa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:14 -0300 Subject: sctp: improve how SSN, TSN and ASCONF serial are compared Make it similar to time_before() macros: - easier to understand - make use of typecheck() to avoid working on unexpected variable types (made the issue on previous patch visible) - for _[lg]te versions, slighly faster, as the compiler used to generate a sequence of cmp/je/cmp/js instructions and now it's sub/test/jle (for _lte): Before, for sctp_outq_sack: if (primary->cacc.changeover_active) { 1f01: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1f08: 74 6e je 1f78 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1f0a: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); 1f10: 8b 7d bc mov -0x44(%rbp),%edi 1f13: 39 c7 cmp %eax,%edi 1f15: 74 25 je 1f3c 1f17: 39 f8 cmp %edi,%eax 1f19: 78 21 js 1f3c primary->cacc.changeover_active = 0; After: if (primary->cacc.changeover_active) { 1ee7: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1eee: 74 73 je 1f63 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1ef0: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax 1ef6: 2b 45 b4 sub -0x4c(%rbp),%eax 1ef9: 85 c0 test %eax,%eax 1efb: 7e 26 jle 1f23 primary->cacc.changeover_active = 0; *_lt() generated pretty much the same code. Tested with gcc (GCC) 6.1.1 20160621. This patch also removes SSN_lte as it is not used and cleanups some comments. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 94 ++++++++++----------------------------------------- 1 file changed, 18 insertions(+), 76 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index bafe2a0..ca6c971 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) } /* Compare two TSNs */ +#define TSN_lt(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) -/* RFC 1982 - Serial Number Arithmetic - * - * 2. Comparison - * Then, s1 is said to be equal to s2 if and only if i1 is equal to i2, - * in all other cases, s1 is not equal to s2. - * - * s1 is said to be less than s2 if, and only if, s1 is not equal to s2, - * and - * - * (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1)) - * - * s1 is said to be greater than s2 if, and only if, s1 is not equal to - * s2, and - * - * (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1)) - */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on TSNs in this document SHOULD use Serial - * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. - */ - -enum { - TSN_SIGN_BIT = (1<<31) -}; - -static inline int TSN_lt(__u32 s, __u32 t) -{ - return ((s) - (t)) & TSN_SIGN_BIT; -} - -static inline int TSN_lte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); -} +#define TSN_lte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) <= 0)) /* Compare two SSNs */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on Stream Sequence Numbers in this document - * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where - * SERIAL_BITS = 16. - */ -enum { - SSN_SIGN_BIT = (1<<15) -}; - -static inline int SSN_lt(__u16 s, __u16 t) -{ - return ((s) - (t)) & SSN_SIGN_BIT; -} - -static inline int SSN_lte(__u16 s, __u16 t) -{ - return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); -} - -/* - * ADDIP 3.1.1 - * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial - * Numbers wrap back to 0 after reaching 4294967295. - */ -enum { - ADDIP_SERIAL_SIGN_BIT = (1<<31) -}; - -static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); -} +#define SSN_lt(a,b) \ + (typecheck(__u16, a) && \ + typecheck(__u16, b) && \ + ((__s16)((a) - (b)) < 0)) + +/* ADDIP 3.1.1 */ +#define ADDIP_SERIAL_gte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int -- cgit v1.1 From 429baa6f0e1b9237a3667c3a5e8ca76051e6d0b7 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 22 Sep 2016 15:47:45 +0100 Subject: sfc: check async completer is !NULL before calling Add a NULL check before calling asynchronous MCDI completion functions during device removal. Fixes: 7014d7f6 ("sfc: allow asynchronous MCDI without completion function") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 9fbc12a..2415209 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1156,7 +1156,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) * acquired locks in the wrong order. */ list_for_each_entry_safe(async, next, &mcdi->async_list, list) { - async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + if (async->complete) + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); list_del(&async->list); kfree(async); } -- cgit v1.1 From fefa569a9d4bc4b7758c0fddd75bb0382c95da77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Sep 2016 08:58:55 -0700 Subject: net_sched: sch_fq: account for schedule/timers drifts It looks like the following patch can make FQ very precise, even in VM or stressed hosts. It matters at high pacing rates. We take into account the difference between the time that was programmed when last packet was sent, and current time (a drift of tens of usecs is often observed) Add an EWMA of the unthrottle latency to help diagnostics. This latency is the difference between current time and oldest packet in delayed RB-tree. This accounts for the high resolution timer latency, but can be different under stress, as fq_check_throttled() can be opportunistically be called from a dequeue() called after an enqueue() for a different flow. Tested: // Start a 10Gbit flow $ netperf --google-pacing-rate 1250000000 -H lpaa24 -l 10000 -- -K bbr & Before patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17106.04 756876.84 1102.75 1119049.02 0.00 0.00 0.52 After patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17867.00 800245.90 1151.77 1183172.12 0.00 0.00 0.52 A new iproute2 tc can output the 'unthrottle latency' : $ tc -s qd sh dev eth0 | grep latency 0 gc, 0 highprio, 32490767 throttled, 2382 ns latency Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 +- net/sched/sch_fq.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f8e39db..df7451d 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -811,7 +811,7 @@ struct tc_fq_qd_stats { __u32 flows; __u32 inactive_flows; __u32 throttled_flows; - __u32 pad; + __u32 unthrottle_latency_ns; }; /* Heavy-Hitter Filter */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 5dd929c..18e7524 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -86,6 +86,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; @@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, static void fq_check_throttled(struct fq_sched_data *q, u64 now) { + unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; + /* Update unthrottle latency EWMA. + * This is cheap and can help diagnosing timer/latency problems. + */ + sample = (unsigned long)(now - q->time_next_delayed_flow); + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); @@ -515,7 +524,12 @@ begin: len = NSEC_PER_SEC; q->stat_pkts_too_long++; } - + /* Account for schedule/timers drifts. + * f->time_next_packet was set when prior packet was sent, + * and current time (@now) can be too late by tens of us. + */ + if (f->time_next_packet) + len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: @@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; + q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; q->new_flows.first = NULL; q->old_flows.first = NULL; @@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; - st.pad = 0; - + st.unthrottle_latency_ns = min_t(unsigned long, + q->unthrottle_latency_ns, ~0U); sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); -- cgit v1.1 From 53e89941ba2a969c483aa29b907de9a823179297 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:41 +0300 Subject: net_sched: act_vlan: add helper inlines to access tcf_vlan info Needed e.g for offloading drivers to pick the relevant attributes. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 6b83588..48cca32 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,7 @@ #define __NET_TC_VLAN_H #include +#include #define VLAN_F_POP 0x1 #define VLAN_F_PUSH 0x2 @@ -24,4 +25,28 @@ struct tcf_vlan { }; #define to_vlan(a) ((struct tcf_vlan *)a) +static inline bool is_tcf_vlan(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_VLAN) + return true; +#endif + return false; +} + +static inline u32 tcf_vlan_action(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_action; +} + +static inline u16 tcf_vlan_push_vid(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_vid; +} + +static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_proto; +} + #endif /* __NET_TC_VLAN_H */ -- cgit v1.1 From 9deb2241f19f26800e3b4c6bf49c4db992192bf0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:42 +0300 Subject: net/mlx5: E-Switch, Set the vport when registering the uplink rep Set the vport value in the PF entry to be that of the uplink so we can use it blindly over the tc / eswitch offload code without translating it each time we deal with the uplink representor. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++------ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 ++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 27 +++++++++++----------- 4 files changed, 20 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a9fc9d4..b309e7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3726,9 +3726,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; - rep.vport = 0; + rep.vport = FDB_UPLINK_VPORT; rep.priv_data = priv; - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, 0, &rep); } } @@ -3867,7 +3867,7 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 22cfc4a..783e122 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -120,10 +120,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch_rep *rep = priv->ppriv; u32 src_vport; - if (rep->vport) /* set source vport for the flow */ - src_vport = rep->vport; - else - src_vport = FDB_UPLINK_VPORT; + src_vport = rep->vport; return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); } @@ -399,10 +396,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_priv = netdev_priv(out_dev); out_rep = out_priv->ppriv; - if (out_rep->vport == 0) - *dest_vport = FDB_UPLINK_VPORT; - else - *dest_vport = out_rep->vport; + *dest_vport = out_rep->vport; *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b96e8c9..6d8c5a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -254,9 +254,10 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, struct mlx5_eswitch_rep *rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport); + int vport_index); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7de40e6..516ac99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -144,16 +144,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_flow_rule *flow_rule; struct mlx5_esw_sq *esw_sq; - int vport; int err; int i; if (esw->mode != SRIOV_OFFLOADS) return 0; - vport = rep->vport == 0 ? - FDB_UPLINK_VPORT : rep->vport; - for (i = 0; i < sqns_num; i++) { esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); if (!esw_sq) { @@ -163,7 +159,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - vport, + rep->vport, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -620,27 +616,30 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) + int vport_index, + struct mlx5_eswitch_rep *__rep) { struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; - memcpy(&offloads->vport_reps[rep->vport], rep, - sizeof(struct mlx5_eswitch_rep)); + memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); - INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); - offloads->vport_reps[rep->vport].valid = true; + INIT_LIST_HEAD(&rep->vport_sqs_list); + rep->valid = true; } void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport) + int vport_index) { struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[vport]; + rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) rep->unload(esw, rep); - offloads->vport_reps[vport].valid = false; + rep->valid = false; } -- cgit v1.1 From bac9b6aa1df7d584d72558cdd12df186e91245b3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:43 +0300 Subject: net/mlx5: E-Switch, Set vport representor fields explicitly on registration The structure we use for the eswitch vport representor (mlx5_eswitch_rep) has some fields which are set from upper layers in the driver when they register the rep. Use explicit setting on registration time for them and avoid global memcpy. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6d8c5a2..ebfcde0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -178,11 +178,12 @@ struct mlx5_eswitch_rep { void (*unload)(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); u16 vport; - struct mlx5_flow_rule *vport_rx_rule; + u8 hw_id[ETH_ALEN]; void *priv_data; + + struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; bool valid; - u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 516ac99..80c6f4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -624,7 +624,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep)); + memset(rep, 0, sizeof(*rep)); + + rep->load = __rep->load; + rep->unload = __rep->unload; + rep->vport = __rep->vport; + rep->priv_data = __rep->priv_data; + ether_addr_copy(rep->hw_id, __rep->hw_id); INIT_LIST_HEAD(&rep->vport_sqs_list); rep->valid = true; -- cgit v1.1 From e33dfe316cf3b408e63bf0c21be0842412eb7981 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:44 +0300 Subject: net/mlx5: E-Switch, Allow fine tuning of eswitch vport push/pop vlan The HW can be programmed to push vlan, pop vlan or both. A factorization step towards using the push/pop capabilties in the eswitch offloads mode. This patch doesn't add new functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 33 +++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 ++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f75f864..abbf2c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -127,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 vlan, u8 qos, bool set) + u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -135,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) return -ENOTSUPP; - esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", - vport, vlan, qos, set); - if (set) { + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { /* insert only if no vlan in packet */ MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -1778,25 +1782,21 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport; int err = 0; - int set = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) return -EINVAL; - if (vlan || qos) - set = 1; - mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) goto unlock; @@ -1814,6 +1814,17 @@ unlock: return err; } +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ebfcde0..4f5391a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -246,6 +246,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); -- cgit v1.1 From 776b12b674db53012a7ce8c379a0bbdec0a5ffa5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:45 +0300 Subject: net/mlx5: Put elements related to offloaded TC rule in one struct Put the representors related to the source and dest vports and the action in struct mlx5_esw_flow_attr which is used while setting the FDB rule. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 51 ++++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 10 ++++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 ++-- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 783e122..3eb319b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -47,6 +48,7 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_rule *rule; + struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 @@ -114,15 +116,11 @@ err_create_ft: static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - u32 action, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - u32 src_vport; - src_vport = rep->vport; - - return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, @@ -358,7 +356,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *dest_vport) + struct mlx5_esw_flow_attr *attr) { const struct tc_action *a; LIST_HEAD(actions); @@ -366,17 +364,18 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tc_no_actions(exts)) return -EINVAL; - *action = 0; + memset(attr, 0, sizeof(*attr)); + attr->in_rep = priv->ppriv; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -384,7 +383,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; - struct mlx5_eswitch_rep *out_rep; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -394,10 +392,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); - out_rep = out_priv->ppriv; - *dest_vport = out_rep->vport; - *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->out_rep = out_priv->ppriv; continue; } @@ -411,18 +408,27 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag, action, dest_vport = 0; + bool fdb_flow = false; + u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + if (esw && esw->mode == SRIOV_OFFLOADS) + fdb_flow = true; + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); - if (flow) + if (flow) { old = flow->rule; - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } else { + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -436,11 +442,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - if (esw && esw->mode == SRIOV_OFFLOADS) { - err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (fdb_flow) { + flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); + err = parse_tc_fdb_actions(priv, f->exts, flow->attr); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); } else { err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4f5391a..eeeeadc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -238,11 +238,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport); + struct mlx5_esw_flow_attr *attr); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); @@ -251,6 +252,13 @@ enum { SET_VLAN_INSERT = BIT(1) }; +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep; + + int action; +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 80c6f4f..781debb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,19 +46,22 @@ enum { struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; void *misc; + int action; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + action = attr->action; + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = dst_vport; + dest.vport_num = attr->out_rep->vport; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); @@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); -- cgit v1.1 From 8515c581dfa574420559d8cef24c2ba24e8eb8dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:46 +0300 Subject: net/mlx5e: Refactor retrival of skb from rx completion element (cqe) Factor the relevant code into a static inline helper (skb_from_cqe) doing that. Move the call to napi_gro_receive to be carried out just after mlx5e_complete_rx_cqe returns. Both changes are to be used for the VF representor as well in the next commit. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +++++++++++++++++-------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0a81bd3..e836e47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -629,7 +629,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, rq->stats.packets++; rq->stats.bytes += cqe_bcnt; mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); - napi_gro_receive(rq->cq.napi, skb); } static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) @@ -733,20 +732,15 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, } } -void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +static inline +struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + u16 wqe_counter, u32 cqe_bcnt) { struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); struct mlx5e_dma_info *di; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_counter_be; struct sk_buff *skb; - u16 wqe_counter; void *va, *data; - u32 cqe_bcnt; - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); di = &rq->dma_info[wqe_counter]; va = page_address(di->page); data = va + MLX5_RX_HEADROOM; @@ -757,22 +751,21 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) - goto wq_ll_pop; /* page/packet was consumed by XDP */ + return NULL; /* page/packet was consumed by XDP */ skb = build_skb(va, RQ_PAGE_SIZE(rq)); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; mlx5e_page_release(rq, di, true); - goto wq_ll_pop; + return NULL; } /* queue up for recycling ..*/ @@ -782,7 +775,28 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb_reserve(skb, MLX5_RX_HEADROOM); skb_put(skb, cqe_bcnt); + return skb; +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, @@ -861,6 +875,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) -- cgit v1.1 From f5f82476090fd2c6fc4fde03ba61aef984900009 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:47 +0300 Subject: net/mlx5: E-Switch, Support VLAN actions in the offloads mode Many virtualization systems use a policy under which a vlan tag is pushed to packets sent by guests, and popped before the packet is forwarded to the VM. The current generation of the mlx5 HW doesn't fully support that on a per flow level. As such, we are addressing the above common use case with the SRIOV e-Switch abilities to push vlan into packets sent by VFs and pop vlan from packets forwarded to VFs. The HW can match on the correct vlan being present in packets forwarded to VFs (eSwitch steering is done before stripping the tag), so this part is offloaded as is. A common practice for vlans is to avoid both push vlan and pop vlan for inter-host VM/VM (east-west) communication because in this case, push on egress cancels out with pop on ingress. For supporting that, we use a global eswitch vlan pop policy, hence allowing guest A to communicate with both remote VM B and local VM C. This works since the HW pops the vlan only if it exists (e.g for C --> A packets but not for B --> A packets). On the slow path, when a VF vport has an offloaded flow which involves pushing vlans, wheres another flow is not currently offloaded, the packets from the 2nd flow seen by the VF representor on the host have vlan. The VF rep driver removes such vlan before calling into the host networking stack. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 180 +++++++++++++++++++++ 5 files changed, 249 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3460154..460363b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -869,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b309e7c..c127923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -446,6 +446,16 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -487,6 +497,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + if (mlx5e_is_vf_vport_rep(priv)) { + err = -EINVAL; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -512,7 +527,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + if (mlx5e_is_vf_vport_rep(priv)) + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; + else + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e836e47..c6de6fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include "en.h" #include "en_tc.h" +#include "eswitch.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -803,6 +804,38 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_mpw_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index eeeeadc..2e2938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; + int vlan_push_pop_refcount; } offloads; }; }; @@ -183,6 +184,8 @@ struct mlx5_eswitch_rep { struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; + u16 vlan; + u32 vlan_refcount; bool valid; }; @@ -252,11 +255,16 @@ enum { SET_VLAN_INSERT = BIT(1) }; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; int action; + u16 vlan; + bool vlan_handled; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -273,6 +281,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport_index); +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 781debb..c55ad8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -89,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + goto out_notsupp; + + return 0; + +out_notsupp: + return -ENOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { -- cgit v1.1 From 8b32580df1cb4dc9cccb2d369d20317f7f74d9ce Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:48 +0300 Subject: net/mlx5e: Add TC vlan action for SRIOV offloads Parse TC vlan actions and set the required elements to allow offloading. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3eb319b..e61bd52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -119,17 +119,27 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + return ERR_PTR(err); return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule) + struct mlx5_flow_rule *rule, + struct mlx5_esw_flow_attr *attr) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) + mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_del_flow_rule(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -369,13 +379,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (attr->action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { - attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -392,12 +398,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); attr->out_rep = out_priv->ppriv; continue; } + if (is_tcf_vlan(a)) { + if (tcf_vlan_action(a) == VLAN_F_POP) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + attr->vlan = tcf_vlan_push_vid(a); + } + continue; + } + return -EINVAL; } return 0; @@ -413,6 +432,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_esw_flow_attr *old_attr; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) @@ -422,6 +442,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, tc->ht_params); if (flow) { old = flow->rule; + old_attr = flow->attr; } else { if (fdb_flow) flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), @@ -466,7 +487,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_del_rule; if (old) - mlx5e_tc_del_flow(priv, old); + mlx5e_tc_del_flow(priv, old, old_attr); goto out; @@ -494,7 +515,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); @@ -551,7 +572,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); } -- cgit v1.1 From 095b6cfd69cedc8050b69535af8bf718ce0e9aad Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:49 +0300 Subject: net/mlx5e: Add TC vlan match parsing Enhance the parsing of offloaded TC rules matches to handle vlans. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e61bd52..a350b71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -164,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { @@ -227,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec key->src); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + } + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, -- cgit v1.1 From e12934d9806e61d2727069cd56757987f3da76aa Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Sep 2016 18:48:58 +0100 Subject: cxgb4: fix signed wrap around when decrementing index idx Change predecrement compare to post decrement compare to avoid an unsigned integer wrap-around comparison when decrementing idx in the while loop. For example, when idx is zero, the current situation will predecrement idx in the while loop, wrapping idx to the maximum signed integer and cause out of bounds reads on rxq_info->msix_tbl[idx]. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index d12a73e..f13b593 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -367,7 +367,7 @@ int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) } return 0; unwind: - while (--idx >= 0) { + while (idx-- > 0) { bmap_idx = rxq_info->msix_tbl[idx]; free_msix_idx_in_bmap(adap, bmap_idx); free_irq(adap->msix_info_ulds[bmap_idx].vec, -- cgit v1.1 From b24d2891cfb0a7975b0039743439c98fe7b7dea7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:17:33 +0100 Subject: rxrpc: Preset timestamp on Tx sk_buffs Set the timestamp on sk_buffs holding packets to be transmitted before queueing them because the moment the packet is on the queue it can be seen by the retransmission algorithm - which may see a completely random timestamp. If the retransmission algorithm sees such a timestamp, it may retransmit the packet and, in future, tell the congestion management algorithm that the retransmit timer expired. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca7c3be..ca3811b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -99,6 +99,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; -- cgit v1.1 From 9aff212bd677829189fae2e2e408cefc196ae5ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: rxrpc: Don't send an ACK at the end of service call response transmission Don't send an IDLE ACK at the end of the transmission of the response to a service call. The service end resends DATA packets until the client sends an ACK that hard-acks all the send data. At that point, the call is complete. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ba4af5..99e4c0a 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,8 +143,6 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - } else { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); } write_lock_bh(&call->state_lock); -- cgit v1.1 From c0d058c21c69b3685c3f1bb008aa11f1a5eaee7e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: rxrpc: Make sure sendmsg() is woken on call completion Make sure that sendmsg() gets woken up if the call it is waiting for completes abnormally. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b13754a..808ab75 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -758,6 +758,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->error = error; call->completion = compl, call->state = RXRPC_CALL_COMPLETE; + wake_up(&call->waitq); return true; } return false; -- cgit v1.1 From 90bd684ded900673d86f64f4b4197704a38f04bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: rxrpc: Should be using ktime_add_ms() not ktime_add_ns() ktime_add_ms() should be used to add the resend time (in ms) rather than ktime_add_ns(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6e2ea8f..a2909da 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -187,7 +187,7 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We -- cgit v1.1 From e3978673f514fa4999f04dfad9bbd5bb70d0edc6 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Thu, 22 Sep 2016 15:47:33 -0700 Subject: drivers: net: xgene: Fix MSS programming Current driver programs static value of MSS in hardware register for TSO offload engine to segment the TCP payload regardless the MSS value provided by network stack. This patch fixes this by programming hardware registers with the stack provided MSS value. Since the hardware has the limitation of having only 4 MSS registers, this patch uses reference count of mss values being used. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.h | 7 ++ drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 90 ++++++++++++++++++----- drivers/net/ethernet/apm/xgene/xgene_enet_main.h | 8 +- drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 18 ++++- 4 files changed, 100 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8a8d055..8456337 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -237,6 +237,8 @@ enum xgene_enet_rm { #define TCPHDR_LEN 6 #define IPHDR_POS 6 #define IPHDR_LEN 6 +#define MSS_POS 20 +#define MSS_LEN 2 #define EC_POS 22 /* Enable checksum */ #define EC_LEN 1 #define ET_POS 23 /* Enable TSO */ @@ -253,6 +255,11 @@ enum xgene_enet_rm { #define LAST_BUFFER (0x7800ULL << BUFDATALEN_POS) +#define TSO_MSS0_POS 0 +#define TSO_MSS0_LEN 14 +#define TSO_MSS1_POS 16 +#define TSO_MSS1_LEN 14 + struct xgene_enet_raw_desc { __le64 m0; __le64 m1; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 522ba92..429f18f 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -137,6 +137,7 @@ static irqreturn_t xgene_enet_rx_irq(const int irq, void *data) static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, struct xgene_enet_raw_desc *raw_desc) { + struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev); struct sk_buff *skb; struct device *dev; skb_frag_t *frag; @@ -144,6 +145,7 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, u16 skb_index; u8 status; int i, ret = 0; + u8 mss_index; skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); skb = cp_ring->cp_skb[skb_index]; @@ -160,6 +162,13 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, DMA_TO_DEVICE); } + if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) { + mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3)); + spin_lock(&pdata->mss_lock); + pdata->mss_refcnt[mss_index]--; + spin_unlock(&pdata->mss_lock); + } + /* Checking for error */ status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); if (unlikely(status > 2)) { @@ -178,15 +187,53 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, return ret; } -static u64 xgene_enet_work_msg(struct sk_buff *skb) +static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + bool mss_index_found = false; + int mss_index; + int i; + + spin_lock(&pdata->mss_lock); + + /* Reuse the slot if MSS matches */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (pdata->mss[i] == mss) { + pdata->mss_refcnt[i]++; + mss_index = i; + mss_index_found = true; + } + } + + /* Overwrite the slot with ref_count = 0 */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (!pdata->mss_refcnt[i]) { + pdata->mss_refcnt[i]++; + pdata->mac_ops->set_mss(pdata, mss, i); + pdata->mss[i] = mss; + mss_index = i; + mss_index_found = true; + } + } + + spin_unlock(&pdata->mss_lock); + + /* No slots with ref_count = 0 available, return busy */ + if (!mss_index_found) + return -EBUSY; + + return mss_index; +} + +static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo) { struct net_device *ndev = skb->dev; struct iphdr *iph; u8 l3hlen = 0, l4hlen = 0; u8 ethhdr, proto = 0, csum_enable = 0; - u64 hopinfo = 0; u32 hdr_len, mss = 0; u32 i, len, nr_frags; + int mss_index; ethhdr = xgene_enet_hdr_len(skb->data); @@ -226,7 +273,11 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) if (!mss || ((skb->len - hdr_len) <= mss)) goto out; - hopinfo |= SET_BIT(ET); + mss_index = xgene_enet_setup_mss(ndev, mss); + if (unlikely(mss_index < 0)) + return -EBUSY; + + *hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index); } } else if (iph->protocol == IPPROTO_UDP) { l4hlen = UDP_HDR_SIZE; @@ -234,15 +285,15 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) } out: l3hlen = ip_hdrlen(skb) >> 2; - hopinfo |= SET_VAL(TCPHDR, l4hlen) | - SET_VAL(IPHDR, l3hlen) | - SET_VAL(ETHHDR, ethhdr) | - SET_VAL(EC, csum_enable) | - SET_VAL(IS, proto) | - SET_BIT(IC) | - SET_BIT(TYPE_ETH_WORK_MESSAGE); - - return hopinfo; + *hopinfo |= SET_VAL(TCPHDR, l4hlen) | + SET_VAL(IPHDR, l3hlen) | + SET_VAL(ETHHDR, ethhdr) | + SET_VAL(EC, csum_enable) | + SET_VAL(IS, proto) | + SET_BIT(IC) | + SET_BIT(TYPE_ETH_WORK_MESSAGE); + + return 0; } static u16 xgene_enet_encode_len(u16 len) @@ -282,20 +333,22 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring, dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr; skb_frag_t *frag; u16 tail = tx_ring->tail; - u64 hopinfo; + u64 hopinfo = 0; u32 len, hw_len; u8 ll = 0, nv = 0, idx = 0; bool split = false; u32 size, offset, ell_bytes = 0; u32 i, fidx, nr_frags, count = 1; + int ret; raw_desc = &tx_ring->raw_desc[tail]; tail = (tail + 1) & (tx_ring->slots - 1); memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc)); - hopinfo = xgene_enet_work_msg(skb); - if (!hopinfo) - return -EINVAL; + ret = xgene_enet_work_msg(skb, &hopinfo); + if (ret) + return ret; + raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) | hopinfo); @@ -435,6 +488,9 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; count = xgene_enet_setup_tx_desc(tx_ring, skb); + if (count == -EBUSY) + return NETDEV_TX_BUSY; + if (count <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -1669,7 +1725,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { ndev->features |= NETIF_F_TSO; - pdata->mss = XGENE_ENET_MSS; + spin_lock_init(&pdata->mss_lock); } ndev->hw_features = ndev->features; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 7735371..0cda58f 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -47,7 +47,7 @@ #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 #define MAX_EXP_BUFFS 256 -#define XGENE_ENET_MSS 1448 +#define NUM_MSS_REG 4 #define XGENE_MIN_ENET_FRAME_SIZE 60 #define XGENE_MAX_ENET_IRQ 16 @@ -143,7 +143,7 @@ struct xgene_mac_ops { void (*rx_disable)(struct xgene_enet_pdata *pdata); void (*set_speed)(struct xgene_enet_pdata *pdata); void (*set_mac_addr)(struct xgene_enet_pdata *pdata); - void (*set_mss)(struct xgene_enet_pdata *pdata); + void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index); void (*link_state)(struct work_struct *work); }; @@ -212,7 +212,9 @@ struct xgene_enet_pdata { u8 eth_bufnum; u8 bp_bufnum; u16 ring_num; - u32 mss; + u32 mss[NUM_MSS_REG]; + u32 mss_refcnt[NUM_MSS_REG]; + spinlock_t mss_lock; /* mss lock */ u8 tx_delay; u8 rx_delay; bool mdio_driver; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 279ee27..6475f38 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -232,9 +232,22 @@ static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1); } -static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata) +static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata, + u16 mss, u8 index) { - xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss); + u8 offset; + u32 data; + + offset = (index < 2) ? 0 : 4; + xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data); + + if (!(index & 0x1)) + data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) | + SET_VAL(TSO_MSS0, mss); + else + data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data); + + xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data); } static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata) @@ -258,7 +271,6 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data); xgene_xgmac_set_mac_addr(pdata); - xgene_xgmac_set_mss(pdata); xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; -- cgit v1.1 From 4acfee8143b33efa8bda6f03fe1462d545ff8170 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:21 -0400 Subject: net: dsa: add port STP state helper Add a void helper to set the STP state of a port, checking first if the required routine is provided by the driver. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9ecbe78..fd78d4c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -69,6 +69,12 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) return !!p->bridge_dev; } +static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) +{ + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -104,8 +110,7 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, stp_state); + dsa_port_set_stp_state(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -147,8 +152,7 @@ static int dsa_slave_close(struct net_device *dev) if (ds->ops->port_disable) ds->ops->port_disable(ds, p->port, p->phy); - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -354,7 +358,7 @@ static int dsa_slave_stp_state_set(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); + dsa_port_set_stp_state(ds, p->port, attr->u.stp_state); return 0; } @@ -556,8 +560,7 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, -- cgit v1.1 From 732f794c1baf58e1eb2be4431635829c3da655bd Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:22 -0400 Subject: net: dsa: add port fast ageing Today the DSA drivers are in charge of flushing the MAC addresses associated to a port when its STP state changes from Learning or Forwarding, to Disabled or Blocking or Listening. This makes the drivers more complex and hides the generic switch logic. Introduce a new optional port_fast_age operation to dsa_switch_ops, to move this logic to the DSA layer and keep drivers simple. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/slave.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 7556646..b122196 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -143,6 +143,7 @@ struct dsa_port { struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; + u8 stp_state; }; struct dsa_switch { @@ -339,6 +340,7 @@ struct dsa_switch_ops { void (*port_bridge_leave)(struct dsa_switch *ds, int port); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + void (*port_fast_age)(struct dsa_switch *ds, int port); /* * VLAN support diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fd78d4c..6b1282c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -71,8 +71,26 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = &ds->ports[port]; + if (ds->ops->port_stp_state_set) ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; } static int dsa_slave_open(struct net_device *dev) -- cgit v1.1 From 597698f1e00d37d40f83770ea166f3ca0dc1d68c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:23 -0400 Subject: net: dsa: b53: implement DSA port fast ageing Remove the fast ageing logic from b53_br_set_stp_state and implement the new DSA switch port_fast_age operation instead. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1a492c0..7717b19 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1402,16 +1402,12 @@ static void b53_br_leave(struct dsa_switch *ds, int port) } } -static void b53_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) +static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { struct b53_device *dev = ds->priv; - u8 hw_state, cur_hw_state; + u8 hw_state; u8 reg; - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); - cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK; - switch (state) { case BR_STATE_DISABLED: hw_state = PORT_CTRL_DIS_STATE; @@ -1433,26 +1429,20 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, return; } - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= PORT_CTRL_LEARN_STATE && - hw_state <= PORT_CTRL_LISTEN_STATE) { - if (b53_fast_age_port(dev, port)) { - dev_err(ds->dev, "fast ageing failed\n"); - return; - } - } - } - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); reg &= ~PORT_CTRL_STP_STATE_MASK; reg |= hw_state; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +static void b53_br_fast_age(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + if (b53_fast_age_port(dev, port)) + dev_err(ds->dev, "fast ageing failed\n"); +} + static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) { return DSA_TAG_PROTO_NONE; @@ -1472,6 +1462,7 @@ static struct dsa_switch_ops b53_switch_ops = { .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, + .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, -- cgit v1.1 From 749efcb8140e608dc2a63b6d61063b7cd3e556a5 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:24 -0400 Subject: net: dsa: mv88e6xxx: implement DSA port fast ageing Now that the DSA layer handles port fast ageing on correct STP change, simplify _mv88e6xxx_port_state and implement mv88e6xxx_port_fast_age. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 25bd3fa..122876c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1133,31 +1133,18 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, oldstate = reg & PORT_CONTROL_STATE_MASK; - if (oldstate != state) { - /* Flush forwarding database if we're moving a port - * from Learning or Forwarding state to Disabled or - * Blocking or Listening state. - */ - if ((oldstate == PORT_CONTROL_STATE_LEARNING || - oldstate == PORT_CONTROL_STATE_FORWARDING) && - (state == PORT_CONTROL_STATE_DISABLED || - state == PORT_CONTROL_STATE_BLOCKING)) { - err = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (err) - return err; - } + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; - reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); - if (err) - return err; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - } + netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", + mv88e6xxx_port_state_names[state], + mv88e6xxx_port_state_names[oldstate]); - return err; + return 0; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) @@ -1232,6 +1219,19 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, mv88e6xxx_port_state_names[stp_state]); } +static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + mutex_unlock(&chip->reg_lock); + + if (err) + netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); +} + static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { @@ -3684,6 +3684,7 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, .port_vlan_add = mv88e6xxx_port_vlan_add, -- cgit v1.1 From 17db4bcef3c3c45b95b3b3d8577f725df1b2c0a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:29 -0700 Subject: hv_netvsc: use consume_skb Packets that are transmitted in normal path should use consume_skb instead of kfree_skb. This allows for better tracing of packet drops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ff05b9b..720b5fa 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -635,7 +635,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, q_idx = nvsc_packet->q_idx; channel = incoming_channel; - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); } num_outstanding_sends = @@ -944,7 +944,7 @@ int netvsc_send(struct hv_device *device, } if (msdp->skb) - dev_kfree_skb_any(msdp->skb); + dev_consume_skb_any(msdp->skb); if (xmit_more && !packet->cp_partial) { msdp->skb = skb; -- cgit v1.1 From 07d0f0008c783d2a2fce8497000938db15fd7aa1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:30 -0700 Subject: hv_netvsc: dev hold/put reference to VF The netvsc driver holds a pointer to the virtual function network device if managing SR-IOV association. In order to ensure that the VF network device does not disappear, it should be using dev_hold/dev_put to get a reference count. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2360e70..e74dbcc 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1262,6 +1262,8 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); + + dev_hold(vf_netdev); net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } @@ -1376,6 +1378,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; + dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; } -- cgit v1.1 From ee837a137304290a1ae26980c73a367f7afef54f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:31 -0700 Subject: hv_netvsc: simplify callback event code The callback handler for netlink events can be simplified: * Consolidate check for netlink callback events about this driver itself. * Ignore non-Ethernet devices. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e74dbcc..849b566 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1238,10 +1238,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device *ndev; struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == NULL || eth_ops == ðtool_ops) - return NOTIFY_DONE; /* * We will use the MAC address to locate the synthetic interface to @@ -1286,12 +1282,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1329,10 +1321,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct net_device *ndev; struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) @@ -1361,12 +1349,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); if (!ndev) return NOTIFY_DONE; @@ -1542,13 +1526,21 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + /* Skip our own events */ + if (event_dev->netdev_ops == &device_ops) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + /* Avoid Vlan dev with same MAC registering as VF */ if (event_dev->priv_flags & IFF_802_1Q_VLAN) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_BONDING && - event_dev->flags & IFF_MASTER) + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) return NOTIFY_DONE; switch (event) { -- cgit v1.1 From e8ff40d4bff1f3b6a588e29ed1fbdfd943642856 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:32 -0700 Subject: hv_netvsc: improve VF device matching The code to associate netvsc and VF devices can be made less error prone by using a better matching algorithms. On registration, use the permanent address which avoids any possible issues caused by device MAC address being changed. For all other callbacks, search by the netdevice pointer value to ensure getting the correct network device. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 60 ++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 849b566..8768219 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1215,22 +1215,44 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static struct net_device *get_netvsc_net_device(char *mac) +static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device *dev, *found = NULL; + struct net_device *dev; ASSERT_RTNL(); for_each_netdev(&init_net, dev) { - if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { - if (dev->netdev_ops != &device_ops) - continue; - found = dev; - break; - } + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; + } + + return NULL; +} + +static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +{ + struct net_device *dev; + + ASSERT_RTNL(); + + for_each_netdev(&init_net, dev) { + struct net_device_context *net_device_ctx; + + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + net_device_ctx = netdev_priv(dev); + if (net_device_ctx->nvdev == NULL) + continue; /* device is removed */ + + if (net_device_ctx->vf_netdev == vf_netdev) + return dev; /* a match */ } - return found; + return NULL; } static int netvsc_register_vf(struct net_device *vf_netdev) @@ -1239,12 +1261,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching * synthetic interface, move on. */ - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_bymac(vf_netdev->perm_addr); if (!ndev) return NOTIFY_DONE; @@ -1284,16 +1309,13 @@ static int netvsc_vf_up(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF up: %s\n", vf_netdev->name); netvsc_inject_enable(net_device_ctx); @@ -1322,16 +1344,13 @@ static int netvsc_vf_down(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF down: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); @@ -1351,14 +1370,13 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; + netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; -- cgit v1.1 From f207c10d982388fa42710922ad1c0c9d3ba9a87b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:33 -0700 Subject: hv_netvsc: use RCU to protect vf_netdev The vf_netdev pointer in the netvsc device context can simply be protected by RCU because network device destruction is already RCU synchronized. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 284b97b..6b79487 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -695,7 +695,7 @@ struct net_device_context { bool start_remove; /* State to manage the associated VF interface. */ - struct net_device *vf_netdev; + struct net_device __rcu *vf_netdev; bool vf_inject; atomic_t vf_use_cnt; /* 1: allocated, serial number is valid. 0: not allocated */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8768219..dde17c0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -667,8 +667,8 @@ int netvsc_recv_callback(struct hv_device *device_obj, { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev; struct sk_buff *skb; - struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; @@ -676,9 +676,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(net_device_ctx->vf_inject)) { + vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); + if (vf_netdev) { + struct sk_buff *vf_skb; + atomic_inc(&net_device_ctx->vf_use_cnt); - if (!READ_ONCE(net_device_ctx->vf_inject)) { + if (!net_device_ctx->vf_inject) { /* * We raced; just move on. */ @@ -694,13 +697,12 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + vf_skb = netvsc_alloc_recv_skb(vf_netdev, packet, csum_info, *data, vlan_tci); if (vf_skb != NULL) { - ++net_device_ctx->vf_netdev->stats.rx_packets; - net_device_ctx->vf_netdev->stats.rx_bytes += - bytes_recvd; + ++vf_netdev->stats.rx_packets; + vf_netdev->stats.rx_bytes += bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; @@ -1232,7 +1234,7 @@ static struct net_device *get_netvsc_bymac(const u8 *mac) return NULL; } -static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) +static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) { struct net_device *dev; @@ -1248,7 +1250,7 @@ static struct net_device *get_netvsc_byref(const struct net_device *vf_netdev) if (net_device_ctx->nvdev == NULL) continue; /* device is removed */ - if (net_device_ctx->vf_netdev == vf_netdev) + if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) return dev; /* a match */ } @@ -1275,7 +1277,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || net_device_ctx->vf_netdev) + if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1285,7 +1287,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) try_module_get(THIS_MODULE); dev_hold(vf_netdev); - net_device_ctx->vf_netdev = vf_netdev; + rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); return NOTIFY_OK; } @@ -1379,7 +1381,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); - net_device_ctx->vf_netdev = NULL; + + RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; @@ -1433,8 +1436,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_LIST_HEAD(&net_device_ctx->reconfig_events); atomic_set(&net_device_ctx->vf_use_cnt, 0); - net_device_ctx->vf_netdev = NULL; - net_device_ctx->vf_inject = false; net->netdev_ops = &device_ops; -- cgit v1.1 From 9cbcc4280645f0e7e19e6a0da443ec7e69cecf40 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:34 -0700 Subject: hv_netvsc: remove VF in flight counters Since VF reference is now protected by RCU, no longer need the VF usage counter and can use device flags to see whether to inject or not. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +- drivers/net/hyperv/netvsc_drv.c | 81 ++++++++++------------------------------- 2 files changed, 21 insertions(+), 63 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 6b79487..1d49740 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -696,8 +696,7 @@ struct net_device_context { /* State to manage the associated VF interface. */ struct net_device __rcu *vf_netdev; - bool vf_inject; - atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; /* Serial number of the VF to team with */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dde17c0..9375d82 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -670,50 +670,20 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; - u32 bytes_recvd = packet->total_data_buflen; - int ret = 0; - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; + /* + * If necessary, inject this packet into the VF interface. + * On Hyper-V, multicast and brodcast packets are only delivered + * to the synthetic interface (after subjecting these to + * policy filters on the host). Deliver these via the VF + * interface in the guest. + */ vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); - if (vf_netdev) { - struct sk_buff *vf_skb; - - atomic_inc(&net_device_ctx->vf_use_cnt); - if (!net_device_ctx->vf_inject) { - /* - * We raced; just move on. - */ - atomic_dec(&net_device_ctx->vf_use_cnt); - goto vf_injection_done; - } - - /* - * Inject this packet into the VF inerface. - * On Hyper-V, multicast and brodcast packets - * are only delivered on the synthetic interface - * (after subjecting these to policy filters on - * the host). Deliver these via the VF interface - * in the guest. - */ - vf_skb = netvsc_alloc_recv_skb(vf_netdev, - packet, csum_info, *data, - vlan_tci); - if (vf_skb != NULL) { - ++vf_netdev->stats.rx_packets; - vf_netdev->stats.rx_bytes += bytes_recvd; - netif_receive_skb(vf_skb); - } else { - ++net->stats.rx_dropped; - ret = NVSP_STAT_FAIL; - } - atomic_dec(&net_device_ctx->vf_use_cnt); - return ret; - } - -vf_injection_done: - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + if (vf_netdev && (vf_netdev->flags & IFF_UP)) + net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); @@ -721,9 +691,17 @@ vf_injection_done: ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } - skb_record_rx_queue(skb, channel-> - offermsg.offer.sub_channel_index); + if (net != vf_netdev) + skb_record_rx_queue(skb, + channel->offermsg.offer.sub_channel_index); + + /* + * Even if injecting the packet, record the statistics + * on the synthetic device because modifying the VF device + * statistics will not work correctly. + */ + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; @@ -1291,20 +1269,6 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } -static void netvsc_inject_enable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = true; -} - -static void netvsc_inject_disable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = false; - - /* Wait for currently active users to drain out. */ - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); -} - static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1319,7 +1283,6 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1354,7 +1317,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1380,7 +1342,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); @@ -1435,8 +1396,6 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); - atomic_set(&net_device_ctx->vf_use_cnt, 0); - net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; -- cgit v1.1 From f7ad75b753f386454f50044fd69edad767b69ce8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 Sep 2016 16:56:35 -0700 Subject: hv_netvsc: count multicast packets received Useful for debugging issues with multicast and SR-IOV to keep track of number of received multicast packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc_drv.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 1d49740..7130bf9 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -649,6 +649,8 @@ struct multi_recv_comp { struct netvsc_stats { u64 packets; u64 bytes; + u64 broadcast; + u64 multicast; struct u64_stats_sync syncp; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9375d82..52eeb2f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -705,6 +705,11 @@ int netvsc_recv_callback(struct hv_device *device_obj, u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; + + if (skb->pkt_type == PACKET_BROADCAST) + ++rx_stats->broadcast; + else if (skb->pkt_type == PACKET_MULTICAST) + ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); /* @@ -947,7 +952,7 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, cpu); struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; unsigned int start; do { @@ -960,12 +965,14 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, start = u64_stats_fetch_begin_irq(&rx_stats->syncp); rx_packets = rx_stats->packets; rx_bytes = rx_stats->bytes; + rx_multicast = rx_stats->multicast + rx_stats->broadcast; } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->tx_bytes += tx_bytes; t->tx_packets += tx_packets; t->rx_bytes += rx_bytes; t->rx_packets += rx_packets; + t->multicast += rx_multicast; } t->tx_dropped = net->stats.tx_dropped; -- cgit v1.1 From 2d48c5f9335e48ddac7a52db10bf3bfd01986b9c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:35 +0200 Subject: bpf: use skb_to_full_sk helper in bpf_skb_under_cgroup We need to use skb_to_full_sk() helper introduced in commit bd5eb35f16a9 ("xfrm: take care of request sockets") as otherwise we miss tcp synack messages, since ownership is on request socket and therefore it would miss the sk_fullsock() check. Use skb_to_full_sk() as also done similarly in the bpf_get_cgroup_classid() helper via 2309236c13fe ("cls_cgroup: get sk_classid only from full sockets") fix to not let this fall through. Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 0920c2a..e5d9977 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2408,7 +2408,7 @@ BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, struct cgroup *cgrp; struct sock *sk; - sk = skb->sk; + sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) return -ENOENT; if (unlikely(idx >= array->map.max_entries)) -- cgit v1.1 From 669dc4d76d0ecc2d795df735839f43cfddf9f617 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:36 +0200 Subject: bpf: use bpf_get_smp_processor_id_proto instead of raw one Same motivation as in commit 80b48c445797 ("bpf: don't use raw processor id in generic helper"), but this time for XDP typed programs. Thus, allow for preemption checks when we have DEBUG_PREEMPT enabled, and otherwise use the raw variant. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index e5d9977..acf84fb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2551,6 +2551,8 @@ xdp_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_xdp_event_output_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return sk_filter_func_proto(func_id); } -- cgit v1.1 From 7a4b28c6cc9ffac50f791b99cc7e46106436e5d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Sep 2016 01:28:37 +0200 Subject: bpf: add helper to invalidate hash Add a small helper that complements 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") for invalidating the current skb->hash after mangling on headers via direct packet write. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ net/core/filter.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e07432b..f09c70b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -419,6 +419,13 @@ enum bpf_func_id { */ BPF_FUNC_csum_update, + /** + * bpf_set_hash_invalid(skb) + * Invalidate current skb>hash. + * @skb: pointer to skb + */ + BPF_FUNC_set_hash_invalid, + __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index acf84fb..00351cd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1777,6 +1777,22 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) +{ + /* After all direct packet write, this can be used once for + * triggering a lazy recalc on next skb_get_hash() invocation. + */ + skb_clear_hash(skb); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_invalid_proto = { + .func = bpf_set_hash_invalid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -2534,6 +2550,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: return &bpf_get_hash_recalc_proto; + case BPF_FUNC_set_hash_invalid: + return &bpf_set_hash_invalid_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: -- cgit v1.1 From 98dafac5697fbe1fb4bef9e3204baf9051641b00 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 14:04:38 +0100 Subject: rxrpc: Use before_eq() and friends to compare serial numbers before_eq() and friends should be used to compare serial numbers (when not checking for (non)equality) rather than casting to int, subtracting and checking the result. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cbb5d53..06027b6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -578,7 +578,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } /* Discard any out-of-order or duplicate ACKs. */ - if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + if (before_eq(sp->hdr.serial, call->acks_latest)) { _debug("discard ACK %d <= %d", sp->hdr.serial, call->acks_latest); return; -- cgit v1.1 From dfc3da4404ad1ec42a0a649a4ffa2b0f37e80352 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: rxrpc: Need to start the resend timer on initial transmission When a DATA packet has its initial transmission, we may need to start or adjust the resend timer. Without this we end up relying on being sent a NACK to initiate the resend. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 2 +- net/rxrpc/sendmsg.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 808ab75..9e3ba4d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -704,6 +704,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ +void rxrpc_set_timer(struct rxrpc_call *); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a2909da..3a7f90a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -static void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index ca3811b..7cb34b2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -146,6 +146,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); + } else { + unsigned long resend_at; + + resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + + if (time_before(resend_at, call->resend_at)) { + call->resend_at = resend_at; + rxrpc_set_timer(call); + } } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); -- cgit v1.1 From fb2a3d5c7c85cb6e8bc88192be919b4ef8d6e630 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 23 Sep 2016 09:09:31 -0400 Subject: Revert "xen-netback: create a debugfs node for hash information" This reverts commit c0c64c152389ad73306b9b0796357210ec6d32ee. There is no vif->ctrl_task member, so this change broke the build. Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 4 --- drivers/net/xen-netback/hash.c | 68 ---------------------------------------- drivers/net/xen-netback/xenbus.c | 37 ++-------------------- 3 files changed, 2 insertions(+), 107 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ff94c51..b38fb2c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -410,8 +410,4 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m); -#endif - #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index e8c5ddd..613bac0 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -360,74 +360,6 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_SUCCESS; } -#ifdef CONFIG_DEBUG_FS -void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) -{ - unsigned int i; - - switch (vif->hash.alg) { - case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: - seq_puts(m, "Hash Algorithm: TOEPLITZ\n"); - break; - - case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: - seq_puts(m, "Hash Algorithm: NONE\n"); - /* FALLTHRU */ - default: - return; - } - - if (vif->hash.flags) { - seq_puts(m, "\nHash Flags:\n"); - - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) - seq_puts(m, "- IPv4\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) - seq_puts(m, "- IPv4 + TCP\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) - seq_puts(m, "- IPv6\n"); - if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) - seq_puts(m, "- IPv6 + TCP\n"); - } - - seq_puts(m, "\nHash Key:\n"); - - for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) { - unsigned int j, n; - - n = 8; - if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE) - n = XEN_NETBK_MAX_HASH_KEY_SIZE - i; - - seq_printf(m, "[%2u - %2u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%02x ", vif->hash.key[i]); - - seq_puts(m, "\n"); - } - - if (vif->hash.size != 0) { - seq_puts(m, "\nHash Mapping:\n"); - - for (i = 0; i < vif->hash.size; ) { - unsigned int j, n; - - n = 8; - if (i + n >= vif->hash.size) - n = vif->hash.size - i; - - seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); - - for (j = 0; j < n; j++, i++) - seq_printf(m, "%4u ", vif->hash.mapping[i]); - - seq_puts(m, "\n"); - } - } -} -#endif /* CONFIG_DEBUG_FS */ - void xenvif_init_hash(struct xenvif *vif) { if (xenvif_hash_cache_size == 0) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 9911b4e..daf4c78 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, return count; } -static int xenvif_io_ring_open(struct inode *inode, struct file *filp) +static int xenvif_dump_open(struct inode *inode, struct file *filp) { int ret; void *queue = NULL; @@ -179,35 +179,13 @@ static int xenvif_io_ring_open(struct inode *inode, struct file *filp) static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .owner = THIS_MODULE, - .open = xenvif_io_ring_open, + .open = xenvif_dump_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = xenvif_write_io_ring, }; -static int xenvif_read_ctrl(struct seq_file *m, void *v) -{ - struct xenvif *vif = m->private; - - xenvif_dump_hash_info(vif, m); - - return 0; -} - -static int xenvif_ctrl_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, xenvif_read_ctrl, inode->i_private); -} - -static const struct file_operations xenvif_dbg_ctrl_ops_fops = { - .owner = THIS_MODULE, - .open = xenvif_ctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; @@ -232,17 +210,6 @@ static void xenvif_debugfs_addif(struct xenvif *vif) pr_warn("Creation of io_ring file returned %ld!\n", PTR_ERR(pfile)); } - - if (vif->ctrl_task) { - pfile = debugfs_create_file("ctrl", - S_IRUSR, - vif->xenvif_dbg_root, - vif, - &xenvif_dbg_ctrl_ops_fops); - if (IS_ERR_OR_NULL(pfile)) - pr_warn("Creation of ctrl file returned %ld!\n", - PTR_ERR(pfile)); - } } else netdev_warn(vif->dev, "Creation of vif debugfs dir returned %ld!\n", -- cgit v1.1 From be8aa3380678183821bd7d7b5dec845f10d776ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:23 +0100 Subject: rxrpc: Fix accidental cancellation of scheduled resend by ACK parser When rxrpc_input_soft_acks() is parsing the soft-ACKs from an ACK packet, it updates the Tx packet annotations in the annotation buffer. If a soft-ACK is an ACK, then we overwrite unack'd, nak'd or to-be-retransmitted states and that is fine; but if the soft-ACK is an NACK, we overwrite the to-be-retransmitted with a nak - which isn't. Instead, we need to let any scheduled retransmission stand if the packet was NAK'd. Note that we don't reissue a resend if the annotation is in the to-be-retransmitted state because someone else must've scheduled the resend already. Signed-off-by: David Howells --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 06027b6..d3d69ab 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -479,6 +479,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, case RXRPC_ACK_TYPE_NACK: if (anno_type == RXRPC_TX_ANNO_NAK) continue; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; resend = true; -- cgit v1.1 From 01a88f7f6bd4514de9551c3fc9a6fd9e65cbf79d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: rxrpc: Fix call timer Fix the call timer in the following ways: (1) If call->resend_at or call->ack_at are before or equal to the current time, then ignore that timeout. (2) If call->expire_at is before or equal to the current time, then don't set the timer at all (possibly we should queue the call). (3) Don't skip modifying the timer if timer_pending() is true. This indicates that the timer is working, not that it has expired and is running/waiting to run its expiry handler. Also call rxrpc_set_timer() to start the call timer going rather than calling add_timer(). Signed-off-by: David Howells --- net/rxrpc/call_event.c | 25 ++++++++++++++----------- net/rxrpc/call_object.c | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3a7f90a..8bc5c8e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -28,24 +28,27 @@ void rxrpc_set_timer(struct rxrpc_call *call) { unsigned long t, now = jiffies; - _enter("{%ld,%ld,%ld:%ld}", - call->ack_at - now, call->resend_at - now, call->expire_at - now, - call->timer.expires - now); - read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { - t = call->ack_at; - if (time_before(call->resend_at, t)) + t = call->expire_at; + if (time_before_eq(t, now)) + goto out; + + if (time_after(call->resend_at, now) && + time_before(call->resend_at, t)) t = call->resend_at; - if (time_before(call->expire_at, t)) - t = call->expire_at; - if (!timer_pending(&call->timer) || - time_before(t, call->timer.expires)) { - _debug("set timer %ld", t - now); + + if (time_after(call->ack_at, now) && + time_before(call->ack_at, t)) + t = call->ack_at; + + if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); } } + +out: read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f50a609..f2fadf6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -199,8 +199,8 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->expire_at = expire_at; call->ack_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at; - add_timer(&call->timer); + call->timer.expires = expire_at + 1; + rxrpc_set_timer(call); } /* -- cgit v1.1 From 70790dbe3f6651fb66ad38da0a1e24368778bc16 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: rxrpc: Pass the last Tx packet marker in the annotation buffer When the last packet of data to be transmitted on a call is queued, tx_top is set and then the RXRPC_CALL_TX_LAST flag is set. Unfortunately, this leaves a race in the ACK processing side of things because the flag affects the interpretation of tx_top and also allows us to start receiving reply data before we've finished transmitting. To fix this, make the following changes: (1) rxrpc_queue_packet() now sets a marker in the annotation buffer instead of setting the RXRPC_CALL_TX_LAST flag. (2) rxrpc_rotate_tx_window() detects the marker and sets the flag in the same context as the routines that use it. (3) rxrpc_end_tx_phase() is simplified to just shift the call state. The Tx window must have been rotated before calling to discard the last packet. (4) rxrpc_receiving_reply() is added to handle the arrival of the first DATA packet of a reply to a client call (which is an implicit ACK of the Tx phase). (5) The last part of rxrpc_input_ack() is reordered to perform Tx rotation, then soft-ACK application and then to end the phase if we've rotated the last packet. In the event of a terminal ACK, the soft-ACK application will be skipped as nAcks should be 0. (6) rxrpc_input_ackall() now has to rotate as well as ending the phase. In addition: (7) Alter the transmit tracepoint to log the rotation of the last packet. (8) Remove the no-longer relevant queue_reqack tracepoint note. The ACK-REQUESTED packet header flag is now set as needed when we actually transmit the packet and may vary by retransmission. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++- net/rxrpc/input.c | 102 +++++++++++++++++++++++++++++++----------------- net/rxrpc/misc.c | 3 +- net/rxrpc/sendmsg.c | 14 +++---- 4 files changed, 81 insertions(+), 45 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9e3ba4d..a494d56 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -508,7 +508,9 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_NAK 2 #define RXRPC_TX_ANNO_RETRANS 3 #define RXRPC_TX_ANNO_MASK 0x03 -#define RXRPC_TX_ANNO_RESENT 0x04 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + #define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ #define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ @@ -621,9 +623,10 @@ extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; enum rxrpc_transmit_trace { rxrpc_transmit_wait, rxrpc_transmit_queue, - rxrpc_transmit_queue_reqack, rxrpc_transmit_queue_last, rxrpc_transmit_rotate, + rxrpc_transmit_rotate_last, + rxrpc_transmit_await_reply, rxrpc_transmit_end, rxrpc_transmit__nr_trace }; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d3d69ab..fb3e2f6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -59,6 +59,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { struct sk_buff *skb, *list = NULL; int ix; + u8 annotation; spin_lock(&call->lock); @@ -66,16 +67,22 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) call->tx_hard_ack++; ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, rxrpc_transmit_rotate); + trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); wake_up(&call->waitq); while (list) { @@ -92,42 +99,65 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) { - _enter(""); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - return true; - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: - break; - default: - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; - } - rxrpc_rotate_tx_window(call, call->tx_top); + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); switch (call->state) { - default: - break; + case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->tx_phase = false; - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + if (reply_begun) + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; + case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); break; + + default: + goto bad_state; } write_unlock(&call->state_lock); - trace_rxrpc_transmit(call, rxrpc_transmit_end); + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_end); + } _leave(" = ok"); return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} + +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_rotate_tx_window(call, top); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; } /* @@ -226,8 +256,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && - !rxrpc_end_tx_phase(call, "ETD")) + if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || + call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) return; call->ackr_prev_seq = seq; @@ -587,27 +618,26 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } call->acks_latest = sp->hdr.serial; - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - hard_ack == call->tx_top) { - rxrpc_end_tx_phase(call, "ETA"); - return; - } - if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) rxrpc_rotate_tx_window(call, hard_ack); - if (after(first_soft_ack, call->tx_top)) + if (nr_acks > 0) { + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + } + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_end_tx_phase(call, false, "ETA"); return; + } - if (nr_acks > call->tx_top - first_soft_ack + 1) - nr_acks = first_soft_ack - call->tx_top + 1; - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); } /* @@ -619,7 +649,9 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_end_tx_phase(call, "ETL"); + rxrpc_rotate_tx_window(call, call->tx_top); + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_end_tx_phase(call, false, "ETL"); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 0d425e7..fe64871 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -155,9 +155,10 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { [rxrpc_transmit_wait] = "WAI", [rxrpc_transmit_queue] = "QUE", - [rxrpc_transmit_queue_reqack] = "QRA", [rxrpc_transmit_queue_last] = "QLS", [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_rotate_last] = "RLS", + [rxrpc_transmit_await_reply] = "AWR", [rxrpc_transmit_end] = "END", }; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7cb34b2..93e6584 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -94,11 +94,15 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; _net("queue skb %p [%d]", skb, seq); ASSERTCMP(seq, ==, call->tx_top + 1); + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ @@ -106,18 +110,14 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ix = seq & RXRPC_RXTX_BUFF_MASK; rxrpc_get_skb(skb, rxrpc_skb_tx_got); - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; call->tx_top = seq; - if (last) { - set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if (last) trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); - } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_queue); - } if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); -- cgit v1.1 From b86e218e0d422488e0febb07620fa97ae9713779 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:08:48 +0100 Subject: rxrpc: Don't call the tx_ack tracepoint if don't generate an ACK rxrpc_send_call_packet() is invoking the tx_ack tracepoint before it checks whether there's an ACK to transmit (another thread may jump in and transmit it). Fix this by only invoking the tracepoint if we get a valid ACK to transmit. Further, only allocate a serial number if we're going to actually transmit something. Signed-off-by: David Howells --- net/rxrpc/output.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 282cb1e..5c1e008 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -80,9 +80,6 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ackinfo.rwind = htonl(call->rx_winsize); pkt->ackinfo.jumbo_max = htonl(jmax); - trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason, - top - hard_ack); - *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; @@ -119,8 +116,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) return -ENOMEM; } - serial = atomic_inc_return(&conn->serial); - msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -131,7 +126,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.serial = htonl(serial); pkt->whdr.type = type; pkt->whdr.flags = conn->out_clientflag; pkt->whdr.userStatus = 0; @@ -157,14 +151,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(pkt->ack.maxSkew), - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.previousPacket), - ntohl(pkt->ack.serial), - rxrpc_acks(pkt->ack.reason), - pkt->ack.nAcks); iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; @@ -176,7 +162,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) case RXRPC_PACKET_TYPE_ABORT: abort_code = call->abort_code; pkt->abort_code = htonl(abort_code); - _proto("Tx ABORT %%%u { %d }", serial, abort_code); iov[0].iov_len += sizeof(pkt->abort_code); len += sizeof(pkt->abort_code); ioc = 1; @@ -188,6 +173,17 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(call, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + break; + } + if (ping) { call->ackr_ping = serial; smp_wmb(); -- cgit v1.1 From fc7ab6d29a3af0b7f6df7c095509378c8caf85b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 15:22:36 +0100 Subject: rxrpc: Add a tracepoint for the call timer Add a tracepoint to log call timer initiation, setting and expiry. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 36 ++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 13 ++++++++++++- net/rxrpc/call_event.c | 7 ++++--- net/rxrpc/call_object.c | 6 ++++-- net/rxrpc/misc.c | 8 ++++++++ net/rxrpc/sendmsg.c | 2 +- 6 files changed, 65 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e8f2afb..5732289 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -414,6 +414,42 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->avg) ); +TRACE_EVENT(rxrpc_timer, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, + unsigned long now), + + TP_ARGS(call, why, now), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_timer_trace, why ) + __field(unsigned long, now ) + __field(unsigned long, expire_at ) + __field(unsigned long, ack_at ) + __field(unsigned long, resend_at ) + __field(unsigned long, timer ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->expire_at = call->expire_at; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->timer = call->timer.expires; + ), + + TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld", + __entry->call, + rxrpc_timer_traces[__entry->why], + __entry->now, + __entry->expire_at - __entry->now, + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->timer - __entry->now) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a494d56..e564eca 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -678,6 +678,17 @@ enum rxrpc_rtt_rx_trace { extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; +enum rxrpc_timer_trace { + rxrpc_timer_begin, + rxrpc_timer_expired, + rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_resend, + rxrpc_timer_set_for_send, + rxrpc_timer__nr_trace +}; + +extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; + extern const char *const rxrpc_pkts[]; extern const char *rxrpc_acks(u8 reason); @@ -707,7 +718,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8bc5c8e..90e970b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,7 +24,7 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) { unsigned long t, now = jiffies; @@ -45,6 +45,7 @@ void rxrpc_set_timer(struct rxrpc_call *call) if (call->timer.expires != t || !timer_pending(&call->timer)) { mod_timer(&call->timer, t); + trace_rxrpc_timer(call, why, now); } } @@ -120,7 +121,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); if (time_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } } @@ -293,7 +294,7 @@ recheck_state: goto recheck_state; } - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f2fadf6..a53f4c2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -76,8 +76,10 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); rxrpc_queue_call(call); + } } /* @@ -200,7 +202,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->ack_at = expire_at; call->resend_at = expire_at; call->timer.expires = expire_at + 1; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_begin); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fe64871..fa9942f 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -194,3 +194,11 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", [rxrpc_rtt_rx_requested_ack] = "RACK", }; + +const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { + [rxrpc_timer_begin] = "Begin ", + [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_send] = "SetTx ", + [rxrpc_timer_set_for_resend] = "SetRTx", +}; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 93e6584..9993937 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -153,7 +153,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (time_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call); + rxrpc_set_timer(call, rxrpc_timer_set_for_send); } } -- cgit v1.1 From be832aecc5ba811728e15a10f675f4a2187f25dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: rxrpc: Add data Tx tracepoint and adjust Tx ACK tracepoint Add a tracepoint to log transmission of DATA packets (including loss injection). Adjust the ACK transmission tracepoint to include the packet serial number and to line this up with the DATA transmission display. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 50 +++++++++++++++++++++++++++++++++++++------- net/rxrpc/conn_event.c | 5 ++--- net/rxrpc/output.c | 5 ++++- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5732289..6001bf9 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -256,33 +256,67 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_tx_data, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, u8 flags, bool lose), + + TP_ARGS(call, seq, serial, flags, lose), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_serial_t, serial ) + __field(u8, flags ) + __field(bool, lose ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->serial = serial; + __entry->flags = flags; + __entry->lose = lose; + ), + + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s", + __entry->call, + __entry->serial, + __entry->seq, + __entry->flags, + __entry->lose ? " *LOSE*" : "") + ); + TRACE_EVENT(rxrpc_tx_ack, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, - rxrpc_serial_t serial, u8 reason, u8 n_acks), + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, + u8 reason, u8 n_acks), - TP_ARGS(call, first, serial, reason, n_acks), + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) - __field(rxrpc_seq_t, first ) __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, ack_first ) + __field(rxrpc_serial_t, ack_serial ) __field(u8, reason ) __field(u8, n_acks ) ), TP_fast_assign( __entry->call = call; - __entry->first = first; __entry->serial = serial; + __entry->ack_first = ack_first; + __entry->ack_serial = ack_serial; __entry->reason = reason; __entry->n_acks = n_acks; ), - TP_printk("c=%p %s f=%08x r=%08x n=%u", + TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), - __entry->first, __entry->serial, + rxrpc_acks(__entry->reason), + __entry->ack_first, + __entry->ack_serial, __entry->n_acks) ); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 75a15a4..a1cf1ec 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -98,9 +98,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); len += sizeof(pkt.ack) + sizeof(pkt.info); - - trace_rxrpc_tx_ack(NULL, chan->last_seq, 0, - RXRPC_ACK_DUPLICATE, 0); break; } @@ -122,6 +119,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); break; case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); _proto("Tx ACK %%%u [re]", serial); break; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5c1e008..e47fbd1 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -177,7 +177,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.serial = htonl(serial); switch (type) { case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, + trace_rxrpc_tx_ack(call, serial, ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); @@ -275,6 +275,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, true); rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); _leave(" = 0 [lose]"); return 0; @@ -302,6 +304,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; -- cgit v1.1 From 89b475abdb107a74f57497b65becaf837a0e5b6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 12:39:22 +0100 Subject: rxrpc: Add a tracepoint to log injected Rx packet loss Add a tracepoint to log received packets that get discarded due to Rx packet loss. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 21 +++++++++++++++++++++ net/rxrpc/input.c | 11 +++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6001bf9..9413b17 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -484,6 +484,27 @@ TRACE_EVENT(rxrpc_timer, __entry->timer - __entry->now) ); +TRACE_EVENT(rxrpc_rx_lose, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index fb3e2f6..19b1e18 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,20 +837,19 @@ void rxrpc_data_ready(struct sock *udp_sk) skb_orphan(skb); sp = rxrpc_skb(skb); + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); return; } } - _net("Rx UDP packet from %08x:%04hu", - ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); - - /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; trace_rxrpc_rx_packet(sp); _net("Rx RxRPC %s ep=%x call=%x:%x", -- cgit v1.1 From 9c7ad434441da6b5d4ac878cac368fbdaec99b56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:50:40 +0100 Subject: rxrpc: Add tracepoint for ACK proposal Add a tracepoint to log proposed ACKs, including whether the proposal is used to update a pending ACK or is discarded in favour of an easlier, higher priority ACK. Whilst we're at it, get rid of the rxrpc_acks() function and access the name array directly. We do, however, need to validate the ACK reason number given to trace_rxrpc_rx_ack() to make sure we don't overrun the array. Signed-off-by: David Howells --- include/rxrpc/packet.h | 1 + include/trace/events/rxrpc.h | 42 ++++++++++++++++++++++++++++++++++++++++-- net/rxrpc/ar-internal.h | 25 +++++++++++++++++++++++-- net/rxrpc/call_event.c | 21 ++++++++++++++------- net/rxrpc/input.c | 19 +++++++++++++------ net/rxrpc/misc.c | 30 +++++++++++++++++++----------- net/rxrpc/output.c | 3 ++- net/rxrpc/recvmsg.c | 3 ++- 8 files changed, 114 insertions(+), 30 deletions(-) diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index fd6eb3a..703a64b 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -123,6 +123,7 @@ struct rxrpc_ackpacket { #define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ #define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ #define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ uint8_t nAcks; /* number of ACKs */ #define RXRPC_MAXACKS 255 diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9413b17..d67a8c6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -251,7 +251,7 @@ TRACE_EVENT(rxrpc_rx_ack, TP_printk("c=%p %s f=%08x n=%u", __entry->call, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->first, __entry->n_acks) ); @@ -314,7 +314,7 @@ TRACE_EVENT(rxrpc_tx_ack, TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", __entry->call, __entry->serial, - rxrpc_acks(__entry->reason), + rxrpc_ack_names[__entry->reason], __entry->ack_first, __entry->ack_serial, __entry->n_acks) @@ -505,6 +505,44 @@ TRACE_EVENT(rxrpc_rx_lose, __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") ); +TRACE_EVENT(rxrpc_propose_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial, bool immediate, + bool background, enum rxrpc_propose_ack_outcome outcome), + + TP_ARGS(call, why, ack_reason, serial, immediate, background, + outcome), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + __field(bool, immediate ) + __field(bool, background ) + __field(enum rxrpc_propose_ack_outcome, outcome ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + __entry->immediate = immediate; + __entry->background = background; + __entry->outcome = outcome; + ), + + TP_printk("c=%p %s %s r=%08x i=%u b=%u%s", + __entry->call, + rxrpc_propose_ack_traces[__entry->why], + rxrpc_ack_names[__entry->ack_reason], + __entry->serial, + __entry->immediate, + __entry->background, + rxrpc_propose_ack_outcomes[__entry->outcome]) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e564eca..042dbcc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -689,8 +689,28 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; +enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_respond_to_ack, + rxrpc_propose_ack_respond_to_ping, + rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_terminal_ack, + rxrpc_propose_ack__nr_trace +}; + +enum rxrpc_propose_ack_outcome { + rxrpc_propose_ack_use, + rxrpc_propose_ack_update, + rxrpc_propose_ack_subsume, + rxrpc_propose_ack__nr_outcomes +}; + +extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; +extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; + extern const char *const rxrpc_pkts[]; -extern const char *rxrpc_acks(u8 reason); +extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include @@ -719,7 +739,8 @@ int rxrpc_reject_call(struct rxrpc_sock *); * call_event.c */ void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 90e970b..fd5b113 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -58,14 +58,13 @@ out: */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, u16 skew, u32 serial, bool immediate, - bool background) + bool background, + enum rxrpc_propose_ack_trace why) { + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -74,15 +73,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); if (ack_reason == call->ackr_reason) { if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; call->ackr_skew = skew; } if (!immediate) - return; + goto trace; } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { call->ackr_reason = ack_reason; call->ackr_serial = serial; call->ackr_skew = skew; + } else { + outcome = rxrpc_propose_ack_subsume; } switch (ack_reason) { @@ -124,17 +126,22 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_set_timer(call, rxrpc_timer_set_for_ack); } } + +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, bool background) + u16 skew, u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) { spin_lock_bh(&call->lock); __rxrpc_propose_ACK(call, ack_reason, skew, serial, - immediate, background); + immediate, background, why); spin_unlock_bh(&call->lock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 19b1e18..349698d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -49,7 +49,8 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, if (call->peer->rtt_usage < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + true, true, + rxrpc_propose_ack_ping_for_params); } /* @@ -382,7 +383,8 @@ skip: ack: if (ack) rxrpc_propose_ACK(call, ack, skew, ack_serial, - immediate_ack, true); + immediate_ack, true, + rxrpc_propose_ack_input_data); if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) rxrpc_notify_socket(call); @@ -539,6 +541,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { + u8 ack_reason; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -561,8 +564,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; + ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -570,7 +575,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_acks(buf.ack.reason), + rxrpc_ack_names[ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -583,10 +588,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skew, sp->hdr.serial, true, true); + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ack); } offset = sp->offset + nr_acks + 3; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index fa9942f..1ca1483 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -91,17 +91,10 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_PING] = 9, }; -const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} +const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" +}; const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_rx_cleaned] = "Rx CLN", @@ -202,3 +195,18 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; + +const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", + [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", + [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_terminal_ack] = "ClTerm ", +}; + +const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { + [rxrpc_propose_ack_use] = "", + [rxrpc_propose_ack_update] = " Update", + [rxrpc_propose_ack_subsume] = " Subsume", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e47fbd1..0c563e3 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -210,7 +210,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), - true, true); + true, true, + rxrpc_propose_ack_retry_tx); break; case RXRPC_PACKET_TYPE_ABORT: break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 99e4c0a..8c7f3de 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -141,7 +141,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } -- cgit v1.1 From c6672e3fe4a641bf302d6309ab4d5ee55648e758 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Sep 2016 13:58:55 +0100 Subject: rxrpc: Add a tracepoint to log which packets will be retransmitted Add a tracepoint to log in rxrpc_resend() which packets will be retransmitted. Note that if a positive ACK comes in whilst we have dropped the lock to retransmit another packet, the actual retransmission may not happen, though some of the effects will (such as altering the congestion management). Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 27 +++++++++++++++++++++++++++ net/rxrpc/call_event.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d67a8c6..5647549 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -543,6 +543,33 @@ TRACE_EVENT(rxrpc_propose_ack, rxrpc_propose_ack_outcomes[__entry->outcome]) ); +TRACE_EVENT(rxrpc_retransmit, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation, + s64 expiry), + + TP_ARGS(call, seq, annotation, expiry), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(u8, annotation ) + __field(s64, expiry ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->annotation = annotation; + __entry->expiry = expiry; + ), + + TP_printk("c=%p q=%x a=%02x xp=%lld", + __entry->call, + __entry->seq, + __entry->annotation, + __entry->expiry) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fd5b113..a78a92f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -196,6 +196,8 @@ static void rxrpc_resend(struct rxrpc_call *call) /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); -- cgit v1.1 From c9cc599a96a6822c52cd72ed31dd7f813d792b4f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:12 +0300 Subject: net/mlx4_core: Fix QUERY FUNC CAP flags Separate QUERY_FUNC_CAP flags0 from QUERY_FUNC_CAP flags, as 'flags' is already used for another set of flags in FUNC CAP, while phv bit should be part of a different set of flags. Remove QUERY_FUNC_CAP port_flags field, as it is not in use. Fixes: 77fc29c4bbbb ('net/mlx4_core: Preparations for 802.1ad VLAN support') Fixes: 5cc914f10851 ('mlx4_core: Added FW commands and their wrappers for supporting SRIOV') Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++--- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d728704..c752330 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -612,8 +612,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); - func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: @@ -2914,7 +2913,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) - *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index cdbd76f..f11614f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -152,7 +152,7 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u32 reserved_lkey; u8 physical_port; - u8 port_flags; + u8 flags0; u8 flags1; u64 phys_port_id; u32 extra_flags; -- cgit v1.1 From 7c3d21c8153c6bfb5690e35e086b0522c42442d9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:13 +0300 Subject: net/mlx4_core: Preparation for VF vlan protocol 802.1ad Check device capability to support VF vlan protocol 802.1ad mode. Add vport attribute vlan protocol. Init vport vlan protocol by default to 802.1Q. Add update QP support for VF vlan protocol 802.1ad. Add func capability vlan_offload_disable to disable all vlan HW acceleration on VF while the VF is set to VF vlan protocol 802.1ad mode. No change in VF vlan protocol 802.1Q (VST) mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 7 ++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 37 ++++++++++++++++---- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 40 ++++++++++++++++++---- include/linux/mlx4/device.h | 3 ++ include/linux/mlx4/qp.h | 2 ++ 6 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a58d96c..09c9694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1851,6 +1851,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos && + vp_oper->state.vlan_proto == vp_admin->vlan_proto && vp_oper->state.link_state == vp_admin->link_state && vp_oper->state.qos_vport == vp_admin->qos_vport) return 0; @@ -1909,6 +1910,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; + vp_oper->state.vlan_proto = vp_admin->vlan_proto; vp_oper->state.link_state = vp_admin->link_state; vp_oper->state.qos_vport = vp_admin->qos_vport; @@ -1922,6 +1924,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, work->qos_vport = vp_oper->state.qos_vport; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; + work->vlan_proto = vp_oper->state.vlan_proto; work->priv = priv; INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); queue_work(priv->mfunc.master.comm_wq, &work->work); @@ -2012,6 +2015,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; + vp_oper->state.default_vlan = MLX4_VGT; + vp_oper->state.vlan_proto = htons(ETH_P_8021Q); mlx4_warn(&priv->dev, "No vlan resources slave %d, port %d\n", slave, port); @@ -2388,6 +2393,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) admin_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; + admin_vport->vlan_proto = htons(ETH_P_8021Q); + oper_vport->vlan_proto = htons(ETH_P_8021Q); vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX; mlx4_set_random_admin_guid(dev, i, port); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c752330..7dc9d38 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", - [34] = "DMFS Sniffer support (UC & MC)" + [34] = "DMFS Sniffer support (UC & MC)", + [35] = "QinQ VST mode support", }; int i; @@ -313,12 +314,15 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper = + &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; if (converted_port < 0) return -EINVAL; @@ -357,11 +361,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); - if (dev->caps.phv_bit[port]) { - field = QUERY_FUNC_CAP_PHV_BIT; - MLX4_PUT(outbox->buf, field, - QUERY_FUNC_CAP_FLAGS0_OFFSET); - } + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = @@ -689,6 +694,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 @@ -856,6 +862,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); @@ -2937,6 +2946,22 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) } EXPORT_SYMBOL(set_phv_bit); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c128ba3..fdfe1ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -508,6 +508,7 @@ struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; + __be16 vlan_proto; u32 tx_rate; bool spoofchk; u32 link_state; @@ -657,6 +658,7 @@ struct mlx4_vf_immed_vlan_work { u8 qos_vport; u16 vlan_id; u16 orig_vlan_id; + __be16 vlan_proto; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 8b81114..84d7857 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -790,10 +790,22 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control |= - MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) { + /* vst QinQ should block untagged on TX, + * but cvlan is in payload and phv is set so + * hw see it as untagged. Block tagged instead. + */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* vst 802.1Q */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } } else { /* priority tagged */ qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | @@ -802,7 +814,11 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + qpc->pri_path.fl |= MLX4_FL_SV; + else + qpc->pri_path.fl |= MLX4_FL_CV; qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; @@ -5238,6 +5254,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SV) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | @@ -5266,7 +5283,12 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) else if (!work->vlan_id) vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; - else + else if (work->vlan_proto == htons(ETH_P_8021AD)) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + else /* vst 802.1Q */ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; @@ -5311,7 +5333,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.fl = - qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN; + if (work->vlan_proto == htons(ETH_P_8021AD)) + upd_context->qp_context.pri_path.fl |= MLX4_FL_SV; + else + upd_context->qp_context.pri_path.fl |= MLX4_FL_CV; upd_context->qp_context.pri_path.feup = qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.sched_queue = diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da355..59b50d3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -221,6 +221,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, }; enum { @@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index deaa221..b4ee8f6 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -160,6 +160,7 @@ struct mlx4_qp_path { enum { /* fl */ MLX4_FL_CV = 1 << 6, + MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, @@ -267,6 +268,7 @@ enum { MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, + MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ -- cgit v1.1 From 0815fe3a86a01cdf81361459c465761be7138665 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:14 +0300 Subject: net/mlx4_en: Disable vlan HW acceleration when in VF vlan protocol 802.1ad mode In Ethernet VF, disable vlan HW acceleration on VF while it is set to VF vlan protocol 802.1ad mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 62516f8..a94f8a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3224,6 +3224,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mlx4_is_slave(mdev->dev)) { + bool vlan_offload_disabled; int phv; err = get_phv_bit(mdev->dev, port, &phv); @@ -3231,6 +3232,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; } + err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port, + &vlan_offload_disabled); + if (!err && vlan_offload_disabled) { + dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + } } else { if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && !(mdev->dev->caps.flags2 & -- cgit v1.1 From 79aab093a0b5370d7fc4e99df75996f4744dc03f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:15 +0300 Subject: net: Update API for VF vlan protocol 802.1ad support Introduce new rtnl UAPI that exposes a list of vlans per VF, giving the ability for user-space application to specify it for the VF, as an option to support 802.1ad. We adjusted IP Link tool to support this option. For future use cases, the new UAPI supports multiple vlans. For now we limit the list size to a single vlan in kernel. Add IFLA_VF_VLAN_LIST in addition to IFLA_VF_VLAN to keep backward compatibility with older versions of IP Link tool. Add a vlan protocol parameter to the ndo_set_vf_vlan callback. We kept 802.1Q as the drivers' default vlan protocol. Suitable ip link tool command examples: Set vf vlan protocol 802.1ad: ip link set eth0 vf 1 vlan 100 proto 802.1ad Set vf to VST (802.1Q) mode: ip link set eth0 vf 1 vlan 100 proto 802.1Q Or by omitting the new parameter ip link set eth0 vf 1 vlan 100 Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 3 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 9 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 6 +- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 6 +- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 6 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 9 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 +- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 6 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 5 +- drivers/net/ethernet/sfc/sriov.c | 5 +- drivers/net/ethernet/sfc/sriov.h | 2 +- include/linux/if_link.h | 1 + include/linux/netdevice.h | 6 +- include/uapi/linux/if_link.h | 19 ++++- net/core/rtnetlink.c | 80 ++++++++++++++++++---- 23 files changed, 161 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 0e68fad..243cb97 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac); -int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); +int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6c586b0..3f77d08 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2521,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) for_each_vf(bp, vfidx) { bulletin = BP_VF_BULLETIN(bp, vfidx); if (bulletin->valid_bitmap & (1 << VLAN_VALID)) - bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0); + bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0, + htons(ETH_P_8021Q)); } } @@ -2781,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf, return 0; } -int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) +int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos, + __be16 vlan_proto) { struct pf_vf_bulletin_content *bulletin = NULL; struct bnx2x *bp = netdev_priv(dev); @@ -2796,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n", vfidx, vlan, 0); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 8be7185..ec6cd18 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -174,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } -int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) +int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto) { struct hwrm_func_cfg_input req = {0}; struct bnxt *bp = netdev_priv(dev); @@ -185,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) if (bp->hwrm_spec_code < 0x10201) return -ENOTSUPP; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + rc = bnxt_vf_ndo_prep(bp, vf_id); if (rc) return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 0392670..1ab72e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -12,7 +12,7 @@ int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *); int bnxt_set_vf_mac(struct net_device *, int, u8 *); -int bnxt_set_vf_vlan(struct net_device *, int, u16, u8); +int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int bnxt_set_vf_bw(struct net_device *, int, int, int); int bnxt_set_vf_link_state(struct net_device *, int, int); int bnxt_set_vf_spoofchk(struct net_device *, int, bool); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9a94840..ac513e6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1895,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) return 0; } -static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; @@ -1907,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; status = be_set_vf_tvt(adapter, vf, vlan); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 67ff01a..4d19e46 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -507,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs); s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, - int vf_idx, u16 vid, u8 qos); + int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, int unused); int fm10k_ndo_get_vf_config(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index d9dec81..5f4dac0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac) } int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, - u8 qos) + u8 qos, __be16 vlan_proto) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, if (qos || (vid > (VLAN_VID_MASK - 1))) return -EINVAL; + /* VF VLAN Protocol part to default is unsupported */ + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + vf_info = &iov_data->vf_info[vf_idx]; /* exit if there is nothing to do */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index da34235..724d874 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2747,11 +2747,12 @@ error_param: * @vf_id: VF identifier * @vlan_id: mac address * @qos: priority setting + * @vlan_proto: vlan protocol * * program VF vlan id and/or qos **/ -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos) +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT); struct i40e_netdev_priv *np = netdev_priv(netdev); @@ -2774,6 +2775,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } + if (vlan_proto != htons(ETH_P_8021Q)) { + dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + ret = -EPROTONOSUPPORT; + goto error_pvid; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 8751741..4012d06 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos); +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto); int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int max_tx_rate); int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index af75eac..a83aa13 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -169,7 +169,7 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos); + int vf, u16 vlan, u8 qos, __be16 vlan_proto); static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -6222,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) return 0; } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) { struct igb_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : igb_disable_port_vlan(adapter, vf); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599..b18590a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1354,13 +1354,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) return err; } -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, + u8 qos, __be16 vlan_proto) { int err = 0; struct ixgbe_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; if (vlan || qos) { /* Check if there is already a port VLAN set, if so * we have to delete the old one first before we diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 47e65e2..0c7977d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int ixgbe_link_mbps(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a94f8a3..132eeea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2400,11 +2400,15 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); } -static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c127923..b58cfe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2917,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); } -static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, vlan, qos); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index cd23a29..0e198fe 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, static void qede_link_update(void *dev, struct qed_link_output *link); #ifdef CONFIG_QED_SRIOV -static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) +static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct qede_dev *edev = netdev_priv(ndev); @@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n", vlan, vf); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 24061b9..5f32765 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); -int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); +int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool); #else static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index afd687e..50eaafa 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, } int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, - u16 vlan, u8 qos) + u16 vlan, u8 qos, __be16 vlan_proto) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, if (vf >= sriov->num_vfs || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan > MAX_VLAN_ID) { netdev_err(netdev, "Invalid VLAN ID, allowed range is [0 - %d]\n", diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c index 816c446..9abcf4a 100644 --- a/drivers/net/ethernet/sfc/sriov.c +++ b/drivers/net/ethernet/sfc/sriov.c @@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) } int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos) + u8 qos, __be16 vlan_proto) { struct efx_nic *efx = netdev_priv(net_dev); @@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); } else { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h index 400df52..ba1762e 100644 --- a/drivers/net/ethernet/sfc/sriov.h +++ b/drivers/net/ethernet/sfc/sriov.h @@ -16,7 +16,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, bool spoofchk); int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f923d15..0b17c58 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -25,5 +25,6 @@ struct ifla_vf_info { __u32 max_tx_rate; __u32 rss_query_en; __u32 trusted; + __be16 vlan_proto; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69f242c..1e8a5c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -946,7 +946,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1187,7 +1188,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7ec9e99..b4fba66 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -619,7 +619,7 @@ enum { enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, + IFLA_VF_VLAN, /* VLAN ID and QoS */ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ @@ -631,6 +631,7 @@ enum { IFLA_VF_TRUST, /* Trust VF */ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ __IFLA_VF_MAX, }; @@ -647,6 +648,22 @@ struct ifla_vf_vlan { __u32 qos; }; +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + struct ifla_vf_tx_rate { __u32 vf; __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0dbae42..3ac8946 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -843,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += nla_total_size(num_vfs * sizeof(struct nlattr)); size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct nlattr)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + @@ -1111,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct nlattr *vfinfo) { struct ifla_vf_rss_query_en vf_rss_query_en; + struct nlattr *vf, *vfstats, *vfvlanlist; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_vlan_info vf_vlan_info; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_stats vf_stats; struct ifla_vf_trust vf_trust; struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; - struct nlattr *vf, *vfstats; struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; @@ -1135,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, * IFLA_VF_LINK_STATE_AUTO which equals zero */ ivi.linkstate = 0; + /* VLAN Protocol by default is 802.1Q */ + ivi.vlan_proto = htons(ETH_P_8021Q); if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; vf_mac.vf = vf_vlan.vf = + vf_vlan_info.vf = vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = @@ -1150,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; + vf_vlan_info.vlan = ivi.vlan; + vf_vlan_info.qos = ivi.qos; + vf_vlan_info.vlan_proto = ivi.vlan_proto; vf_tx_rate.rate = ivi.max_tx_rate; vf_rate.min_tx_rate = ivi.min_tx_rate; vf_rate.max_tx_rate = ivi.max_tx_rate; @@ -1158,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); - if (!vf) { - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vf) + goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), @@ -1177,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, &vf_rss_query_en) || nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) - return -EMSGSIZE; + goto nla_put_vf_failure; + vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + if (!vfvlanlist) + goto nla_put_vf_failure; + if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), + &vf_vlan_info)) { + nla_nest_cancel(skb, vfvlanlist); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfvlanlist); memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) { - nla_nest_cancel(skb, vf); - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vfstats) + goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, vf_stats.rx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, @@ -1199,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD)) - return -EMSGSIZE; + vf_stats.multicast, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; + +nla_put_vf_failure: + nla_nest_cancel(skb, vf); +nla_put_vfinfo_failure: + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; } static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) @@ -1448,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, @@ -1704,7 +1727,34 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, - ivv->qos); + ivv->qos, + htons(ETH_P_8021Q)); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN_LIST]) { + struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN]; + struct nlattr *attr; + int rem, len = 0; + + err = -EOPNOTSUPP; + if (!ops->ndo_set_vf_vlan) + return err; + + nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { + if (nla_type(attr) != IFLA_VF_VLAN_INFO || + nla_len(attr) < NLA_HDRLEN) { + return -EINVAL; + } + if (len >= MAX_VLAN_LIST_LEN) + return -EOPNOTSUPP; + ivvl[len] = nla_data(attr); + + len++; + } + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, + ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) return err; } -- cgit v1.1 From b42959dc35a533a531dd698b581193a65a5da831 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:16 +0300 Subject: net/mlx4: Add VF vlan protocol 802.1ad support Move the vf to VST 802.1ad mode (mlx4 VST QinQ mode) by setting vf vlan protocol to 802.1ad. VST 802.1ad mode in mlx4, is used for STAG strip/insertion by PF, while the CTAG is set by the VF. Read current vlan protocol as part of the vf configuration state. Upon setting vf vlan protocol to 802.1ad, we use a mechanism of handshake to verify that both the vf and the pf driver version support it. The handshake uses the command QUERY_FUNC_CAP: - The vf sets a pre-defined support bit in input modifier. - A pf that supports the feature sends the request to the vf through a pre-defined field in the output mailbox. - In case vf does not support the feature, the pf will fail the control command (in this case, IP link tool command to set the vf vlan protocol to 802.1ad). No change in VST 802.1Q mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 51 ++++++++++++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 89 ++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/cmd.h | 3 +- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 09c9694..b1cef7a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1995,6 +1995,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; struct mlx4_active_ports actv_ports = mlx4_get_active_ports( &priv->dev, slave); int min_port = find_first_bit(actv_ports.ports, @@ -2009,7 +2011,19 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vp_oper->state = *vp_admin; + if (vp_admin->vlan_proto != htons(ETH_P_8021AD) || + slave_state->vst_qinq_supported) { + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + } + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.mac = vp_admin->mac; + vp_oper->state.spoofchk = vp_admin->spoofchk; + vp_oper->state.tx_rate = vp_admin->tx_rate; + vp_oper->state.qos_vport = vp_admin->qos_vport; + vp_oper->state.guid = vp_admin->guid; + if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); @@ -2097,6 +2111,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; slave_state[slave].old_vlan_api = false; + slave_state[slave].vst_qinq_supported = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; @@ -2364,6 +2379,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) vf_oper = &priv->mfunc.master.vf_oper[i]; s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + s_state->vst_qinq_supported = false; mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; @@ -2955,10 +2971,13 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, + __be16 proto) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *vf_admin; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_oper_state *vf_oper; int slave; if ((!mlx4_is_master(dev)) || @@ -2968,12 +2987,31 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) if ((vlan > 4095) || (qos > 7)) return -EINVAL; + if (proto == htons(ETH_P_8021AD) && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP)) + return -EPROTONOSUPPORT; + + if (proto != htons(ETH_P_8021Q) && + proto != htons(ETH_P_8021AD)) + return -EINVAL; + + if ((proto == htons(ETH_P_8021AD)) && + ((vlan == 0) || (vlan == MLX4_VGT))) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; + slave_state = &priv->mfunc.master.slave_state[slave]; + if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) && + (!slave_state->vst_qinq_supported)) { + mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf); + return -EPROTONOSUPPORT; + } port = mlx4_slaves_closest_port(dev, slave, port); vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) return -EPERM; @@ -2983,6 +3021,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; + vf_admin->vlan_proto = proto; /* If rate was configured prior to VST, we saved the configured rate * in vf_admin->rate and now, if priority supported we enforce the QoS @@ -2991,7 +3030,12 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) vf_admin->tx_rate) vf_admin->qos_vport = slave; - if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + /* Try to activate new vf state without restart, + * this option is not supported while moving to VST QinQ mode. + */ + if ((proto == htons(ETH_P_8021AD) && + vf_oper->state.vlan_proto != proto) || + mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_info(dev, "updating vf %d port %d config will take effect on next VF restart\n", vf, port); @@ -3135,6 +3179,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->vlan = s_info->default_vlan; ivf->qos = s_info->default_qos; + ivf->vlan_proto = s_info->vlan_proto; if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) ivf->max_tx_rate = s_info->tx_rate; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 132eeea..7e703be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2406,10 +2406,8 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - if (vlan_proto != htons(ETH_P_8021Q)) - return -EPROTONOSUPPORT; - - return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos, + vlan_proto); } static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 7dc9d38..090bf81 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -249,6 +249,72 @@ out: return err; } +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -312,17 +378,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 #define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) + if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); - struct mlx4_vport_oper_state *vp_oper = - &priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier]; + struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; @@ -361,6 +428,11 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + field = 0; if (dev->caps.phv_bit[port]) field |= QUERY_FUNC_CAP_PHV_BIT; @@ -371,6 +443,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ @@ -444,6 +519,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + } else err = -EINVAL; @@ -459,10 +538,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; + u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ - in_modifier = op_modifier ? gen_or_port : + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index fdfe1ac..e4878f3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -483,6 +483,7 @@ struct mlx4_slave_state { u8 init_port_mask; bool active; bool old_vlan_api; + bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 116b284..1f35686 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, + u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); -- cgit v1.1 From 4ce4862a815e3cee8040c9d91e2148aecbbf056e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:04:09 +0800 Subject: Documentation: devicetree: revise ethernet device-tree binding about TRGMII add phy-mode "trgmii" to Documentation/devicetree/bindings/net/ethernet.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 5d88f37..e1d7681 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers: the maximum frame size (there's contradiction in ePAPR). - phy-mode: string, operation mode of the PHY interface; supported values are "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id", - "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto - standard property; + "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a + de-facto standard property; - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; -- cgit v1.1 From 7f8c2865a94a73308386627cd7556c17f03efb63 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:09:32 +0800 Subject: Documentation: devicetree: fix typo in MediaTek ethernet device-tree binding fix typo in Documentation/devicetree/bindings/net/mediatek-net.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 7111278..f095257 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -34,7 +34,7 @@ Required properties: - phy-handle: see ethernet.txt file in the same directory and the phy-mode "trgmii" required being provided when reg is equal to 0 and the MAC uses fixed-link to connect - with inernal switch such as MT7530. + with internal switch such as MT7530. Example: -- cgit v1.1 From faac0ff0a544eed6b8c9375c1104d692e4979540 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 23 Sep 2016 12:02:45 +0100 Subject: mlxsw: spectrum: remove redundant check if err is zero There is an earlier check and return if err is non-zero, so the check to see if it is zero is redundant in every iteration of the loop and hence the check can be removed. Signed-off-by: Colin Ian King Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 80f27b5..fd74d10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1664,7 +1664,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, return; mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) - data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; + data[data_index + i] = hw_stats[i].getter(ppcnt_pl); } static void mlxsw_sp_port_get_stats(struct net_device *dev, -- cgit v1.1 From b4e28c1fc9c7f3b7508b9a27d7c59a0da7b1f824 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:27 +0530 Subject: net: thunderx: Fix issue with IRQ namimg This patch fixes a regression caused by previous commit when irq name exceeds 20 byte array if interface's name size is large. Fixes: e412621394fa ("net: thunderx: Use netdev's name for naming VF's interrupts") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 18d12d3..3042610 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -305,7 +305,7 @@ struct nicvf { bool msix_enabled; u8 num_vec; struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; - char irq_name[NIC_VF_MSIX_VECTORS][20]; + char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; -- cgit v1.1 From 2c204c2b9fca36aa24f7abe2e8bfd83fe3a8db8d Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 23 Sep 2016 14:42:28 +0530 Subject: net: thunderx: Support for byte queue limits This patch adds support for byte queue limits Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 14 ++++++++-- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 30 ++++++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 7d00162..45a13f7 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -516,7 +516,8 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cmp_queue *cq, struct cqe_send_t *cqe_tx, - int cqe_type, int budget) + int cqe_type, int budget, + unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -547,6 +548,8 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); + (*tx_pkts)++; + *tx_bytes += skb->len; napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { @@ -662,6 +665,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; + unsigned int tx_pkts = 0, tx_bytes = 0; spin_lock_bh(&cq->lock); loop: @@ -701,7 +705,7 @@ loop: case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, (void *)cq_desc, CQE_TYPE_SEND, - budget); + budget, &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -730,6 +734,9 @@ done: netdev = nic->pnicvf->netdev; txq = netdev_get_tx_queue(netdev, nicvf_netdev_qidx(nic, cq_idx)); + if (tx_pkts) + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); @@ -1160,6 +1167,9 @@ int nicvf_stop(struct net_device *netdev) netif_tx_disable(netdev); + for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); + /* Free resources */ nicvf_config_data_transfer(nic, false); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 178c5c7..a4fc501 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1082,6 +1082,24 @@ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, imm->len = 1; } +static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, + int sq_num, int desc_cnt) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(nic->pnicvf->netdev, + skb_get_queue_mapping(skb)); + + netdev_tx_sent_queue(txq, skb->len); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, desc_cnt); +} + /* Segment a TSO packet into 'gso_size' segments and append * them to SQ for transfer */ @@ -1141,12 +1159,8 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Save SKB in the last segment for freeing */ sq->skbuff[hdr_qentry] = (u64)skb; - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - /* Inform HW to xmit all TSO segments */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, desc_cnt); nic->drv_stats.tx_tso++; return 1; } @@ -1219,12 +1233,8 @@ doorbell: nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); } - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); - /* Inform HW to xmit new packet */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, subdesc_cnt); return 1; append_fail: -- cgit v1.1 From c6a77ff82fb849534748719f37f3f9086d78ed39 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 23 Sep 2016 17:08:17 -0700 Subject: hv_netvsc: fix comments Typo's and spelling errors. Also remove old comment from staging era. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 7130bf9..f4fbcb5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -433,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer { */ struct nvsp_1_message_send_rndis_packet { /* - * This field is specified by RNIDS. They assume there's two different + * This field is specified by RNDIS. They assume there's two different * channels of communication. However, the Network VSP only has one. * Therefore, the channel travels with the RNDIS packet. */ @@ -578,7 +578,7 @@ struct nvsp_5_send_indirect_table { /* The number of entries in the send indirection table */ u32 count; - /* The offset of the send indireciton table from top of this struct. + /* The offset of the send indirection table from top of this struct. * The send indirection table tells which channel to put the send * traffic on. Each entry is a channel number. */ @@ -734,7 +734,6 @@ struct netvsc_device { struct nvsp_message channel_init_pkt; struct nvsp_message revoke_packet; - /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; @@ -1239,7 +1238,7 @@ struct rndis_message { u32 ndis_msg_type; /* Total length of this message, from the beginning */ - /* of the sruct rndis_message, in bytes. */ + /* of the struct rndis_message, in bytes. */ u32 msg_len; /* Actual message */ -- cgit v1.1 From 805b21b929e29192fb5de16154f616bfc1116e3e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: rxrpc: Send an ACK after every few DATA packets we receive Send an ACK if we haven't sent one for the last two packets we've received. This keeps the other end apprised of where we've got to - which is important if they're doing slow-start. We do this in recvmsg so that we can dispatch a packet directly without the need to wake up the background thread. This should possibly be made configurable in future. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/misc.c | 1 + net/rxrpc/output.c | 25 +++++++++++++++++-------- net/rxrpc/recvmsg.c | 13 ++++++++++++- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 042dbcc..e3bf9c0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -533,6 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ + rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ rxrpc_serial_t ackr_ping; /* Last ping sent */ ktime_t ackr_ping_time; /* Time last ping sent */ @@ -695,6 +697,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_rotate_rx, rxrpc_propose_ack_terminal_ack, rxrpc_propose_ack__nr_trace }; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1ca1483..a473fd7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -202,6 +202,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_rotate_rx] = "RxAck ", [rxrpc_propose_ack_terminal_ack] = "ClTerm ", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0c563e3..3eb0144 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -36,7 +36,9 @@ struct rxrpc_pkt_buffer { * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt) + struct rxrpc_pkt_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -48,6 +50,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(call->ackr_skew); @@ -96,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; size_t len, n; bool ping = false; int ioc, ret; @@ -146,7 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); call->ackr_reason = 0; spin_unlock_bh(&call->lock); @@ -203,18 +208,22 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) if (ping) call->ackr_ping_time = ktime_get_real(); - if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { - switch (type) { - case RXRPC_PACKET_TYPE_ACK: + if (type == RXRPC_PACKET_TYPE_ACK && + call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), true, true, rxrpc_propose_ack_retry_tx); - break; - case RXRPC_PACKET_TYPE_ABORT: - break; + } else { + spin_lock_bh(&call->lock); + if (after(hard_ack, call->ackr_consumed)) + call->ackr_consumed = hard_ack; + if (after(top, call->ackr_seen)) + call->ackr_seen = top; + spin_unlock_bh(&call->lock); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8c7f3de..a7458c3 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -201,8 +201,19 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); - if (flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) { rxrpc_end_rx_phase(call); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (after_eq(hard_ack, call->ackr_consumed + 2) || + after_eq(top, call->ackr_seen + 2) || + (hard_ack == top && after(hard_ack, call->ackr_consumed))) + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } } /* -- cgit v1.1 From a7056c5ba67ee6a956b42cf9ff9ba3a6a0bd9794 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: rxrpc: Send an immediate ACK if we fill in a hole Send an immediate ACK if we fill in a hole in the buffer left by an out-of-sequence packet. This may allow the congestion management in the peer to avoid a retransmission if packets got reordered on the wire. Signed-off-by: David Howells --- net/rxrpc/input.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 349698d..757c16f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -331,8 +331,16 @@ next_subpacket: call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) + if (after(seq, call->rx_top)) { smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } if (flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); -- cgit v1.1 From b69d94d7991f83928d3ea18fe12ab011fa852bb0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: rxrpc: Include the last reply DATA serial number in the final ACK In a client call, include the serial number of the last DATA packet of the reply in the final ACK. Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a7458c3..038ae62 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -133,7 +133,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, /* * End the packet reception phase. */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call) +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) { _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); @@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); } @@ -202,7 +202,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (flags & RXRPC_LAST_PACKET) { - rxrpc_end_rx_phase(call); + rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ if (after_eq(hard_ack, call->ackr_consumed + 2) || -- cgit v1.1 From dd7c1ee59a90ca8a75bce72c721851d5550f3c59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: rxrpc: Reinitialise the call ACK and timer state for client reply phase Clear the ACK reason, ACK timer and resend timer when entering the client reply phase when the first DATA packet is received. New ACKs will be proposed once the data is queued. The resend timer is no longer relevant and we need to cancel ACKs scheduled to probe for a lost reply. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 9 +++++++++ net/rxrpc/misc.c | 1 + 3 files changed, 11 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e3bf9c0..cdd35e2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -682,6 +682,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_init_for_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, rxrpc_timer_set_for_resend, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 757c16f..bda11eb2 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -149,6 +149,15 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { rxrpc_seq_t top = READ_ONCE(call->tx_top); + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + call->resend_at = call->expire_at; + call->ack_at = call->expire_at; + spin_unlock_bh(&call->lock); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + } + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_rotate_tx_window(call, top); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a473fd7..901c012 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -191,6 +191,7 @@ const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_init_for_reply] = "IniRpl", [rxrpc_timer_set_for_ack] = "SetAck", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", -- cgit v1.1 From df0562a72dba13ab49c7dd7cb15170697b9848ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 23:00:54 +0100 Subject: rxrpc: Delay the resend timer to allow for nsec->jiffies conv error When determining the resend timer value, we have a value in nsec but the timer is in jiffies which may be a million or more times more coarse. nsecs_to_jiffies() rounds down - which means that the resend timeout expressed as jiffies is very likely earlier than the one expressed as nanoseconds from which it was derived. The problem is that rxrpc_resend() gets triggered by the timer, but can't then find anything to resend yet. It sets the timer again - but gets kicked off immediately again and again until the nanosecond-based expiry time is reached and we actually retransmit. Fix this by adding 1 to the jiffies-based resend_at value to counteract the rounding and make sure that the timer happens after the nanosecond-based expiry is passed. Alternatives would be to adjust the timestamp on the packets to align with the jiffie scale or to switch back to using jiffie-timestamps. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a78a92f..d5bf9ce 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -200,8 +200,14 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now); - call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); + resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + call->resend_at = jiffies + + nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + + 1; /* We have to make sure that the calculated jiffies value + * falls at or after the nsec value, or we shall loop + * ceaselessly because the timer times out, but we haven't + * reached the nsec timeout yet. + */ /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the -- cgit v1.1 From 31a1b989508ce64e8ead504884ced01e61870852 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:26 +0100 Subject: rxrpc: Generate a summary of the ACK state for later use Generate a summary of the Tx buffer packet state when an ACK is received for use in a later patch that does congestion management. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 14 ++++++++++++++ net/rxrpc/input.c | 45 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdd35e2..1a700b6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -540,6 +540,20 @@ struct rxrpc_call { /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ +}; + +/* + * Summary of a new ACK and the changes it made. + */ +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ }; enum rxrpc_skb_trace { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bda11eb2..dd69966 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -56,12 +56,20 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; int ix; u8 annotation; + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } + spin_lock(&call->lock); while (before(call->tx_hard_ack, to)) { @@ -77,6 +85,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) if (annotation & RXRPC_TX_ANNO_LAST) set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; } spin_unlock(&call->lock); @@ -147,6 +157,7 @@ bad_state: */ static bool rxrpc_receiving_reply(struct rxrpc_call *call) { + struct rxrpc_ack_summary summary = { 0 }; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { @@ -159,7 +170,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top); + rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { rxrpc_proto_abort("TXL", call, top); return false; @@ -508,7 +519,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, * the time the ACK was sent. */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, - rxrpc_seq_t seq, int nr_acks) + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) { bool resend = false; int ix; @@ -521,14 +533,23 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; if (anno_type == RXRPC_TX_ANNO_ACK) continue; + summary->nr_new_acks++; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; if (anno_type == RXRPC_TX_ANNO_NAK) continue; + summary->nr_new_nacks++; if (anno_type == RXRPC_TX_ANNO_RETRANS) continue; call->rxtx_annotations[ix] = @@ -558,7 +579,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { - u8 ack_reason; + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); union { struct rxrpc_ackpacket ack; @@ -581,10 +602,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; - ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? - buf.ack.reason : RXRPC_ACK__INVALID); + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, first_soft_ack, ack_reason, nr_acks); + trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", sp->hdr.serial, @@ -592,7 +613,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, first_soft_ack, ntohl(buf.ack.previousPacket), acked_serial, - rxrpc_ack_names[ack_reason], + rxrpc_ack_names[summary.ack_reason], buf.ack.nAcks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -649,12 +670,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack); + rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { @@ -669,11 +691,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, */ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top); + rxrpc_rotate_tx_window(call, call->tx_top, &summary); if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) rxrpc_end_tx_phase(call, false, "ETL"); } -- cgit v1.1 From 0d967960d39ee89f9e0289692e9f7232f490e55c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: rxrpc: Schedule an ACK if the reply to a client call appears overdue If we've sent all the request data in a client call but haven't seen any sign of the reply data yet, schedule an ACK to be sent to the server to find out if the reply data got lost. If the server hasn't yet hard-ACK'd the request data, we send a PING ACK to demand a response to find out whether we need to retransmit. If the server says it has received all of the data, we send an IDLE ACK to tell the server that we haven't received anything in the receive phase as yet. To make this work, a non-immediate PING ACK must carry a delay. I've chosen the same as the IDLE ACK for the moment. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 1 + net/rxrpc/input.c | 8 ++++++++ net/rxrpc/misc.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1a700b6..b1e697f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -707,7 +707,9 @@ enum rxrpc_timer_trace { extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index d5bf9ce..05b94d1 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -100,6 +100,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; + case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dd69966..0344f44 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -138,6 +138,8 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, + rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); @@ -684,6 +686,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return; } + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 901c012..a608769 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -198,7 +198,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { }; const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", -- cgit v1.1 From 57494343cb5d66962bb197878fb1cc576177db31 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Sep 2016 18:05:27 +0100 Subject: rxrpc: Implement slow-start Implement RxRPC slow-start, which is similar to RFC 5681 for TCP. A tracepoint is added to log the state of the congestion management algorithm and the decisions it makes. Notes: (1) Since we send fixed-size DATA packets (apart from the final packet in each phase), counters and calculations are in terms of packets rather than bytes. (2) The ACK packet carries the equivalent of TCP SACK. (3) The FLIGHT_SIZE calculation in RFC 5681 doesn't seem particularly suited to SACK of a small number of packets. It seems that, almost inevitably, by the time three 'duplicate' ACKs have been seen, we have narrowed the loss down to one or two missing packets, and the FLIGHT_SIZE calculation ends up as 2. (4) In rxrpc_resend(), if there was no data that apparently needed retransmission, we transmit a PING ACK to ask the peer to tell us what its Rx window state is. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 45 ++++++++++++ net/rxrpc/ar-internal.h | 53 +++++++++++++- net/rxrpc/call_event.c | 36 ++++++++- net/rxrpc/call_object.c | 13 ++++ net/rxrpc/conn_event.c | 1 + net/rxrpc/input.c | 169 +++++++++++++++++++++++++++++++++++++++++-- net/rxrpc/misc.c | 19 +++++ net/rxrpc/output.c | 9 ++- net/rxrpc/sendmsg.c | 7 +- 9 files changed, 339 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5647549..ada12d0 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -570,6 +570,51 @@ TRACE_EVENT(rxrpc_retransmit, __entry->expiry) ); +TRACE_EVENT(rxrpc_congest, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + + TP_ARGS(call, summary, ack_serial, change), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_congest_change, change ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + __field(rxrpc_seq_t, lowest_nak ) + __field(rxrpc_serial_t, ack_serial ) + __field_struct(struct rxrpc_ack_summary, sum ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->change = change; + __entry->hard_ack = call->tx_hard_ack; + __entry->top = call->tx_top; + __entry->lowest_nak = call->acks_lowest_nak; + __entry->ack_serial = ack_serial; + memcpy(&__entry->sum, summary, sizeof(__entry->sum)); + ), + + TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + __entry->call, + __entry->ack_serial, + rxrpc_ack_names[__entry->sum.ack_reason], + __entry->hard_ack, + rxrpc_congest_modes[__entry->sum.mode], + __entry->sum.cwnd, + __entry->sum.ssthresh, + __entry->sum.nr_acks, __entry->sum.nr_nacks, + __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks, + __entry->sum.nr_rot_new_acks, + __entry->top - __entry->hard_ack, + __entry->sum.cumulative_acks, + __entry->sum.dup_acks, + __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", + rxrpc_congest_changes[__entry->change], + __entry->sum.retrans_timeo ? " rTxTo" : "") + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b1e697f..ca96e54 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -402,6 +402,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_PINGING, /* Ping in process */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; /* @@ -447,6 +448,17 @@ enum rxrpc_call_completion { }; /* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES +}; + +/* * RxRPC call definition * - matched by { connection, call_id } */ @@ -518,6 +530,20 @@ struct rxrpc_call { * not hard-ACK'd packet follows this. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not * consumed packet follows this. */ @@ -539,12 +565,13 @@ struct rxrpc_call { ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ }; /* - * Summary of a new ACK and the changes it made. + * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { u8 ack_reason; @@ -554,6 +581,14 @@ struct rxrpc_ack_summary { u8 nr_new_nacks; /* Number of new NACKs in packet */ u8 nr_rot_new_acks; /* Number of rotated new ACKs */ bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; }; enum rxrpc_skb_trace { @@ -709,6 +744,7 @@ extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, rxrpc_propose_ack_respond_to_ack, @@ -729,6 +765,21 @@ enum rxrpc_propose_ack_outcome { extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; +enum rxrpc_congest_change { + rxrpc_cong_begin_retransmission, + rxrpc_cong_cleared_nacks, + rxrpc_cong_new_low_nack, + rxrpc_cong_no_change, + rxrpc_cong_progress, + rxrpc_cong_retransmit_again, + rxrpc_cong_rtt_window_end, + rxrpc_cong_saw_nack, + rxrpc_congest__nr_change +}; + +extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; +extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; + extern const char *const rxrpc_pkts[]; extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 05b94d1..0e84780 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -147,6 +147,14 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, } /* + * Handle congestion being detected by the retransmit timeout. + */ +static void rxrpc_congestion_timeout(struct rxrpc_call *call) +{ + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); +} + +/* * Perform retransmission of NAK'd and unack'd packets. */ static void rxrpc_resend(struct rxrpc_call *call) @@ -154,9 +162,9 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; int ix; - u8 annotation, anno_type; + u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); @@ -193,10 +201,13 @@ static void rxrpc_resend(struct rxrpc_call *call) oldest = skb->tstamp; continue; } + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; } /* Okay, we need to retransmit a packet. */ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; trace_rxrpc_retransmit(call, seq, annotation | anno_type, ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } @@ -210,6 +221,25 @@ static void rxrpc_resend(struct rxrpc_call *call) * reached the nsec timeout yet. */ + if (unacked) + rxrpc_congestion_timeout(call); + + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + spin_unlock_bh(&call->lock); + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_ns(ack_ts) < call->peer->rtt) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto out; + } + /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the * lock is dropped, it may clear some of the retransmission markers for @@ -260,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call) out_unlock: spin_unlock_bh(&call->lock); +out: _leave(""); } @@ -293,6 +324,7 @@ recheck_state: if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a53f4c2..d4b3293 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -160,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->rx_expect_next = 1; + + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; nomem_2: @@ -176,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; + ktime_t now; _enter(""); @@ -185,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->service_id = srx->srx_service; call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; _leave(" = %p", call); return call; @@ -325,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_ACCEPTING; if (sp->hdr.securityIndex > 0) call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a1cf1ec..37609ce 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -97,6 +97,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.info.maxMTU = htonl(mtu); pkt.info.rwind = htonl(rxrpc_rx_window_size); pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; len += sizeof(pkt.ack) + sizeof(pkt.info); break; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 0344f44..094720d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -37,6 +37,166 @@ static void rxrpc_proto_abort(const char *why, } /* + * Do TCP-style congestion management [RFC 5681]. + */ +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary) +{ + enum rxrpc_congest_change change = rxrpc_cong_no_change; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; + + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd > call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; + } + } + + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; + + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd > call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + } + goto out; + + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_usage == 0) + goto out; + if (ktime_before(skb->tstamp, + ktime_add_ns(call->cong_tstamp, + call->peer->rtt))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; + + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; + + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; + } + + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; + + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; + } + goto out; + + default: + BUG(); + goto out; + } + +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd <= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; +out: + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + return; + +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; +} + +/* * Ping the other end to fill our RTT cache and to retrieve the rwind * and MTU parameters. */ @@ -524,7 +684,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, rxrpc_seq_t seq, int nr_acks, struct rxrpc_ack_summary *summary) { - bool resend = false; int ix; u8 annotation, anno_type; @@ -556,16 +715,11 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, continue; call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK | annotation; - resend = true; break; default: return rxrpc_proto_abort("SFT", call, 0); } } - - if (resend && - !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); } /* @@ -663,6 +817,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, sp->hdr.serial, call->acks_latest); return; } + call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || @@ -692,6 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); + + return rxrpc_congestion_management(call, skb, &summary); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index a608769..aedb897 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -200,6 +200,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", @@ -214,3 +215,21 @@ const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { [rxrpc_propose_ack_update] = " Update", [rxrpc_propose_ack_subsume] = " Subsume", }; + +const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = { + [RXRPC_CALL_SLOW_START] = "SlowStart", + [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid", + [RXRPC_CALL_PACKET_LOSS] = "PktLoss ", + [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ", +}; + +const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = { + [rxrpc_cong_begin_retransmission] = " Retrans", + [rxrpc_cong_cleared_nacks] = " Cleared", + [rxrpc_cong_new_low_nack] = " NewLowN", + [rxrpc_cong_no_change] = "", + [rxrpc_cong_progress] = " Progres", + [rxrpc_cong_retransmit_again] = " ReTxAgn", + [rxrpc_cong_rtt_window_end] = " RttWinE", + [rxrpc_cong_saw_nack] = " SawNack", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3eb0144..cf43a71 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -157,6 +157,8 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) spin_unlock_bh(&call->lock); + pkt->whdr.flags |= RXRPC_SLOW_START_OK; + iov[0].iov_len += sizeof(pkt->ack) + n; iov[1].iov_base = &pkt->ackinfo; iov[1].iov_len = sizeof(pkt->ackinfo); @@ -276,8 +278,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; - /* If our RTT cache needs working on, request an ACK. */ - if ((call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + */ + if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) whdr.flags |= RXRPC_REQUEST_ACK; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9993937..1f8040d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -45,7 +45,9 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (call->tx_top - call->tx_hard_ack < call->tx_winsize) + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) break; if (call->state >= RXRPC_CALL_COMPLETE) { ret = -call->error; @@ -203,7 +205,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); if (call->tx_top - call->tx_hard_ack >= - call->tx_winsize) { + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; -- cgit v1.1 From c2675de447f8238e7e2e7eced78fa671d42a9a7e Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Sat, 24 Sep 2016 14:01:04 -0400 Subject: gre: use nla_get_be32() to extract flowinfo Eliminate a sparse endianness mismatch warning, use nla_get_be32() to extract a __be32 value instead of nla_get_u32(). Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 397e1ed..4ce74f8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1239,7 +1239,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); if (data[IFLA_GRE_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); -- cgit v1.1 From f2c7c1d09832ef0d6499a9e1a958c3ddc686f723 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 6 Sep 2016 18:05:06 -0700 Subject: i40e: Remove 100 Mbps SGMII support for X722 This patch fixes the problem where driver shows 100 Mbps as a supported speed, and allows it to be configured for advertising on X722 devices. This patch fixes the problem by not setting the 100 Mbps SGMII flag for X722 devices. Without this patch, the user incorrectly thinks that 100 Mbps is supported and hence might try to advertise it on X722 devices when it is actually not a supported speed. Change-ID: I8c3d7c4251a9402d98994ed29749b7b895a0f205 Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53cde5b..60f082e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8608,7 +8608,6 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | I40E_FLAG_NO_PCI_LINK_CHECK | - I40E_FLAG_100M_SGMII_CAPABLE | I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; } else if ((pf->hw.aq.api_maj_ver > 1) || -- cgit v1.1 From a6cb91464b5b09c0ff749e4b01048f93ce450275 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 6 Sep 2016 18:05:07 -0700 Subject: i40e: fix deleting mac filters There exists a bug in which deleting a mac filter does not actually occur. The driver reports that the filter has been deleted with no error. The problem occurs because the wrong cmd_flag is passed to the firmware when deleting the filter. The firmware reports an error back to the driver but it is expressly ignored. This fixes the bug by using the correct flag when deleting a filter. Without this patch, deleted filters remain in firmware and function as if they had not been deleted. Change-ID: I5f22b874f3b83f457702f18f0d5602ca21ac40c3 Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 60f082e..0841379 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1315,7 +1315,7 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) element.vlan_tag = 0; /* ...and some firmware does it this way. */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | - I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); } @@ -1908,7 +1908,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) { del_list[num_del].vlan_tag = 0; - cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { del_list[num_del].vlan_tag = cpu_to_le16((u16)(f->vlan)); -- cgit v1.1 From 2199254cb50a650934ef2d1c531ec6fdc0a826c5 Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Tue, 6 Sep 2016 18:05:08 -0700 Subject: i40e: add encap csum VF offload flag Add ENCAP_CSUM offload negotiation flag. Currently VF assumes checksum offload for encapsulated packets is supported by default. Going forward, this feature needs to be negotiated with PF before advertising to the stack. Hence, we need a flag to control it. This is in regards to prepping up for VF base mode functionality support. Change-ID: Iaab1f25cc0abda5f2fbe3309092640f0e77d163e Signed-off-by: Preethi Banala Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index c92a3bd..f861d31 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -163,6 +163,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index f04ce6c..bd691ad 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -160,6 +160,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; -- cgit v1.1 From 234dc4e67611c11bb3990abced26cb75b8ef262a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:09 -0700 Subject: i40e: cleanup ATR auto_disable_flags use Some locations that disable ATR accidentally used the "full" disable by disabling the flag in the standard flags field. This incorrectly forces ATR off permanently instead of temporarily disabling it. In addition, some code locations accidentally set the ATR flag enabled when they only meant to clear the auto_disable_flags. This results in ignoring the user's ethtool private flag settings. Additionally, when disabling ATR via ethtool, we did not perform a flush of the FD table. This results in the previously assigned ATR rules still functioning which was not expected. Cleanup all these areas so that automatic disable uses only the auto_disable_flag. Fix the flush code so that we can trigger a flush even when we've disabled ATR and SB support, as otherwise the flush doesn't work. Fix ethtool setting to actually request a flush. Fix NETIF_F_NTUPLE flag to only clear the auto_disable setting and not enable the full feature. Change-ID: Ib2486111f8031bd16943e9308757b276305c03b5 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++++------------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 14 +++++++------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index af28a8c..5cad80f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3021,6 +3021,9 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } else { pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; + + /* flush current ATR settings */ + set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); } if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0841379..e1a2c9a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5242,7 +5242,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = 0; if (pf->fd_tcp_rule > 0) { - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n"); pf->fd_tcp_rule = 0; @@ -5976,9 +5976,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) int fd_room; int reg; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (!time_after(jiffies, pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return; @@ -5998,7 +5995,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6018,7 +6015,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) /* replay sideband filters */ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); if (!disable_atr) - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); @@ -6052,9 +6049,6 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) if (test_bit(__I40E_DOWN, &pf->state)) return; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) i40e_fdir_flush_and_replay(pf); @@ -8682,13 +8676,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0; pf->fdir_pf_active_filters = 0; - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); /* if ATR was auto disabled it can be re-enabled. */ if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); + } } return need_reset; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index bf7bb7c..7ada05e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -282,18 +282,18 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if (add) { pf->fd_tcp_rule++; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } else { pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? (pf->fd_tcp_rule - 1) : 0; if (pf->fd_tcp_rule == 0) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; } } -- cgit v1.1 From a3417d287fb02e7bd24c6b1068fe6f9b52a259a6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 6 Sep 2016 18:05:10 -0700 Subject: i40e: check conflicting ntuple/sideband rules when re-enabling ATR In i40e_fdir_check_and_reenable(), the driver performs some checks to determine whether it is safe to re-enable FD Sideband and FD ATR support. The current check will only determine if there is available space in the flow director table. However, this ignores the fact that ATR should be disabled when there are TCP/IPv4 sideband rules in effect. Add the missing check, and update the info message printed when I40E_DEBUG_FD is enabled. Change-ID: Ibb9c63e5be95d63c53a498fdd5dbf69f54a00e08 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e1a2c9a..89b0418 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5939,13 +5939,17 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } } - /* Wait for some more space to be available to turn on ATR */ + + /* Wait for some more space to be available to turn on ATR. We also + * must check that no existing ntuple rules for TCP are in effect + */ if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) { if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) && + (pf->fd_tcp_rule == 0)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n"); + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } -- cgit v1.1 From ac9c5c6d8c17fa105878442ac663f0e9abe3cff5 Mon Sep 17 00:00:00 2001 From: Henry Tieman Date: Tue, 6 Sep 2016 18:05:11 -0700 Subject: i40e: removing unreachable code The return value from i40e_shutdown_adminq() is always 0 (I40E_SUCCESS). So, the test for non-0 will never be true. Cleanup by removing the test and debug print statement. Change-ID: Ie51e8e37515c3e3a6a9ff26fa951d0e5e24343c1 Signed-off-by: Henry Tieman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 89b0418..e626761b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11333,11 +11333,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(hw); - if (ret_code) - dev_warn(&pdev->dev, - "Failed to destroy the Admin Queue resources: %d\n", - ret_code); + i40e_shutdown_adminq(hw); /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); -- cgit v1.1 From fa90efa59dabbaac24f1ad2e6535e6daa2845257 Mon Sep 17 00:00:00 2001 From: Bimmy Pujari Date: Tue, 6 Sep 2016 18:05:12 -0700 Subject: i40e/i40evf: Changed version to 1.6.16 Signed-off-by: Bimmy Pujari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e626761b..b434d07 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 99833f3..064419e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 12 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ -- cgit v1.1 From 7ac4b5c6fd351be8f849f687e290ca9724acfd33 Mon Sep 17 00:00:00 2001 From: Akeem Abodunrin Date: Mon, 12 Sep 2016 14:18:37 -0700 Subject: i40e: Increase minimum number of allocated VSI This patch increases minimum number of allocated VSIs, so as to resolve failure adding VSI for VF when 64-VFs assigned to a PF. The driver supports up to 128 VFs per device, users can decide to enable up to 64-VFs on a single PF, especially 2 X 40 devices. In that scenario, with VMDq co-existence, there would be starvation of VSIs - with this patch, supported features would have enough VSIs for configuration now. Change-ID: If084f4cd823667af8fe7fdc11489c705b32039d5 Signed-off-by: Akeem Abodunrin Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 30aaee4..6b22df6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -74,7 +74,7 @@ #define I40E_MIN_NUM_DESCRIPTORS 64 #define I40E_MIN_MSIX 2 #define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ -#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ +#define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1) -- cgit v1.1 From f19a973f46e85d4394cadb90fa7717f7ec98197a Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:38 -0700 Subject: i40evf: enable adaptive interrupt throttling All of the code to support adaptive interrupt throttling is already in the interrupt handler, it just needs to be enabled. Fill out the data structures properly to make it happen. Single-flow traffic tests may show slightly lower throughput, but interrupts per second will drop by about 75%. Change-ID: I9cd7d42c025b906bf1bb85c6aeb6112684aa6471 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 064419e..302c974 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,6 +370,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -377,7 +379,10 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); } /** @@ -391,6 +396,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -398,9 +405,10 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; - q_vector->ring_mask |= BIT(t_idx); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); } /** @@ -2269,10 +2277,8 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_TX_DEF)); + adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); + adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { -- cgit v1.1 From 64bfd68eaecdce7b86e179fe39662340c8aed20d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:39 -0700 Subject: i40e: Fix Flow Director raw_buf cleanup The Tx cleanup flow was incorrectly assuming it could check for the flow director bits after it had unmapped the buffer. However in this case it results in us trying to free a raw_buf as though it is an sk_buff. To fix this I am moving up the flag test for the FD_SB bit so that when find a non-NULL skb or raw_buf value we then check the flag and use the appropriate call to free the buffer. Change-ID: I6284034ba1ea87c9922e56f6eb3181f7f09bddde Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 ++++---- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7ada05e..a2077be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -532,7 +532,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -545,9 +548,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index e3427eb..cb6b130 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,7 +51,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -64,9 +67,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); -- cgit v1.1 From e486bdfd7c491e997f29fcdf6a4216861ab1d06a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:40 -0700 Subject: i40e/i40evf: Add txring_txq function to match fm10k and ixgbe This patch adds a txring_txq function which allows us to convert a i40e_ring/i40evf_ring to a netdev_tx_queue structure. This way we can avoid having to make a multi-line function call for all the spots that need access to this. Change-ID: Ic063b71d8b92ea406d2c32e798c8e2b02809d65b Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 17 ++++++----------- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 9 +++++++++ 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a2077be..6e0a7ac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -584,8 +584,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -754,8 +753,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2784,9 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2811,13 +2808,11 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index b78c810..5088405 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -463,4 +463,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index cb6b130..d4c6a76 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -103,8 +103,7 @@ void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -273,8 +272,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -2012,9 +2011,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2039,13 +2036,11 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0112277..84e561c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -445,4 +445,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ -- cgit v1.1 From 5e02f2837349b399e48fd2a5e5149c9ee9c27cdd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:41 -0700 Subject: i40e: Split Flow Director descriptor config into separate function In an effort to improve code readability I am splitting the Flow Director filter configuration out into a separate function like we have done for the standard xmit path. The general idea is to provide a single block of code that translates the flow specification into a proper Flow Director descriptor. Change-ID: Id355ad8030c4e6c72c57504fa09de60c976a8ffe Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 115 ++++++++++++++++------------ 1 file changed, 64 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6e0a7ac..ef9b8d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -40,6 +40,69 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, } #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) +/** + * i40e_fdir - Generate a Flow Director descriptor based on fdata + * @tx_ring: Tx ring to send buffer on + * @fdata: Flow director filter data + * @add: Indicate if we are adding a rule or deleting one + * + **/ +static void i40e_fdir(struct i40e_ring *tx_ring, + struct i40e_fdir_filter *fdata, bool add) +{ + struct i40e_filter_program_desc *fdir_desc; + struct i40e_pf *pf = tx_ring->vsi->back; + u32 flex_ptype, dtype_cmd; + u16 i; + + /* grab the next descriptor */ + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & + (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & + (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & + (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + + /* Use LAN VSI Id if not programmed by user */ + flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & + ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + + dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; + + dtype_cmd |= add ? + I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT : + I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; + + dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & + (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); + + dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & + (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); + + if (fdata->cnt_index) { + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & + ((u32)fdata->cnt_index << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); + } + + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); + fdir_desc->rsvd = cpu_to_le32(0); + fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); + fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); +} + #define I40E_FD_CLEAN_DELAY 10 /** * i40e_program_fdir_filter - Program a Flow Director filter @@ -51,11 +114,9 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, struct i40e_pf *pf, bool add) { - struct i40e_filter_program_desc *fdir_desc; struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; - unsigned int fpt, dcc; struct i40e_vsi *vsi; struct device *dev; dma_addr_t dma; @@ -92,56 +153,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, /* grab the next descriptor */ i = tx_ring->next_to_use; - fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); first = &tx_ring->tx_bi[i]; - memset(first, 0, sizeof(struct i40e_tx_buffer)); - - tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; - - fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & - I40E_TXD_FLTR_QW0_QINDEX_MASK; - - fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & - I40E_TXD_FLTR_QW0_FLEXOFF_MASK; - - fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & - I40E_TXD_FLTR_QW0_PCTYPE_MASK; - - /* Use LAN VSI Id if not programmed by user */ - if (fdir_data->dest_vsi == 0) - fpt |= (pf->vsi[pf->lan_vsi]->id) << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; - else - fpt |= ((u32)fdir_data->dest_vsi << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & - I40E_TXD_FLTR_QW0_DEST_VSI_MASK; - - dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; - - if (add) - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - else - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - - dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & - I40E_TXD_FLTR_QW1_DEST_MASK; - - dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & - I40E_TXD_FLTR_QW1_FD_STATUS_MASK; - - if (fdir_data->cnt_index != 0) { - dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dcc |= ((u32)fdir_data->cnt_index << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - } - - fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); - fdir_desc->rsvd = cpu_to_le32(0); - fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); - fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); + i40e_fdir(tx_ring, fdir_data, add); /* Now program a dummy descriptor */ i = tx_ring->next_to_use; -- cgit v1.1 From 1eb846ac90b956e52f4269d80f13cfbe1df6850b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 12 Sep 2016 14:18:42 -0700 Subject: i40e: Strip out debugfs hook for Flow Director filter programming This interface was only ever meant for debug only. Since it is not supposed to be here we are removing it. Change-ID: Id771a1e5e7d3e2b4b7f56591b61fb48c921e1d04 Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 - drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 80 -------------------------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 5 +- 3 files changed, 3 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6b22df6..2030d7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -710,8 +710,6 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi); int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig); -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add); int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add); void i40e_fdir_check_and_reenable(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8555f04..0c1875b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1430,84 +1430,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, buff = NULL; kfree(desc); desc = NULL; - } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) || - (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) { - struct i40e_fdir_filter fd_data; - u16 packet_len, i, j = 0; - char *asc_packet; - u8 *raw_packet; - bool add = false; - int ret; - - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - if (strncmp(cmd_buf, "add", 3) == 0) - add = true; - - if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - if (!asc_packet) - goto command_write_done; - - raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - - if (!raw_packet) { - kfree(asc_packet); - asc_packet = NULL; - goto command_write_done; - } - - cnt = sscanf(&cmd_buf[13], - "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s", - &fd_data.q_index, - &fd_data.flex_off, &fd_data.pctype, - &fd_data.dest_vsi, &fd_data.dest_ctl, - &fd_data.fd_status, &fd_data.cnt_index, - &fd_data.fd_id, &packet_len, asc_packet); - if (cnt != 10) { - dev_info(&pf->pdev->dev, - "program fd_filter: bad command string, cnt=%d\n", - cnt); - kfree(asc_packet); - asc_packet = NULL; - kfree(raw_packet); - goto command_write_done; - } - - /* fix packet length if user entered 0 */ - if (packet_len == 0) - packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE; - - /* make sure to check the max as well */ - packet_len = min_t(u16, - packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE); - - for (i = 0; i < packet_len; i++) { - cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]); - if (!cnt) - break; - j += 3; - } - dev_info(&pf->pdev->dev, "FD raw packet dump\n"); - print_hex_dump(KERN_INFO, "FD raw packet: ", - DUMP_PREFIX_OFFSET, 16, 1, - raw_packet, packet_len, true); - ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add); - if (!ret) { - dev_info(&pf->pdev->dev, "Filter command send Status : Success\n"); - } else { - dev_info(&pf->pdev->dev, - "Filter command send failed %d\n", ret); - } - kfree(raw_packet); - raw_packet = NULL; - kfree(asc_packet); - asc_packet = NULL; } else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) { dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n", i40e_get_current_fd_count(pf)); @@ -1732,8 +1654,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " globr\n"); dev_info(&pf->pdev->dev, " send aq_cmd \n"); dev_info(&pf->pdev->dev, " send indirect aq_cmd \n"); - dev_info(&pf->pdev->dev, " add fd_filter \n"); - dev_info(&pf->pdev->dev, " rem fd_filter \n"); dev_info(&pf->pdev->dev, " fd current cnt"); dev_info(&pf->pdev->dev, " lldp start\n"); dev_info(&pf->pdev->dev, " lldp stop\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ef9b8d7..5237c49 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -111,8 +111,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring, * @pf: The PF pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add) +static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, + u8 *raw_packet, struct i40e_pf *pf, + bool add) { struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; -- cgit v1.1 From a4fa59cc5bb028ebb8048e8dcb6f92b2a1ea07f6 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 12 Sep 2016 14:18:43 -0700 Subject: i40e: don't configure zero-size RSS table In some rare cases, we might get a VSI with no queues. In this case, we cannot configure RSS on this VSI as it will try to divide by zero when configuring the lookup table. Change-ID: I6ae173a7dd3481a081e079eb10eb80275de2adb0 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b434d07..8176596 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8282,6 +8282,8 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) -- cgit v1.1 From 65e87c0398f542d5bd51cfd8a29b9dfd246b6a1c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 12 Sep 2016 14:18:44 -0700 Subject: i40evf: support queue-specific settings for interrupt moderation In commit a75e8005d506f3 ("i40e: queue-specific settings for interrupt moderation") the i40e driver gained support for setting interrupt moderation values per queue. This patch adds support for this feature to the i40evf driver as well. In addition, a few changes are made to the i40e implementation to add function header documentation comments, as well. This behaves in a similar fashion to the implementation in i40e. Thus, requesting the moderation value when no queue is provided will report queue 0 value, while setting the value without a queue will set all queues at once. Change-ID: I1f310a57c8e6c84a8524c178d44d1b7a6d3a848e Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 72 ++++++- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 21 ++- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 26 ++- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 8 + drivers/net/ethernet/intel/i40evf/i40evf.h | 7 - drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 206 ++++++++++++++++----- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 10 +- 7 files changed, 280 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5cad80f..92bc884 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1970,11 +1970,22 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ +/** + * __i40e_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ static int __i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_ring *rx_ring, *tx_ring; struct i40e_vsi *vsi = np->vsi; ec->tx_max_coalesced_frames_irq = vsi->work_limit; @@ -1989,14 +2000,18 @@ static int __i40e_get_coalesce(struct net_device *netdev, return -EINVAL; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) + rx_ring = vsi->rx_rings[queue]; + tx_ring = vsi->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2010,18 +2025,44 @@ static int __i40e_get_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_get_coalesce - get a netdev's coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * + * Gets the coalesce settings for a particular netdev. Note that if user has + * modified per-queue settings, this only guarantees to represent queue 0. See + * __i40e_get_coalesce for more details. + **/ static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, -1); } +/** + * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue + * @netdev: netdev structure + * @ec: ethtool's coalesce settings + * @queue: the particular queue to read + * + * Will read a specific queue's coalesce settings + **/ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, queue); } +/** + * i40e_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ + static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2060,6 +2101,14 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, i40e_flush(hw); } +/** + * __i40e_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ static int __i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) @@ -2120,12 +2169,27 @@ static int __i40e_set_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_set_coalesce - set coalesce settings for every queue on the netdev + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * + * This will set each queue to the same coalesce settings. + **/ static int i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_set_coalesce(netdev, ec, -1); } +/** + * i40e_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to change + * + * Sets the specified queue's coalesce settings. + **/ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5237c49..6287bf6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1877,6 +1877,15 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->rx_rings[idx]->rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->tx_rings[idx]->tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1892,6 +1901,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, u32 rxval, txval; int vector; int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1900,18 +1910,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d4c6a76..75f2a2c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1311,6 +1311,19 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->rx_rings[idx].rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->tx_rings[idx].tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1325,6 +1338,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1333,18 +1348,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 84e561c..abcdeca 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -287,6 +287,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessors routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_buf_len; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index dc00aaf..c5fd724 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -59,13 +59,6 @@ struct i40e_vsi { unsigned long state; int base_vector; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 qs_handle; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e17a154..a994015 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -296,93 +296,207 @@ static int i40evf_set_ringparam(struct net_device *netdev, } /** - * i40evf_get_coalesce - Get interrupt coalescing settings - * @netdev: network interface device structure - * @ec: ethtool coalesce structure + * __i40evf_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick * - * Returns current coalescing settings. This is referred to elsewhere in the - * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. **/ -static int i40evf_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int __i40evf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_ring *rx_ring, *tx_ring; ec->tx_max_coalesced_frames = vsi->work_limit; ec->rx_max_coalesced_frames = vsi->work_limit; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) + /* Rx and Tx usecs per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. + */ + if (queue < 0) + queue = 0; + else if (queue >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[queue]; + tx_ring = &adapter->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; return 0; } /** - * i40evf_set_coalesce - Set interrupt coalescing settings + * i40evf_get_coalesce - Get interrupt coalescing settings * @netdev: network interface device structure * @ec: ethtool coalesce structure * - * Change current coalescing settings. + * Returns current coalescing settings. This is referred to elsewhere in the + * driver as Interrupt Throttle Rate, as this is how the hardware describes + * this functionality. Note that if per-queue settings have been modified this + * only represents the settings of queue 0. **/ -static int i40evf_set_coalesce(struct net_device *netdev, +static int i40evf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; + return __i40evf_get_coalesce(netdev, ec, -1); +} + +/** + * i40evf_get_per_queue_coalesce - get coalesce values for specific queue + * @netdev: netdev to read + * @ec: coalesce settings from ethtool + * @queue: the queue to read + * + * Read specific queue's coalesce settings. + **/ +static int i40evf_get_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, queue); +} + +/** + * i40evf_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ +static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, + struct ethtool_coalesce *ec, + int queue) +{ struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; struct i40e_q_vector *q_vector; + u16 vector; + + adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs; + adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = adapter->rx_rings[queue].q_vector; + q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = adapter->tx_rings[queue].q_vector; + q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + i40e_flush(hw); +} + +/** + * __i40evf_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ +static int __i40evf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - - else + if (ec->rx_coalesce_usecs == 0) { + if (ec->use_adaptive_rx_coalesce) + netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; + } - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - else if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); else + if (ec->tx_coalesce_usecs == 0) { + if (ec->use_adaptive_tx_coalesce) + netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; + } - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = &adapter->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr); - i40e_flush(hw); + /* Rx and Tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < adapter->num_active_queues; i++) + i40evf_set_itr_per_queue(adapter, ec, i); + } else if (queue < adapter->num_active_queues) { + i40evf_set_itr_per_queue(adapter, ec, queue); + } else { + netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + adapter->num_active_queues - 1); + return -EINVAL; } return 0; } /** + * i40evf_set_coalesce - Set interrupt coalescing settings + * @netdev: network interface device structure + * @ec: ethtool coalesce structure + * + * Change current coalescing settings for every queue. + **/ +static int i40evf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __i40evf_set_coalesce(netdev, ec, -1); +} + +/** + * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to modify + * + * Modifies a specific queue's coalesce settings. + */ +static int i40evf_set_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_set_coalesce(netdev, ec, queue); +} + +/** * i40evf_get_rxnfc - command to get RX flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command @@ -533,6 +647,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, .set_coalesce = i40evf_set_coalesce, + .get_per_queue_coalesce = i40evf_get_per_queue_coalesce, + .set_per_queue_coalesce = i40evf_set_per_queue_coalesce, .get_rxnfc = i40evf_get_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, .get_rxfh = i40evf_get_rxfh, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 302c974..1437281 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -370,7 +370,6 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; @@ -379,7 +378,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); @@ -396,7 +395,6 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; - struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; @@ -405,7 +403,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); @@ -1162,6 +1160,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; + tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1170,6 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; + rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); } return 0; @@ -2277,8 +2277,6 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { -- cgit v1.1