summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/driver-model/devres.txt5
-rw-r--r--Documentation/networking/filter.txt158
-rw-r--r--arch/arm/boot/dts/am33xx.dtsi2
-rw-r--r--arch/arm/boot/dts/am4372.dtsi2
-rw-r--r--drivers/clk/ti/clk-43xx.c16
-rw-r--r--drivers/net/ethernet/ti/cpsw.c56
-rw-r--r--drivers/net/ethernet/ti/cpts.c11
-rw-r--r--drivers/net/ethernet/ti/davinci_mdio.c48
-rw-r--r--drivers/net/ethernet/via/via-rhine.c77
-rw-r--r--drivers/net/phy/mdio_bus.c67
-rw-r--r--include/linux/filter.h47
-rw-r--r--include/linux/phy.h7
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/tcp.h24
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/filter.c538
-rw-r--r--net/ipv4/tcp_bic.c5
-rw-r--r--net/ipv4/tcp_cong.c24
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_highspeed.c4
-rw-r--r--net/ipv4/tcp_htcp.c4
-rw-r--r--net/ipv4/tcp_hybla.c7
-rw-r--r--net/ipv4/tcp_illinois.c5
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv4/tcp_lp.c5
-rw-r--r--net/ipv4/tcp_output.c21
-rw-r--r--net/ipv4/tcp_scalable.c5
-rw-r--r--net/ipv4/tcp_vegas.c7
-rw-r--r--net/ipv4/tcp_veno.c9
-rw-r--r--net/ipv4/tcp_yeah.c5
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/sch_api.c8
32 files changed, 737 insertions, 456 deletions
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 4f7897e..c74e044 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -308,3 +308,8 @@ SLAVE DMA ENGINE
SPI
devm_spi_register_master()
+
+MDIO
+ devm_mdiobus_alloc()
+ devm_mdiobus_alloc_size()
+ devm_mdiobus_free()
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 82e1cb0..748fd38 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -613,7 +613,7 @@ Some core changes of the new internal format:
Therefore, BPF calling convention is defined as:
- * R0 - return value from in-kernel function
+ * R0 - return value from in-kernel function, and exit value for BPF program
* R1 - R5 - arguments from BPF program to in-kernel function
* R6 - R9 - callee saved registers that in-kernel function will preserve
* R10 - read-only frame pointer to access stack
@@ -659,9 +659,140 @@ Some core changes of the new internal format:
- Introduces bpf_call insn and register passing convention for zero overhead
calls from/to other kernel functions:
- After a kernel function call, R1 - R5 are reset to unreadable and R0 has a
- return type of the function. Since R6 - R9 are callee saved, their state is
- preserved across the call.
+ Before an in-kernel function call, the internal BPF program needs to
+ place function arguments into R1 to R5 registers to satisfy calling
+ convention, then the interpreter will take them from registers and pass
+ to in-kernel function. If R1 - R5 registers are mapped to CPU registers
+ that are used for argument passing on given architecture, the JIT compiler
+ doesn't need to emit extra moves. Function arguments will be in the correct
+ registers and BPF_CALL instruction will be JITed as single 'call' HW
+ instruction. This calling convention was picked to cover common call
+ situations without performance penalty.
+
+ After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
+ a return value of the function. Since R6 - R9 are callee saved, their state
+ is preserved across the call.
+
+ For example, consider three C functions:
+
+ u64 f1() { return (*_f2)(1); }
+ u64 f2(u64 a) { return f3(a + 1, a); }
+ u64 f3(u64 a, u64 b) { return a - b; }
+
+ GCC can compile f1, f3 into x86_64:
+
+ f1:
+ movl $1, %edi
+ movq _f2(%rip), %rax
+ jmp *%rax
+ f3:
+ movq %rdi, %rax
+ subq %rsi, %rax
+ ret
+
+ Function f2 in BPF may look like:
+
+ f2:
+ bpf_mov R2, R1
+ bpf_add R1, 1
+ bpf_call f3
+ bpf_exit
+
+ If f2 is JITed and the pointer stored to '_f2'. The calls f1 -> f2 -> f3 and
+ returns will be seamless. Without JIT, __sk_run_filter() interpreter needs to
+ be used to call into f2.
+
+ For practical reasons all BPF programs have only one argument 'ctx' which is
+ already placed into R1 (e.g. on __sk_run_filter() startup) and the programs
+ can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
+ are currently not supported, but these restrictions can be lifted if necessary
+ in the future.
+
+ On 64-bit architectures all register map to HW registers one to one. For
+ example, x86_64 JIT compiler can map them as ...
+
+ R0 - rax
+ R1 - rdi
+ R2 - rsi
+ R3 - rdx
+ R4 - rcx
+ R5 - r8
+ R6 - rbx
+ R7 - r13
+ R8 - r14
+ R9 - r15
+ R10 - rbp
+
+ ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
+ and rbx, r12 - r15 are callee saved.
+
+ Then the following internal BPF pseudo-program:
+
+ bpf_mov R6, R1 /* save ctx */
+ bpf_mov R2, 2
+ bpf_mov R3, 3
+ bpf_mov R4, 4
+ bpf_mov R5, 5
+ bpf_call foo
+ bpf_mov R7, R0 /* save foo() return value */
+ bpf_mov R1, R6 /* restore ctx for next call */
+ bpf_mov R2, 6
+ bpf_mov R3, 7
+ bpf_mov R4, 8
+ bpf_mov R5, 9
+ bpf_call bar
+ bpf_add R0, R7
+ bpf_exit
+
+ After JIT to x86_64 may look like:
+
+ push %rbp
+ mov %rsp,%rbp
+ sub $0x228,%rsp
+ mov %rbx,-0x228(%rbp)
+ mov %r13,-0x220(%rbp)
+ mov %rdi,%rbx
+ mov $0x2,%esi
+ mov $0x3,%edx
+ mov $0x4,%ecx
+ mov $0x5,%r8d
+ callq foo
+ mov %rax,%r13
+ mov %rbx,%rdi
+ mov $0x2,%esi
+ mov $0x3,%edx
+ mov $0x4,%ecx
+ mov $0x5,%r8d
+ callq bar
+ add %r13,%rax
+ mov -0x228(%rbp),%rbx
+ mov -0x220(%rbp),%r13
+ leaveq
+ retq
+
+ Which is in this example equivalent in C to:
+
+ u64 bpf_filter(u64 ctx)
+ {
+ return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
+ }
+
+ In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
+ arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
+ registers and place their return value into '%rax' which is R0 in BPF.
+ Prologue and epilogue are emitted by JIT and are implicit in the
+ interpreter. R0-R5 are scratch registers, so BPF program needs to preserve
+ them across the calls as defined by calling convention.
+
+ For example the following program is invalid:
+
+ bpf_mov R1, 1
+ bpf_call foo
+ bpf_mov R0, R1
+ bpf_exit
+
+ After the call the registers R1-R5 contain junk values and cannot be read.
+ In the future a BPF verifier can be used to validate internal BPF programs.
Also in the new design, BPF is limited to 4096 insns, which means that any
program will terminate quickly and will only call a fixed number of kernel
@@ -676,6 +807,25 @@ A program, that is translated internally consists of the following elements:
op:16, jt:8, jf:8, k:32 ==> op:8, a_reg:4, x_reg:4, off:16, imm:32
+So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
+has room for new instructions. Some of them may use 16/24/32 byte encoding. New
+instructions must be multiple of 8 bytes to preserve backward compatibility.
+
+Internal BPF is a general purpose RISC instruction set. Not every register and
+every instruction are used during translation from original BPF to new format.
+For example, socket filters are not using 'exclusive add' instruction, but
+tracing filters may do to maintain counters of events, for example. Register R9
+is not used by socket filters either, but more complex filters may be running
+out of registers and would have to resort to spill/fill to stack.
+
+Internal BPF can used as generic assembler for last step performance
+optimizations, socket filters and seccomp are using it as assembler. Tracing
+filters may use it as assembler to generate code from kernel. In kernel usage
+may not be bounded by security considerations, since generated internal BPF code
+may be optimizing internal code path and not being exposed to the user space.
+Safety of internal BPF can come from a verifier (TBD). In such use cases as
+described, it may be used as safe instruction set.
+
Just like the original BPF, the new format runs within a controlled environment,
is deterministic and the kernel can easily prove that. The safety of the program
can be determined in two steps: first step does depth-first-search to disallow
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 9770e35..d1e2b36 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -665,6 +665,8 @@
mac: ethernet@4a100000 {
compatible = "ti,cpsw";
ti,hwmods = "cpgmac0";
+ clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>;
+ clock-names = "fck", "cpts";
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 36d523a..c2779f6 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -489,6 +489,8 @@
#address-cells = <1>;
#size-cells = <1>;
ti,hwmods = "cpgmac0";
+ clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>;
+ clock-names = "fck", "cpts";
status = "disabled";
cpdma_channels = <8>;
ale_entries = <1024>;
diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index 67c8de5..b4877e0 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c
@@ -110,9 +110,25 @@ static struct ti_dt_clk am43xx_clks[] = {
int __init am43xx_dt_clk_init(void)
{
+ struct clk *clk1, *clk2;
+
ti_dt_clocks_register(am43xx_clks);
omap2_clk_disable_autoidle_all();
+ /*
+ * cpsw_cpts_rft_clk has got the choice of 3 clocksources
+ * dpll_core_m4_ck, dpll_core_m5_ck and dpll_disp_m2_ck.
+ * By default dpll_core_m4_ck is selected, witn this as clock
+ * source the CPTS doesnot work properly. It gives clockcheck errors
+ * while running PTP.
+ * clockcheck: clock jumped backward or running slower than expected!
+ * By selecting dpll_core_m5_ck as the clocksource fixes this issue.
+ * In AM335x dpll_core_m5_ck is the default clocksource.
+ */
+ clk1 = clk_get_sys(NULL, "cpsw_cpts_rft_clk");
+ clk2 = clk_get_sys(NULL, "dpll_core_m5_ck");
+ clk_set_parent(clk1, clk2);
+
return 0;
}
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 36aa109..d14c8da 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -248,20 +248,31 @@ struct cpsw_ss_regs {
#define TS_131 (1<<11) /* Time Sync Dest IP Addr 131 enable */
#define TS_130 (1<<10) /* Time Sync Dest IP Addr 130 enable */
#define TS_129 (1<<9) /* Time Sync Dest IP Addr 129 enable */
-#define TS_BIT8 (1<<8) /* ts_ttl_nonzero? */
+#define TS_TTL_NONZERO (1<<8) /* Time Sync Time To Live Non-zero enable */
+#define TS_ANNEX_F_EN (1<<6) /* Time Sync Annex F enable */
#define TS_ANNEX_D_EN (1<<4) /* Time Sync Annex D enable */
#define TS_LTYPE2_EN (1<<3) /* Time Sync LTYPE 2 enable */
#define TS_LTYPE1_EN (1<<2) /* Time Sync LTYPE 1 enable */
#define TS_TX_EN (1<<1) /* Time Sync Transmit Enable */
#define TS_RX_EN (1<<0) /* Time Sync Receive Enable */
-#define CTRL_TS_BITS \
- (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 | TS_BIT8 | \
- TS_ANNEX_D_EN | TS_LTYPE1_EN)
+#define CTRL_V2_TS_BITS \
+ (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+ TS_TTL_NONZERO | TS_ANNEX_D_EN | TS_LTYPE1_EN)
-#define CTRL_ALL_TS_MASK (CTRL_TS_BITS | TS_TX_EN | TS_RX_EN)
-#define CTRL_TX_TS_BITS (CTRL_TS_BITS | TS_TX_EN)
-#define CTRL_RX_TS_BITS (CTRL_TS_BITS | TS_RX_EN)
+#define CTRL_V2_ALL_TS_MASK (CTRL_V2_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V2_TX_TS_BITS (CTRL_V2_TS_BITS | TS_TX_EN)
+#define CTRL_V2_RX_TS_BITS (CTRL_V2_TS_BITS | TS_RX_EN)
+
+
+#define CTRL_V3_TS_BITS \
+ (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+ TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
+ TS_LTYPE1_EN)
+
+#define CTRL_V3_ALL_TS_MASK (CTRL_V3_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V3_TX_TS_BITS (CTRL_V3_TS_BITS | TS_TX_EN)
+#define CTRL_V3_RX_TS_BITS (CTRL_V3_TS_BITS | TS_RX_EN)
/* Bit definitions for the CPSW2_TS_SEQ_MTYPE register */
#define TS_SEQ_ID_OFFSET_SHIFT (16) /* Time Sync Sequence ID Offset */
@@ -1376,13 +1387,27 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
slave = &priv->slaves[priv->data.active_slave];
ctrl = slave_read(slave, CPSW2_CONTROL);
- ctrl &= ~CTRL_ALL_TS_MASK;
+ switch (priv->version) {
+ case CPSW_VERSION_2:
+ ctrl &= ~CTRL_V2_ALL_TS_MASK;
- if (priv->cpts->tx_enable)
- ctrl |= CTRL_TX_TS_BITS;
+ if (priv->cpts->tx_enable)
+ ctrl |= CTRL_V2_TX_TS_BITS;
- if (priv->cpts->rx_enable)
- ctrl |= CTRL_RX_TS_BITS;
+ if (priv->cpts->rx_enable)
+ ctrl |= CTRL_V2_RX_TS_BITS;
+ break;
+ case CPSW_VERSION_3:
+ default:
+ ctrl &= ~CTRL_V3_ALL_TS_MASK;
+
+ if (priv->cpts->tx_enable)
+ ctrl |= CTRL_V3_TX_TS_BITS;
+
+ if (priv->cpts->rx_enable)
+ ctrl |= CTRL_V3_RX_TS_BITS;
+ break;
+ }
mtype = (30 << TS_SEQ_ID_OFFSET_SHIFT) | EVENT_MSG_BITS;
@@ -1398,7 +1423,8 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
struct hwtstamp_config cfg;
if (priv->version != CPSW_VERSION_1 &&
- priv->version != CPSW_VERSION_2)
+ priv->version != CPSW_VERSION_2 &&
+ priv->version != CPSW_VERSION_3)
return -EOPNOTSUPP;
if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -1443,6 +1469,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
cpsw_hwtstamp_v1(priv);
break;
case CPSW_VERSION_2:
+ case CPSW_VERSION_3:
cpsw_hwtstamp_v2(priv);
break;
default:
@@ -1459,7 +1486,8 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
struct hwtstamp_config cfg;
if (priv->version != CPSW_VERSION_1 &&
- priv->version != CPSW_VERSION_2)
+ priv->version != CPSW_VERSION_2 &&
+ priv->version != CPSW_VERSION_3)
return -EOPNOTSUPP;
cfg.flags = 0;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 2435139..6b56f85 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -236,13 +236,11 @@ static void cpts_overflow_check(struct work_struct *work)
schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
}
-#define CPTS_REF_CLOCK_NAME "cpsw_cpts_rft_clk"
-
-static void cpts_clk_init(struct cpts *cpts)
+static void cpts_clk_init(struct device *dev, struct cpts *cpts)
{
- cpts->refclk = clk_get(NULL, CPTS_REF_CLOCK_NAME);
+ cpts->refclk = devm_clk_get(dev, "cpts");
if (IS_ERR(cpts->refclk)) {
- pr_err("Failed to clk_get %s\n", CPTS_REF_CLOCK_NAME);
+ dev_err(dev, "Failed to get cpts refclk\n");
cpts->refclk = NULL;
return;
}
@@ -252,7 +250,6 @@ static void cpts_clk_init(struct cpts *cpts)
static void cpts_clk_release(struct cpts *cpts)
{
clk_disable(cpts->refclk);
- clk_put(cpts->refclk);
}
static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -390,7 +387,7 @@ int cpts_register(struct device *dev, struct cpts *cpts,
for (i = 0; i < CPTS_MAX_EVENTS; i++)
list_add(&cpts->pool_data[i].list, &cpts->pool);
- cpts_clk_init(cpts);
+ cpts_clk_init(dev, cpts);
cpts_write32(cpts, CPTS_EN, control);
cpts_write32(cpts, TS_PEND_EN, int_enable);
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 0cca9de..34e97ec 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -321,15 +321,14 @@ static int davinci_mdio_probe(struct platform_device *pdev)
struct phy_device *phy;
int ret, addr;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- data->bus = mdiobus_alloc();
+ data->bus = devm_mdiobus_alloc(dev);
if (!data->bus) {
dev_err(dev, "failed to alloc mii bus\n");
- ret = -ENOMEM;
- goto bail_out;
+ return -ENOMEM;
}
if (dev->of_node) {
@@ -349,12 +348,9 @@ static int davinci_mdio_probe(struct platform_device *pdev)
data->bus->parent = dev;
data->bus->priv = data;
- /* Select default pin state */
- pinctrl_pm_select_default_state(&pdev->dev);
-
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
- data->clk = clk_get(&pdev->dev, "fck");
+ data->clk = devm_clk_get(dev, "fck");
if (IS_ERR(data->clk)) {
dev_err(dev, "failed to get device clock\n");
ret = PTR_ERR(data->clk);
@@ -367,24 +363,9 @@ static int davinci_mdio_probe(struct platform_device *pdev)
spin_lock_init(&data->lock);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(dev, "could not find register map resource\n");
- ret = -ENOENT;
- goto bail_out;
- }
-
- res = devm_request_mem_region(dev, res->start, resource_size(res),
- dev_name(dev));
- if (!res) {
- dev_err(dev, "could not allocate register map resource\n");
- ret = -ENXIO;
- goto bail_out;
- }
-
- data->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
- if (!data->regs) {
- dev_err(dev, "could not map mdio registers\n");
- ret = -ENOMEM;
+ data->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(data->regs)) {
+ ret = PTR_ERR(data->regs);
goto bail_out;
}
@@ -406,16 +387,9 @@ static int davinci_mdio_probe(struct platform_device *pdev)
return 0;
bail_out:
- if (data->bus)
- mdiobus_free(data->bus);
-
- if (data->clk)
- clk_put(data->clk);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- kfree(data);
-
return ret;
}
@@ -423,18 +397,12 @@ static int davinci_mdio_remove(struct platform_device *pdev)
{
struct davinci_mdio_data *data = platform_get_drvdata(pdev);
- if (data->bus) {
+ if (data->bus)
mdiobus_unregister(data->bus);
- mdiobus_free(data->bus);
- }
- if (data->clk)
- clk_put(data->clk);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- kfree(data);
-
return 0;
}
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 4fa9201..13cfcce 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -264,6 +264,8 @@ enum rhine_quirks {
rq6patterns = 0x0040, /* 6 instead of 4 patterns for WOL */
rqStatusWBRace = 0x0080, /* Tx Status Writeback Error possible */
rqRhineI = 0x0100, /* See comment below */
+ rqIntPHY = 0x0200, /* Integrated PHY */
+ rqMgmt = 0x0400, /* Management adapter */
};
/*
* rqRhineI: VT86C100A (aka Rhine-I) uses different bits to enable
@@ -284,11 +286,11 @@ static DEFINE_PCI_DEVICE_TABLE(rhine_pci_tbl) = {
MODULE_DEVICE_TABLE(pci, rhine_pci_tbl);
/* OpenFirmware identifiers for platform-bus devices
- * The .data field is currently only used to store chip revision
- * (for quirks etc.)
+ * The .data field is currently only used to store quirks
*/
+static u32 vt8500_quirks = rqWOL | rqForceReset | rq6patterns;
static struct of_device_id rhine_of_tbl[] = {
- { .compatible = "via,vt8500-rhine", .data = (void *)0x84 },
+ { .compatible = "via,vt8500-rhine", .data = &vt8500_quirks },
{ } /* terminate list */
};
MODULE_DEVICE_TABLE(of, rhine_of_tbl);
@@ -459,7 +461,6 @@ struct rhine_private {
unsigned char *tx_bufs;
dma_addr_t tx_bufs_dma;
- int revision;
int irq;
long pioaddr;
struct net_device *dev;
@@ -882,7 +883,7 @@ static const struct net_device_ops rhine_netdev_ops = {
#endif
};
-static int rhine_init_one_common(struct device *hwdev, int revision,
+static int rhine_init_one_common(struct device *hwdev, u32 quirks,
long pioaddr, void __iomem *ioaddr, int irq)
{
struct net_device *dev;
@@ -906,31 +907,13 @@ static int rhine_init_one_common(struct device *hwdev, int revision,
rp = netdev_priv(dev);
rp->dev = dev;
- rp->revision = revision;
+ rp->quirks = quirks;
rp->pioaddr = pioaddr;
rp->base = ioaddr;
rp->irq = irq;
rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT);
- phy_id = 0;
- name = "Rhine";
- if (revision < VTunknown0) {
- rp->quirks = rqRhineI;
- } else if (revision >= VT6102) {
- rp->quirks = rqWOL | rqForceReset;
- if (revision < VT6105) {
- name = "Rhine II";
- rp->quirks |= rqStatusWBRace; /* Rhine-II exclusive */
- } else {
- phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */
- if (revision >= VT6105_B0)
- rp->quirks |= rq6patterns;
- if (revision < VT6105M)
- name = "Rhine III";
- else
- name = "Rhine III (Management Adapter)";
- }
- }
+ phy_id = rp->quirks & rqIntPHY ? 1 : 0;
u64_stats_init(&rp->tx_stats.syncp);
u64_stats_init(&rp->rx_stats.syncp);
@@ -975,7 +958,7 @@ static int rhine_init_one_common(struct device *hwdev, int revision,
if (rp->quirks & rqRhineI)
dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
- if (rp->revision >= VT6105M)
+ if (rp->quirks & rqMgmt)
dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
@@ -985,6 +968,15 @@ static int rhine_init_one_common(struct device *hwdev, int revision,
if (rc)
goto err_out_free_netdev;
+ if (rp->quirks & rqRhineI)
+ name = "Rhine";
+ else if (rp->quirks & rqStatusWBRace)
+ name = "Rhine II";
+ else if (rp->quirks & rqMgmt)
+ name = "Rhine III (Management Adapter)";
+ else
+ name = "Rhine III";
+
netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n",
name, (long)ioaddr, dev->dev_addr, rp->irq);
@@ -1031,7 +1023,7 @@ static int rhine_init_one_pci(struct pci_dev *pdev,
long pioaddr, memaddr;
void __iomem *ioaddr;
int io_size = pdev->revision < VTunknown0 ? 128 : 256;
- u32 quirks = pdev->revision < VTunknown0 ? rqRhineI : 0;
+ u32 quirks;
#ifdef USE_MMIO
int bar = 1;
#else
@@ -1047,6 +1039,21 @@ static int rhine_init_one_pci(struct pci_dev *pdev,
if (rc)
goto err_out;
+ if (pdev->revision < VTunknown0) {
+ quirks = rqRhineI;
+ } else if (pdev->revision >= VT6102) {
+ quirks = rqWOL | rqForceReset;
+ if (pdev->revision < VT6105) {
+ quirks |= rqStatusWBRace;
+ } else {
+ quirks |= rqIntPHY;
+ if (pdev->revision >= VT6105_B0)
+ quirks |= rq6patterns;
+ if (pdev->revision >= VT6105M)
+ quirks |= rqMgmt;
+ }
+ }
+
/* sanity check */
if ((pci_resource_len(pdev, 0) < io_size) ||
(pci_resource_len(pdev, 1) < io_size)) {
@@ -1093,7 +1100,7 @@ static int rhine_init_one_pci(struct pci_dev *pdev,
}
#endif /* USE_MMIO */
- rc = rhine_init_one_common(&pdev->dev, pdev->revision,
+ rc = rhine_init_one_common(&pdev->dev, quirks,
pioaddr, ioaddr, pdev->irq);
if (!rc)
return 0;
@@ -1111,7 +1118,7 @@ err_out:
static int rhine_init_one_platform(struct platform_device *pdev)
{
const struct of_device_id *match;
- u32 revision;
+ const u32 *quirks;
int irq;
struct resource *res;
void __iomem *ioaddr;
@@ -1129,11 +1136,11 @@ static int rhine_init_one_platform(struct platform_device *pdev)
if (!irq)
return -EINVAL;
- revision = (u32)match->data;
- if (!revision)
+ quirks = match->data;
+ if (!quirks)
return -EINVAL;
- return rhine_init_one_common(&pdev->dev, revision,
+ return rhine_init_one_common(&pdev->dev, *quirks,
(long)ioaddr, ioaddr, irq);
}
@@ -1523,7 +1530,7 @@ static void init_registers(struct net_device *dev)
rhine_set_rx_mode(dev);
- if (rp->revision >= VT6105M)
+ if (rp->quirks & rqMgmt)
rhine_init_cam_filter(dev);
napi_enable(&rp->napi);
@@ -2160,7 +2167,7 @@ static void rhine_set_rx_mode(struct net_device *dev)
/* Too many to match, or accept all multicasts. */
iowrite32(0xffffffff, ioaddr + MulticastFilter0);
iowrite32(0xffffffff, ioaddr + MulticastFilter1);
- } else if (rp->revision >= VT6105M) {
+ } else if (rp->quirks & rqMgmt) {
int i = 0;
u32 mCAMmask = 0; /* 32 mCAMs (6105M and better) */
netdev_for_each_mc_addr(ha, dev) {
@@ -2182,7 +2189,7 @@ static void rhine_set_rx_mode(struct net_device *dev)
iowrite32(mc_filter[1], ioaddr + MulticastFilter1);
}
/* enable/disable VLAN receive filtering */
- if (rp->revision >= VT6105M) {
+ if (rp->quirks & rqMgmt) {
if (dev->flags & IFF_PROMISC)
BYTE_REG_BITS_OFF(BCR1_VIDFR, ioaddr + PCIBusConfig1);
else
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 76f54b3..68a9a38 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size)
}
EXPORT_SYMBOL(mdiobus_alloc_size);
+static void _devm_mdiobus_free(struct device *dev, void *res)
+{
+ mdiobus_free(*(struct mii_bus **)res);
+}
+
+static int devm_mdiobus_match(struct device *dev, void *res, void *data)
+{
+ struct mii_bus **r = res;
+
+ if (WARN_ON(!r || !*r))
+ return 0;
+
+ return *r == data;
+}
+
+/**
+ * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size()
+ * @dev: Device to allocate mii_bus for
+ * @sizeof_priv: Space to allocate for private structure.
+ *
+ * Managed mdiobus_alloc_size. mii_bus allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * If an mii_bus allocated with this function needs to be freed separately,
+ * devm_mdiobus_free() must be used.
+ *
+ * RETURNS:
+ * Pointer to allocated mii_bus on success, NULL on failure.
+ */
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
+{
+ struct mii_bus **ptr, *bus;
+
+ ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ /* use raw alloc_dr for kmalloc caller tracing */
+ bus = mdiobus_alloc_size(sizeof_priv);
+ if (bus) {
+ *ptr = bus;
+ devres_add(dev, ptr);
+ } else {
+ devres_free(ptr);
+ }
+
+ return bus;
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_alloc);
+
+/**
+ * devm_mdiobus_free - Resource-managed mdiobus_free()
+ * @dev: Device this mii_bus belongs to
+ * @bus: the mii_bus associated with the device
+ *
+ * Free mii_bus allocated with devm_mdiobus_alloc_size().
+ */
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus)
+{
+ int rc;
+
+ rc = devres_release(dev, _devm_mdiobus_free,
+ devm_mdiobus_match, bus);
+ WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_free);
+
/**
* mdiobus_release - mii_bus device release callback
* @d: the target struct device that contains the mii_bus
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 759abf7..ed1efab 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -37,16 +37,50 @@
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
+/* Placeholder/dummy for 0 */
+#define BPF_0 0
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
/* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG 11
+#define MAX_BPF_REG __MAX_BPF_REG
+
+/* ArgX, context and stack frame pointer register positions. Note,
+ * Arg1, Arg2, Arg3, etc are used as argument mappings of function
+ * calls in BPF_CALL instruction.
+ */
+#define BPF_REG_ARG1 BPF_REG_1
+#define BPF_REG_ARG2 BPF_REG_2
+#define BPF_REG_ARG3 BPF_REG_3
+#define BPF_REG_ARG4 BPF_REG_4
+#define BPF_REG_ARG5 BPF_REG_5
+#define BPF_REG_CTX BPF_REG_6
+#define BPF_REG_FP BPF_REG_10
+
+/* Additional register mappings for converted user programs. */
+#define BPF_REG_A BPF_REG_0
+#define BPF_REG_X BPF_REG_7
+#define BPF_REG_TMP BPF_REG_8
/* BPF program can access up to 512 bytes of stack space. */
#define MAX_BPF_STACK 512
-/* Arg1, context and stack frame pointer register positions. */
-#define ARG1_REG 1
-#define CTX_REG 6
-#define FP_REG 10
+/* Macro to invoke filter function. */
+#define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
struct sock_filter_int {
__u8 code; /* opcode */
@@ -97,9 +131,6 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
#define sk_filter_proglen(fprog) \
(fprog->len * sizeof(fprog->filter[0]))
-#define SK_RUN_FILTER(filter, ctx) \
- (*filter->bpf_func)(ctx, filter->insnsi)
-
int sk_filter(struct sock *sk, struct sk_buff *skb);
u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 51d15f6..864ddaf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void)
int mdiobus_register(struct mii_bus *bus);
void mdiobus_unregister(struct mii_bus *bus);
void mdiobus_free(struct mii_bus *bus);
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
+static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
+{
+ return devm_mdiobus_alloc_size(dev, 0);
+}
+
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2399468..4e37c71 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,7 @@ struct tcp_sock {
u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
u32 snd_cwnd_used;
u32 snd_cwnd_stamp;
+ u32 lsnd_pending; /* packets inflight or unsent since last xmit */
u32 prior_cwnd; /* Congestion window at start of Recovery. */
u32 prr_delivered; /* Number of newly delivered packets to
* receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 163d2b4..3c94184 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -796,7 +796,7 @@ struct tcp_congestion_ops {
/* return slow start threshold (required) */
u32 (*ssthresh)(struct sock *sk);
/* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+ void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
@@ -828,7 +828,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
extern struct tcp_congestion_ops tcp_init_congestion_ops;
u32 tcp_reno_ssthresh(struct sock *sk);
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;
static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
@@ -974,7 +974,25 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
{
return tp->snd_una + tp->snd_wnd;
}
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
+
+/* We follow the spirit of RFC2861 to validate cwnd but implement a more
+ * flexible approach. The RFC suggests cwnd should not be raised unless
+ * it was fully used previously. But we allow cwnd to grow as long as the
+ * application has used half the cwnd.
+ * Example :
+ * cwnd is 10 (IW10), but application sends 9 frames.
+ * We allow cwnd to reach 18 when all frames are ACKed.
+ * This check is safe because it's as aggressive as slow start which already
+ * risks 100% overshoot. The advantage is that we discourage application to
+ * either send more filler packets or data to artificially blow up the cwnd
+ * usage, and allow application-limited process to probe bw more aggressively.
+ */
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return tp->snd_cwnd < 2 * tp->lsnd_pending;
+}
static inline void tcp_check_probe_timer(struct sock *sk)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 11d70e3..fe0b9cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5606,10 +5606,6 @@ static void rollback_registered_many(struct list_head *head)
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
-
/*
* Flush the unicast and multicast chains
*/
@@ -5619,6 +5615,10 @@ static void rollback_registered_many(struct list_head *head)
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
diff --git a/net/core/filter.c b/net/core/filter.c
index 7c4db3d..eb020a7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,27 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
+/* Registers */
+#define R0 regs[BPF_REG_0]
+#define R1 regs[BPF_REG_1]
+#define R2 regs[BPF_REG_2]
+#define R3 regs[BPF_REG_3]
+#define R4 regs[BPF_REG_4]
+#define R5 regs[BPF_REG_5]
+#define R6 regs[BPF_REG_6]
+#define R7 regs[BPF_REG_7]
+#define R8 regs[BPF_REG_8]
+#define R9 regs[BPF_REG_9]
+#define R10 regs[BPF_REG_10]
+
+/* Named registers */
+#define A regs[insn->a_reg]
+#define X regs[insn->x_reg]
+#define FP regs[BPF_REG_FP]
+#define ARG1 regs[BPF_REG_ARG1]
+#define CTX regs[BPF_REG_CTX]
+#define K insn->imm
+
/* No hurry in this branch
*
* Exported for the bpf jit load helper.
@@ -57,9 +78,9 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
+
return NULL;
}
@@ -68,6 +89,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
+
return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
@@ -122,13 +144,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return 0;
}
-/* Register mappings for user programs. */
-#define A_REG 0
-#define X_REG 7
-#define TMP_REG 8
-#define ARG2_REG 2
-#define ARG3_REG 3
-
/**
* __sk_run_filter - run a filter on a given context
* @ctx: buffer to run the filter on
@@ -142,208 +157,204 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
- void *ptr;
- int off;
-
-#define K insn->imm
-#define A regs[insn->a_reg]
-#define X regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
-#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
- DL(BPF_ALU, BPF_ADD, BPF_X),
- DL(BPF_ALU, BPF_ADD, BPF_K),
- DL(BPF_ALU, BPF_SUB, BPF_X),
- DL(BPF_ALU, BPF_SUB, BPF_K),
- DL(BPF_ALU, BPF_AND, BPF_X),
- DL(BPF_ALU, BPF_AND, BPF_K),
- DL(BPF_ALU, BPF_OR, BPF_X),
- DL(BPF_ALU, BPF_OR, BPF_K),
- DL(BPF_ALU, BPF_LSH, BPF_X),
- DL(BPF_ALU, BPF_LSH, BPF_K),
- DL(BPF_ALU, BPF_RSH, BPF_X),
- DL(BPF_ALU, BPF_RSH, BPF_K),
- DL(BPF_ALU, BPF_XOR, BPF_X),
- DL(BPF_ALU, BPF_XOR, BPF_K),
- DL(BPF_ALU, BPF_MUL, BPF_X),
- DL(BPF_ALU, BPF_MUL, BPF_K),
- DL(BPF_ALU, BPF_MOV, BPF_X),
- DL(BPF_ALU, BPF_MOV, BPF_K),
- DL(BPF_ALU, BPF_DIV, BPF_X),
- DL(BPF_ALU, BPF_DIV, BPF_K),
- DL(BPF_ALU, BPF_MOD, BPF_X),
- DL(BPF_ALU, BPF_MOD, BPF_K),
- DL(BPF_ALU, BPF_NEG, 0),
- DL(BPF_ALU, BPF_END, BPF_TO_BE),
- DL(BPF_ALU, BPF_END, BPF_TO_LE),
- DL(BPF_ALU64, BPF_ADD, BPF_X),
- DL(BPF_ALU64, BPF_ADD, BPF_K),
- DL(BPF_ALU64, BPF_SUB, BPF_X),
- DL(BPF_ALU64, BPF_SUB, BPF_K),
- DL(BPF_ALU64, BPF_AND, BPF_X),
- DL(BPF_ALU64, BPF_AND, BPF_K),
- DL(BPF_ALU64, BPF_OR, BPF_X),
- DL(BPF_ALU64, BPF_OR, BPF_K),
- DL(BPF_ALU64, BPF_LSH, BPF_X),
- DL(BPF_ALU64, BPF_LSH, BPF_K),
- DL(BPF_ALU64, BPF_RSH, BPF_X),
- DL(BPF_ALU64, BPF_RSH, BPF_K),
- DL(BPF_ALU64, BPF_XOR, BPF_X),
- DL(BPF_ALU64, BPF_XOR, BPF_K),
- DL(BPF_ALU64, BPF_MUL, BPF_X),
- DL(BPF_ALU64, BPF_MUL, BPF_K),
- DL(BPF_ALU64, BPF_MOV, BPF_X),
- DL(BPF_ALU64, BPF_MOV, BPF_K),
- DL(BPF_ALU64, BPF_ARSH, BPF_X),
- DL(BPF_ALU64, BPF_ARSH, BPF_K),
- DL(BPF_ALU64, BPF_DIV, BPF_X),
- DL(BPF_ALU64, BPF_DIV, BPF_K),
- DL(BPF_ALU64, BPF_MOD, BPF_X),
- DL(BPF_ALU64, BPF_MOD, BPF_K),
- DL(BPF_ALU64, BPF_NEG, 0),
- DL(BPF_JMP, BPF_CALL, 0),
- DL(BPF_JMP, BPF_JA, 0),
- DL(BPF_JMP, BPF_JEQ, BPF_X),
- DL(BPF_JMP, BPF_JEQ, BPF_K),
- DL(BPF_JMP, BPF_JNE, BPF_X),
- DL(BPF_JMP, BPF_JNE, BPF_K),
- DL(BPF_JMP, BPF_JGT, BPF_X),
- DL(BPF_JMP, BPF_JGT, BPF_K),
- DL(BPF_JMP, BPF_JGE, BPF_X),
- DL(BPF_JMP, BPF_JGE, BPF_K),
- DL(BPF_JMP, BPF_JSGT, BPF_X),
- DL(BPF_JMP, BPF_JSGT, BPF_K),
- DL(BPF_JMP, BPF_JSGE, BPF_X),
- DL(BPF_JMP, BPF_JSGE, BPF_K),
- DL(BPF_JMP, BPF_JSET, BPF_X),
- DL(BPF_JMP, BPF_JSET, BPF_K),
- DL(BPF_JMP, BPF_EXIT, 0),
- DL(BPF_STX, BPF_MEM, BPF_B),
- DL(BPF_STX, BPF_MEM, BPF_H),
- DL(BPF_STX, BPF_MEM, BPF_W),
- DL(BPF_STX, BPF_MEM, BPF_DW),
- DL(BPF_STX, BPF_XADD, BPF_W),
- DL(BPF_STX, BPF_XADD, BPF_DW),
- DL(BPF_ST, BPF_MEM, BPF_B),
- DL(BPF_ST, BPF_MEM, BPF_H),
- DL(BPF_ST, BPF_MEM, BPF_W),
- DL(BPF_ST, BPF_MEM, BPF_DW),
- DL(BPF_LDX, BPF_MEM, BPF_B),
- DL(BPF_LDX, BPF_MEM, BPF_H),
- DL(BPF_LDX, BPF_MEM, BPF_W),
- DL(BPF_LDX, BPF_MEM, BPF_DW),
- DL(BPF_LD, BPF_ABS, BPF_W),
- DL(BPF_LD, BPF_ABS, BPF_H),
- DL(BPF_LD, BPF_ABS, BPF_B),
- DL(BPF_LD, BPF_IND, BPF_W),
- DL(BPF_LD, BPF_IND, BPF_H),
- DL(BPF_LD, BPF_IND, BPF_B),
+#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
+ DL(ALU, ADD, X),
+ DL(ALU, ADD, K),
+ DL(ALU, SUB, X),
+ DL(ALU, SUB, K),
+ DL(ALU, AND, X),
+ DL(ALU, AND, K),
+ DL(ALU, OR, X),
+ DL(ALU, OR, K),
+ DL(ALU, LSH, X),
+ DL(ALU, LSH, K),
+ DL(ALU, RSH, X),
+ DL(ALU, RSH, K),
+ DL(ALU, XOR, X),
+ DL(ALU, XOR, K),
+ DL(ALU, MUL, X),
+ DL(ALU, MUL, K),
+ DL(ALU, MOV, X),
+ DL(ALU, MOV, K),
+ DL(ALU, DIV, X),
+ DL(ALU, DIV, K),
+ DL(ALU, MOD, X),
+ DL(ALU, MOD, K),
+ DL(ALU, NEG, 0),
+ DL(ALU, END, TO_BE),
+ DL(ALU, END, TO_LE),
+ DL(ALU64, ADD, X),
+ DL(ALU64, ADD, K),
+ DL(ALU64, SUB, X),
+ DL(ALU64, SUB, K),
+ DL(ALU64, AND, X),
+ DL(ALU64, AND, K),
+ DL(ALU64, OR, X),
+ DL(ALU64, OR, K),
+ DL(ALU64, LSH, X),
+ DL(ALU64, LSH, K),
+ DL(ALU64, RSH, X),
+ DL(ALU64, RSH, K),
+ DL(ALU64, XOR, X),
+ DL(ALU64, XOR, K),
+ DL(ALU64, MUL, X),
+ DL(ALU64, MUL, K),
+ DL(ALU64, MOV, X),
+ DL(ALU64, MOV, K),
+ DL(ALU64, ARSH, X),
+ DL(ALU64, ARSH, K),
+ DL(ALU64, DIV, X),
+ DL(ALU64, DIV, K),
+ DL(ALU64, MOD, X),
+ DL(ALU64, MOD, K),
+ DL(ALU64, NEG, 0),
+ DL(JMP, CALL, 0),
+ DL(JMP, JA, 0),
+ DL(JMP, JEQ, X),
+ DL(JMP, JEQ, K),
+ DL(JMP, JNE, X),
+ DL(JMP, JNE, K),
+ DL(JMP, JGT, X),
+ DL(JMP, JGT, K),
+ DL(JMP, JGE, X),
+ DL(JMP, JGE, K),
+ DL(JMP, JSGT, X),
+ DL(JMP, JSGT, K),
+ DL(JMP, JSGE, X),
+ DL(JMP, JSGE, K),
+ DL(JMP, JSET, X),
+ DL(JMP, JSET, K),
+ DL(JMP, EXIT, 0),
+ DL(STX, MEM, B),
+ DL(STX, MEM, H),
+ DL(STX, MEM, W),
+ DL(STX, MEM, DW),
+ DL(STX, XADD, W),
+ DL(STX, XADD, DW),
+ DL(ST, MEM, B),
+ DL(ST, MEM, H),
+ DL(ST, MEM, W),
+ DL(ST, MEM, DW),
+ DL(LDX, MEM, B),
+ DL(LDX, MEM, H),
+ DL(LDX, MEM, W),
+ DL(LDX, MEM, DW),
+ DL(LD, ABS, W),
+ DL(LD, ABS, H),
+ DL(LD, ABS, B),
+ DL(LD, IND, W),
+ DL(LD, IND, H),
+ DL(LD, IND, B),
#undef DL
};
+ void *ptr;
+ int off;
- regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
- regs[ARG1_REG] = (u64) (unsigned long) ctx;
- regs[A_REG] = 0;
- regs[X_REG] = 0;
+#define CONT ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
+
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+ ARG1 = (u64) (unsigned long) ctx;
+
+ /* Register for user BPF programs need to be reset first. */
+ regs[BPF_REG_A] = 0;
+ regs[BPF_REG_X] = 0;
select_insn:
goto *jumptable[insn->code];
/* ALU */
#define ALU(OPCODE, OP) \
- BPF_ALU64_##OPCODE##_BPF_X: \
+ ALU64_##OPCODE##_X: \
A = A OP X; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_X: \
+ ALU_##OPCODE##_X: \
A = (u32) A OP (u32) X; \
CONT; \
- BPF_ALU64_##OPCODE##_BPF_K: \
+ ALU64_##OPCODE##_K: \
A = A OP K; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_K: \
+ ALU_##OPCODE##_K: \
A = (u32) A OP (u32) K; \
CONT;
- ALU(BPF_ADD, +)
- ALU(BPF_SUB, -)
- ALU(BPF_AND, &)
- ALU(BPF_OR, |)
- ALU(BPF_LSH, <<)
- ALU(BPF_RSH, >>)
- ALU(BPF_XOR, ^)
- ALU(BPF_MUL, *)
+ ALU(ADD, +)
+ ALU(SUB, -)
+ ALU(AND, &)
+ ALU(OR, |)
+ ALU(LSH, <<)
+ ALU(RSH, >>)
+ ALU(XOR, ^)
+ ALU(MUL, *)
#undef ALU
- BPF_ALU_BPF_NEG_0:
+ ALU_NEG_0:
A = (u32) -A;
CONT;
- BPF_ALU64_BPF_NEG_0:
+ ALU64_NEG_0:
A = -A;
CONT;
- BPF_ALU_BPF_MOV_BPF_X:
+ ALU_MOV_X:
A = (u32) X;
CONT;
- BPF_ALU_BPF_MOV_BPF_K:
+ ALU_MOV_K:
A = (u32) K;
CONT;
- BPF_ALU64_BPF_MOV_BPF_X:
+ ALU64_MOV_X:
A = X;
CONT;
- BPF_ALU64_BPF_MOV_BPF_K:
+ ALU64_MOV_K:
A = K;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_X:
+ ALU64_ARSH_X:
(*(s64 *) &A) >>= X;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_K:
+ ALU64_ARSH_K:
(*(s64 *) &A) >>= K;
CONT;
- BPF_ALU64_BPF_MOD_BPF_X:
+ ALU64_MOD_X:
if (unlikely(X == 0))
return 0;
tmp = A;
A = do_div(tmp, X);
CONT;
- BPF_ALU_BPF_MOD_BPF_X:
+ ALU_MOD_X:
if (unlikely(X == 0))
return 0;
tmp = (u32) A;
A = do_div(tmp, (u32) X);
CONT;
- BPF_ALU64_BPF_MOD_BPF_K:
+ ALU64_MOD_K:
tmp = A;
A = do_div(tmp, K);
CONT;
- BPF_ALU_BPF_MOD_BPF_K:
+ ALU_MOD_K:
tmp = (u32) A;
A = do_div(tmp, (u32) K);
CONT;
- BPF_ALU64_BPF_DIV_BPF_X:
+ ALU64_DIV_X:
if (unlikely(X == 0))
return 0;
do_div(A, X);
CONT;
- BPF_ALU_BPF_DIV_BPF_X:
+ ALU_DIV_X:
if (unlikely(X == 0))
return 0;
tmp = (u32) A;
do_div(tmp, (u32) X);
A = (u32) tmp;
CONT;
- BPF_ALU64_BPF_DIV_BPF_K:
+ ALU64_DIV_K:
do_div(A, K);
CONT;
- BPF_ALU_BPF_DIV_BPF_K:
+ ALU_DIV_K:
tmp = (u32) A;
do_div(tmp, (u32) K);
A = (u32) tmp;
CONT;
- BPF_ALU_BPF_END_BPF_TO_BE:
+ ALU_END_TO_BE:
switch (K) {
case 16:
A = (__force u16) cpu_to_be16(A);
@@ -356,7 +367,7 @@ select_insn:
break;
}
CONT;
- BPF_ALU_BPF_END_BPF_TO_LE:
+ ALU_END_TO_LE:
switch (K) {
case 16:
A = (__force u16) cpu_to_le16(A);
@@ -371,136 +382,135 @@ select_insn:
CONT;
/* CALL */
- BPF_JMP_BPF_CALL_0:
+ JMP_CALL_0:
/* Function call scratches R1-R5 registers, preserves R6-R9,
* and stores return value into R0.
*/
- R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
- regs[4], regs[5]);
+ R0 = (__bpf_call_base + insn->imm)(R1, R2, R3, R4, R5);
CONT;
/* JMP */
- BPF_JMP_BPF_JA_0:
+ JMP_JA_0:
insn += insn->off;
CONT;
- BPF_JMP_BPF_JEQ_BPF_X:
+ JMP_JEQ_X:
if (A == X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JEQ_BPF_K:
+ JMP_JEQ_K:
if (A == K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_X:
+ JMP_JNE_X:
if (A != X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_K:
+ JMP_JNE_K:
if (A != K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_X:
+ JMP_JGT_X:
if (A > X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_K:
+ JMP_JGT_K:
if (A > K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_X:
+ JMP_JGE_X:
if (A >= X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_K:
+ JMP_JGE_K:
if (A >= K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_X:
- if (((s64)A) > ((s64)X)) {
+ JMP_JSGT_X:
+ if (((s64) A) > ((s64) X)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_K:
- if (((s64)A) > ((s64)K)) {
+ JMP_JSGT_K:
+ if (((s64) A) > ((s64) K)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_X:
- if (((s64)A) >= ((s64)X)) {
+ JMP_JSGE_X:
+ if (((s64) A) >= ((s64) X)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_K:
- if (((s64)A) >= ((s64)K)) {
+ JMP_JSGE_K:
+ if (((s64) A) >= ((s64) K)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_X:
+ JMP_JSET_X:
if (A & X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_K:
+ JMP_JSET_K:
if (A & K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_EXIT_0:
+ JMP_EXIT_0:
return R0;
/* STX and ST and LDX*/
#define LDST(SIZEOP, SIZE) \
- BPF_STX_BPF_MEM_##SIZEOP: \
+ STX_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (A + insn->off) = X; \
CONT; \
- BPF_ST_BPF_MEM_##SIZEOP: \
+ ST_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (A + insn->off) = K; \
CONT; \
- BPF_LDX_BPF_MEM_##SIZEOP: \
+ LDX_MEM_##SIZEOP: \
A = *(SIZE *)(unsigned long) (X + insn->off); \
CONT;
- LDST(BPF_B, u8)
- LDST(BPF_H, u16)
- LDST(BPF_W, u32)
- LDST(BPF_DW, u64)
+ LDST(B, u8)
+ LDST(H, u16)
+ LDST(W, u32)
+ LDST(DW, u64)
#undef LDST
- BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
+ STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
atomic_add((u32) X, (atomic_t *)(unsigned long)
(A + insn->off));
CONT;
- BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
+ STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
atomic64_add((u64) X, (atomic64_t *)(unsigned long)
(A + insn->off));
CONT;
- BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
+ LD_ABS_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
off = K;
load_word:
/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
* appearing in the programs where ctx == skb. All programs
- * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
+ * keep 'ctx' in regs[BPF_REG_CTX] == R6, sk_convert_filter()
* saves it in R6, internal BPF verifier will check that
* R6 == ctx.
*
@@ -524,7 +534,7 @@ load_word:
CONT;
}
return 0;
- BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
+ LD_ABS_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
off = K;
load_half:
ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
@@ -533,7 +543,7 @@ load_half:
CONT;
}
return 0;
- BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
+ LD_ABS_B: /* R0 = *(u8 *) (ctx + K) */
off = K;
load_byte:
ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
@@ -542,13 +552,13 @@ load_byte:
CONT;
}
return 0;
- BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
+ LD_IND_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
off = K + X;
goto load_word;
- BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
+ LD_IND_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
off = K + X;
goto load_half;
- BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
+ LD_IND_B: /* R0 = *(u8 *) (skb->data + X + K) */
off = K + X;
goto load_byte;
@@ -556,13 +566,6 @@ load_byte:
/* If we ever reach this, we have a bug somewhere. */
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
}
u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
@@ -594,16 +597,14 @@ static unsigned int pkt_type_offset(void)
return -1;
}
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
-
- return __skb_get_poff(skb);
+ return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
}
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -612,19 +613,19 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+ nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -633,29 +634,29 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = (struct nlattr *) &skb->data[A];
- if (nla->nla_len > skb->len - A)
+ nla = (struct nlattr *) &skb->data[a];
+ if (nla->nla_len > skb->len - a)
return 0;
- nla = nla_find_nested(nla, X);
+ nla = nla_find_nested(nla, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
return raw_smp_processor_id();
}
/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- return (u64)prandom_u32();
+ return prandom_u32();
}
static bool convert_bpf_extensions(struct sock_filter *fp,
@@ -668,28 +669,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, protocol);
insn++;
/* A = ntohs(A) [emitting a nop or swap16] */
insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = 16;
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
insn->code = BPF_LDX | BPF_MEM | BPF_B;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = pkt_type_offset();
if (insn->off < 0)
return false;
insn++;
insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = PKT_TYPE_MAX;
break;
@@ -699,13 +700,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
insn->code = BPF_LDX | BPF_MEM | BPF_DW;
else
insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = TMP_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_TMP;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, dev);
insn++;
insn->code = BPF_JMP | BPF_JNE | BPF_K;
- insn->a_reg = TMP_REG;
+ insn->a_reg = BPF_REG_TMP;
insn->imm = 0;
insn->off = 1;
insn++;
@@ -716,8 +717,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_TMP;
if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
insn->code = BPF_LDX | BPF_MEM | BPF_W;
@@ -732,8 +733,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, mark);
break;
@@ -741,8 +742,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, hash);
break;
@@ -750,8 +751,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, queue_mapping);
break;
@@ -760,8 +761,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, vlan_tci);
insn++;
@@ -769,16 +770,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = ~VLAN_TAG_PRESENT;
} else {
insn->code = BPF_ALU | BPF_RSH | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = 12;
insn++;
insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = 1;
}
break;
@@ -790,20 +791,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_RANDOM:
/* arg1 = ctx */
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG1_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_ARG1;
+ insn->x_reg = BPF_REG_CTX;
insn++;
/* arg2 = A */
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG2_REG;
- insn->x_reg = A_REG;
+ insn->a_reg = BPF_REG_ARG2;
+ insn->x_reg = BPF_REG_A;
insn++;
/* arg3 = X */
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG3_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_ARG3;
+ insn->x_reg = BPF_REG_X;
insn++;
/* Emit call(ctx, arg2=A, arg3=X) */
@@ -829,8 +830,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
insn->code = BPF_ALU | BPF_XOR | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_X;
break;
default:
@@ -880,7 +881,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
u8 bpf_src;
BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
- BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
if (len <= 0 || len >= BPF_MAXINSNS)
return -EINVAL;
@@ -897,8 +898,8 @@ do_pass:
if (new_insn) {
new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- new_insn->a_reg = CTX_REG;
- new_insn->x_reg = ARG1_REG;
+ new_insn->a_reg = BPF_REG_CTX;
+ new_insn->x_reg = BPF_REG_ARG1;
}
new_insn++;
@@ -948,8 +949,8 @@ do_pass:
break;
insn->code = fp->code;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_X;
insn->imm = fp->k;
break;
@@ -983,16 +984,16 @@ do_pass:
* in compare insn.
*/
insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = TMP_REG;
+ insn->a_reg = BPF_REG_TMP;
insn->imm = fp->k;
insn++;
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_TMP;
bpf_src = BPF_X;
} else {
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
}
@@ -1027,33 +1028,33 @@ do_pass:
/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B:
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = TMP_REG;
- insn->x_reg = A_REG;
+ insn->a_reg = BPF_REG_TMP;
+ insn->x_reg = BPF_REG_A;
insn++;
insn->code = BPF_LD | BPF_ABS | BPF_B;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = fp->k;
insn++;
insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = 0xf;
insn++;
insn->code = BPF_ALU | BPF_LSH | BPF_K;
- insn->a_reg = A_REG;
+ insn->a_reg = BPF_REG_A;
insn->imm = 2;
insn++;
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
+ insn->a_reg = BPF_REG_X;
+ insn->x_reg = BPF_REG_A;
insn++;
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_TMP;
break;
/* RET_K, RET_A are remaped into 2 insns. */
@@ -1063,7 +1064,7 @@ do_pass:
(BPF_RVAL(fp->code) == BPF_K ?
BPF_K : BPF_X);
insn->a_reg = 0;
- insn->x_reg = A_REG;
+ insn->x_reg = BPF_REG_A;
insn->imm = fp->k;
insn++;
@@ -1074,8 +1075,9 @@ do_pass:
case BPF_ST:
case BPF_STX:
insn->code = BPF_STX | BPF_MEM | BPF_W;
- insn->a_reg = FP_REG;
- insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
+ insn->a_reg = BPF_REG_FP;
+ insn->x_reg = fp->code == BPF_ST ?
+ BPF_REG_A : BPF_REG_X;
insn->off = -(BPF_MEMWORDS - fp->k) * 4;
break;
@@ -1084,8 +1086,8 @@ do_pass:
case BPF_LDX | BPF_MEM:
insn->code = BPF_LDX | BPF_MEM | BPF_W;
insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = FP_REG;
+ BPF_REG_A : BPF_REG_X;
+ insn->x_reg = BPF_REG_FP;
insn->off = -(BPF_MEMWORDS - fp->k) * 4;
break;
@@ -1094,22 +1096,22 @@ do_pass:
case BPF_LDX | BPF_IMM:
insn->code = BPF_ALU | BPF_MOV | BPF_K;
insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
+ BPF_REG_A : BPF_REG_X;
insn->imm = fp->k;
break;
/* X = A */
case BPF_MISC | BPF_TAX:
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
+ insn->a_reg = BPF_REG_X;
+ insn->x_reg = BPF_REG_A;
break;
/* A = X */
case BPF_MISC | BPF_TXA:
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_X;
break;
/* A = skb->len or X = skb->len */
@@ -1117,16 +1119,16 @@ do_pass:
case BPF_LDX | BPF_W | BPF_LEN:
insn->code = BPF_LDX | BPF_MEM | BPF_W;
insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = CTX_REG;
+ BPF_REG_A : BPF_REG_X;
+ insn->x_reg = BPF_REG_CTX;
insn->off = offsetof(struct sk_buff, len);
break;
/* access seccomp_data fields */
case BPF_LDX | BPF_ABS | BPF_W:
insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_CTX;
insn->off = fp->k;
break;
@@ -1472,7 +1474,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
if (fp_new) {
- memcpy(fp_new, fp, sizeof(struct sk_filter));
+ *fp_new = *fp;
/* As we're kepping orig_prog in fp_new along,
* we need to make sure we're not evicting it
* from the old fp.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846f..d5de69b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c..7b09d8b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
return err;
}
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- u32 left;
-
- if (in_flight >= tp->snd_cwnd)
- return true;
-
- left = tp->snd_cwnd - in_flight;
- if (sk_can_gso(sk) &&
- left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left < tp->xmit_size_goal_segs)
- return true;
- return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-
/* Slow start is used when congestion window is no greater than the slow start
* threshold. We base on RFC2581 and also handle stretch ACKs properly.
* We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
@@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* In "safe" area, increase. */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8bf2245..ba2a4f3 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh) {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b9e7ba..1c49082 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
-static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194ac..0313613 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index a15a799..d8f8f05 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds)
* o Give cwnd a new value based on the model proposed
* o remember increments <1
*/
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
@@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
ca->minrtt_us = tp->srtt_us;
}
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (!ca->hybla_en) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 863d105..5999b39 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
/*
* Increase window in response to successful acknowledgment.
*/
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
update_params(sk);
/* RFC2861 only increase cwnd if fully utilized */
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* In slow start */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6efed13..350b207 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2938,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
}
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
+
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -3364,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight;
u32 prior_fackets;
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3397,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= FLAG_SND_UNA_ADVANCED;
prior_fackets = tp->fackets_out;
- prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
* is in window.
@@ -3452,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* Advance cwnd if state allows */
if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked, prior_in_flight);
+ tcp_cong_avoid(sk, ack, acked);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c9aecae..1e70fa8 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk)
* Will only call newReno CA when away from inference.
* From TCP-LP's paper, this will be handled in additive increasement.
*/
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct lp *lp = inet_csk_ca(sk);
if (!(lp->flag & LP_WITHIN_INF))
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
}
/**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20847de..89277a3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk)
+static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->packets_out >= tp->snd_cwnd) {
+ tp->lsnd_pending = tp->packets_out + unsent_segs;
+
+ if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- unsigned int tso_segs, sent_pkts;
+ unsigned int tso_segs, sent_pkts, unsent_segs = 0;
int cwnd_quota;
int result;
@@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
} else {
if (!push_one && tcp_tso_should_defer(sk, skb))
- break;
+ goto compute_unsent_segs;
}
/* TCP Small Queues :
@@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
* there is no smp_mb__after_set_bit() yet
*/
smp_mb__after_clear_bit();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ u32 unsent_bytes;
+
+compute_unsent_segs:
+ unsent_bytes = tp->write_seq - tp->snd_nxt;
+ unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
break;
+ }
}
limit = mss_now;
@@ -1990,7 +1997,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- tcp_cwnd_validate(sk);
+ tcp_cwnd_validate(sk, unsent_segs);
return false;
}
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 0ac5083..8250949 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,12 +15,11 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 48539ff..9a5e05f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
return min(tp->snd_ssthresh, tp->snd_cwnd-1);
}
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
@@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
} else {
u32 rtt, diff;
u64 target_cwnd;
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 1b8e28f..27b9825 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
tcp_veno_init(sk);
}
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct veno *veno = inet_csk_ca(sk);
if (!veno->doing_veno_now) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
/* limited by applications */
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
/* We don't have enough rtt samples to do the Veno
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
} else {
u64 target_cwnd;
u32 rtt;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 5ede0e7..599b79b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
}
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct yeah *yeah = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a481bbe..1a4a202 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -134,7 +134,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
int err;
int tp_created = 0;
- if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTFILTER) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a0b84e0..86f8edf 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1084,7 +1084,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETQDISC) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1151,7 +1152,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q, *p;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -1490,7 +1491,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTCLASS) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
OpenPOWER on IntegriCloud