From ab28c12a8e6fea875b6757052e211772f62fa771 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 6 Dec 2010 22:53:15 +0000 Subject: sfc: Reorder struct efx_nic to separate fields by volatility Place the regularly updated fields (locks, MAC stats, etc.) on a separate cache-line from fields which are mostly constant. This should reduce cache misses for access to the latter on the data path. Signed-off-by: Ben Hutchings --- drivers/net/sfc/net_driver.h | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 0d19fbf..60d6371 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -625,10 +625,8 @@ struct efx_filter_state; * Work items do not hold and must not acquire RTNL. * @workqueue_name: Name of workqueue * @reset_work: Scheduled reset workitem - * @monitor_work: Hardware monitor workitem * @membase_phys: Memory BAR value as physical address * @membase: Memory BAR value - * @biu_lock: BIU (bus interface unit) lock * @interrupt_mode: Interrupt mode * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues @@ -652,14 +650,9 @@ struct efx_filter_state; * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired * @irq_status: Interrupt status buffer - * @last_irq_cpu: Last CPU to handle interrupt. - * This register is written with the SMP processor ID whenever an - * interrupt is handled. It is used by efx_nic_test_interrupt() - * to verify that an interrupt has occurred. * @irq_zero_count: Number of legacy IRQs seen with queue flags == 0 * @fatal_irq_level: IRQ level (bit number) used for serious errors * @mtd_list: List of MTDs attached to the NIC - * @n_rx_nodesc_drop_cnt: RX no descriptor drop count * @nic_data: Hardware dependant state * @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode, * @port_inhibited, efx_monitor() and efx_reconfigure_port() @@ -672,11 +665,7 @@ struct efx_filter_state; * @port_initialized: Port initialized? * @net_dev: Operating system network device. Consider holding the rtnl lock * @rx_checksum_enabled: RX checksumming enabled - * @mac_stats: MAC statistics. These include all statistics the MACs - * can provide. Generic code converts these into a standard - * &struct net_device_stats. * @stats_buffer: DMA buffer for statistics - * @stats_lock: Statistics update lock. Serialises statistics fetches * @mac_op: MAC interface * @phy_type: PHY type * @phy_op: PHY interface @@ -694,10 +683,23 @@ struct efx_filter_state; * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state + * @monitor_work: Hardware monitor workitem + * @biu_lock: BIU (bus interface unit) lock + * @last_irq_cpu: Last CPU to handle interrupt. + * This register is written with the SMP processor ID whenever an + * interrupt is handled. It is used by efx_nic_test_interrupt() + * to verify that an interrupt has occurred. + * @n_rx_nodesc_drop_cnt: RX no descriptor drop count + * @mac_stats: MAC statistics. These include all statistics the MACs + * can provide. Generic code converts these into a standard + * &struct net_device_stats. + * @stats_lock: Statistics update lock. Serialises statistics fetches * * This is stored in the private area of the &struct net_device. */ struct efx_nic { + /* The following fields should be written very rarely */ + char name[IFNAMSIZ]; struct pci_dev *pci_dev; const struct efx_nic_type *type; @@ -705,10 +707,9 @@ struct efx_nic { struct workqueue_struct *workqueue; char workqueue_name[16]; struct work_struct reset_work; - struct delayed_work monitor_work; resource_size_t membase_phys; void __iomem *membase; - spinlock_t biu_lock; + enum efx_int_mode interrupt_mode; bool irq_rx_adaptive; unsigned int irq_rx_moderation; @@ -735,7 +736,6 @@ struct efx_nic { unsigned long int_error_expire; struct efx_buffer irq_status; - volatile signed int last_irq_cpu; unsigned irq_zero_count; unsigned fatal_irq_level; @@ -743,8 +743,6 @@ struct efx_nic { struct list_head mtd_list; #endif - unsigned n_rx_nodesc_drop_cnt; - void *nic_data; struct mutex mac_lock; @@ -756,9 +754,7 @@ struct efx_nic { struct net_device *net_dev; bool rx_checksum_enabled; - struct efx_mac_stats mac_stats; struct efx_buffer stats_buffer; - spinlock_t stats_lock; struct efx_mac_operations *mac_op; @@ -784,6 +780,15 @@ struct efx_nic { void *loopback_selftest; struct efx_filter_state *filter_state; + + /* The following fields may be written more often */ + + struct delayed_work monitor_work ____cacheline_aligned_in_smp; + spinlock_t biu_lock; + volatile signed int last_irq_cpu; + unsigned n_rx_nodesc_drop_cnt; + struct efx_mac_stats mac_stats; + spinlock_t stats_lock; }; static inline int efx_dev_registered(struct efx_nic *efx) -- cgit v1.1 From 51c56f40ef41ca780ff001d59727eda03fa39374 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 10 Nov 2010 18:46:40 +0000 Subject: sfc: Use ACCESS_ONCE when copying efx_tx_queue::read_count Signed-off-by: Ben Hutchings --- drivers/net/sfc/tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index 03194f7..fef2235 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -240,8 +240,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) * of read_count. */ smp_mb(); tx_queue->old_read_count = - *(volatile unsigned *) - &tx_queue->read_count; + ACCESS_ONCE(tx_queue->read_count); fill_level = (tx_queue->insert_count - tx_queue->old_read_count); q_space = efx->txq_entries - 1 - fill_level; @@ -764,7 +763,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * stopped from the access of read_count. */ smp_mb(); tx_queue->old_read_count = - *(volatile unsigned *)&tx_queue->read_count; + ACCESS_ONCE(tx_queue->read_count); fill_level = (tx_queue->insert_count - tx_queue->old_read_count); q_space = efx->txq_entries - 1 - fill_level; -- cgit v1.1 From 9f2f6cd07a09bc0af1f2950189e426569561d1e6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 6 Dec 2010 22:55:00 +0000 Subject: sfc: Expand/correct comments on collector behaviour and function usage Document exactly which registers and functions have special behaviour, and why races on writes to descriptor pointers are safe. Signed-off-by: Ben Hutchings --- drivers/net/sfc/io.h | 98 ++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index 85a99fe..0764e84 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -22,28 +22,39 @@ * * Notes on locking strategy: * - * Most NIC registers require 16-byte (or 8-byte, for SRAM) atomic writes - * which necessitates locking. - * Under normal operation few writes to NIC registers are made and these - * registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and TX_DESC_UPD_REG) are special - * cased to allow 4-byte (hence lockless) accesses. + * Most CSRs are 128-bit (oword) and therefore cannot be read or + * written atomically. Access from the host is buffered by the Bus + * Interface Unit (BIU). Whenever the host reads from the lowest + * address of such a register, or from the address of a different such + * register, the BIU latches the register's value. Subsequent reads + * from higher addresses of the same register will read the latched + * value. Whenever the host writes part of such a register, the BIU + * collects the written value and does not write to the underlying + * register until all 4 dwords have been written. A similar buffering + * scheme applies to host access to the NIC's 64-bit SRAM. * - * It *is* safe to write to these 4-byte registers in the middle of an - * access to an 8-byte or 16-byte register. We therefore use a - * spinlock to protect accesses to the larger registers, but no locks - * for the 4-byte registers. + * Access to different CSRs and 64-bit SRAM words must be serialised, + * since interleaved access can result in lost writes or lost + * information from read-to-clear fields. We use efx_nic::biu_lock + * for this. (We could use separate locks for read and write, but + * this is not normally a performance bottleneck.) * - * A write barrier is needed to ensure that DW3 is written after DW0/1/2 - * due to the way the 16byte registers are "collected" in the BIU. + * The DMA descriptor pointers (RX_DESC_UPD and TX_DESC_UPD) are + * 128-bit but are special-cased in the BIU to avoid the need for + * locking in the host: * - * We also lock when carrying out reads, to ensure consistency of the - * data (made possible since the BIU reads all 128 bits into a cache). - * Reads are very rare, so this isn't a significant performance - * impact. (Most data transferred from NIC to host is DMAed directly - * into host memory). - * - * I/O BAR access uses locks for both reads and writes (but is only provided - * for testing purposes). + * - They are write-only. + * - The semantics of writing to these registers are such that + * replacing the low 96 bits with zero does not affect functionality. + * - If the host writes to the last dword address of such a register + * (i.e. the high 32 bits) the underlying register will always be + * written. If the collector does not hold values for the low 96 + * bits of the register, they will be written as zero. Writing to + * the last qword does not have this effect and must not be done. + * - If the host writes to the address of any other part of such a + * register while the collector already holds values for some other + * register, the write is discarded and the collector maintains its + * current state. */ #if BITS_PER_LONG == 64 @@ -72,7 +83,7 @@ static inline __le32 _efx_readd(struct efx_nic *efx, unsigned int reg) return (__force __le32)__raw_readl(efx->membase + reg); } -/* Writes to a normal 16-byte Efx register, locking as appropriate. */ +/* Write a normal 128-bit CSR, locking as appropriate. */ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, unsigned int reg) { @@ -98,8 +109,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, spin_unlock_irqrestore(&efx->biu_lock, flags); } -/* Write an 8-byte NIC SRAM entry through the supplied mapping, - * locking as appropriate. */ +/* Write 64-bit SRAM through the supplied mapping, locking as appropriate. */ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, efx_qword_t *value, unsigned int index) { @@ -122,29 +132,19 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, spin_unlock_irqrestore(&efx->biu_lock, flags); } -/* Write dword to NIC register that allows partial writes - * - * Some registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and - * TX_DESC_UPD_REG) can be written to as a single dword. This allows - * for lockless writes. - */ +/* Write a 32-bit CSR or the last dword of a special 128-bit CSR */ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value, unsigned int reg) { netif_vdbg(efx, hw, efx->net_dev, - "writing partial register %x with "EFX_DWORD_FMT"\n", + "writing register %x with "EFX_DWORD_FMT"\n", reg, EFX_DWORD_VAL(*value)); /* No lock required */ _efx_writed(efx, value->u32[0], reg); } -/* Read from a NIC register - * - * This reads an entire 16-byte register in one go, locking as - * appropriate. It is essential to read the first dword first, as this - * prompts the NIC to load the current value into the shadow register. - */ +/* Read a 128-bit CSR, locking as appropriate. */ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value, unsigned int reg) { @@ -163,8 +163,7 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value, EFX_OWORD_VAL(*value)); } -/* Read an 8-byte SRAM entry through supplied mapping, - * locking as appropriate. */ +/* Read 64-bit SRAM through the supplied mapping, locking as appropriate. */ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase, efx_qword_t *value, unsigned int index) { @@ -186,7 +185,7 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase, addr, EFX_QWORD_VAL(*value)); } -/* Read dword from register that allows partial writes (sic) */ +/* Read a 32-bit CSR or SRAM */ static inline void efx_readd(struct efx_nic *efx, efx_dword_t *value, unsigned int reg) { @@ -196,28 +195,28 @@ static inline void efx_readd(struct efx_nic *efx, efx_dword_t *value, reg, EFX_DWORD_VAL(*value)); } -/* Write to a register forming part of a table */ +/* Write a 128-bit CSR forming part of a table */ static inline void efx_writeo_table(struct efx_nic *efx, efx_oword_t *value, unsigned int reg, unsigned int index) { efx_writeo(efx, value, reg + index * sizeof(efx_oword_t)); } -/* Read to a register forming part of a table */ +/* Read a 128-bit CSR forming part of a table */ static inline void efx_reado_table(struct efx_nic *efx, efx_oword_t *value, unsigned int reg, unsigned int index) { efx_reado(efx, value, reg + index * sizeof(efx_oword_t)); } -/* Write to a dword register forming part of a table */ +/* Write a 32-bit CSR forming part of a table, or 32-bit SRAM */ static inline void efx_writed_table(struct efx_nic *efx, efx_dword_t *value, unsigned int reg, unsigned int index) { efx_writed(efx, value, reg + index * sizeof(efx_oword_t)); } -/* Read from a dword register forming part of a table */ +/* Read a 32-bit CSR forming part of a table, or 32-bit SRAM */ static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value, unsigned int reg, unsigned int index) { @@ -231,25 +230,26 @@ static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value, #define EFX_PAGED_REG(page, reg) \ ((page) * EFX_PAGE_BLOCK_SIZE + (reg)) -/* As for efx_writeo(), but for a page-mapped register. */ +/* Write the whole of RX_DESC_UPD or TX_DESC_UPD */ static inline void efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, unsigned int reg, unsigned int page) { efx_writeo(efx, value, EFX_PAGED_REG(page, reg)); } -/* As for efx_writed(), but for a page-mapped register. */ +/* Write a page-mapped 32-bit CSR (EVQ_RPTR or the high bits of + * RX_DESC_UPD or TX_DESC_UPD) + */ static inline void efx_writed_page(struct efx_nic *efx, efx_dword_t *value, unsigned int reg, unsigned int page) { efx_writed(efx, value, EFX_PAGED_REG(page, reg)); } -/* Write dword to page-mapped register with an extra lock. - * - * As for efx_writed_page(), but for a register that suffers from - * SFC bug 3181. Take out a lock so the BIU collector cannot be - * confused. */ +/* Write TIMER_COMMAND. This is a page-mapped 32-bit CSR, but a bug + * in the BIU means that writes to TIMER_COMMAND[0] invalidate the + * collector register. + */ static inline void efx_writed_page_locked(struct efx_nic *efx, efx_dword_t *value, unsigned int reg, -- cgit v1.1 From 494bdf1b0fd58688d055f1b66c34b0844dcfc1fa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 6 Dec 2010 22:55:18 +0000 Subject: sfc: Remove redundant memory barriers between MMIOs Signed-off-by: Ben Hutchings --- drivers/net/sfc/io.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index 0764e84..896b31e 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -96,13 +96,11 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, spin_lock_irqsave(&efx->biu_lock, flags); #ifdef EFX_USE_QWORD_IO _efx_writeq(efx, value->u64[0], reg + 0); - wmb(); _efx_writeq(efx, value->u64[1], reg + 8); #else _efx_writed(efx, value->u32[0], reg + 0); _efx_writed(efx, value->u32[1], reg + 4); _efx_writed(efx, value->u32[2], reg + 8); - wmb(); _efx_writed(efx, value->u32[3], reg + 12); #endif mmiowb(); @@ -125,7 +123,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, __raw_writeq((__force u64)value->u64[0], membase + addr); #else __raw_writel((__force u32)value->u32[0], membase + addr); - wmb(); __raw_writel((__force u32)value->u32[1], membase + addr + 4); #endif mmiowb(); @@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value, spin_lock_irqsave(&efx->biu_lock, flags); value->u32[0] = _efx_readd(efx, reg + 0); - rmb(); value->u32[1] = _efx_readd(efx, reg + 4); value->u32[2] = _efx_readd(efx, reg + 8); value->u32[3] = _efx_readd(efx, reg + 12); @@ -175,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase, value->u64[0] = (__force __le64)__raw_readq(membase + addr); #else value->u32[0] = (__force __le32)__raw_readl(membase + addr); - rmb(); value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4); #endif spin_unlock_irqrestore(&efx->biu_lock, flags); -- cgit v1.1 From 1a29cc40115c011895143c5f8278dee49423d5df Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 6 Dec 2010 22:55:33 +0000 Subject: sfc: Add compile-time checks for correctness of paged register writes Signed-off-by: Ben Hutchings --- drivers/net/sfc/io.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index 896b31e..7f3adf2 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -226,29 +226,40 @@ static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value, ((page) * EFX_PAGE_BLOCK_SIZE + (reg)) /* Write the whole of RX_DESC_UPD or TX_DESC_UPD */ -static inline void efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, - unsigned int reg, unsigned int page) +static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg, unsigned int page) { efx_writeo(efx, value, EFX_PAGED_REG(page, reg)); } +#define efx_writeo_page(efx, value, reg, page) \ + _efx_writeo_page(efx, value, \ + reg + \ + BUILD_BUG_ON_ZERO((reg) != 0x830 && (reg) != 0xa10), \ + page) /* Write a page-mapped 32-bit CSR (EVQ_RPTR or the high bits of * RX_DESC_UPD or TX_DESC_UPD) */ -static inline void efx_writed_page(struct efx_nic *efx, efx_dword_t *value, - unsigned int reg, unsigned int page) +static inline void _efx_writed_page(struct efx_nic *efx, efx_dword_t *value, + unsigned int reg, unsigned int page) { efx_writed(efx, value, EFX_PAGED_REG(page, reg)); } +#define efx_writed_page(efx, value, reg, page) \ + _efx_writed_page(efx, value, \ + reg + \ + BUILD_BUG_ON_ZERO((reg) != 0x400 && (reg) != 0x83c \ + && (reg) != 0xa1c), \ + page) /* Write TIMER_COMMAND. This is a page-mapped 32-bit CSR, but a bug * in the BIU means that writes to TIMER_COMMAND[0] invalidate the * collector register. */ -static inline void efx_writed_page_locked(struct efx_nic *efx, - efx_dword_t *value, - unsigned int reg, - unsigned int page) +static inline void _efx_writed_page_locked(struct efx_nic *efx, + efx_dword_t *value, + unsigned int reg, + unsigned int page) { unsigned long flags __attribute__ ((unused)); @@ -260,5 +271,9 @@ static inline void efx_writed_page_locked(struct efx_nic *efx, efx_writed(efx, value, EFX_PAGED_REG(page, reg)); } } +#define efx_writed_page_locked(efx, value, reg, page) \ + _efx_writed_page_locked(efx, value, \ + reg + BUILD_BUG_ON_ZERO((reg) != 0x420), \ + page) #endif /* EFX_IO_H */ -- cgit v1.1 From e506147271229d6c53b42c6a9897db67b5cfdb6d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 6 Dec 2010 22:58:41 +0000 Subject: sfc: Remove locking from implementation of efx_writeo_paged() It is not necessary to serialise writes to the paged 128-bit registers. However, if we don't then we must always write the last dword separately, not as part of a qword write. Signed-off-by: Ben Hutchings --- drivers/net/sfc/io.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index 7f3adf2..6da4ae2 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h @@ -229,7 +229,20 @@ static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value, static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, unsigned int reg, unsigned int page) { - efx_writeo(efx, value, EFX_PAGED_REG(page, reg)); + reg = EFX_PAGED_REG(page, reg); + + netif_vdbg(efx, hw, efx->net_dev, + "writing register %x with " EFX_OWORD_FMT "\n", reg, + EFX_OWORD_VAL(*value)); + +#ifdef EFX_USE_QWORD_IO + _efx_writeq(efx, value->u64[0], reg + 0); +#else + _efx_writed(efx, value->u32[0], reg + 0); + _efx_writed(efx, value->u32[1], reg + 4); +#endif + _efx_writed(efx, value->u32[2], reg + 8); + _efx_writed(efx, value->u32[3], reg + 12); } #define efx_writeo_page(efx, value, reg, page) \ _efx_writeo_page(efx, value, \ -- cgit v1.1 From cd38557d78554fd4318fe448f728a8d7ff1cbabb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 15 Nov 2010 23:53:11 +0000 Subject: sfc: Use TX push whenever adding descriptors to an empty queue Whenever we add DMA descriptors to a TX ring and update the ring pointer, the TX DMA engine must first read the new DMA descriptors and then start reading packet data. However, all released Solarflare 10G controllers have a 'TX push' feature that allows us to reduce latency by writing the first new DMA descriptor along with the pointer update. This is only useful when the queue is empty. The hardware should ignore the pushed descriptor if the queue is not empty, but this check is buggy, so we must do it in software. In order to tell whether a TX queue is empty, we need to compare the previous transmission count (write_count) and completion count (read_count). However, if we do that every time we update the ring pointer then read_count may ping-pong between the caches of two CPUs running the transmission and completion paths for the queue. Therefore, we split the check for an empty queue between the completion path and the transmission path: - Add an empty_read_count field representing a point at which the completion path saw the TX queue as empty. - Add an old_write_count field for use on the completion path. - On the completion path, whenever read_count reaches or passes old_write_count the TX queue may be empty. We then read write_count, set empty_read_count if read_count == write_count, and update old_write_count. - On the transmission path, we read empty_read_count. If it's set, we compare it with the value of write_count before the current set of descriptors was added. If they match, the queue really is empty and we can use TX push. Signed-off-by: Ben Hutchings --- drivers/net/sfc/net_driver.h | 16 ++++++++++++++++ drivers/net/sfc/nic.c | 42 ++++++++++++++++++++++++++++++++++++++++-- drivers/net/sfc/tx.c | 12 ++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 60d6371..270e217 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -142,6 +142,12 @@ struct efx_tx_buffer { * @flushed: Used when handling queue flushing * @read_count: Current read pointer. * This is the number of buffers that have been removed from both rings. + * @old_write_count: The value of @write_count when last checked. + * This is here for performance reasons. The xmit path will + * only get the up-to-date value of @write_count if this + * variable indicates that the queue is empty. This is to + * avoid cache-line ping-pong between the xmit path and the + * completion path. * @stopped: Stopped count. * Set if this TX queue is currently stopping its port. * @insert_count: Current insert pointer @@ -163,6 +169,10 @@ struct efx_tx_buffer { * @tso_long_headers: Number of packets with headers too long for standard * blocks * @tso_packets: Number of packets via the TSO xmit path + * @pushes: Number of times the TX push feature has been used + * @empty_read_count: If the completion path has seen the queue as empty + * and the transmission path has not yet checked this, the value of + * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. */ struct efx_tx_queue { /* Members which don't change on the fast path */ @@ -177,6 +187,7 @@ struct efx_tx_queue { /* Members used mainly on the completion path */ unsigned int read_count ____cacheline_aligned_in_smp; + unsigned int old_write_count; int stopped; /* Members used only on the xmit path */ @@ -187,6 +198,11 @@ struct efx_tx_queue { unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; + unsigned int pushes; + + /* Members shared between paths and sometimes updated */ + unsigned int empty_read_count ____cacheline_aligned_in_smp; +#define EFX_EMPTY_COUNT_VALID 0x80000000 }; /** diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c index 9743cff..bda6b1b 100644 --- a/drivers/net/sfc/nic.c +++ b/drivers/net/sfc/nic.c @@ -362,6 +362,35 @@ static inline void efx_notify_tx_desc(struct efx_tx_queue *tx_queue) FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue); } +/* Write pointer and first descriptor for TX descriptor ring */ +static inline void efx_push_tx_desc(struct efx_tx_queue *tx_queue, + const efx_qword_t *txd) +{ + unsigned write_ptr; + efx_oword_t reg; + + BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0); + BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0); + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true, + FRF_AZ_TX_DESC_WPTR, write_ptr); + reg.qword[0] = *txd; + efx_writeo_page(tx_queue->efx, ®, + FR_BZ_TX_DESC_UPD_P0, tx_queue->queue); +} + +static inline bool +efx_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count) +{ + unsigned empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + + if (empty_read_count == 0) + return false; + + tx_queue->empty_read_count = 0; + return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; +} /* For each entry inserted into the software descriptor ring, create a * descriptor in the hardware TX descriptor ring (in host memory), and @@ -373,6 +402,7 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) struct efx_tx_buffer *buffer; efx_qword_t *txd; unsigned write_ptr; + unsigned old_write_count = tx_queue->write_count; BUG_ON(tx_queue->write_count == tx_queue->insert_count); @@ -391,7 +421,15 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) } while (tx_queue->write_count != tx_queue->insert_count); wmb(); /* Ensure descriptors are written before they are fetched */ - efx_notify_tx_desc(tx_queue); + + if (efx_may_push_tx_desc(tx_queue, old_write_count)) { + txd = efx_tx_desc(tx_queue, + old_write_count & tx_queue->ptr_mask); + efx_push_tx_desc(tx_queue, txd); + ++tx_queue->pushes; + } else { + efx_notify_tx_desc(tx_queue); + } } /* Allocate hardware resources for a TX queue */ @@ -1626,7 +1664,7 @@ void efx_nic_init_common(struct efx_nic *efx) EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe); EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1); EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1); - EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 0); + EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1); EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1); /* Enable SW_EV to inherit in char driver - assume harmless here */ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1); diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index fef2235..bdb92b4 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -428,6 +428,16 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) __netif_tx_unlock(queue); } } + + /* Check whether the hardware queue is now empty */ + if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { + tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); + if (tx_queue->read_count == tx_queue->old_write_count) { + smp_mb(); + tx_queue->empty_read_count = + tx_queue->read_count | EFX_EMPTY_COUNT_VALID; + } + } } int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) @@ -473,8 +483,10 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->insert_count = 0; tx_queue->write_count = 0; + tx_queue->old_write_count = 0; tx_queue->read_count = 0; tx_queue->old_read_count = 0; + tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; BUG_ON(tx_queue->stopped); /* Set up TX descriptor ring */ -- cgit v1.1 From ac33ac610dc613b2b1c938f8b61eef651ab72563 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 7 Dec 2010 18:29:52 +0000 Subject: sfc: Log start and end of ethtool self-test at INFO level Add message at start of self-test and increase log level of message at end of self-test, so that any other messages produced during the test are clearly associated with it. Signed-off-by: Ben Hutchings --- drivers/net/sfc/ethtool.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index aae756b..d51a6b1 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -582,6 +582,9 @@ static void efx_ethtool_self_test(struct net_device *net_dev, goto fail1; } + netif_info(efx, drv, efx->net_dev, "starting %sline testing\n", + (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on"); + /* We need rx buffers and interrupts. */ already_up = (efx->net_dev->flags & IFF_UP); if (!already_up) { @@ -600,9 +603,9 @@ static void efx_ethtool_self_test(struct net_device *net_dev, if (!already_up) dev_close(efx->net_dev); - netif_dbg(efx, drv, efx->net_dev, "%s %sline self-tests\n", - rc == 0 ? "passed" : "failed", - (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on"); + netif_info(efx, drv, efx->net_dev, "%s %sline self-tests\n", + rc == 0 ? "passed" : "failed", + (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on"); fail2: fail1: -- cgit v1.1 From 8891681af928f1da795cd4bd59043e5e0fadd6c8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 7 Dec 2010 19:02:27 +0000 Subject: sfc: Remove filter table IDs from filter functions The separation between filter tables is largely an internal detail and it may be removed in future hardware. To prepare for that: - Merge table ID with filter index to make an opaque filter ID - Wrap efx_filter_table_clear() with a function that clears filters from both RX tables, which is all that the current caller requires Signed-off-by: Ben Hutchings --- drivers/net/sfc/efx.h | 5 ++--- drivers/net/sfc/ethtool.c | 8 ++------ drivers/net/sfc/filter.c | 40 ++++++++++++++++++++++++++++++---------- drivers/net/sfc/filter.h | 6 ------ 4 files changed, 34 insertions(+), 25 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h index 10a1bf4..003fdb3 100644 --- a/drivers/net/sfc/efx.h +++ b/drivers/net/sfc/efx.h @@ -74,9 +74,8 @@ extern int efx_filter_insert_filter(struct efx_nic *efx, bool replace); extern int efx_filter_remove_filter(struct efx_nic *efx, struct efx_filter_spec *spec); -extern void efx_filter_table_clear(struct efx_nic *efx, - enum efx_filter_table_id table_id, - enum efx_filter_priority priority); +extern void efx_filter_clear_rx(struct efx_nic *efx, + enum efx_filter_priority priority); /* Channels */ extern void efx_process_channel_now(struct efx_channel *channel); diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index d51a6b1..0f46c1a 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -558,12 +558,8 @@ static int efx_ethtool_set_flags(struct net_device *net_dev, u32 data) if (rc) return rc; - if (!(data & ETH_FLAG_NTUPLE)) { - efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_IP, - EFX_FILTER_PRI_MANUAL); - efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_MAC, - EFX_FILTER_PRI_MANUAL); - } + if (!(data & ETH_FLAG_NTUPLE)) + efx_filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); return 0; } diff --git a/drivers/net/sfc/filter.c b/drivers/net/sfc/filter.c index 44500b5..e96e6e8 100644 --- a/drivers/net/sfc/filter.c +++ b/drivers/net/sfc/filter.c @@ -26,6 +26,12 @@ */ #define FILTER_CTL_SRCH_MAX 200 +enum efx_filter_table_id { + EFX_FILTER_TABLE_RX_IP = 0, + EFX_FILTER_TABLE_RX_MAC, + EFX_FILTER_TABLE_COUNT, +}; + struct efx_filter_table { u32 offset; /* address of table relative to BAR */ unsigned size; /* number of entries */ @@ -206,6 +212,14 @@ found: return filter_idx; } +/* Construct/deconstruct external filter IDs */ + +static inline int +efx_filter_make_id(enum efx_filter_table_id table_id, unsigned index) +{ + return table_id << 16 | index; +} + /** * efx_filter_insert_filter - add or replace a filter * @efx: NIC in which to insert the filter @@ -213,7 +227,7 @@ found: * @replace: Flag for whether the specified filter may replace a filter * with an identical match expression and equal or lower priority * - * On success, return the filter index within its table. + * On success, return the filter ID. * On failure, return a negative error code. */ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, @@ -273,6 +287,7 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, netif_vdbg(efx, hw, efx->net_dev, "%s: filter type %d index %d rxq %u set", __func__, spec->type, filter_idx, spec->dmaq_id); + rc = efx_filter_make_id(table_id, filter_idx); out: spin_unlock_bh(&state->lock); @@ -340,15 +355,9 @@ out: return rc; } -/** - * efx_filter_table_clear - remove filters from a table by priority - * @efx: NIC from which to remove the filters - * @table_id: Table from which to remove the filters - * @priority: Maximum priority to remove - */ -void efx_filter_table_clear(struct efx_nic *efx, - enum efx_filter_table_id table_id, - enum efx_filter_priority priority) +static void efx_filter_table_clear(struct efx_nic *efx, + enum efx_filter_table_id table_id, + enum efx_filter_priority priority) { struct efx_filter_state *state = efx->filter_state; struct efx_filter_table *table = &state->table[table_id]; @@ -365,6 +374,17 @@ void efx_filter_table_clear(struct efx_nic *efx, spin_unlock_bh(&state->lock); } +/** + * efx_filter_clear_rx - remove RX filters by priority + * @efx: NIC from which to remove the filters + * @priority: Maximum priority to remove + */ +void efx_filter_clear_rx(struct efx_nic *efx, enum efx_filter_priority priority) +{ + efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_IP, priority); + efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_MAC, priority); +} + /* Restore filter stater after reset */ void efx_restore_filters(struct efx_nic *efx) { diff --git a/drivers/net/sfc/filter.h b/drivers/net/sfc/filter.h index a53319d..d11e4aa 100644 --- a/drivers/net/sfc/filter.h +++ b/drivers/net/sfc/filter.h @@ -12,12 +12,6 @@ #include -enum efx_filter_table_id { - EFX_FILTER_TABLE_RX_IP = 0, - EFX_FILTER_TABLE_RX_MAC, - EFX_FILTER_TABLE_COUNT, -}; - /** * enum efx_filter_type - type of hardware filter * @EFX_FILTER_RX_TCP_FULL: RX, matching TCP/IPv4 4-tuple -- cgit v1.1 From c39d35ebffeea5996a6f8fd8430fae9acfb8aeaf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 7 Dec 2010 19:11:26 +0000 Subject: sfc: Generalise filter spec initialisation Move search_depth arrays into per-table state. Define initialisation function efx_filter_init_rx() which sets everything apart from the match fields. Define efx_filter_set_{ipv4_local,ipv4_full,eth_local}() to set the match fields. This allows some simplification of callers and later support for additional protocols and more flexible matching using multiple calls to these functions. Signed-off-by: Ben Hutchings --- drivers/net/sfc/ethtool.c | 82 ++++++++---------- drivers/net/sfc/filter.c | 214 ++++++++++++++++++++++++++++++++++++++-------- drivers/net/sfc/filter.h | 143 ++++++++----------------------- 3 files changed, 248 insertions(+), 191 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index 0f46c1a..5e50e57 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "net_driver.h" #include "workarounds.h" #include "selftest.h" @@ -920,6 +921,7 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev, struct ethhdr *mac_entry = &ntuple->fs.h_u.ether_spec; struct ethhdr *mac_mask = &ntuple->fs.m_u.ether_spec; struct efx_filter_spec filter; + int rc; /* Range-check action */ if (ntuple->fs.action < ETHTOOL_RXNTUPLE_ACTION_CLEAR || @@ -929,9 +931,16 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev, if (~ntuple->fs.data_mask) return -EINVAL; + efx_filter_init_rx(&filter, EFX_FILTER_PRI_MANUAL, 0, + (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) ? + 0xfff : ntuple->fs.action); + switch (ntuple->fs.flow_type) { case TCP_V4_FLOW: - case UDP_V4_FLOW: + case UDP_V4_FLOW: { + u8 proto = (ntuple->fs.flow_type == TCP_V4_FLOW ? + IPPROTO_TCP : IPPROTO_UDP); + /* Must match all of destination, */ if (ip_mask->ip4dst | ip_mask->pdst) return -EINVAL; @@ -943,7 +952,22 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev, /* and nothing else */ if ((u8)~ip_mask->tos | (u16)~ntuple->fs.vlan_tag_mask) return -EINVAL; + + if (!ip_mask->ip4src) + rc = efx_filter_set_ipv4_full(&filter, proto, + ip_entry->ip4dst, + ip_entry->pdst, + ip_entry->ip4src, + ip_entry->psrc); + else + rc = efx_filter_set_ipv4_local(&filter, proto, + ip_entry->ip4dst, + ip_entry->pdst); + if (rc) + return rc; break; + } + case ETHER_FLOW: /* Must match all of destination, */ if (!is_zero_ether_addr(mac_mask->h_dest)) @@ -956,58 +980,24 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev, if (!is_broadcast_ether_addr(mac_mask->h_source) || mac_mask->h_proto != htons(0xffff)) return -EINVAL; + + rc = efx_filter_set_eth_local( + &filter, + (ntuple->fs.vlan_tag_mask == 0xf000) ? + ntuple->fs.vlan_tag : EFX_FILTER_VID_UNSPEC, + mac_entry->h_dest); + if (rc) + return rc; break; + default: return -EINVAL; } - filter.priority = EFX_FILTER_PRI_MANUAL; - filter.flags = 0; - - switch (ntuple->fs.flow_type) { - case TCP_V4_FLOW: - if (!ip_mask->ip4src) - efx_filter_set_rx_tcp_full(&filter, - htonl(ip_entry->ip4src), - htons(ip_entry->psrc), - htonl(ip_entry->ip4dst), - htons(ip_entry->pdst)); - else - efx_filter_set_rx_tcp_wild(&filter, - htonl(ip_entry->ip4dst), - htons(ip_entry->pdst)); - break; - case UDP_V4_FLOW: - if (!ip_mask->ip4src) - efx_filter_set_rx_udp_full(&filter, - htonl(ip_entry->ip4src), - htons(ip_entry->psrc), - htonl(ip_entry->ip4dst), - htons(ip_entry->pdst)); - else - efx_filter_set_rx_udp_wild(&filter, - htonl(ip_entry->ip4dst), - htons(ip_entry->pdst)); - break; - case ETHER_FLOW: - if (ntuple->fs.vlan_tag_mask == 0xf000) - efx_filter_set_rx_mac_full(&filter, - ntuple->fs.vlan_tag & 0xfff, - mac_entry->h_dest); - else - efx_filter_set_rx_mac_wild(&filter, mac_entry->h_dest); - break; - } - - if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_CLEAR) { + if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_CLEAR) return efx_filter_remove_filter(efx, &filter); - } else { - if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) - filter.dmaq_id = 0xfff; - else - filter.dmaq_id = ntuple->fs.action; + else return efx_filter_insert_filter(efx, &filter, true); - } } static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, diff --git a/drivers/net/sfc/filter.c b/drivers/net/sfc/filter.c index e96e6e8..d4722c4 100644 --- a/drivers/net/sfc/filter.c +++ b/drivers/net/sfc/filter.c @@ -7,6 +7,7 @@ * by the Free Software Foundation, incorporated herein by reference. */ +#include #include "efx.h" #include "filter.h" #include "io.h" @@ -33,18 +34,19 @@ enum efx_filter_table_id { }; struct efx_filter_table { + enum efx_filter_table_id id; u32 offset; /* address of table relative to BAR */ unsigned size; /* number of entries */ unsigned step; /* step between entries */ unsigned used; /* number currently used */ unsigned long *used_bitmap; struct efx_filter_spec *spec; + unsigned search_depth[EFX_FILTER_TYPE_COUNT]; }; struct efx_filter_state { spinlock_t lock; struct efx_filter_table table[EFX_FILTER_TABLE_COUNT]; - unsigned search_depth[EFX_FILTER_TYPE_COUNT]; }; /* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit @@ -71,68 +73,203 @@ static u16 efx_filter_increment(u32 key) } static enum efx_filter_table_id -efx_filter_type_table_id(enum efx_filter_type type) +efx_filter_spec_table_id(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_TCP_FULL >> 2)); + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_TCP_WILD >> 2)); + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_UDP_FULL >> 2)); + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_UDP_WILD >> 2)); + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_FULL >> 2)); + BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_WILD >> 2)); + EFX_BUG_ON_PARANOID(spec->type == EFX_FILTER_UNSPEC); + return spec->type >> 2; +} + +static struct efx_filter_table * +efx_filter_spec_table(struct efx_filter_state *state, + const struct efx_filter_spec *spec) { - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_TCP_FULL >> 2)); - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_TCP_WILD >> 2)); - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_UDP_FULL >> 2)); - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_UDP_WILD >> 2)); - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_RX_MAC_FULL >> 2)); - BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_RX_MAC_WILD >> 2)); - return type >> 2; + if (spec->type == EFX_FILTER_UNSPEC) + return NULL; + else + return &state->table[efx_filter_spec_table_id(spec)]; } -static void -efx_filter_table_reset_search_depth(struct efx_filter_state *state, - enum efx_filter_table_id table_id) +static void efx_filter_table_reset_search_depth(struct efx_filter_table *table) { - memset(state->search_depth + (table_id << 2), 0, - sizeof(state->search_depth[0]) << 2); + memset(table->search_depth, 0, sizeof(table->search_depth)); } static void efx_filter_push_rx_limits(struct efx_nic *efx) { struct efx_filter_state *state = efx->filter_state; + struct efx_filter_table *table; efx_oword_t filter_ctl; efx_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); + table = &state->table[EFX_FILTER_TABLE_RX_IP]; EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT, - state->search_depth[EFX_FILTER_RX_TCP_FULL] + + table->search_depth[EFX_FILTER_TCP_FULL] + FILTER_CTL_SRCH_FUDGE_FULL); EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT, - state->search_depth[EFX_FILTER_RX_TCP_WILD] + + table->search_depth[EFX_FILTER_TCP_WILD] + FILTER_CTL_SRCH_FUDGE_WILD); EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT, - state->search_depth[EFX_FILTER_RX_UDP_FULL] + + table->search_depth[EFX_FILTER_UDP_FULL] + FILTER_CTL_SRCH_FUDGE_FULL); EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT, - state->search_depth[EFX_FILTER_RX_UDP_WILD] + + table->search_depth[EFX_FILTER_UDP_WILD] + FILTER_CTL_SRCH_FUDGE_WILD); - if (state->table[EFX_FILTER_TABLE_RX_MAC].size) { + table = &state->table[EFX_FILTER_TABLE_RX_MAC]; + if (table->size) { EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT, - state->search_depth[EFX_FILTER_RX_MAC_FULL] + + table->search_depth[EFX_FILTER_MAC_FULL] + FILTER_CTL_SRCH_FUDGE_FULL); EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT, - state->search_depth[EFX_FILTER_RX_MAC_WILD] + + table->search_depth[EFX_FILTER_MAC_WILD] + FILTER_CTL_SRCH_FUDGE_WILD); } efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); } +static inline void __efx_filter_set_ipv4(struct efx_filter_spec *spec, + __be32 host1, __be16 port1, + __be32 host2, __be16 port2) +{ + spec->data[0] = ntohl(host1) << 16 | ntohs(port1); + spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16; + spec->data[2] = ntohl(host2); +} + +/** + * efx_filter_set_ipv4_local - specify IPv4 host, transport protocol and port + * @spec: Specification to initialise + * @proto: Transport layer protocol number + * @host: Local host address (network byte order) + * @port: Local port (network byte order) + */ +int efx_filter_set_ipv4_local(struct efx_filter_spec *spec, u8 proto, + __be32 host, __be16 port) +{ + __be32 host1; + __be16 port1; + + EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX)); + + /* This cannot currently be combined with other filtering */ + if (spec->type != EFX_FILTER_UNSPEC) + return -EPROTONOSUPPORT; + + if (port == 0) + return -EINVAL; + + switch (proto) { + case IPPROTO_TCP: + spec->type = EFX_FILTER_TCP_WILD; + break; + case IPPROTO_UDP: + spec->type = EFX_FILTER_UDP_WILD; + break; + default: + return -EPROTONOSUPPORT; + } + + /* Filter is constructed in terms of source and destination, + * with the odd wrinkle that the ports are swapped in a UDP + * wildcard filter. We need to convert from local and remote + * (= zero for wildcard) addresses. + */ + host1 = 0; + if (proto != IPPROTO_UDP) { + port1 = 0; + } else { + port1 = port; + port = 0; + } + + __efx_filter_set_ipv4(spec, host1, port1, host, port); + return 0; +} + +/** + * efx_filter_set_ipv4_full - specify IPv4 hosts, transport protocol and ports + * @spec: Specification to initialise + * @proto: Transport layer protocol number + * @host: Local host address (network byte order) + * @port: Local port (network byte order) + * @rhost: Remote host address (network byte order) + * @rport: Remote port (network byte order) + */ +int efx_filter_set_ipv4_full(struct efx_filter_spec *spec, u8 proto, + __be32 host, __be16 port, + __be32 rhost, __be16 rport) +{ + EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX)); + + /* This cannot currently be combined with other filtering */ + if (spec->type != EFX_FILTER_UNSPEC) + return -EPROTONOSUPPORT; + + if (port == 0 || rport == 0) + return -EINVAL; + + switch (proto) { + case IPPROTO_TCP: + spec->type = EFX_FILTER_TCP_FULL; + break; + case IPPROTO_UDP: + spec->type = EFX_FILTER_UDP_FULL; + break; + default: + return -EPROTONOSUPPORT; + } + + __efx_filter_set_ipv4(spec, rhost, rport, host, port); + return 0; +} + +/** + * efx_filter_set_eth_local - specify local Ethernet address and optional VID + * @spec: Specification to initialise + * @vid: VLAN ID to match, or %EFX_FILTER_VID_UNSPEC + * @addr: Local Ethernet MAC address + */ +int efx_filter_set_eth_local(struct efx_filter_spec *spec, + u16 vid, const u8 *addr) +{ + EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX)); + + /* This cannot currently be combined with other filtering */ + if (spec->type != EFX_FILTER_UNSPEC) + return -EPROTONOSUPPORT; + + if (vid == EFX_FILTER_VID_UNSPEC) { + spec->type = EFX_FILTER_MAC_WILD; + spec->data[0] = 0; + } else { + spec->type = EFX_FILTER_MAC_FULL; + spec->data[0] = vid; + } + + spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5]; + spec->data[2] = addr[0] << 8 | addr[1]; + return 0; +} + /* Build a filter entry and return its n-tuple key. */ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) { u32 data3; - switch (efx_filter_type_table_id(spec->type)) { + switch (efx_filter_spec_table_id(spec)) { case EFX_FILTER_TABLE_RX_IP: { - bool is_udp = (spec->type == EFX_FILTER_RX_UDP_FULL || - spec->type == EFX_FILTER_RX_UDP_WILD); + bool is_udp = (spec->type == EFX_FILTER_UDP_FULL || + spec->type == EFX_FILTER_UDP_WILD); EFX_POPULATE_OWORD_7( *filter, FRF_BZ_RSS_EN, @@ -149,7 +286,7 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) } case EFX_FILTER_TABLE_RX_MAC: { - bool is_wild = spec->type == EFX_FILTER_RX_MAC_WILD; + bool is_wild = spec->type == EFX_FILTER_MAC_WILD; EFX_POPULATE_OWORD_8( *filter, FRF_CZ_RMFT_RSS_EN, @@ -234,23 +371,21 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, bool replace) { struct efx_filter_state *state = efx->filter_state; - enum efx_filter_table_id table_id = - efx_filter_type_table_id(spec->type); - struct efx_filter_table *table = &state->table[table_id]; + struct efx_filter_table *table = efx_filter_spec_table(state, spec); struct efx_filter_spec *saved_spec; efx_oword_t filter; int filter_idx, depth; u32 key; int rc; - if (table->size == 0) + if (!table || table->size == 0) return -EINVAL; key = efx_filter_build(&filter, spec); netif_vdbg(efx, hw, efx->net_dev, "%s: type %d search_depth=%d", __func__, spec->type, - state->search_depth[spec->type]); + table->search_depth[spec->type]); spin_lock_bh(&state->lock); @@ -277,8 +412,8 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, } *saved_spec = *spec; - if (state->search_depth[spec->type] < depth) { - state->search_depth[spec->type] = depth; + if (table->search_depth[spec->type] < depth) { + table->search_depth[spec->type] = depth; efx_filter_push_rx_limits(efx); } @@ -287,7 +422,7 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, netif_vdbg(efx, hw, efx->net_dev, "%s: filter type %d index %d rxq %u set", __func__, spec->type, filter_idx, spec->dmaq_id); - rc = efx_filter_make_id(table_id, filter_idx); + rc = efx_filter_make_id(table->id, filter_idx); out: spin_unlock_bh(&state->lock); @@ -321,15 +456,16 @@ static void efx_filter_table_clear_entry(struct efx_nic *efx, int efx_filter_remove_filter(struct efx_nic *efx, struct efx_filter_spec *spec) { struct efx_filter_state *state = efx->filter_state; - enum efx_filter_table_id table_id = - efx_filter_type_table_id(spec->type); - struct efx_filter_table *table = &state->table[table_id]; + struct efx_filter_table *table = efx_filter_spec_table(state, spec); struct efx_filter_spec *saved_spec; efx_oword_t filter; int filter_idx, depth; u32 key; int rc; + if (!table) + return -EINVAL; + key = efx_filter_build(&filter, spec); spin_lock_bh(&state->lock); @@ -347,7 +483,7 @@ int efx_filter_remove_filter(struct efx_nic *efx, struct efx_filter_spec *spec) efx_filter_table_clear_entry(efx, table, filter_idx); if (table->used == 0) - efx_filter_table_reset_search_depth(state, table_id); + efx_filter_table_reset_search_depth(table); rc = 0; out: @@ -369,7 +505,7 @@ static void efx_filter_table_clear(struct efx_nic *efx, if (table->spec[filter_idx].priority <= priority) efx_filter_table_clear_entry(efx, table, filter_idx); if (table->used == 0) - efx_filter_table_reset_search_depth(state, table_id); + efx_filter_table_reset_search_depth(table); spin_unlock_bh(&state->lock); } @@ -427,6 +563,7 @@ int efx_probe_filters(struct efx_nic *efx) if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { table = &state->table[EFX_FILTER_TABLE_RX_IP]; + table->id = EFX_FILTER_TABLE_RX_IP; table->offset = FR_BZ_RX_FILTER_TBL0; table->size = FR_BZ_RX_FILTER_TBL0_ROWS; table->step = FR_BZ_RX_FILTER_TBL0_STEP; @@ -434,6 +571,7 @@ int efx_probe_filters(struct efx_nic *efx) if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) { table = &state->table[EFX_FILTER_TABLE_RX_MAC]; + table->id = EFX_FILTER_TABLE_RX_MAC; table->offset = FR_CZ_RX_MAC_FILTER_TBL0; table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS; table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP; diff --git a/drivers/net/sfc/filter.h b/drivers/net/sfc/filter.h index d11e4aa..872f213 100644 --- a/drivers/net/sfc/filter.h +++ b/drivers/net/sfc/filter.h @@ -14,23 +14,25 @@ /** * enum efx_filter_type - type of hardware filter - * @EFX_FILTER_RX_TCP_FULL: RX, matching TCP/IPv4 4-tuple - * @EFX_FILTER_RX_TCP_WILD: RX, matching TCP/IPv4 destination (host, port) - * @EFX_FILTER_RX_UDP_FULL: RX, matching UDP/IPv4 4-tuple - * @EFX_FILTER_RX_UDP_WILD: RX, matching UDP/IPv4 destination (host, port) - * @EFX_FILTER_RX_MAC_FULL: RX, matching Ethernet destination MAC address, VID - * @EFX_FILTER_RX_MAC_WILD: RX, matching Ethernet destination MAC address + * @EFX_FILTER_TCP_FULL: Matching TCP/IPv4 4-tuple + * @EFX_FILTER_TCP_WILD: Matching TCP/IPv4 destination (host, port) + * @EFX_FILTER_UDP_FULL: Matching UDP/IPv4 4-tuple + * @EFX_FILTER_UDP_WILD: Matching UDP/IPv4 destination (host, port) + * @EFX_FILTER_MAC_FULL: Matching Ethernet destination MAC address, VID + * @EFX_FILTER_MAC_WILD: Matching Ethernet destination MAC address + * @EFX_FILTER_UNSPEC: Match type is unspecified * - * Falcon NICs only support the RX TCP/IPv4 and UDP/IPv4 filter types. + * Falcon NICs only support the TCP/IPv4 and UDP/IPv4 filter types. */ enum efx_filter_type { - EFX_FILTER_RX_TCP_FULL = 0, - EFX_FILTER_RX_TCP_WILD, - EFX_FILTER_RX_UDP_FULL, - EFX_FILTER_RX_UDP_WILD, - EFX_FILTER_RX_MAC_FULL = 4, - EFX_FILTER_RX_MAC_WILD, - EFX_FILTER_TYPE_COUNT, + EFX_FILTER_TCP_FULL = 0, + EFX_FILTER_TCP_WILD, + EFX_FILTER_UDP_FULL, + EFX_FILTER_UDP_WILD, + EFX_FILTER_MAC_FULL = 4, + EFX_FILTER_MAC_WILD, + EFX_FILTER_TYPE_COUNT, /* number of specific types */ + EFX_FILTER_UNSPEC = 0xf, }; /** @@ -57,13 +59,13 @@ enum efx_filter_priority { * @EFX_FILTER_FLAG_RX_OVERRIDE_IP: Enables a MAC filter to override * any IP filter that matches the same packet. By default, IP * filters take precedence. - * - * Currently, no flags are defined for TX filters. + * @EFX_FILTER_FLAG_RX: Filter is for RX */ enum efx_filter_flags { EFX_FILTER_FLAG_RX_RSS = 0x01, EFX_FILTER_FLAG_RX_SCATTER = 0x02, EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04, + EFX_FILTER_FLAG_RX = 0x08, }; /** @@ -85,99 +87,26 @@ struct efx_filter_spec { u32 data[3]; }; -/** - * efx_filter_set_rx_tcp_full - specify RX filter with TCP/IPv4 full match - * @spec: Specification to initialise - * @shost: Source host address (host byte order) - * @sport: Source port (host byte order) - * @dhost: Destination host address (host byte order) - * @dport: Destination port (host byte order) - */ -static inline void -efx_filter_set_rx_tcp_full(struct efx_filter_spec *spec, - u32 shost, u16 sport, u32 dhost, u16 dport) -{ - spec->type = EFX_FILTER_RX_TCP_FULL; - spec->data[0] = sport | shost << 16; - spec->data[1] = dport << 16 | shost >> 16; - spec->data[2] = dhost; -} - -/** - * efx_filter_set_rx_tcp_wild - specify RX filter with TCP/IPv4 wildcard match - * @spec: Specification to initialise - * @dhost: Destination host address (host byte order) - * @dport: Destination port (host byte order) - */ -static inline void -efx_filter_set_rx_tcp_wild(struct efx_filter_spec *spec, u32 dhost, u16 dport) -{ - spec->type = EFX_FILTER_RX_TCP_WILD; - spec->data[0] = 0; - spec->data[1] = dport << 16; - spec->data[2] = dhost; -} - -/** - * efx_filter_set_rx_udp_full - specify RX filter with UDP/IPv4 full match - * @spec: Specification to initialise - * @shost: Source host address (host byte order) - * @sport: Source port (host byte order) - * @dhost: Destination host address (host byte order) - * @dport: Destination port (host byte order) - */ -static inline void -efx_filter_set_rx_udp_full(struct efx_filter_spec *spec, - u32 shost, u16 sport, u32 dhost, u16 dport) -{ - spec->type = EFX_FILTER_RX_UDP_FULL; - spec->data[0] = sport | shost << 16; - spec->data[1] = dport << 16 | shost >> 16; - spec->data[2] = dhost; -} - -/** - * efx_filter_set_rx_udp_wild - specify RX filter with UDP/IPv4 wildcard match - * @spec: Specification to initialise - * @dhost: Destination host address (host byte order) - * @dport: Destination port (host byte order) - */ -static inline void -efx_filter_set_rx_udp_wild(struct efx_filter_spec *spec, u32 dhost, u16 dport) -{ - spec->type = EFX_FILTER_RX_UDP_WILD; - spec->data[0] = dport; - spec->data[1] = 0; - spec->data[2] = dhost; -} - -/** - * efx_filter_set_rx_mac_full - specify RX filter with MAC full match - * @spec: Specification to initialise - * @vid: VLAN ID - * @addr: Destination MAC address - */ -static inline void efx_filter_set_rx_mac_full(struct efx_filter_spec *spec, - u16 vid, const u8 *addr) +static inline void efx_filter_init_rx(struct efx_filter_spec *spec, + enum efx_filter_priority priority, + enum efx_filter_flags flags, + unsigned rxq_id) { - spec->type = EFX_FILTER_RX_MAC_FULL; - spec->data[0] = vid; - spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5]; - spec->data[2] = addr[0] << 8 | addr[1]; + spec->type = EFX_FILTER_UNSPEC; + spec->priority = priority; + spec->flags = EFX_FILTER_FLAG_RX | flags; + spec->dmaq_id = rxq_id; } -/** - * efx_filter_set_rx_mac_full - specify RX filter with MAC wildcard match - * @spec: Specification to initialise - * @addr: Destination MAC address - */ -static inline void efx_filter_set_rx_mac_wild(struct efx_filter_spec *spec, - const u8 *addr) -{ - spec->type = EFX_FILTER_RX_MAC_WILD; - spec->data[0] = 0; - spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5]; - spec->data[2] = addr[0] << 8 | addr[1]; -} +extern int efx_filter_set_ipv4_local(struct efx_filter_spec *spec, u8 proto, + __be32 host, __be16 port); +extern int efx_filter_set_ipv4_full(struct efx_filter_spec *spec, u8 proto, + __be32 host, __be16 port, + __be32 rhost, __be16 rport); +extern int efx_filter_set_eth_local(struct efx_filter_spec *spec, + u16 vid, const u8 *addr); +enum { + EFX_FILTER_VID_UNSPEC = 0xffff, +}; #endif /* EFX_FILTER_H */ -- cgit v1.1