summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-03-24 13:45:07 -0700
committerDavid S. Miller <davem@davemloft.net>2017-03-24 13:45:07 -0700
commitba82427d4a0fdd92b01cb2a1c4c24990f37406a9 (patch)
treef482723e5448c72b606a85b998307eb203a6bf1b
parent6a18c312320766b6d13a12c681f77df04894f1a5 (diff)
parent584a88709bf4880ba5f8fed72da50512fbd9bdbb (diff)
downloadop-kernel-dev-ba82427d4a0fdd92b01cb2a1c4c24990f37406a9.zip
op-kernel-dev-ba82427d4a0fdd92b01cb2a1c4c24990f37406a9.tar.gz
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says: ==================== 40GbE Intel Wired LAN Driver Updates 2017-03-23 This series contains updates to i40e and i40e.txt documentation. Jake provides all the changes in the series which are centered around ntuple filter fixes and additional support. Fixed the current implementation of .set_rxnfc, where we were not reading the mask field for filter entries which was resulting in filters not behaving as expected and not working correctly. When cleaning up after disabling flow director support, ensure that the default input set is correctly reprogrammed. Since the hardware only supports a single input set for all flows of that type, the driver shall only allow the input set to change if there are no other configured filters for that flow type, so add support to detect when we can update the input set for each flow type. Align the driver to other drivers to partition the ring_cookie value into 8bits of VF index, along with 32bits of queue number instead of using the user-def field. Added support to parse the user-def field into a data structure format to allow future extensions of the user-def filed by keeping all the code that read/writes the field into a single location. Added support for flexible payloads passed via ethtool user-def field. We support a single flexible word (2byte) value per protocol type, and we handle the FLX_PIT register using a list of flexible entries so that each flow type may be configured separately. Enabled flow director filters for SCTPv4 packets using the ethtool ntuple interface to enable filters. Updated the documentation on the i40e driver to include the newly added support to ntuple filters. Reduced complexity of a if-continue-else-break section of code by taking advantage of using hlist_for_each_entry_continue() instead. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/i40e.txt72
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h130
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c977
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c52
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c107
5 files changed, 1294 insertions, 44 deletions
diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
index a251bf4..57e616e 100644
--- a/Documentation/networking/i40e.txt
+++ b/Documentation/networking/i40e.txt
@@ -63,6 +63,78 @@ Additional Configurations
The latest release of ethtool can be found from
https://www.kernel.org/pub/software/network/ethtool
+
+ Flow Director n-ntuple traffic filters (FDir)
+ ---------------------------------------------
+ The driver utilizes the ethtool interface for configuring ntuple filters,
+ via "ethtool -N <device> <filter>".
+
+ The sctp4, ip4, udp4, and tcp4 flow types are supported with the standard
+ fields including src-ip, dst-ip, src-port and dst-port. The driver only
+ supports fully enabling or fully masking the fields, so use of the mask
+ fields for partial matches is not supported.
+
+ Additionally, the driver supports using the action to specify filters for a
+ Virtual Function. You can specify the action as a 64bit value, where the
+ lower 32 bits represents the queue number, while the next 8 bits represent
+ which VF. Note that 0 is the PF, so the VF identifier is offset by 1. For
+ example:
+
+ ... action 0x800000002 ...
+
+ Would indicate to direct traffic for Virtual Function 7 (8 minus 1) on queue
+ 2 of that VF.
+
+ The driver also supports using the user-defined field to specify 2 bytes of
+ arbitrary data to match within the packet payload in addition to the regular
+ fields. The data is specified in the lower 32bits of the user-def field in
+ the following way:
+
+ +----------------------------+---------------------------+
+ | 31 28 24 20 16 | 15 12 8 4 0|
+ +----------------------------+---------------------------+
+ | offset into packet payload | 2 bytes of flexible data |
+ +----------------------------+---------------------------+
+
+ As an example,
+
+ ... user-def 0x4FFFF ....
+
+ means to match the value 0xFFFF 4 bytes into the packet payload. Note that
+ the offset is based on the beginning of the payload, and not the beginning
+ of the packet. Thus
+
+ flow-type tcp4 ... user-def 0x8BEAF ....
+
+ would match TCP/IPv4 packets which have the value 0xBEAF 8bytes into the
+ TCP/IPv4 payload.
+
+ For ICMP, the hardware parses the ICMP header as 4 bytes of header and 4
+ bytes of payload, so if you want to match an ICMP frames payload you may need
+ to add 4 to the offset in order to match the data.
+
+ Furthermore, the offset can only be up to a value of 64, as the hardware
+ will only read up to 64 bytes of data from the payload. It must also be even
+ as the flexible data is 2 bytes long and must be aligned to byte 0 of the
+ packet payload.
+
+ When programming filters, the hardware is limited to using a single input
+ set for each flow type. This means that it is an error to program two
+ different filters with the same type that don't match on the same fields.
+ Thus the second of the following two commands will fail:
+
+ ethtool -N <device> flow-type tcp4 src-ip 192.168.0.7 action 5
+ ethtool -N <device> flow-type tcp4 dst-ip 192.168.15.18 action 1
+
+ This is because the first filter will be accepted and reprogram the input
+ set for TCPv4 filters, but the second filter will be unable to reprogram the
+ input set until all the conflicting TCPv4 filters are first removed.
+
+ Note that the user-defined flexible offset is also considered part of the
+ input set and cannot be programmed separately for multiple filters of the
+ same type. However, the flexible data is not part of the input set and
+ multiple filters may use the same offset but match against different data.
+
Data Center Bridging (DCB)
--------------------------
DCB configuration is not currently supported.
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index c0f2286..3133a1a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -202,6 +202,15 @@ enum i40e_fd_stat_idx {
#define I40E_FD_ATR_TUNNEL_STAT_IDX(pf_id) \
(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_ATR_TUNNEL)
+/* The following structure contains the data parsed from the user-defined
+ * field of the ethtool_rx_flow_spec structure.
+ */
+struct i40e_rx_flow_userdef {
+ bool flex_filter;
+ u16 flex_word;
+ u16 flex_offset;
+};
+
struct i40e_fdir_filter {
struct hlist_node fdir_node;
/* filter ipnut set */
@@ -213,6 +222,12 @@ struct i40e_fdir_filter {
__be16 src_port;
__be16 dst_port;
__be32 sctp_v_tag;
+
+ /* Flexible data to match within the packet payload */
+ __be16 flex_word;
+ u16 flex_offset;
+ bool flex_filter;
+
/* filter control */
u16 q_index;
u8 flex_off;
@@ -249,6 +264,75 @@ struct i40e_udp_port_config {
u8 type;
};
+/* macros related to FLX_PIT */
+#define I40E_FLEX_SET_FSIZE(fsize) (((fsize) << \
+ I40E_PRTQF_FLX_PIT_FSIZE_SHIFT) & \
+ I40E_PRTQF_FLX_PIT_FSIZE_MASK)
+#define I40E_FLEX_SET_DST_WORD(dst) (((dst) << \
+ I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT) & \
+ I40E_PRTQF_FLX_PIT_DEST_OFF_MASK)
+#define I40E_FLEX_SET_SRC_WORD(src) (((src) << \
+ I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT) & \
+ I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK)
+#define I40E_FLEX_PREP_VAL(dst, fsize, src) (I40E_FLEX_SET_DST_WORD(dst) | \
+ I40E_FLEX_SET_FSIZE(fsize) | \
+ I40E_FLEX_SET_SRC_WORD(src))
+
+#define I40E_FLEX_PIT_GET_SRC(flex) (((flex) & \
+ I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK) >> \
+ I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT)
+#define I40E_FLEX_PIT_GET_DST(flex) (((flex) & \
+ I40E_PRTQF_FLX_PIT_DEST_OFF_MASK) >> \
+ I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT)
+#define I40E_FLEX_PIT_GET_FSIZE(flex) (((flex) & \
+ I40E_PRTQF_FLX_PIT_FSIZE_MASK) >> \
+ I40E_PRTQF_FLX_PIT_FSIZE_SHIFT)
+
+#define I40E_MAX_FLEX_SRC_OFFSET 0x1F
+
+/* macros related to GLQF_ORT */
+#define I40E_ORT_SET_IDX(idx) (((idx) << \
+ I40E_GLQF_ORT_PIT_INDX_SHIFT) & \
+ I40E_GLQF_ORT_PIT_INDX_MASK)
+
+#define I40E_ORT_SET_COUNT(count) (((count) << \
+ I40E_GLQF_ORT_FIELD_CNT_SHIFT) & \
+ I40E_GLQF_ORT_FIELD_CNT_MASK)
+
+#define I40E_ORT_SET_PAYLOAD(payload) (((payload) << \
+ I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) & \
+ I40E_GLQF_ORT_FLX_PAYLOAD_MASK)
+
+#define I40E_ORT_PREP_VAL(idx, count, payload) (I40E_ORT_SET_IDX(idx) | \
+ I40E_ORT_SET_COUNT(count) | \
+ I40E_ORT_SET_PAYLOAD(payload))
+
+#define I40E_L3_GLQF_ORT_IDX 34
+#define I40E_L4_GLQF_ORT_IDX 35
+
+/* Flex PIT register index */
+#define I40E_FLEX_PIT_IDX_START_L2 0
+#define I40E_FLEX_PIT_IDX_START_L3 3
+#define I40E_FLEX_PIT_IDX_START_L4 6
+
+#define I40E_FLEX_PIT_TABLE_SIZE 3
+
+#define I40E_FLEX_DEST_UNUSED 63
+
+#define I40E_FLEX_INDEX_ENTRIES 8
+
+/* Flex MASK to disable all flexible entries */
+#define I40E_FLEX_INPUT_MASK (I40E_FLEX_50_MASK | I40E_FLEX_51_MASK | \
+ I40E_FLEX_52_MASK | I40E_FLEX_53_MASK | \
+ I40E_FLEX_54_MASK | I40E_FLEX_55_MASK | \
+ I40E_FLEX_56_MASK | I40E_FLEX_57_MASK)
+
+struct i40e_flex_pit {
+ struct list_head list;
+ u16 src_offset;
+ u8 pit_index;
+};
+
/* struct that defines the Ethernet device */
struct i40e_pf {
struct pci_dev *pdev;
@@ -293,8 +377,17 @@ struct i40e_pf {
*/
u16 fd_tcp4_filter_cnt;
u16 fd_udp4_filter_cnt;
+ u16 fd_sctp4_filter_cnt;
u16 fd_ip4_filter_cnt;
+ /* Flexible filter table values that need to be programmed into
+ * hardware, which expects L3 and L4 to be programmed separately. We
+ * need to ensure that the values are in ascended order and don't have
+ * duplicates, so we track each L3 and L4 values in separate lists.
+ */
+ struct list_head l3_flex_pit_list;
+ struct list_head l4_flex_pit_list;
+
struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
u16 pending_udp_bitmap;
@@ -734,6 +827,43 @@ static inline int i40e_get_fd_cnt_all(struct i40e_pf *pf)
return pf->hw.fdir_shared_filter_count + pf->fdir_pf_filter_count;
}
+/**
+ * i40e_read_fd_input_set - reads value of flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ *
+ * This function reads value of flow director input set register
+ * specified by 'addr' (which is specific to flow-type)
+ **/
+static inline u64 i40e_read_fd_input_set(struct i40e_pf *pf, u16 addr)
+{
+ u64 val;
+
+ val = i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1));
+ val <<= 32;
+ val += i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0));
+
+ return val;
+}
+
+/**
+ * i40e_write_fd_input_set - writes value into flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ * @val: value to be written
+ *
+ * This function writes specified value to the register specified by 'addr'.
+ * This register is input set register based on flow-type.
+ **/
+static inline void i40e_write_fd_input_set(struct i40e_pf *pf,
+ u16 addr, u64 val)
+{
+ i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1),
+ (u32)(val >> 32));
+ i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0),
+ (u32)(val & 0xFFFFFFFFULL));
+}
+
/* needed by i40e_ethtool.c */
int i40e_up(struct i40e_vsi *vsi);
void i40e_down(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 1c3805b4..8fac124 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2332,6 +2332,102 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
}
/**
+ * i40e_check_mask - Check whether a mask field is set
+ * @mask: the full mask value
+ * @field; mask of the field to check
+ *
+ * If the given mask is fully set, return positive value. If the mask for the
+ * field is fully unset, return zero. Otherwise return a negative error code.
+ **/
+static int i40e_check_mask(u64 mask, u64 field)
+{
+ u64 value = mask & field;
+
+ if (value == field)
+ return 1;
+ else if (!value)
+ return 0;
+ else
+ return -1;
+}
+
+/**
+ * i40e_parse_rx_flow_user_data - Deconstruct user-defined data
+ * @fsp: pointer to rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Read the user-defined data and deconstruct the value into a structure. No
+ * other code should read the user-defined data, so as to ensure that every
+ * place consistently reads the value correctly.
+ *
+ * The user-defined field is a 64bit Big Endian format value, which we
+ * deconstruct by reading bits or bit fields from it. Single bit flags shall
+ * be defined starting from the highest bits, while small bit field values
+ * shall be defined starting from the lowest bits.
+ *
+ * Returns 0 if the data is valid, and non-zero if the userdef data is invalid
+ * and the filter should be rejected. The data structure will always be
+ * modified even if FLOW_EXT is not set.
+ *
+ **/
+static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+ struct i40e_rx_flow_userdef *data)
+{
+ u64 value, mask;
+ int valid;
+
+ /* Zero memory first so it's always consistent. */
+ memset(data, 0, sizeof(*data));
+
+ if (!(fsp->flow_type & FLOW_EXT))
+ return 0;
+
+ value = be64_to_cpu(*((__be64 *)fsp->h_ext.data));
+ mask = be64_to_cpu(*((__be64 *)fsp->m_ext.data));
+
+#define I40E_USERDEF_FLEX_WORD GENMASK_ULL(15, 0)
+#define I40E_USERDEF_FLEX_OFFSET GENMASK_ULL(31, 16)
+#define I40E_USERDEF_FLEX_FILTER GENMASK_ULL(31, 0)
+
+ valid = i40e_check_mask(mask, I40E_USERDEF_FLEX_FILTER);
+ if (valid < 0) {
+ return -EINVAL;
+ } else if (valid) {
+ data->flex_word = value & I40E_USERDEF_FLEX_WORD;
+ data->flex_offset =
+ (value & I40E_USERDEF_FLEX_OFFSET) >> 16;
+ data->flex_filter = true;
+ }
+
+ return 0;
+}
+
+/**
+ * i40e_fill_rx_flow_user_data - Fill in user-defined data field
+ * @fsp: pointer to rx_flow specification
+ *
+ * Reads the userdef data structure and properly fills in the user defined
+ * fields of the rx_flow_spec.
+ **/
+static void i40e_fill_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+ struct i40e_rx_flow_userdef *data)
+{
+ u64 value = 0, mask = 0;
+
+ if (data->flex_filter) {
+ value |= data->flex_word;
+ value |= (u64)data->flex_offset << 16;
+ mask |= I40E_USERDEF_FLEX_FILTER;
+ }
+
+ if (value || mask)
+ fsp->flow_type |= FLOW_EXT;
+
+ *((__be64 *)fsp->h_ext.data) = cpu_to_be64(value);
+ *((__be64 *)fsp->m_ext.data) = cpu_to_be64(mask);
+}
+
+/**
* i40e_get_ethtool_fdir_all - Populates the rule count of a command
* @pf: Pointer to the physical function struct
* @cmd: The command to get or set Rx flow classification rules
@@ -2382,8 +2478,11 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
{
struct ethtool_rx_flow_spec *fsp =
(struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct i40e_rx_flow_userdef userdef = {0};
struct i40e_fdir_filter *rule = NULL;
struct hlist_node *node2;
+ u64 input_set;
+ u16 index;
hlist_for_each_entry_safe(rule, node2,
&pf->fdir_filter_list, fdir_node) {
@@ -2409,6 +2508,46 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip;
fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip;
+ switch (rule->flow_type) {
+ case SCTP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+ break;
+ case TCP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+ break;
+ case UDP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+ break;
+ case IP_USER_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+ break;
+ default:
+ /* If we have stored a filter with a flow type not listed here
+ * it is almost certainly a driver bug. WARN(), and then
+ * assign the input_set as if all fields are enabled to avoid
+ * reading unassigned memory.
+ */
+ WARN(1, "Missing input set index for flow_type %d\n",
+ rule->flow_type);
+ input_set = 0xFFFFFFFFFFFFFFFFULL;
+ goto no_input_set;
+ }
+
+ input_set = i40e_read_fd_input_set(pf, index);
+
+no_input_set:
+ if (input_set & I40E_L3_SRC_MASK)
+ fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFF);
+
+ if (input_set & I40E_L3_DST_MASK)
+ fsp->m_u.tcp_ip4_spec.ip4dst = htonl(0xFFFF);
+
+ if (input_set & I40E_L4_SRC_MASK)
+ fsp->m_u.tcp_ip4_spec.psrc = htons(0xFFFFFFFF);
+
+ if (input_set & I40E_L4_DST_MASK)
+ fsp->m_u.tcp_ip4_spec.pdst = htons(0xFFFFFFFF);
+
if (rule->dest_ctl == I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET)
fsp->ring_cookie = RX_CLS_FLOW_DISC;
else
@@ -2419,11 +2558,24 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi);
if (vsi && vsi->type == I40E_VSI_SRIOV) {
- fsp->h_ext.data[1] = htonl(vsi->vf_id);
- fsp->m_ext.data[1] = htonl(0x1);
+ /* VFs are zero-indexed by the driver, but ethtool
+ * expects them to be one-indexed, so add one here
+ */
+ u64 ring_vf = vsi->vf_id + 1;
+
+ ring_vf <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+ fsp->ring_cookie |= ring_vf;
}
}
+ if (rule->flex_filter) {
+ userdef.flex_filter = true;
+ userdef.flex_word = be16_to_cpu(rule->flex_word);
+ userdef.flex_offset = rule->flex_offset;
+ }
+
+ i40e_fill_rx_flow_user_data(fsp, &userdef);
+
return 0;
}
@@ -2687,6 +2839,69 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
}
/**
+ * i40e_prune_flex_pit_list - Cleanup unused entries in FLX_PIT table
+ * @pf: pointer to PF structure
+ *
+ * This function searches the list of filters and determines which FLX_PIT
+ * entries are still required. It will prune any entries which are no longer
+ * in use after the deletion.
+ **/
+static void i40e_prune_flex_pit_list(struct i40e_pf *pf)
+{
+ struct i40e_flex_pit *entry, *tmp;
+ struct i40e_fdir_filter *rule;
+
+ /* First, we'll check the l3 table */
+ list_for_each_entry_safe(entry, tmp, &pf->l3_flex_pit_list, list) {
+ bool found = false;
+
+ hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+ if (rule->flow_type != IP_USER_FLOW)
+ continue;
+ if (rule->flex_filter &&
+ rule->flex_offset == entry->src_offset) {
+ found = true;
+ break;
+ }
+ }
+
+ /* If we didn't find the filter, then we can prune this entry
+ * from the list.
+ */
+ if (!found) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
+ /* Followed by the L4 table */
+ list_for_each_entry_safe(entry, tmp, &pf->l4_flex_pit_list, list) {
+ bool found = false;
+
+ hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+ /* Skip this filter if it's L3, since we already
+ * checked those in the above loop
+ */
+ if (rule->flow_type == IP_USER_FLOW)
+ continue;
+ if (rule->flex_filter &&
+ rule->flex_offset == entry->src_offset) {
+ found = true;
+ break;
+ }
+ }
+
+ /* If we didn't find the filter, then we can prune this entry
+ * from the list.
+ */
+ if (!found) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+}
+
+/**
* i40e_del_fdir_entry - Deletes a Flow Director filter entry
* @vsi: Pointer to the targeted VSI
* @cmd: The command to get or set Rx flow classification rules
@@ -2713,11 +2928,691 @@ static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd);
+ i40e_prune_flex_pit_list(pf);
+
i40e_fdir_check_and_reenable(pf);
return ret;
}
/**
+ * i40e_unused_pit_index - Find an unused PIT index for given list
+ * @pf: the PF data structure
+ *
+ * Find the first unused flexible PIT index entry. We search both the L3 and
+ * L4 flexible PIT lists so that the returned index is unique and unused by
+ * either currently programmed L3 or L4 filters. We use a bit field as storage
+ * to track which indexes are already used.
+ **/
+static u8 i40e_unused_pit_index(struct i40e_pf *pf)
+{
+ unsigned long available_index = 0xFF;
+ struct i40e_flex_pit *entry;
+
+ /* We need to make sure that the new index isn't in use by either L3
+ * or L4 filters so that IP_USER_FLOW filters can program both L3 and
+ * L4 to use the same index.
+ */
+
+ list_for_each_entry(entry, &pf->l4_flex_pit_list, list)
+ clear_bit(entry->pit_index, &available_index);
+
+ list_for_each_entry(entry, &pf->l3_flex_pit_list, list)
+ clear_bit(entry->pit_index, &available_index);
+
+ return find_first_bit(&available_index, 8);
+}
+
+/**
+ * i40e_find_flex_offset - Find an existing flex src_offset
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to find
+ *
+ * Searches the flex_pit_list for an existing offset. If no offset is
+ * currently programmed, then this will return an ERR_PTR if there is no space
+ * to add a new offset, otherwise it returns NULL.
+ **/
+static
+struct i40e_flex_pit *i40e_find_flex_offset(struct list_head *flex_pit_list,
+ u16 src_offset)
+{
+ struct i40e_flex_pit *entry;
+ int size = 0;
+
+ /* Search for the src_offset first. If we find a matching entry
+ * already programmed, we can simply re-use it.
+ */
+ list_for_each_entry(entry, flex_pit_list, list) {
+ size++;
+ if (entry->src_offset == src_offset)
+ return entry;
+ }
+
+ /* If we haven't found an entry yet, then the provided src offset has
+ * not yet been programmed. We will program the src offset later on,
+ * but we need to indicate whether there is enough space to do so
+ * here. We'll make use of ERR_PTR for this purpose.
+ */
+ if (size >= I40E_FLEX_PIT_TABLE_SIZE)
+ return ERR_PTR(-ENOSPC);
+
+ return NULL;
+}
+
+/**
+ * i40e_add_flex_offset - Add src_offset to flex PIT table list
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to add
+ * @pit_index: the PIT index to program
+ *
+ * This function programs the new src_offset to the list. It is expected that
+ * i40e_find_flex_offset has already been tried and returned NULL, indicating
+ * that this offset is not programmed, and that the list has enough space to
+ * store another offset.
+ *
+ * Returns 0 on success, and negative value on error.
+ **/
+static int i40e_add_flex_offset(struct list_head *flex_pit_list,
+ u16 src_offset,
+ u8 pit_index)
+{
+ struct i40e_flex_pit *new_pit, *entry;
+
+ new_pit = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!new_pit)
+ return -ENOMEM;
+
+ new_pit->src_offset = src_offset;
+ new_pit->pit_index = pit_index;
+
+ /* We need to insert this item such that the list is sorted by
+ * src_offset in ascending order.
+ */
+ list_for_each_entry(entry, flex_pit_list, list) {
+ if (new_pit->src_offset < entry->src_offset) {
+ list_add_tail(&new_pit->list, &entry->list);
+ return 0;
+ }
+
+ /* If we found an entry with our offset already programmed we
+ * can simply return here, after freeing the memory. However,
+ * if the pit_index does not match we need to report an error.
+ */
+ if (new_pit->src_offset == entry->src_offset) {
+ int err = 0;
+
+ /* If the PIT index is not the same we can't re-use
+ * the entry, so we must report an error.
+ */
+ if (new_pit->pit_index != entry->pit_index)
+ err = -EINVAL;
+
+ kfree(new_pit);
+ return err;
+ }
+ }
+
+ /* If we reached here, then we haven't yet added the item. This means
+ * that we should add the item at the end of the list.
+ */
+ list_add_tail(&new_pit->list, flex_pit_list);
+ return 0;
+}
+
+/**
+ * __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table
+ * @pf: Pointer to the PF structure
+ * @flex_pit_list: list of flexible src offsets in use
+ * #flex_pit_start: index to first entry for this section of the table
+ *
+ * In order to handle flexible data, the hardware uses a table of values
+ * called the FLX_PIT table. This table is used to indicate which sections of
+ * the input correspond to what PIT index values. Unfortunately, hardware is
+ * very restrictive about programming this table. Entries must be ordered by
+ * src_offset in ascending order, without duplicates. Additionally, unused
+ * entries must be set to the unused index value, and must have valid size and
+ * length according to the src_offset ordering.
+ *
+ * This function will reprogram the FLX_PIT register from a book-keeping
+ * structure that we guarantee is already ordered correctly, and has no more
+ * than 3 entries.
+ *
+ * To make things easier, we only support flexible values of one word length,
+ * rather than allowing variable length flexible values.
+ **/
+static void __i40e_reprogram_flex_pit(struct i40e_pf *pf,
+ struct list_head *flex_pit_list,
+ int flex_pit_start)
+{
+ struct i40e_flex_pit *entry = NULL;
+ u16 last_offset = 0;
+ int i = 0, j = 0;
+
+ /* First, loop over the list of flex PIT entries, and reprogram the
+ * registers.
+ */
+ list_for_each_entry(entry, flex_pit_list, list) {
+ /* We have to be careful when programming values for the
+ * largest SRC_OFFSET value. It is possible that adding
+ * additional empty values at the end would overflow the space
+ * for the SRC_OFFSET in the FLX_PIT register. To avoid this,
+ * we check here and add the empty values prior to adding the
+ * largest value.
+ *
+ * To determine this, we will use a loop from i+1 to 3, which
+ * will determine whether the unused entries would have valid
+ * SRC_OFFSET. Note that there cannot be extra entries past
+ * this value, because the only valid values would have been
+ * larger than I40E_MAX_FLEX_SRC_OFFSET, and thus would not
+ * have been added to the list in the first place.
+ */
+ for (j = i + 1; j < 3; j++) {
+ u16 offset = entry->src_offset + j;
+ int index = flex_pit_start + i;
+ u32 value = I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+ 1,
+ offset - 3);
+
+ if (offset > I40E_MAX_FLEX_SRC_OFFSET) {
+ i40e_write_rx_ctl(&pf->hw,
+ I40E_PRTQF_FLX_PIT(index),
+ value);
+ i++;
+ }
+ }
+
+ /* Now, we can program the actual value into the table */
+ i40e_write_rx_ctl(&pf->hw,
+ I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+ I40E_FLEX_PREP_VAL(entry->pit_index + 50,
+ 1,
+ entry->src_offset));
+ i++;
+ }
+
+ /* In order to program the last entries in the table, we need to
+ * determine the valid offset. If the list is empty, we'll just start
+ * with 0. Otherwise, we'll start with the last item offset and add 1.
+ * This ensures that all entries have valid sizes. If we don't do this
+ * correctly, the hardware will disable flexible field parsing.
+ */
+ if (!list_empty(flex_pit_list))
+ last_offset = list_prev_entry(entry, list)->src_offset + 1;
+
+ for (; i < 3; i++, last_offset++) {
+ i40e_write_rx_ctl(&pf->hw,
+ I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+ I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+ 1,
+ last_offset));
+ }
+}
+
+/**
+ * i40e_reprogram_flex_pit - Reprogram all FLX_PIT tables after input set change
+ * @pf: pointer to the PF structure
+ *
+ * This function reprograms both the L3 and L4 FLX_PIT tables. See the
+ * internal helper function for implementation details.
+ **/
+static void i40e_reprogram_flex_pit(struct i40e_pf *pf)
+{
+ __i40e_reprogram_flex_pit(pf, &pf->l3_flex_pit_list,
+ I40E_FLEX_PIT_IDX_START_L3);
+
+ __i40e_reprogram_flex_pit(pf, &pf->l4_flex_pit_list,
+ I40E_FLEX_PIT_IDX_START_L4);
+
+ /* We also need to program the L3 and L4 GLQF ORT register */
+ i40e_write_rx_ctl(&pf->hw,
+ I40E_GLQF_ORT(I40E_L3_GLQF_ORT_IDX),
+ I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L3,
+ 3, 1));
+
+ i40e_write_rx_ctl(&pf->hw,
+ I40E_GLQF_ORT(I40E_L4_GLQF_ORT_IDX),
+ I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L4,
+ 3, 1));
+}
+
+/**
+ * i40e_flow_str - Converts a flow_type into a human readable string
+ * @flow_type: the flow type from a flow specification
+ *
+ * Currently only flow types we support are included here, and the string
+ * value attempts to match what ethtool would use to configure this flow type.
+ **/
+static const char *i40e_flow_str(struct ethtool_rx_flow_spec *fsp)
+{
+ switch (fsp->flow_type & ~FLOW_EXT) {
+ case TCP_V4_FLOW:
+ return "tcp4";
+ case UDP_V4_FLOW:
+ return "udp4";
+ case SCTP_V4_FLOW:
+ return "sctp4";
+ case IP_USER_FLOW:
+ return "ip4";
+ default:
+ return "unknown";
+ }
+}
+
+/**
+ * i40e_pit_index_to_mask - Return the FLEX mask for a given PIT index
+ * @pit_index: PIT index to convert
+ *
+ * Returns the mask for a given PIT index. Will return 0 if the pit_index is
+ * of range.
+ **/
+static u64 i40e_pit_index_to_mask(int pit_index)
+{
+ switch (pit_index) {
+ case 0:
+ return I40E_FLEX_50_MASK;
+ case 1:
+ return I40E_FLEX_51_MASK;
+ case 2:
+ return I40E_FLEX_52_MASK;
+ case 3:
+ return I40E_FLEX_53_MASK;
+ case 4:
+ return I40E_FLEX_54_MASK;
+ case 5:
+ return I40E_FLEX_55_MASK;
+ case 6:
+ return I40E_FLEX_56_MASK;
+ case 7:
+ return I40E_FLEX_57_MASK;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * i40e_print_input_set - Show changes between two input sets
+ * @vsi: the vsi being configured
+ * @old: the old input set
+ * @new: the new input set
+ *
+ * Print the difference between old and new input sets by showing which series
+ * of words are toggled on or off. Only displays the bits we actually support
+ * changing.
+ **/
+static void i40e_print_input_set(struct i40e_vsi *vsi, u64 old, u64 new)
+{
+ struct i40e_pf *pf = vsi->back;
+ bool old_value, new_value;
+ int i;
+
+ old_value = !!(old & I40E_L3_SRC_MASK);
+ new_value = !!(new & I40E_L3_SRC_MASK);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "L3 source address: %s -> %s\n",
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+
+ old_value = !!(old & I40E_L3_DST_MASK);
+ new_value = !!(new & I40E_L3_DST_MASK);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "L3 destination address: %s -> %s\n",
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+
+ old_value = !!(old & I40E_L4_SRC_MASK);
+ new_value = !!(new & I40E_L4_SRC_MASK);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "L4 source port: %s -> %s\n",
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+
+ old_value = !!(old & I40E_L4_DST_MASK);
+ new_value = !!(new & I40E_L4_DST_MASK);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "L4 destination port: %s -> %s\n",
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+
+ old_value = !!(old & I40E_VERIFY_TAG_MASK);
+ new_value = !!(new & I40E_VERIFY_TAG_MASK);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "SCTP verification tag: %s -> %s\n",
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+
+ /* Show change of flexible filter entries */
+ for (i = 0; i < I40E_FLEX_INDEX_ENTRIES; i++) {
+ u64 flex_mask = i40e_pit_index_to_mask(i);
+
+ old_value = !!(old & flex_mask);
+ new_value = !!(new & flex_mask);
+ if (old_value != new_value)
+ netif_info(pf, drv, vsi->netdev, "FLEX index %d: %s -> %s\n",
+ i,
+ old_value ? "ON" : "OFF",
+ new_value ? "ON" : "OFF");
+ }
+
+ netif_info(pf, drv, vsi->netdev, " Current input set: %0llx\n",
+ old);
+ netif_info(pf, drv, vsi->netdev, "Requested input set: %0llx\n",
+ new);
+}
+
+/**
+ * i40e_check_fdir_input_set - Check that a given rx_flow_spec mask is valid
+ * @vsi: pointer to the targeted VSI
+ * @fsp: pointer to Rx flow specification
+ * @userdef: userdefined data from flow specification
+ *
+ * Ensures that a given ethtool_rx_flow_spec has a valid mask. Some support
+ * for partial matches exists with a few limitations. First, hardware only
+ * supports masking by word boundary (2 bytes) and not per individual bit.
+ * Second, hardware is limited to using one mask for a flow type and cannot
+ * use a separate mask for each filter.
+ *
+ * To support these limitations, if we already have a configured filter for
+ * the specified type, this function enforces that new filters of the type
+ * match the configured input set. Otherwise, if we do not have a filter of
+ * the specified type, we allow the input set to be updated to match the
+ * desired filter.
+ *
+ * To help ensure that administrators understand why filters weren't displayed
+ * as supported, we print a diagnostic message displaying how the input set
+ * would change and warning to delete the preexisting filters if required.
+ *
+ * Returns 0 on successful input set match, and a negative return code on
+ * failure.
+ **/
+static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
+ struct ethtool_rx_flow_spec *fsp,
+ struct i40e_rx_flow_userdef *userdef)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct ethtool_tcpip4_spec *tcp_ip4_spec;
+ struct ethtool_usrip4_spec *usr_ip4_spec;
+ u64 current_mask, new_mask;
+ bool new_flex_offset = false;
+ bool flex_l3 = false;
+ u16 *fdir_filter_count;
+ u16 index, src_offset = 0;
+ u8 pit_index = 0;
+ int err;
+
+ switch (fsp->flow_type & ~FLOW_EXT) {
+ case SCTP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+ fdir_filter_count = &pf->fd_sctp4_filter_cnt;
+ break;
+ case TCP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+ fdir_filter_count = &pf->fd_tcp4_filter_cnt;
+ break;
+ case UDP_V4_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+ fdir_filter_count = &pf->fd_udp4_filter_cnt;
+ break;
+ case IP_USER_FLOW:
+ index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+ fdir_filter_count = &pf->fd_ip4_filter_cnt;
+ flex_l3 = true;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Read the current input set from register memory. */
+ current_mask = i40e_read_fd_input_set(pf, index);
+ new_mask = current_mask;
+
+ /* Determine, if any, the required changes to the input set in order
+ * to support the provided mask.
+ *
+ * Hardware only supports masking at word (2 byte) granularity and does
+ * not support full bitwise masking. This implementation simplifies
+ * even further and only supports fully enabled or fully disabled
+ * masks for each field, even though we could split the ip4src and
+ * ip4dst fields.
+ */
+ switch (fsp->flow_type & ~FLOW_EXT) {
+ case SCTP_V4_FLOW:
+ new_mask &= ~I40E_VERIFY_TAG_MASK;
+ /* Fall through */
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ tcp_ip4_spec = &fsp->m_u.tcp_ip4_spec;
+
+ /* IPv4 source address */
+ if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+ new_mask |= I40E_L3_SRC_MASK;
+ else if (!tcp_ip4_spec->ip4src)
+ new_mask &= ~I40E_L3_SRC_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* IPv4 destination address */
+ if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+ new_mask |= I40E_L3_DST_MASK;
+ else if (!tcp_ip4_spec->ip4dst)
+ new_mask &= ~I40E_L3_DST_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* L4 source port */
+ if (tcp_ip4_spec->psrc == htons(0xFFFF))
+ new_mask |= I40E_L4_SRC_MASK;
+ else if (!tcp_ip4_spec->psrc)
+ new_mask &= ~I40E_L4_SRC_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* L4 destination port */
+ if (tcp_ip4_spec->pdst == htons(0xFFFF))
+ new_mask |= I40E_L4_DST_MASK;
+ else if (!tcp_ip4_spec->pdst)
+ new_mask &= ~I40E_L4_DST_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* Filtering on Type of Service is not supported. */
+ if (tcp_ip4_spec->tos)
+ return -EOPNOTSUPP;
+
+ break;
+ case IP_USER_FLOW:
+ usr_ip4_spec = &fsp->m_u.usr_ip4_spec;
+
+ /* IPv4 source address */
+ if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+ new_mask |= I40E_L3_SRC_MASK;
+ else if (!usr_ip4_spec->ip4src)
+ new_mask &= ~I40E_L3_SRC_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* IPv4 destination address */
+ if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+ new_mask |= I40E_L3_DST_MASK;
+ else if (!usr_ip4_spec->ip4dst)
+ new_mask &= ~I40E_L3_DST_MASK;
+ else
+ return -EOPNOTSUPP;
+
+ /* First 4 bytes of L4 header */
+ if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF))
+ new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
+ else if (!usr_ip4_spec->l4_4_bytes)
+ new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+ else
+ return -EOPNOTSUPP;
+
+ /* Filtering on Type of Service is not supported. */
+ if (usr_ip4_spec->tos)
+ return -EOPNOTSUPP;
+
+ /* Filtering on IP version is not supported */
+ if (usr_ip4_spec->ip_ver)
+ return -EINVAL;
+
+ /* Filtering on L4 protocol is not supported */
+ if (usr_ip4_spec->proto)
+ return -EINVAL;
+
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* First, clear all flexible filter entries */
+ new_mask &= ~I40E_FLEX_INPUT_MASK;
+
+ /* If we have a flexible filter, try to add this offset to the correct
+ * flexible filter PIT list. Once finished, we can update the mask.
+ * If the src_offset changed, we will get a new mask value which will
+ * trigger an input set change.
+ */
+ if (userdef->flex_filter) {
+ struct i40e_flex_pit *l3_flex_pit = NULL, *flex_pit = NULL;
+
+ /* Flexible offset must be even, since the flexible payload
+ * must be aligned on 2-byte boundary.
+ */
+ if (userdef->flex_offset & 0x1) {
+ dev_warn(&pf->pdev->dev,
+ "Flexible data offset must be 2-byte aligned\n");
+ return -EINVAL;
+ }
+
+ src_offset = userdef->flex_offset >> 1;
+
+ /* FLX_PIT source offset value is only so large */
+ if (src_offset > I40E_MAX_FLEX_SRC_OFFSET) {
+ dev_warn(&pf->pdev->dev,
+ "Flexible data must reside within first 64 bytes of the packet payload\n");
+ return -EINVAL;
+ }
+
+ /* See if this offset has already been programmed. If we get
+ * an ERR_PTR, then the filter is not safe to add. Otherwise,
+ * if we get a NULL pointer, this means we will need to add
+ * the offset.
+ */
+ flex_pit = i40e_find_flex_offset(&pf->l4_flex_pit_list,
+ src_offset);
+ if (IS_ERR(flex_pit))
+ return PTR_ERR(flex_pit);
+
+ /* IP_USER_FLOW filters match both L4 (ICMP) and L3 (unknown)
+ * packet types, and thus we need to program both L3 and L4
+ * flexible values. These must have identical flexible index,
+ * as otherwise we can't correctly program the input set. So
+ * we'll find both an L3 and L4 index and make sure they are
+ * the same.
+ */
+ if (flex_l3) {
+ l3_flex_pit =
+ i40e_find_flex_offset(&pf->l3_flex_pit_list,
+ src_offset);
+ if (IS_ERR(l3_flex_pit))
+ return PTR_ERR(l3_flex_pit);
+
+ if (flex_pit) {
+ /* If we already had a matching L4 entry, we
+ * need to make sure that the L3 entry we
+ * obtained uses the same index.
+ */
+ if (l3_flex_pit) {
+ if (l3_flex_pit->pit_index !=
+ flex_pit->pit_index) {
+ return -EINVAL;
+ }
+ } else {
+ new_flex_offset = true;
+ }
+ } else {
+ flex_pit = l3_flex_pit;
+ }
+ }
+
+ /* If we didn't find an existing flex offset, we need to
+ * program a new one. However, we don't immediately program it
+ * here because we will wait to program until after we check
+ * that it is safe to change the input set.
+ */
+ if (!flex_pit) {
+ new_flex_offset = true;
+ pit_index = i40e_unused_pit_index(pf);
+ } else {
+ pit_index = flex_pit->pit_index;
+ }
+
+ /* Update the mask with the new offset */
+ new_mask |= i40e_pit_index_to_mask(pit_index);
+ }
+
+ /* If the mask and flexible filter offsets for this filter match the
+ * currently programmed values we don't need any input set change, so
+ * this filter is safe to install.
+ */
+ if (new_mask == current_mask && !new_flex_offset)
+ return 0;
+
+ netif_info(pf, drv, vsi->netdev, "Input set change requested for %s flows:\n",
+ i40e_flow_str(fsp));
+ i40e_print_input_set(vsi, current_mask, new_mask);
+ if (new_flex_offset) {
+ netif_info(pf, drv, vsi->netdev, "FLEX index %d: Offset -> %d",
+ pit_index, src_offset);
+ }
+
+ /* Hardware input sets are global across multiple ports, so even the
+ * main port cannot change them when in MFP mode as this would impact
+ * any filters on the other ports.
+ */
+ if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* This filter requires us to update the input set. However, hardware
+ * only supports one input set per flow type, and does not support
+ * separate masks for each filter. This means that we can only support
+ * a single mask for all filters of a specific type.
+ *
+ * If we have preexisting filters, they obviously depend on the
+ * current programmed input set. Display a diagnostic message in this
+ * case explaining why the filter could not be accepted.
+ */
+ if (*fdir_filter_count) {
+ netif_err(pf, drv, vsi->netdev, "Cannot change input set for %s flows until %d preexisting filters are removed\n",
+ i40e_flow_str(fsp),
+ *fdir_filter_count);
+ return -EOPNOTSUPP;
+ }
+
+ i40e_write_fd_input_set(pf, index, new_mask);
+
+ /* Add the new offset and update table, if necessary */
+ if (new_flex_offset) {
+ err = i40e_add_flex_offset(&pf->l4_flex_pit_list, src_offset,
+ pit_index);
+ if (err)
+ return err;
+
+ if (flex_l3) {
+ err = i40e_add_flex_offset(&pf->l3_flex_pit_list,
+ src_offset,
+ pit_index);
+ if (err)
+ return err;
+ }
+
+ i40e_reprogram_flex_pit(pf);
+ }
+
+ return 0;
+}
+
+/**
* i40e_add_fdir_ethtool - Add/Remove Flow Director filters
* @vsi: pointer to the targeted VSI
* @cmd: command to get or set RX flow classification rules
@@ -2728,11 +3623,13 @@ static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
struct ethtool_rxnfc *cmd)
{
+ struct i40e_rx_flow_userdef userdef;
struct ethtool_rx_flow_spec *fsp;
struct i40e_fdir_filter *input;
+ u16 dest_vsi = 0, q_index = 0;
struct i40e_pf *pf;
int ret = -EINVAL;
- u16 vf_id;
+ u8 dest_ctl;
if (!vsi)
return -EINVAL;
@@ -2753,18 +3650,49 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+ /* Parse the user-defined field */
+ if (i40e_parse_rx_flow_user_data(fsp, &userdef))
+ return -EINVAL;
+
/* Extended MAC field is not supported */
if (fsp->flow_type & FLOW_MAC_EXT)
return -EINVAL;
+ ret = i40e_check_fdir_input_set(vsi, fsp, &userdef);
+ if (ret)
+ return ret;
+
if (fsp->location >= (pf->hw.func_caps.fd_filters_best_effort +
pf->hw.func_caps.fd_filters_guaranteed)) {
return -EINVAL;
}
- if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
- (fsp->ring_cookie >= vsi->num_queue_pairs))
- return -EINVAL;
+ /* ring_cookie is either the drop index, or is a mask of the queue
+ * index and VF id we wish to target.
+ */
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+ dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
+ } else {
+ u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+ u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+ if (!vf) {
+ if (ring >= vsi->num_queue_pairs)
+ return -EINVAL;
+ dest_vsi = vsi->id;
+ } else {
+ /* VFs are zero-indexed, so we subtract one here */
+ vf--;
+
+ if (vf >= pf->num_alloc_vfs)
+ return -EINVAL;
+ if (ring >= pf->vf[vf].num_queue_pairs)
+ return -EINVAL;
+ dest_vsi = pf->vf[vf].lan_vsi_id;
+ }
+ dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+ q_index = ring;
+ }
input = kzalloc(sizeof(*input), GFP_KERNEL);
@@ -2772,20 +3700,14 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
return -ENOMEM;
input->fd_id = fsp->location;
-
- if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
- input->dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
- else
- input->dest_ctl =
- I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
-
- input->q_index = fsp->ring_cookie;
- input->flex_off = 0;
- input->pctype = 0;
- input->dest_vsi = vsi->id;
+ input->q_index = q_index;
+ input->dest_vsi = dest_vsi;
+ input->dest_ctl = dest_ctl;
input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
input->cnt_index = I40E_FD_SB_STAT_IDX(pf->hw.pf_id);
- input->flow_type = fsp->flow_type;
+ input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+ input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+ input->flow_type = fsp->flow_type & ~FLOW_EXT;
input->ip4_proto = fsp->h_u.usr_ip4_spec.proto;
/* Reverse the src and dest notion, since the HW expects them to be from
@@ -2796,21 +3718,10 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
- if (ntohl(fsp->m_ext.data[1])) {
- vf_id = ntohl(fsp->h_ext.data[1]);
- if (vf_id >= pf->num_alloc_vfs) {
- netif_info(pf, drv, vsi->netdev,
- "Invalid VF id %d\n", vf_id);
- goto free_input;
- }
- /* Find vsi id from vf id and override dest vsi */
- input->dest_vsi = pf->vf[vf_id].lan_vsi_id;
- if (input->q_index >= pf->vf[vf_id].num_queue_pairs) {
- netif_info(pf, drv, vsi->netdev,
- "Invalid queue id %d for VF %d\n",
- input->q_index, vf_id);
- goto free_input;
- }
+ if (userdef.flex_filter) {
+ input->flex_filter = true;
+ input->flex_word = cpu_to_be16(userdef.flex_word);
+ input->flex_offset = userdef.flex_offset;
}
ret = i40e_add_del_fdir(vsi, input, true);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index caccb8e..1d8febd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1883,19 +1883,12 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
static
struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
{
- while (next) {
- next = hlist_entry(next->hlist.next,
- typeof(struct i40e_new_mac_filter),
- hlist);
-
- /* keep going if we found a broadcast filter */
- if (next && is_broadcast_ether_addr(next->f->macaddr))
- continue;
-
- break;
+ hlist_for_each_entry_continue(next, hlist) {
+ if (!is_broadcast_ether_addr(next->f->macaddr))
+ return next;
}
- return next;
+ return NULL;
}
/**
@@ -3286,6 +3279,7 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
/* Reset FDir counters as we're replaying all existing filters */
pf->fd_tcp4_filter_cnt = 0;
pf->fd_udp4_filter_cnt = 0;
+ pf->fd_sctp4_filter_cnt = 0;
pf->fd_ip4_filter_cnt = 0;
hlist_for_each_entry_safe(filter, node,
@@ -5747,6 +5741,7 @@ err_setup_tx:
static void i40e_fdir_filter_exit(struct i40e_pf *pf)
{
struct i40e_fdir_filter *filter;
+ struct i40e_flex_pit *pit_entry, *tmp;
struct hlist_node *node2;
hlist_for_each_entry_safe(filter, node2,
@@ -5755,10 +5750,42 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf)
kfree(filter);
}
+ list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
+ list_del(&pit_entry->list);
+ kfree(pit_entry);
+ }
+ INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+
+ list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
+ list_del(&pit_entry->list);
+ kfree(pit_entry);
+ }
+ INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
pf->fdir_pf_active_filters = 0;
pf->fd_tcp4_filter_cnt = 0;
pf->fd_udp4_filter_cnt = 0;
+ pf->fd_sctp4_filter_cnt = 0;
pf->fd_ip4_filter_cnt = 0;
+
+ /* Reprogram the default input set for TCP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for UDP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for SCTP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for Other/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
}
/**
@@ -11125,6 +11152,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->bus.bus_id = pdev->bus->number;
pf->instance = pfs_found;
+ INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+ INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
/* set up the locks for the AQ, do this only once in probe
* and destroy them only once in remove
*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3880e41..0ca307a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -71,6 +71,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring,
flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
(fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+ flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
+ (fdata->flex_offset << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
+
/* Use LAN VSI Id if not programmed by user */
flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
@@ -223,6 +226,14 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
ip->saddr = fd_data->src_ip;
udp->source = fd_data->src_port;
+ if (fd_data->flex_filter) {
+ u8 *payload = raw_packet + I40E_UDPIP_DUMMY_PACKET_LEN;
+ __be16 pattern = fd_data->flex_word;
+ u16 off = fd_data->flex_offset;
+
+ *((__force __be16 *)(payload + off)) = pattern;
+ }
+
fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
if (ret) {
@@ -289,6 +300,14 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
ip->saddr = fd_data->src_ip;
tcp->source = fd_data->src_port;
+ if (fd_data->flex_filter) {
+ u8 *payload = raw_packet + I40E_TCPIP_DUMMY_PACKET_LEN;
+ __be16 pattern = fd_data->flex_word;
+ u16 off = fd_data->flex_offset;
+
+ *((__force __be16 *)(payload + off)) = pattern;
+ }
+
fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
if (ret) {
@@ -327,6 +346,80 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
return 0;
}
+#define I40E_SCTPIP_DUMMY_PACKET_LEN 46
+/**
+ * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *fd_data,
+ bool add)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct sctphdr *sctp;
+ struct iphdr *ip;
+ u8 *raw_packet;
+ int ret;
+ /* Dummy packet */
+ static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+ 0x45, 0, 0, 0x20, 0, 0, 0x40, 0, 0x40, 0x84, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+ if (!raw_packet)
+ return -ENOMEM;
+ memcpy(raw_packet, packet, I40E_SCTPIP_DUMMY_PACKET_LEN);
+
+ ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+ sctp = (struct sctphdr *)(raw_packet + IP_HEADER_OFFSET
+ + sizeof(struct iphdr));
+
+ ip->daddr = fd_data->dst_ip;
+ sctp->dest = fd_data->dst_port;
+ ip->saddr = fd_data->src_ip;
+ sctp->source = fd_data->src_port;
+
+ if (fd_data->flex_filter) {
+ u8 *payload = raw_packet + I40E_SCTPIP_DUMMY_PACKET_LEN;
+ __be16 pattern = fd_data->flex_word;
+ u16 off = fd_data->flex_offset;
+
+ *((__force __be16 *)(payload + off)) = pattern;
+ }
+
+ fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+ ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
+ fd_data->pctype, fd_data->fd_id, ret);
+ /* Free the packet buffer since it wasn't added to the ring */
+ kfree(raw_packet);
+ return -EOPNOTSUPP;
+ } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
+ if (add)
+ dev_info(&pf->pdev->dev,
+ "Filter OK for PCTYPE %d loc = %d\n",
+ fd_data->pctype, fd_data->fd_id);
+ else
+ dev_info(&pf->pdev->dev,
+ "Filter deleted for PCTYPE %d loc = %d\n",
+ fd_data->pctype, fd_data->fd_id);
+ }
+
+ if (add)
+ pf->fd_sctp4_filter_cnt++;
+ else
+ pf->fd_sctp4_filter_cnt--;
+
+ return 0;
+}
+
#define I40E_IP_DUMMY_PACKET_LEN 34
/**
* i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
@@ -362,6 +455,14 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
ip->daddr = fd_data->dst_ip;
ip->protocol = 0;
+ if (fd_data->flex_filter) {
+ u8 *payload = raw_packet + I40E_IP_DUMMY_PACKET_LEN;
+ __be16 pattern = fd_data->flex_word;
+ u16 off = fd_data->flex_offset;
+
+ *((__force __be16 *)(payload + off)) = pattern;
+ }
+
fd_data->pctype = i;
ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
if (ret) {
@@ -413,6 +514,9 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
case UDP_V4_FLOW:
ret = i40e_add_del_fdir_udpv4(vsi, input, add);
break;
+ case SCTP_V4_FLOW:
+ ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
+ break;
case IP_USER_FLOW:
switch (input->ip4_proto) {
case IPPROTO_TCP:
@@ -421,6 +525,9 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
case IPPROTO_UDP:
ret = i40e_add_del_fdir_udpv4(vsi, input, add);
break;
+ case IPPROTO_SCTP:
+ ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
+ break;
case IPPROTO_IP:
ret = i40e_add_del_fdir_ipv4(vsi, input, add);
break;
OpenPOWER on IntegriCloud