diff options
175 files changed, 6672 insertions, 2915 deletions
@@ -102,6 +102,8 @@ Koushik <raghavendra.koushik@neterion.com> Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com> Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com> Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> +Leon Romanovsky <leon@kernel.org> <leon@leon.nu> +Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com> Leonid I Ananiev <leonid.i.ananiev@intel.com> Linas Vepstas <linas@austin.ibm.com> Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de> diff --git a/MAINTAINERS b/MAINTAINERS index f1be26e..0407846 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7214,6 +7214,7 @@ M: Shiraz Saleem <shiraz.saleem@intel.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/i40iw/ +F: include/uapi/rdma/i40iw-abi.h INTEL SHA MULTIBUFFER DRIVER M: Megha Dey <megha.dey@linux.intel.com> diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 8517d6e..ee270e0 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -35,14 +35,13 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from rdma-core <https://github.com/linux-rdma/rdma-core>. -config INFINIBAND_EXP_USER_ACCESS - bool "Enable the full uverbs ioctl interface (EXPERIMENTAL)" +config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI + bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)" depends on INFINIBAND_USER_ACCESS ---help--- - IOCTL based ABI support for Infiniband. This allows userspace - to invoke the experimental IOCTL based ABI. - These commands are parsed via per-device parsing tree and - enables per-device features. + IOCTL based uAPI support for Infiniband is enabled by default for + new verbs only. This allows userspace to invoke the IOCTL based uAPI + for current legacy verbs too. config INFINIBAND_USER_MEM bool diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index f69833d..dda9e85 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -34,4 +34,6 @@ ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ - uverbs_ioctl_merge.o + uverbs_ioctl_merge.o uverbs_std_types_cq.o \ + uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ + uverbs_std_types_mr.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index cb1d2ab..88a7542 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -329,7 +329,8 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, +static int ib_nl_fetch_ha(const struct dst_entry *dst, + struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) @@ -340,7 +341,8 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); } -static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, +static int dst_fetch_ha(const struct dst_entry *dst, + struct rdma_dev_addr *dev_addr, const void *daddr) { struct neighbour *n; @@ -364,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ret; } -static bool has_gateway(struct dst_entry *dst, sa_family_t family) +static bool has_gateway(const struct dst_entry *dst, sa_family_t family) { struct rtable *rt; struct rt6_info *rt6; @@ -378,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family) return rt6->rt6i_flags & RTF_GATEWAY; } -static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, +static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const struct sockaddr *dst_in, u32 seq) { const struct sockaddr_in *dst_in4 = @@ -482,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, } #endif -static int addr_resolve_neigh(struct dst_entry *dst, +static int addr_resolve_neigh(const struct dst_entry *dst, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, u32 seq) @@ -736,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, return addr_resolve(src_in, dst_addr, addr, false, 0); } -EXPORT_SYMBOL(rdma_resolve_ip_route); void rdma_addr_cancel(struct rdma_dev_addr *addr) { diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e9a409d..e337b08 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -59,8 +59,6 @@ struct ib_update_work { union ib_gid zgid; EXPORT_SYMBOL(zgid); -static const struct ib_gid_attr zattr; - enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID = 1UL << 0, GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, @@ -73,15 +71,6 @@ enum gid_table_entry_props { GID_TABLE_ENTRY_DEFAULT = 1UL << 1, }; -enum gid_table_write_action { - GID_TABLE_WRITE_ACTION_ADD, - GID_TABLE_WRITE_ACTION_DEL, - /* MODIFY only updates the GID table. Currently only used by - * ib_cache_update. - */ - GID_TABLE_WRITE_ACTION_MODIFY -}; - struct ib_gid_table_entry { unsigned long props; union ib_gid gid; @@ -100,31 +89,26 @@ struct ib_gid_table { * (a) Find the GID * (b) Delete it. * - * Add/delete should be carried out atomically. - * This is done by locking this mutex from multiple - * writers. We don't need this lock for IB, as the MAD - * layer replaces all entries. All data_vec entries - * are locked by this lock. **/ - struct mutex lock; - /* This lock protects the table entries from being - * read and written simultaneously. + /* Any writer to data_vec must hold this lock and the write side of + * rwlock. readers must hold only rwlock. All writers must be in a + * sleepable context. */ + struct mutex lock; + /* rwlock protects data_vec[ix]->props. */ rwlock_t rwlock; struct ib_gid_table_entry *data_vec; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) { - if (rdma_cap_roce_gid_table(ib_dev, port)) { - struct ib_event event; + struct ib_event event; - event.device = ib_dev; - event.element.port_num = port; - event.event = IB_EVENT_GID_CHANGE; + event.device = ib_dev; + event.element.port_num = port; + event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); - } + ib_dispatch_event(&event); } static const char * const gid_type_str[] = { @@ -165,94 +149,127 @@ int ib_cache_gid_parse_type_str(const char *buf) } EXPORT_SYMBOL(ib_cache_gid_parse_type_str); -/* This function expects that rwlock will be write locked in all - * scenarios and that lock will be locked in sleep-able (RoCE) - * scenarios. - */ -static int write_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix, - const union ib_gid *gid, - const struct ib_gid_attr *attr, - enum gid_table_write_action action, - bool default_gid) - __releases(&table->rwlock) __acquires(&table->rwlock) +static void del_roce_gid(struct ib_device *device, u8 port_num, + struct ib_gid_table *table, int ix) { - int ret = 0; - struct net_device *old_net_dev; - enum ib_gid_type old_gid_type; + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + device->name, port_num, ix, + table->data_vec[ix].gid.raw); + + if (rdma_cap_roce_gid_table(device, port_num)) + device->del_gid(&table->data_vec[ix].attr, + &table->data_vec[ix].context); + dev_put(table->data_vec[ix].attr.ndev); +} - /* in rdma_cap_roce_gid_table, this funciton should be protected by a - * sleep-able lock. - */ +static int add_roce_gid(struct ib_gid_table *table, + const union ib_gid *gid, + const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry; + int ix = attr->index; + int ret = 0; - if (rdma_cap_roce_gid_table(ib_dev, port)) { - table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; - write_unlock_irq(&table->rwlock); - /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by - * RoCE providers and thus only updates the cache. - */ - if (action == GID_TABLE_WRITE_ACTION_ADD) - ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, - &table->data_vec[ix].context); - else if (action == GID_TABLE_WRITE_ACTION_DEL) - ret = ib_dev->del_gid(ib_dev, port, ix, - &table->data_vec[ix].context); - write_lock_irq(&table->rwlock); + if (!attr->ndev) { + pr_err("%s NULL netdev device=%s port=%d index=%d\n", + __func__, attr->device->name, attr->port_num, + attr->index); + return -EINVAL; } - old_net_dev = table->data_vec[ix].attr.ndev; - old_gid_type = table->data_vec[ix].attr.gid_type; - if (old_net_dev && old_net_dev != attr->ndev) - dev_put(old_net_dev); - /* if modify_gid failed, just delete the old gid */ - if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { - gid = &zgid; - attr = &zattr; - table->data_vec[ix].context = NULL; + entry = &table->data_vec[ix]; + if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { + WARN(1, "GID table corruption device=%s port=%d index=%d\n", + attr->device->name, attr->port_num, + attr->index); + return -EINVAL; } - memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); - if (default_gid) { - table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; - if (action == GID_TABLE_WRITE_ACTION_DEL) - table->data_vec[ix].attr.gid_type = old_gid_type; + if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { + ret = attr->device->add_gid(gid, attr, &entry->context); + if (ret) { + pr_err("%s GID add failed device=%s port=%d index=%d\n", + __func__, attr->device->name, attr->port_num, + attr->index); + goto add_err; + } } - if (table->data_vec[ix].attr.ndev && - table->data_vec[ix].attr.ndev != old_net_dev) - dev_hold(table->data_vec[ix].attr.ndev); - - table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; + dev_hold(attr->ndev); +add_err: + if (!ret) + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + attr->device->name, attr->port_num, ix, gid->raw); return ret; } -static int add_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix, - const union ib_gid *gid, - const struct ib_gid_attr *attr, - bool default_gid) { - return write_gid(ib_dev, port, table, ix, gid, attr, - GID_TABLE_WRITE_ACTION_ADD, default_gid); -} +/** + * add_modify_gid - Add or modify GID table entry + * + * @table: GID table in which GID to be added or modified + * @gid: GID content + * @attr: Attributes of the GID + * + * Returns 0 on success or appropriate error code. It accepts zero + * GID addition for non RoCE ports for HCA's who report them as valid + * GID. However such zero GIDs are not added to the cache. + */ +static int add_modify_gid(struct ib_gid_table *table, + const union ib_gid *gid, + const struct ib_gid_attr *attr) +{ + int ret; + + if (rdma_protocol_roce(attr->device, attr->port_num)) { + ret = add_roce_gid(table, gid, attr); + if (ret) + return ret; + } else { + /* + * Some HCA's report multiple GID entries with only one + * valid GID, but remaining as zero GID. + * So ignore such behavior for IB link layer and don't + * fail the call, but don't add such entry to GID cache. + */ + if (!memcmp(gid, &zgid, sizeof(*gid))) + return 0; + } + + lockdep_assert_held(&table->lock); + memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); + memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); -static int modify_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix, - const union ib_gid *gid, - const struct ib_gid_attr *attr, - bool default_gid) { - return write_gid(ib_dev, port, table, ix, gid, attr, - GID_TABLE_WRITE_ACTION_MODIFY, default_gid); + write_lock_irq(&table->rwlock); + table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; + write_unlock_irq(&table->rwlock); + return 0; } -static int del_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix, - bool default_gid) { - return write_gid(ib_dev, port, table, ix, &zgid, &zattr, - GID_TABLE_WRITE_ACTION_DEL, default_gid); +/** + * del_gid - Delete GID table entry + * + * @ib_dev: IB device whose GID entry to be deleted + * @port: Port number of the IB device + * @table: GID table of the IB device for a port + * @ix: GID entry index to delete + * + */ +static void del_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix) +{ + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + write_unlock_irq(&table->rwlock); + + if (rdma_protocol_roce(ib_dev, port)) + del_roce_gid(ib_dev, port, table, ix); + memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid)); + memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); + table->data_vec[ix].context = NULL; } -/* rwlock should be read locked */ +/* rwlock should be read locked, or lock should be held */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, unsigned long mask, int *pempty) @@ -268,15 +285,32 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, i++; + /* find_gid() is used during GID addition where it is expected + * to return a free entry slot which is not duplicate. + * Free entry slot is requested and returned if pempty is set, + * so lookup free slot only if requested. + */ + if (pempty && empty < 0) { + if (data->props & GID_TABLE_ENTRY_INVALID) { + /* Found an invalid (free) entry; allocate it */ + if (data->props & GID_TABLE_ENTRY_DEFAULT) { + if (default_gid) + empty = curr_index; + } else { + empty = curr_index; + } + } + } + + /* + * Additionally find_gid() is used to find valid entry during + * lookup operation, where validity needs to be checked. So + * find the empty entry first to continue to search for a free + * slot and ignore its INVALID flag. + */ if (data->props & GID_TABLE_ENTRY_INVALID) continue; - if (empty < 0) - if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && - !memcmp(attr, &zattr, sizeof(*attr)) && - !data->props) - empty = curr_index; - if (found >= 0) continue; @@ -312,20 +346,56 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid) addrconf_ifid_eui48(&gid->raw[8], dev); } -int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, - union ib_gid *gid, struct ib_gid_attr *attr) +static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr, + unsigned long mask, bool default_gid) { struct ib_gid_table *table; - int ix; int ret = 0; - struct net_device *idev; int empty; + int ix; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; - + /* Do not allow adding zero GID in support of + * IB spec version 1.3 section 4.1.1 point (6) and + * section 12.7.10 and section 12.7.20 + */ if (!memcmp(gid, &zgid, sizeof(*gid))) return -EINVAL; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + + mutex_lock(&table->lock); + + ix = find_gid(table, gid, attr, default_gid, mask, &empty); + if (ix >= 0) + goto out_unlock; + + if (empty < 0) { + ret = -ENOSPC; + goto out_unlock; + } + attr->device = ib_dev; + attr->index = empty; + attr->port_num = port; + ret = add_modify_gid(table, gid, attr); + if (!ret) + dispatch_gid_change_event(ib_dev, port); + +out_unlock: + mutex_unlock(&table->lock); + if (ret) + pr_warn("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); + return ret; +} + +int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr) +{ + struct net_device *idev; + unsigned long mask; + int ret; + if (ib_dev->get_netdev) { idev = ib_dev->get_netdev(ib_dev, port); if (idev && attr->ndev != idev) { @@ -342,27 +412,11 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, dev_put(idev); } - mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); - - ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV, &empty); - if (ix >= 0) - goto out_unlock; - - if (empty < 0) { - ret = -ENOSPC; - goto out_unlock; - } - - ret = add_gid(ib_dev, port, table, empty, gid, attr, false); - if (!ret) - dispatch_gid_change_event(ib_dev, port); + mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_NETDEV; -out_unlock: - write_unlock_irq(&table->rwlock); - mutex_unlock(&table->lock); + ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); return ret; } @@ -370,29 +424,32 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { struct ib_gid_table *table; + int ret = 0; int ix; table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV | - GID_ATTR_FIND_MASK_DEFAULT, + GID_ATTR_FIND_MASK_NETDEV, NULL); - if (ix < 0) + if (ix < 0) { + ret = -EINVAL; goto out_unlock; + } - if (!del_gid(ib_dev, port, table, ix, false)) - dispatch_gid_change_event(ib_dev, port); + del_gid(ib_dev, port, table, ix); + dispatch_gid_change_event(ib_dev, port); out_unlock: - write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); - return 0; + if (ret) + pr_debug("%s: can't delete gid %pI6 error=%d\n", + __func__, gid->raw, ret); + return ret; } int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, @@ -405,16 +462,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); - for (ix = 0; ix < table->sz; ix++) - if (table->data_vec[ix].attr.ndev == ndev) - if (!del_gid(ib_dev, port, table, ix, - !!(table->data_vec[ix].props & - GID_TABLE_ENTRY_DEFAULT))) - deleted = true; + for (ix = 0; ix < table->sz; ix++) { + if (table->data_vec[ix].attr.ndev == ndev) { + del_gid(ib_dev, port, table, ix); + deleted = true; + } + } - write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); if (deleted) @@ -492,6 +547,19 @@ static int ib_cache_gid_find(struct ib_device *ib_dev, mask, port, index); } +/** + * ib_find_cached_gid_by_port - Returns the GID table index where a specified + * GID value occurs. It searches for the specified GID value in the local + * software cache. + * @device: The device to query. + * @gid: The GID value to search for. + * @gid_type: The GID type to search for. + * @port_num: The port number of the device where the GID value should be + * searched. + * @ndev: In RoCE, the net device of the device. Null means ignore. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + */ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, @@ -528,7 +596,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, EXPORT_SYMBOL(ib_find_cached_gid_by_port); /** - * ib_find_gid_by_filter - Returns the GID table index where a specified + * ib_cache_gid_find_by_filter - Returns the GID table index where a specified * GID value occurs * @device: The device to query. * @gid: The GID value to search for. @@ -539,7 +607,7 @@ EXPORT_SYMBOL(ib_find_cached_gid_by_port); * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. - * @index: The index into the cached GID table where the GID was found. This + * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. * * ib_cache_gid_find_by_filter() searches for the specified GID value @@ -598,6 +666,7 @@ static struct ib_gid_table *alloc_gid_table(int sz) { struct ib_gid_table *table = kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); + int i; if (!table) return NULL; @@ -611,6 +680,11 @@ static struct ib_gid_table *alloc_gid_table(int sz) table->sz = sz; rwlock_init(&table->rwlock); + /* Mark all entries as invalid so that allocator can allocate + * one of the invalid (free) entry. + */ + for (i = 0; i < sz; i++) + table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; return table; err_free_table: @@ -635,16 +709,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, if (!table) return; - write_lock_irq(&table->rwlock); + mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { if (memcmp(&table->data_vec[i].gid, &zgid, - sizeof(table->data_vec[i].gid))) - if (!del_gid(ib_dev, port, table, i, - table->data_vec[i].props & - GID_ATTR_FIND_MASK_DEFAULT)) - deleted = true; + sizeof(table->data_vec[i].gid))) { + del_gid(ib_dev, port, table, i); + deleted = true; + } } - write_unlock_irq(&table->rwlock); + mutex_unlock(&table->lock); if (deleted) dispatch_gid_change_event(ib_dev, port); @@ -657,9 +730,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, { union ib_gid gid; struct ib_gid_attr gid_attr; - struct ib_gid_attr zattr_type = zattr; struct ib_gid_table *table; unsigned int gid_type; + unsigned long mask; table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; @@ -668,60 +741,19 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, gid_attr.ndev = ndev; for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { - int ix; - union ib_gid current_gid; - struct ib_gid_attr current_gid_attr = {}; - if (1UL << gid_type & ~gid_type_mask) continue; gid_attr.gid_type = gid_type; - mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); - ix = find_gid(table, NULL, &gid_attr, true, - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_DEFAULT, - NULL); - - /* Coudn't find default GID location */ - if (WARN_ON(ix < 0)) - goto release; - - zattr_type.gid_type = gid_type; - - if (!__ib_cache_gid_get(ib_dev, port, ix, - ¤t_gid, ¤t_gid_attr) && - mode == IB_CACHE_GID_DEFAULT_MODE_SET && - !memcmp(&gid, ¤t_gid, sizeof(gid)) && - !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) - goto release; - - if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || - memcmp(¤t_gid_attr, &zattr_type, - sizeof(current_gid_attr))) { - if (del_gid(ib_dev, port, table, ix, true)) { - pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", - ix, gid.raw); - goto release; - } else { - dispatch_gid_change_event(ib_dev, port); - } - } - if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { - if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) - pr_warn("ib_cache_gid: unable to add default gid %pI6\n", - gid.raw); - else - dispatch_gid_change_event(ib_dev, port); + mask = GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT; + __ib_cache_gid_add(ib_dev, port, &gid, + &gid_attr, mask, true); + } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { + ib_cache_gid_del(ib_dev, port, &gid, &gid_attr); } - -release: - if (current_gid_attr.ndev) - dev_put(current_gid_attr.ndev); - write_unlock_irq(&table->rwlock); - mutex_unlock(&table->lock); } } @@ -848,6 +880,20 @@ int ib_get_cached_gid(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_gid); +/** + * ib_find_cached_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @gid_type: The GID type to search for. + * @ndev: In RoCE, the net device of the device. NULL means ignore. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, @@ -868,7 +914,7 @@ int ib_find_gid_by_filter(struct ib_device *device, void *context, u16 *index) { /* Only RoCE GID table supports filter function */ - if (!rdma_cap_roce_gid_table(device, port_num) && filter) + if (!rdma_protocol_roce(device, port_num) && filter) return -EPROTONOSUPPORT; return ib_cache_gid_find_by_filter(device, gid, @@ -910,8 +956,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, unsigned long flags; int p; - if (port_num < rdma_start_port(device) || - port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; p = port_num - rdma_start_port(device); @@ -1021,7 +1066,7 @@ int ib_get_cached_port_state(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); @@ -1033,21 +1078,46 @@ int ib_get_cached_port_state(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_port_state); +static int config_non_roce_gid_cache(struct ib_device *device, + u8 port, int gid_tbl_len) +{ + struct ib_gid_attr gid_attr = {}; + struct ib_gid_table *table; + union ib_gid gid; + int ret = 0; + int i; + + gid_attr.device = device; + gid_attr.port_num = port; + table = device->cache.ports[port - rdma_start_port(device)].gid; + + mutex_lock(&table->lock); + for (i = 0; i < gid_tbl_len; ++i) { + if (!device->query_gid) + continue; + ret = device->query_gid(device, port, i, &gid); + if (ret) { + pr_warn("query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } + gid_attr.index = i; + add_modify_gid(table, &gid, &gid_attr); + } +err: + mutex_unlock(&table->lock); + return ret; +} + static void ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; - struct ib_gid_cache { - int table_len; - union ib_gid table[0]; - } *gid_cache = NULL; int i; int ret; struct ib_gid_table *table; - bool use_roce_gid_table = - rdma_cap_roce_gid_table(device, port); if (!rdma_is_port_valid(device, port)) return; @@ -1065,6 +1135,13 @@ static void ib_cache_update(struct ib_device *device, goto err; } + if (!rdma_protocol_roce(device, port)) { + ret = config_non_roce_gid_cache(device, port, + tprops->gid_tbl_len); + if (ret) + goto err; + } + pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * sizeof *pkey_cache->table, GFP_KERNEL); if (!pkey_cache) @@ -1072,15 +1149,6 @@ static void ib_cache_update(struct ib_device *device, pkey_cache->table_len = tprops->pkey_tbl_len; - if (!use_roce_gid_table) { - gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * - sizeof(*gid_cache->table), GFP_KERNEL); - if (!gid_cache) - goto err; - - gid_cache->table_len = tprops->gid_tbl_len; - } - for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { @@ -1090,33 +1158,12 @@ static void ib_cache_update(struct ib_device *device, } } - if (!use_roce_gid_table) { - for (i = 0; i < gid_cache->table_len; ++i) { - ret = ib_query_gid(device, port, i, - gid_cache->table + i, NULL); - if (ret) { - pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", - ret, device->name, i); - goto err; - } - } - } - write_lock_irq(&device->cache.lock); old_pkey_cache = device->cache.ports[port - rdma_start_port(device)].pkey; device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache; - if (!use_roce_gid_table) { - write_lock(&table->rwlock); - for (i = 0; i < gid_cache->table_len; i++) { - modify_gid(device, port, table, i, gid_cache->table + i, - &zattr, false); - } - write_unlock(&table->rwlock); - } - device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; device->cache.ports[port - rdma_start_port(device)].port_state = tprops->state; @@ -1130,14 +1177,12 @@ static void ib_cache_update(struct ib_device *device, port, tprops->subnet_prefix); - kfree(gid_cache); kfree(old_pkey_cache); kfree(tprops); return; err: kfree(pkey_cache); - kfree(gid_cache); kfree(tprops); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e674915..a92e1a5 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -462,13 +462,31 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, - struct cm_id_private *cm_id_priv) +static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, + struct cm_av *av, + struct cm_port *port) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm.lock, flags); + + if (&cm_id_priv->av == av) + list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); + else if (&cm_id_priv->alt_av == av) + list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); + else + ret = -EINVAL; + + spin_unlock_irqrestore(&cm.lock, flags); + return ret; +} + +static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; - int ret; u8 p; struct net_device *ndev = ib_get_ndev_from_path(path); @@ -477,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, sa_conv_pathrec_to_gid_type(path), ndev, &p, NULL)) { - port = cm_dev->port[p-1]; + port = cm_dev->port[p - 1]; break; } } @@ -485,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, if (ndev) dev_put(ndev); + return port; +} +static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, + struct cm_id_private *cm_id_priv) +{ + struct cm_device *cm_dev; + struct cm_port *port; + int ret; + + port = get_cm_port_from_path(path); if (!port) return -EINVAL; + cm_dev = port->cm_dev; ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, be16_to_cpu(path->pkey), &av->pkey_index); @@ -502,16 +531,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, av->timeout = path->packet_life_time + 1; - spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) - list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); - else if (&cm_id_priv->alt_av == av) - list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); - else - ret = -EINVAL; - - spin_unlock_irqrestore(&cm.lock, flags); - + ret = add_cm_id_to_port_list(cm_id_priv, av, port); return ret; } @@ -1523,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); primary_path->service_id = req_msg->service_id; + if (sa_path_is_roce(primary_path)) + primary_path->roce.route_resolved = false; if (cm_req_has_alt_path(req_msg)) { alt_path->dgid = req_msg->alt_local_gid; @@ -1542,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); alt_path->service_id = req_msg->service_id; + + if (sa_path_is_roce(alt_path)) + alt_path->roce.route_resolved = false; } cm_format_path_lid_from_req(req_msg, primary_path, alt_path); } @@ -3150,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work) struct ib_mad_send_buf *msg = NULL; int ret; + /* Currently Alternate path messages are not supported for + * RoCE link layer. + */ + if (rdma_protocol_roce(work->port->cm_dev->ib_device, + work->port->port_num)) + return -EINVAL; + /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, @@ -3299,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work) struct cm_apr_msg *apr_msg; int ret; + /* Currently Alternate path messages are not supported for + * RoCE link layer. + */ + if (rdma_protocol_roce(work->port->cm_dev->ib_device, + work->port->port_num)) + return -EINVAL; + apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, apr_msg->local_comm_id); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6ab1059..51a6410 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -62,6 +62,7 @@ #include <rdma/iw_cm.h> #include "core_priv.h" +#include "cma_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net) return net_generic(net, cma_pernet_id); } -static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) +static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps) { struct cma_pernet *pernet = cma_pernet(net); @@ -203,7 +204,7 @@ struct cma_device { }; struct rdma_bind_list { - enum rdma_port_space ps; + enum rdma_ucm_port_space ps; struct hlist_head owners; unsigned short port; }; @@ -216,7 +217,7 @@ struct class_port_info_context { u8 port_num; }; -static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, +static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list, int snum) { struct idr *idr = cma_pernet_idr(net, ps); @@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, } static struct rdma_bind_list *cma_ps_find(struct net *net, - enum rdma_port_space ps, int snum) + enum rdma_ucm_port_space ps, int snum) { struct idr *idr = cma_pernet_idr(net, ps); return idr_find(idr, snum); } -static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) +static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, + int snum) { struct idr *idr = cma_pernet_idr(net, ps); @@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) * We do this by disabling removal notification while a callback is in process, * and reporting it after the callback completes. */ -struct rdma_id_private { - struct rdma_cm_id id; - - struct rdma_bind_list *bind_list; - struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ - struct cma_device *cma_dev; - struct list_head mc_list; - - int internal_id; - enum rdma_cm_state state; - spinlock_t lock; - struct mutex qp_mutex; - - struct completion comp; - atomic_t refcount; - struct mutex handler_mutex; - - int backlog; - int timeout_ms; - struct ib_sa_query *query; - int query_id; - union { - struct ib_cm_id *ib; - struct iw_cm_id *iw; - } cm_id; - - u32 seq_num; - u32 qkey; - u32 qp_num; - pid_t owner; - u32 options; - u8 srq; - u8 tos; - bool tos_set; - u8 reuseaddr; - u8 afonly; - enum ib_gid_type gid_type; -}; struct cma_multicast { struct rdma_id_private *id_priv; @@ -505,6 +467,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); + id_priv->res.type = RDMA_RESTRACK_CM_ID; + rdma_restrack_add(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -777,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -struct rdma_cm_id *rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type) +struct rdma_cm_id *__rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const char *caller) { struct rdma_id_private *id_priv; @@ -788,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net, if (!id_priv) return ERR_PTR(-ENOMEM); - id_priv->owner = task_pid_nr(current); + if (caller) + id_priv->res.kern_name = caller; + else + rdma_restrack_set_task(&id_priv->res, current); id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; @@ -808,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net, return &id_priv->id; } -EXPORT_SYMBOL(rdma_create_id); +EXPORT_SYMBOL(__rdma_create_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { @@ -1400,7 +1367,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, return net_dev; } -static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) +static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id) { return (be64_to_cpu(service_id) >> 16) & 0xffff; } @@ -1441,21 +1408,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv, return true; } -static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) -{ - enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); - enum rdma_transport_type transport = - rdma_node_get_transport(device->node_type); - - return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; -} - static bool cma_protocol_roce(const struct rdma_cm_id *id) { struct ib_device *device = id->device; const int port_num = id->port_num ?: rdma_start_port(device); - return cma_protocol_roce_dev_port(device, port_num); + return rdma_protocol_roce(device, port_num); } static bool cma_match_net_dev(const struct rdma_cm_id *id, @@ -1468,7 +1426,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, /* This request is an AF_IB request or a RoCE request */ return (!id->port_num || id->port_num == port_num) && (addr->src_addr.ss_family == AF_IB || - cma_protocol_roce_dev_port(id->device, port_num)); + rdma_protocol_roce(id->device, port_num)); return !addr->dev_addr.bound_dev_if || (net_eq(dev_net(net_dev), addr->dev_addr.net) && @@ -1523,7 +1481,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ *net_dev = NULL; - } else if (cma_protocol_roce_dev_port(req.device, req.port)) { + } else if (rdma_protocol_roce(req.device, req.port)) { /* TODO find the net dev matching the request parameters * through the RoCE GID table */ *net_dev = NULL; @@ -1668,6 +1626,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { + rdma_restrack_del(&id_priv->res); if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); @@ -1817,6 +1776,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { + struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; @@ -1826,9 +1786,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id_priv = container_of(listen_id, struct rdma_id_private, id); + id = __rdma_create_id(listen_id->route.addr.dev_addr.net, listen_id->event_handler, listen_id->context, - listen_id->ps, ib_event->param.req_rcvd.qp_type); + listen_id->ps, ib_event->param.req_rcvd.qp_type, + listen_id_priv->res.kern_name); if (IS_ERR(id)) return NULL; @@ -1877,14 +1839,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { + struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(net, listen_id->event_handler, listen_id->context, - listen_id->ps, IB_QPT_UD); + listen_id_priv = container_of(listen_id, struct rdma_id_private, id); + id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, + listen_id->ps, IB_QPT_UD, + listen_id_priv->res.kern_name); if (IS_ERR(id)) return NULL; @@ -2150,10 +2115,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, - listen_id->id.event_handler, - listen_id->id.context, - RDMA_PS_TCP, IB_QPT_RC); + new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP, IB_QPT_RC, + listen_id->res.kern_name); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; @@ -2278,8 +2244,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, - id_priv->id.qp_type); + id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, + id_priv->id.qp_type, id_priv->res.kern_name); if (IS_ERR(id)) return; @@ -2541,6 +2507,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) gid_type = ib_network_to_gid_type(addr->dev_addr.network); route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); + route->path_rec->roce.route_resolved = true; sa_path_set_ndev(route->path_rec, addr->dev_addr.net); sa_path_set_ifindex(route->path_rec, ndev->ifindex); sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); @@ -3028,7 +2995,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, hlist_add_head(&id_priv->node, &bind_list->owners); } -static int cma_alloc_port(enum rdma_port_space ps, +static int cma_alloc_port(enum rdma_ucm_port_space ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; @@ -3091,7 +3058,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, return 0; } -static int cma_alloc_any_port(enum rdma_port_space ps, +static int cma_alloc_any_port(enum rdma_ucm_port_space ps, struct rdma_id_private *id_priv) { static unsigned int last_used_port; @@ -3169,7 +3136,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, return 0; } -static int cma_use_port(enum rdma_port_space ps, +static int cma_use_port(enum rdma_ucm_port_space ps, struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; @@ -3203,8 +3170,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv) return ret; } -static enum rdma_port_space cma_select_inet_ps( - struct rdma_id_private *id_priv) +static enum rdma_ucm_port_space +cma_select_inet_ps(struct rdma_id_private *id_priv) { switch (id_priv->id.ps) { case RDMA_PS_TCP: @@ -3218,9 +3185,10 @@ static enum rdma_port_space cma_select_inet_ps( } } -static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) +static enum rdma_ucm_port_space +cma_select_ib_ps(struct rdma_id_private *id_priv) { - enum rdma_port_space ps = 0; + enum rdma_ucm_port_space ps = 0; struct sockaddr_ib *sib; u64 sid_ps, mask, sid; @@ -3251,7 +3219,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) static int cma_get_port(struct rdma_id_private *id_priv) { - enum rdma_port_space ps; + enum rdma_ucm_port_space ps; int ret; if (cma_family(id_priv) != AF_IB) @@ -3389,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - if (id_priv->cma_dev) + if (id_priv->cma_dev) { + rdma_restrack_del(&id_priv->res); cma_release_dev(id_priv); + } err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; @@ -3773,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); - id_priv->owner = task_pid_nr(current); + if (caller) + id_priv->res.kern_name = caller; + else + rdma_restrack_set_task(&id_priv->res, current); if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; @@ -3820,7 +3794,7 @@ reject: rdma_reject(id, NULL, 0); return ret; } -EXPORT_SYMBOL(rdma_accept); +EXPORT_SYMBOL(__rdma_accept); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { @@ -3938,10 +3912,14 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) rdma_start_port(id_priv->cma_dev->device)]; event.event = RDMA_CM_EVENT_MULTICAST_JOIN; - ib_init_ah_from_mcmember(id_priv->id.device, - id_priv->id.port_num, &multicast->rec, - ndev, gid_type, - &event.param.ud.ah_attr); + ret = ib_init_ah_from_mcmember(id_priv->id.device, + id_priv->id.port_num, + &multicast->rec, + ndev, gid_type, + &event.param.ud.ah_attr); + if (ret) + event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); if (ndev) @@ -4501,7 +4479,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) goto out; - id_stats->pid = id_priv->owner; + id_stats->pid = task_pid_vnr(id_priv->res.task); id_stats->port_space = id->ps; id_stats->cm_state = id_priv->state; id_stats->qp_num = id_priv->qp_num; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h new file mode 100644 index 0000000..194cfe7 --- /dev/null +++ b/drivers/infiniband/core/cma_priv.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _CMA_PRIV_H +#define _CMA_PRIV_H + +enum rdma_cm_state { + RDMA_CM_IDLE, + RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_QUERY, + RDMA_CM_ROUTE_RESOLVED, + RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT, + RDMA_CM_ADDR_BOUND, + RDMA_CM_LISTEN, + RDMA_CM_DEVICE_REMOVAL, + RDMA_CM_DESTROYING +}; + +struct rdma_id_private { + struct rdma_cm_id id; + + struct rdma_bind_list *bind_list; + struct hlist_node node; + struct list_head list; /* listen_any_list or cma_device.list */ + struct list_head listen_list; /* per device listens */ + struct cma_device *cma_dev; + struct list_head mc_list; + + int internal_id; + enum rdma_cm_state state; + spinlock_t lock; + struct mutex qp_mutex; + + struct completion comp; + atomic_t refcount; + struct mutex handler_mutex; + + int backlog; + int timeout_ms; + struct ib_sa_query *query; + int query_id; + union { + struct ib_cm_id *ib; + struct iw_cm_id *iw; + } cm_id; + + u32 seq_num; + u32 qkey; + u32 qp_num; + u32 options; + u8 srq; + u8 tos; + bool tos_set; + u8 reuseaddr; + u8 afonly; + enum ib_gid_type gid_type; + + /* + * Internal to RDMA/core, don't use in the drivers + */ + struct rdma_restrack_entry res; +}; +#endif /* _CMA_PRIV_H */ diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 25bb178..54163a6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -333,4 +333,15 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, return qp; } + +struct rdma_dev_addr; +int rdma_resolve_ip_route(struct sockaddr *src_addr, + const struct sockaddr *dst_addr, + struct rdma_dev_addr *addr); + +int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, + const union ib_gid *dgid, + u8 *dmac, const struct net_device *ndev, + int *hoplimit); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b7459cf..ea9fbcf 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), IB_MANDATORY_FUNC(query_pkey), - IB_MANDATORY_FUNC(query_gid), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_ah), @@ -853,7 +852,7 @@ int ib_query_port(struct ib_device *device, if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) return 0; - err = ib_query_gid(device, port_num, 0, &gid, NULL); + err = device->query_gid(device, port_num, 0, &gid); if (err) return err; @@ -871,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port); * @attr: Returned GID attributes related to this GID index (only in RoCE). * NULL means ignore. * - * ib_query_gid() fetches the specified GID table entry. + * ib_query_gid() fetches the specified GID table entry from the cache. */ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr) { - if (rdma_cap_roce_gid_table(device, port_num)) - return ib_get_cached_gid(device, port_num, index, gid, attr); - - if (attr) - return -EINVAL; - - return device->query_gid(device, port_num, index, gid); + return ib_get_cached_gid(device, port_num, index, gid, attr); } EXPORT_SYMBOL(ib_query_gid); @@ -1049,19 +1042,18 @@ EXPORT_SYMBOL(ib_modify_port); * a specified GID value occurs. Its searches only for IB link layer. * @device: The device to query. * @gid: The GID value to search for. - * @ndev: The ndev related to the GID to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, - struct net_device *ndev, u8 *port_num, u16 *index) + u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { - if (rdma_cap_roce_gid_table(device, port)) + if (!rdma_protocol_ib(device, port)) continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 81528f6..9821ae9 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -439,10 +439,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, struct sk_buff *skb = NULL; skb = dev_alloc_skb(IWPM_MSG_SIZE); - if (!skb) { - pr_err("%s Unable to allocate skb\n", __func__); + if (!skb) goto create_nlmsg_exit; - } + if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op, NLM_F_REQUEST))) { pr_warn("%s: Unable to put the nlmsg header\n", __func__); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 45f2f09..4eb72ff 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, { int ret; u16 gid_index; - u8 p; - - if (rdma_protocol_roce(device, port_num)) { - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - } else if (rdma_protocol_ib(device, port_num)) { - ret = ib_find_cached_gid(device, &rec->port_gid, - IB_GID_TYPE_IB, NULL, &p, - &gid_index); - } else { - ret = -EINVAL; - } + /* GID table is not based on the netdevice for IB link layer, + * so ignore ndev during search. + */ + if (rdma_protocol_ib(device, port_num)) + ndev = NULL; + else if (!rdma_protocol_roce(device, port_num)) + return -EINVAL; + + ret = ib_find_cached_gid_by_port(device, &rec->port_gid, + gid_type, port_num, + ndev, + &gid_index); if (ret) return ret; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 5326a68..eb56776 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -34,9 +34,11 @@ #include <linux/pid.h> #include <linux/pid_namespace.h> #include <net/netlink.h> +#include <rdma/rdma_cm.h> #include <rdma/rdma_netlink.h> #include "core_priv.h" +#include "cma_priv.h" static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, @@ -71,6 +73,31 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, .len = TASK_COMM_LEN }, + [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, + [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, + [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ }, }; static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -99,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; ib_get_device_fw_str(device, fw); - /* Device without FW has strlen(fw) */ + /* Device without FW has strlen(fw) = 0 */ if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) return -EMSGSIZE; @@ -115,8 +142,10 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) } static int fill_port_info(struct sk_buff *msg, - struct ib_device *device, u32 port) + struct ib_device *device, u32 port, + const struct net *net) { + struct net_device *netdev = NULL; struct ib_port_attr attr; int ret; @@ -150,7 +179,23 @@ static int fill_port_info(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) return -EMSGSIZE; - return 0; + + if (device->get_netdev) + netdev = device->get_netdev(device, port); + + if (netdev && net_eq(dev_net(netdev), net)) { + ret = nla_put_u32(msg, + RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); + if (ret) + goto out; + ret = nla_put_string(msg, + RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); + } + +out: + if (netdev) + dev_put(netdev); + return ret; } static int fill_res_info_entry(struct sk_buff *msg, @@ -182,6 +227,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_PD] = "pd", [RDMA_RESTRACK_CQ] = "cq", [RDMA_RESTRACK_QP] = "qp", + [RDMA_RESTRACK_CM_ID] = "cm_id", + [RDMA_RESTRACK_MR] = "mr", }; struct rdma_restrack_root *res = &device->res; @@ -212,10 +259,29 @@ err: return ret; } -static int fill_res_qp_entry(struct sk_buff *msg, - struct ib_qp *qp, uint32_t port) +static int fill_res_name_pid(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + /* + * For user resources, user is should read /proc/PID/comm to get the + * name of the task file. + */ + if (rdma_is_kernel_res(res)) { + if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + res->kern_name)) + return -EMSGSIZE; + } else { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, + task_pid_vnr(res->task))) + return -EMSGSIZE; + } + return 0; +} + +static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) { - struct rdma_restrack_entry *res = &qp->res; + struct ib_qp *qp = container_of(res, struct ib_qp, res); struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -262,19 +328,172 @@ static int fill_res_qp_entry(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) goto err; - /* - * Existence of task means that it is user QP and netlink - * user is invited to go and read /proc/PID/comm to get name - * of the task file and res->task_com should be NULL. - */ - if (rdma_is_kernel_res(res)) { - if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name)) + if (fill_res_name_pid(msg, res)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); +out: + return -EMSGSIZE; +} + +static int fill_res_cm_id_entry(struct sk_buff *msg, + struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct rdma_id_private *id_priv = + container_of(res, struct rdma_id_private, res); + struct rdma_cm_id *cm_id = &id_priv->id; + struct nlattr *entry_attr; + + if (port && port != cm_id->port_num) + return 0; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY); + if (!entry_attr) + goto out; + + if (cm_id->port_num && + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) + goto err; + + if (id_priv->qp_num) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) goto err; - } else { - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task))) + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) goto err; } + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) + goto err; + + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) + goto err; + + if (cm_id->route.addr.src_addr.ss_family && + nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, + sizeof(cm_id->route.addr.src_addr), + &cm_id->route.addr.src_addr)) + goto err; + if (cm_id->route.addr.dst_addr.ss_family && + nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, + sizeof(cm_id->route.addr.dst_addr), + &cm_id->route.addr.dst_addr)) + goto err; + + if (fill_res_name_pid(msg, res)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); +out: + return -EMSGSIZE; +} + +static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); + if (!entry_attr) + goto out; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, + atomic_read(&cq->usecnt), 0)) + goto err; + + /* Poll context is only valid for kernel CQs */ + if (rdma_is_kernel_res(res) && + nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) + goto err; + + if (fill_res_name_pid(msg, res)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); +out: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); + if (!entry_attr) + goto out; + + if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) + goto err; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA, + mr->iova, 0)) + goto err; + } + + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0)) + goto err; + + if (fill_res_name_pid(msg, res)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); +out: + return -EMSGSIZE; +} + +static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_pd *pd = container_of(res, struct ib_pd, res); + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); + if (!entry_attr) + goto out; + + if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, + pd->local_dma_lkey)) + goto err; + if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, + pd->unsafe_global_rkey)) + goto err; + } + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, + atomic_read(&pd->usecnt), 0)) + goto err; + if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, + pd->unsafe_global_rkey)) + goto err; + + if (fill_res_name_pid(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -405,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); - err = fill_port_info(msg, device, port); + err = fill_port_info(msg, device, port, sock_net(skb->sk)); if (err) goto err_free; @@ -465,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, RDMA_NLDEV_CMD_PORT_GET), 0, NLM_F_MULTI); - if (fill_port_info(skb, device, p)) { + if (fill_port_info(skb, device, p, sock_net(skb->sk))) { nlmsg_cancel(skb, nlh); goto out; } @@ -558,23 +777,60 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); } -static int nldev_res_get_qp_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) +struct nldev_fill_res_entry { + int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb, + struct rdma_restrack_entry *res, u32 port); + enum rdma_nldev_attr nldev_attr; + enum rdma_nldev_command nldev_cmd; +}; + +static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { + [RDMA_RESTRACK_QP] = { + .fill_res_func = fill_res_qp_entry, + .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, + }, + [RDMA_RESTRACK_CM_ID] = { + .fill_res_func = fill_res_cm_id_entry, + .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, + }, + [RDMA_RESTRACK_CQ] = { + .fill_res_func = fill_res_cq_entry, + .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, + }, + [RDMA_RESTRACK_MR] = { + .fill_res_func = fill_res_mr_entry, + .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, + }, + [RDMA_RESTRACK_PD] = { + .fill_res_func = fill_res_pd_entry, + .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, + }, +}; + +static int res_get_common_dumpit(struct sk_buff *skb, + struct netlink_callback *cb, + enum rdma_restrack_type res_type) { + const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct rdma_restrack_entry *res; int err, ret = 0, idx = 0; struct nlattr *table_attr; struct ib_device *device; int start = cb->args[0]; - struct ib_qp *qp = NULL; struct nlmsghdr *nlh; u32 index, port = 0; + bool filled = false; err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); /* - * Right now, we are expecting the device index to get QP information, + * Right now, we are expecting the device index to get res information, * but it is possible to extend this code to return all devices in * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. * if it doesn't exist, we will iterate over all devices. @@ -601,7 +857,7 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb, } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), 0, NLM_F_MULTI); if (fill_nldev_handle(skb, device)) { @@ -609,24 +865,26 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb, goto err; } - table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP); + table_attr = nla_nest_start(skb, fe->nldev_attr); if (!table_attr) { ret = -EMSGSIZE; goto err; } down_read(&device->res.rwsem); - hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) { + hash_for_each_possible(device->res.hash, res, node, res_type) { if (idx < start) goto next; if ((rdma_is_kernel_res(res) && task_active_pid_ns(current) != &init_pid_ns) || - (!rdma_is_kernel_res(res) && - task_active_pid_ns(current) != task_active_pid_ns(res->task))) + (!rdma_is_kernel_res(res) && task_active_pid_ns(current) != + task_active_pid_ns(res->task))) /* - * 1. Kernel QPs should be visible in init namspace only - * 2. Present only QPs visible in the current namespace + * 1. Kern resources should be visible in init + * namspace only + * 2. Present only resources visible in the current + * namespace */ goto next; @@ -638,10 +896,10 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb, */ goto next; - qp = container_of(res, struct ib_qp, res); + filled = true; up_read(&device->res.rwsem); - ret = fill_res_qp_entry(skb, qp, port); + ret = fe->fill_res_func(skb, cb, res, port); down_read(&device->res.rwsem); /* * Return resource back, but it won't be released till @@ -667,10 +925,10 @@ next: idx++; cb->args[0] = idx; /* - * No more QPs to fill, cancel the message and + * No more entries to fill, cancel the message and * return 0 to mark end of dumpit. */ - if (!qp) + if (!filled) goto err; put_device(&device->dev); @@ -688,6 +946,36 @@ err_index: return ret; } +static int nldev_res_get_qp_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP); +} + +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID); +} + +static int nldev_res_get_cq_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ); +} + +static int nldev_res_get_mr_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR); +} + +static int nldev_res_get_pd_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -714,6 +1002,18 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { * too. */ }, + [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { + .dump = nldev_res_get_cm_id_dumpit, + }, + [RDMA_NLDEV_CMD_RES_CQ_GET] = { + .dump = nldev_res_get_cq_dumpit, + }, + [RDMA_NLDEV_CMD_RES_MR_GET] = { + .dump = nldev_res_get_mr_dumpit, + }, + [RDMA_NLDEV_CMD_RES_PD_GET] = { + .dump = nldev_res_get_pd_dumpit, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index d8eead5..a6e9049 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, return type->type_class->alloc_begin(type, ucontext); } -static void uverbs_uobject_add(struct ib_uobject *uobject) -{ - mutex_lock(&uobject->context->uobjects_lock); - list_add(&uobject->list, &uobject->context->uobjects); - mutex_unlock(&uobject->context->uobjects_lock); -} - static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -502,7 +495,6 @@ out: static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { - uverbs_uobject_add(uobj); spin_lock(&uobj->context->ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so @@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj) struct ib_uobject_file *uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - uverbs_uobject_add(&uobj_file->uobj); fd_install(uobj_file->uobj.id, uobj->object); /* This shouldn't be used anymore. Use the file object instead */ uobj_file->uobj.id = 0; @@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); + mutex_lock(&uobj->context->uobjects_lock); + list_add(&uobj->list, &uobj->context->uobjects); + mutex_unlock(&uobj->context->uobjects_lock); + uobj->type->type_class->alloc_commit(uobj); up_read(&uobj->context->cleanup_rwsem); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 3dbc4e4..efddd13 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -3,20 +3,66 @@ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. */ +#include <rdma/rdma_cm.h> #include <rdma/ib_verbs.h> #include <rdma/restrack.h> #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/pid_namespace.h> +#include "cma_priv.h" + void rdma_restrack_init(struct rdma_restrack_root *res) { init_rwsem(&res->rwsem); } +static const char *type2str(enum rdma_restrack_type type) +{ + static const char * const names[RDMA_RESTRACK_MAX] = { + [RDMA_RESTRACK_PD] = "PD", + [RDMA_RESTRACK_CQ] = "CQ", + [RDMA_RESTRACK_QP] = "QP", + [RDMA_RESTRACK_CM_ID] = "CM_ID", + [RDMA_RESTRACK_MR] = "MR", + }; + + return names[type]; +}; + void rdma_restrack_clean(struct rdma_restrack_root *res) { - WARN_ON_ONCE(!hash_empty(res->hash)); + struct rdma_restrack_entry *e; + char buf[TASK_COMM_LEN]; + struct ib_device *dev; + const char *owner; + int bkt; + + if (hash_empty(res->hash)) + return; + + dev = container_of(res, struct ib_device, res); + pr_err("restrack: %s", CUT_HERE); + pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n", + dev->name); + hash_for_each(res->hash, bkt, e, node) { + if (rdma_is_kernel_res(e)) { + owner = e->kern_name; + } else { + /* + * There is no need to call get_task_struct here, + * because we can be here only if there are more + * get_task_struct() call than put_task_struct(). + */ + get_task_comm(buf, e->task); + owner = buf; + } + + pr_err("restrack: %s %s object allocated by %s is not freed\n", + rdma_is_kernel_res(e) ? "Kernel" : "User", + type2str(e->type), owner); + } + pr_err("restrack: %s", CUT_HERE); } int rdma_restrack_count(struct rdma_restrack_root *res, @@ -40,51 +86,48 @@ EXPORT_SYMBOL(rdma_restrack_count); static void set_kern_name(struct rdma_restrack_entry *res) { - enum rdma_restrack_type type = res->type; - struct ib_qp *qp; - - if (type != RDMA_RESTRACK_QP) - /* PD and CQ types already have this name embedded in */ - return; + struct ib_pd *pd; - qp = container_of(res, struct ib_qp, res); - if (!qp->pd) { - WARN_ONCE(true, "XRC QPs are not supported\n"); - /* Survive, despite the programmer's error */ - res->kern_name = " "; - return; + switch (res->type) { + case RDMA_RESTRACK_QP: + pd = container_of(res, struct ib_qp, res)->pd; + if (!pd) { + WARN_ONCE(true, "XRC QPs are not supported\n"); + /* Survive, despite the programmer's error */ + res->kern_name = " "; + } + break; + case RDMA_RESTRACK_MR: + pd = container_of(res, struct ib_mr, res)->pd; + break; + default: + /* Other types set kern_name directly */ + pd = NULL; + break; } - res->kern_name = qp->pd->res.kern_name; + if (pd) + res->kern_name = pd->res.kern_name; } static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) { - enum rdma_restrack_type type = res->type; - struct ib_device *dev; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - - switch (type) { + switch (res->type) { case RDMA_RESTRACK_PD: - pd = container_of(res, struct ib_pd, res); - dev = pd->device; - break; + return container_of(res, struct ib_pd, res)->device; case RDMA_RESTRACK_CQ: - cq = container_of(res, struct ib_cq, res); - dev = cq->device; - break; + return container_of(res, struct ib_cq, res)->device; case RDMA_RESTRACK_QP: - qp = container_of(res, struct ib_qp, res); - dev = qp->device; - break; + return container_of(res, struct ib_qp, res)->device; + case RDMA_RESTRACK_CM_ID: + return container_of(res, struct rdma_id_private, + res)->id.device; + case RDMA_RESTRACK_MR: + return container_of(res, struct ib_mr, res)->device; default: - WARN_ONCE(true, "Wrong resource tracking type %u\n", type); + WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return NULL; } - - return dev; } static bool res_is_user(struct rdma_restrack_entry *res) @@ -96,6 +139,10 @@ static bool res_is_user(struct rdma_restrack_entry *res) return container_of(res, struct ib_cq, res)->uobject; case RDMA_RESTRACK_QP: return container_of(res, struct ib_qp, res)->uobject; + case RDMA_RESTRACK_CM_ID: + return !res->kern_name; + case RDMA_RESTRACK_MR: + return container_of(res, struct ib_mr, res)->pd->uobject; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return false; @@ -109,13 +156,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) if (!dev) return; + if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res)) + res->task = NULL; + if (res_is_user(res)) { - get_task_struct(current); - res->task = current; + if (!res->task) + rdma_restrack_set_task(res, current); res->kern_name = NULL; } else { set_kern_name(res); - res->task = NULL; } kref_init(&res->kref); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9f029a1..a61ec7e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,118 +1227,130 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) +static int +roce_resolve_route_from_path(struct ib_device *device, u8 port_num, + struct sa_path_rec *rec) { + struct net_device *resolved_dev; + struct net_device *ndev; + struct net_device *idev; + struct rdma_dev_addr dev_addr = { + .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? + sa_path_get_ifindex(rec) : 0), + .net = sa_path_get_ndev(rec) ? + sa_path_get_ndev(rec) : + &init_net + }; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; int ret; - u16 gid_index; - int use_roce; - struct net_device *ndev = NULL; - memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->type = rdma_ah_find_type(device, port_num); + if (rec->roce.route_resolved) + return 0; - rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); + if (!device->get_netdev) + return -EOPNOTSUPP; - if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && - (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))) - rdma_ah_set_make_grd(ah_attr, true); + rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); + rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); - rdma_ah_set_sl(ah_attr, rec->sl); - rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & - get_src_path_mask(device, port_num)); - rdma_ah_set_port_num(ah_attr, port_num); - rdma_ah_set_static_rate(ah_attr, rec->rate); - use_roce = rdma_cap_eth_ah(device, port_num); - - if (use_roce) { - struct net_device *idev; - struct net_device *resolved_dev; - struct rdma_dev_addr dev_addr = { - .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? - sa_path_get_ifindex(rec) : 0), - .net = sa_path_get_ndev(rec) ? - sa_path_get_ndev(rec) : - &init_net - }; - union { - struct sockaddr _sockaddr; - struct sockaddr_in _sockaddr_in; - struct sockaddr_in6 _sockaddr_in6; - } sgid_addr, dgid_addr; - - if (!device->get_netdev) - return -EOPNOTSUPP; - - rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); - rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); - - /* validate the route */ - ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, - &dgid_addr._sockaddr, &dev_addr); - if (ret) - return ret; + /* validate the route */ + ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, + &dgid_addr._sockaddr, &dev_addr); + if (ret) + return ret; - if ((dev_addr.network == RDMA_NETWORK_IPV4 || - dev_addr.network == RDMA_NETWORK_IPV6) && - rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) - return -EINVAL; + if ((dev_addr.network == RDMA_NETWORK_IPV4 || + dev_addr.network == RDMA_NETWORK_IPV6) && + rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) + return -EINVAL; - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; + idev = device->get_netdev(device, port_num); + if (!idev) + return -ENODEV; - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (!resolved_dev) { - dev_put(idev); - return -ENODEV; - } - ndev = ib_get_ndev_from_path(rec); - rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(idev); - dev_put(resolved_dev); - if (ret) { - if (ndev) - dev_put(ndev); - return ret; - } + resolved_dev = dev_get_by_index(dev_addr.net, + dev_addr.bound_dev_if); + if (!resolved_dev) { + ret = -ENODEV; + goto done; } + ndev = ib_get_ndev_from_path(rec); + rcu_read_lock(); + if ((ndev && ndev != resolved_dev) || + (resolved_dev != idev && + !rdma_is_upper_dev_rcu(idev, resolved_dev))) + ret = -EHOSTUNREACH; + rcu_read_unlock(); + dev_put(resolved_dev); + if (ndev) + dev_put(ndev); +done: + dev_put(idev); + if (!ret) + rec->roce.route_resolved = true; + return ret; +} - if (rec->hop_limit > 0 || use_roce) { - enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); +static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, + struct sa_path_rec *rec, + struct rdma_ah_attr *ah_attr) +{ + enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); + struct net_device *ndev; + u16 gid_index; + int ret; - ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, - port_num, ndev, &gid_index); - if (ret) { - if (ndev) - dev_put(ndev); - return ret; - } + ndev = ib_get_ndev_from_path(rec); + ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, + port_num, ndev, &gid_index); + if (ndev) + dev_put(ndev); + if (ret) + return ret; - rdma_ah_set_grh(ah_attr, &rec->dgid, - be32_to_cpu(rec->flow_label), - gid_index, rec->hop_limit, - rec->traffic_class); - if (ndev) - dev_put(ndev); - } + rdma_ah_set_grh(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + gid_index, rec->hop_limit, + rec->traffic_class); + return 0; +} - if (use_roce) { - u8 *dmac = sa_path_get_dmac(rec); +int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, + struct sa_path_rec *rec, + struct rdma_ah_attr *ah_attr) +{ + int ret = 0; + + memset(ah_attr, 0, sizeof(*ah_attr)); + ah_attr->type = rdma_ah_find_type(device, port_num); + rdma_ah_set_sl(ah_attr, rec->sl); + rdma_ah_set_port_num(ah_attr, port_num); + rdma_ah_set_static_rate(ah_attr, rec->rate); - if (!dmac) - return -EINVAL; - memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN); + if (sa_path_is_roce(rec)) { + ret = roce_resolve_route_from_path(device, port_num, rec); + if (ret) + return ret; + + memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN); + } else { + rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); + if (sa_path_is_opa(rec) && + rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) + rdma_ah_set_make_grd(ah_attr, true); + + rdma_ah_set_path_bits(ah_attr, + be32_to_cpu(sa_path_get_slid(rec)) & + get_src_path_mask(device, port_num)); } - return 0; + if (rec->hop_limit > 0 || sa_path_is_roce(rec)) + ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); + return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 8ae1308e..31c7efa 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ + speed = " SDR"; rate = 25; break; } @@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); + union ib_gid *pgid; union ib_gid gid; ssize_t ret; ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); - if (ret) - return ret; - return sprintf(buf, "%pI6\n", gid.raw); + /* If reading GID fails, it is likely due to GID entry being empty + * (invalid) or reserved GID in the table. + * User space expects to read GID table entries as long as it given + * index is within GID table size. + * Administrative/debugging tool fails to query rest of the GID entries + * if it hits error while querying a GID of the given index. + * To avoid user space throwing such error on fail to read gid, return + * zero GID as before. This maintains backward compatibility. + */ + if (ret) + pgid = &zgid; + else + pgid = &gid; + return sprintf(buf, "%pI6\n", pgid->raw); } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, @@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, dev = port->ibdev; stats = port->hw_stats; } + mutex_lock(&stats->lock); ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); if (ret) - return ret; - return print_hw_stat(stats, hsa->index, buf); + goto unlock; + ret = print_hw_stat(stats, hsa->index, buf); +unlock: + mutex_unlock(&stats->lock); + + return ret; } static ssize_t show_stats_lifespan(struct kobject *kobj, @@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj, char *buf) { struct hw_stats_attribute *hsa; + struct rdma_hw_stats *stats; int msecs; hsa = container_of(attr, struct hw_stats_attribute, attr); if (!hsa->port_num) { struct ib_device *dev = container_of((struct device *)kobj, struct ib_device, dev); - msecs = jiffies_to_msecs(dev->hw_stats->lifespan); + + stats = dev->hw_stats; } else { struct ib_port *p = container_of(kobj, struct ib_port, kobj); - msecs = jiffies_to_msecs(p->hw_stats->lifespan); + + stats = p->hw_stats; } + + mutex_lock(&stats->lock); + msecs = jiffies_to_msecs(stats->lifespan); + mutex_unlock(&stats->lock); + return sprintf(buf, "%d\n", msecs); } @@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj, const char *buf, size_t count) { struct hw_stats_attribute *hsa; + struct rdma_hw_stats *stats; int msecs; int jiffies; int ret; @@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj, if (!hsa->port_num) { struct ib_device *dev = container_of((struct device *)kobj, struct ib_device, dev); - dev->hw_stats->lifespan = jiffies; + + stats = dev->hw_stats; } else { struct ib_port *p = container_of(kobj, struct ib_port, kobj); - p->hw_stats->lifespan = jiffies; + + stats = p->hw_stats; } + + mutex_lock(&stats->lock); + stats->lifespan = jiffies; + mutex_unlock(&stats->lock); + return count; } @@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, sysfs_attr_init(hsag->attrs[i]); } + mutex_init(&stats->lock); /* treat an error here as non-fatal */ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); if (hsag->attrs[i]) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 0170226..9eef96d 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -430,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, uevent->resp.id = ctx->id; } - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; goto done; @@ -441,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, result = -ENOMEM; goto done; } - if (copy_to_user((void __user *)(unsigned long)cmd.data, + if (copy_to_user(u64_to_user_ptr(cmd.data), uevent->data, uevent->data_len)) { result = -EFAULT; goto done; @@ -453,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, result = -ENOMEM; goto done; } - if (copy_to_user((void __user *)(unsigned long)cmd.info, + if (copy_to_user(u64_to_user_ptr(cmd.info), uevent->info, uevent->info_len)) { result = -EFAULT; goto done; @@ -502,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, } resp.id = ctx->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { result = -EFAULT; goto err2; @@ -556,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, ib_ucm_cleanup_events(ctx); resp.events_reported = ctx->events_reported; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) result = -EFAULT; @@ -588,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, resp.local_id = ctx->cm_id->local_id; resp.remote_id = ctx->cm_id->remote_id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) result = -EFAULT; @@ -625,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) result = -EFAULT; @@ -699,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) if (!len) return 0; - data = memdup_user((void __user *)(unsigned long)src, len); + data = memdup_user(u64_to_user_ptr(src), len); if (IS_ERR(data)) return PTR_ERR(data); @@ -721,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src) if (!sa_path) return -ENOMEM; - if (copy_from_user(&upath, (void __user *)(unsigned long)src, + if (copy_from_user(&upath, u64_to_user_ptr(src), sizeof(upath))) { kfree(sa_path); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index d933336..7432948 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -382,7 +382,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, struct ucma_event *uevent; int ret = 0; - if (out_len < sizeof uevent->resp) + /* + * Old 32 bit user space does not send the 4 byte padding in the + * reserved field. We don't care, allow it to keep working. + */ + if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) @@ -416,8 +420,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, uevent->resp.id = ctx->id; } - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &uevent->resp, sizeof uevent->resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), + &uevent->resp, + min_t(size_t, out_len, sizeof(uevent->resp)))) { ret = -EFAULT; goto done; } @@ -477,15 +482,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - cm_id = rdma_create_id(current->nsproxy->net_ns, - ucma_event_handler, ctx, cmd.ps, qp_type); + cm_id = __rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type, NULL); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); goto err1; } resp.id = ctx->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { ret = -EFAULT; goto err2; @@ -615,7 +620,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, } resp.events_reported = ucma_free_ctx(ctx); - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -991,7 +996,7 @@ static ssize_t ucma_query(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - response = (void __user *)(unsigned long) cmd.response; + response = u64_to_user_ptr(cmd.response); ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1094,12 +1099,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); - ret = rdma_accept(ctx->cm_id, &conn_param); + ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else - ret = rdma_accept(ctx->cm_id, NULL); + ret = __rdma_accept(ctx->cm_id, NULL, NULL); ucma_put_ctx(ctx); return ret; @@ -1179,7 +1184,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, goto out; ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1241,6 +1246,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx, if (!optlen) return -EINVAL; + if (!ctx->cm_id->device) + return -EINVAL; + memset(&sa_path, 0, sizeof(sa_path)); sa_path.rec_type = SA_PATH_REC_TYPE_IB; @@ -1315,7 +1323,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) return -EINVAL; - optval = memdup_user((void __user *) (unsigned long) cmd.optval, + optval = memdup_user(u64_to_user_ptr(cmd.optval), cmd.optlen); if (IS_ERR(optval)) { ret = PTR_ERR(optval); @@ -1395,7 +1403,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err2; resp.id = mc->id; - if (copy_to_user((void __user *)(unsigned long) cmd->response, + if (copy_to_user(u64_to_user_ptr(cmd->response), &resp, sizeof(resp))) { ret = -EFAULT; goto err3; @@ -1500,7 +1508,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, resp.events_reported = mc->events_reported; kfree(mc); - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; out: @@ -1587,7 +1595,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, ucma_unlock_files(cur_file, new_file); response: - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index deccefb..cfb5161 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -46,6 +46,10 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> +#include <rdma/uverbs_std_types.h> + +#define UVERBS_MODULE_NAME ib_uverbs +#include <rdma/uverbs_named_ioctl.h> static inline void ib_uverbs_init_udata(struct ib_udata *udata, @@ -199,11 +203,18 @@ struct ib_ucq_object { u32 async_events_reported; }; +struct ib_uflow_resources; +struct ib_uflow_object { + struct ib_uobject uobject; + struct ib_uflow_resources *resources; +}; + extern const struct file_operations uverbs_event_fops; void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_completion_event_file *ev_file, @@ -226,7 +237,13 @@ int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); +void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); +extern const struct uverbs_attr_def uverbs_uhw_compat_in; +extern const struct uverbs_attr_def uverbs_uhw_compat_out; long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int uverbs_destroy_def_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs); struct ib_uverbs_flow_spec { union { @@ -240,13 +257,37 @@ struct ib_uverbs_flow_spec { }; struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_esp esp; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; struct ib_uverbs_flow_spec_action_tag flow_tag; struct ib_uverbs_flow_spec_action_drop drop; + struct ib_uverbs_flow_spec_action_handle action; }; }; +int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, + const void *kern_spec_mask, + const void *kern_spec_val, + size_t kern_filter_sz, + union ib_flow_spec *ib_spec); + +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); + #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ struct ib_device *ib_dev, \ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a148de3..13cb5e4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -50,7 +50,7 @@ static struct ib_uverbs_completion_event_file * ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) { - struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel), + struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, fd, context); struct ib_uobject_file *uobj_file; @@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(uobj_get_type(pd), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -372,7 +372,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle, + uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -517,7 +517,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd), + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file->ucontext); if (IS_ERR(obj)) { ret = PTR_ERR(obj); @@ -602,7 +602,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle, + uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -663,11 +663,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(uobj_get_type(mr), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -693,6 +693,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->pd = pd; mr->uobject = uobj; atomic_inc(&pd->usecnt); + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_add(&mr->res); uobj->object = mr; @@ -756,7 +758,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -770,7 +772,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -822,7 +824,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -851,11 +853,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(uobj_get_type(mw), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -914,7 +916,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle, + uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -939,7 +941,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -984,7 +986,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq), + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file->ucontext); if (IS_ERR(obj)) return obj; @@ -1173,7 +1175,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1238,7 +1240,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1285,7 +1287,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1312,7 +1314,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle, + uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -1371,7 +1373,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file->ucontext); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1382,7 +1384,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = uobj_get_obj_read(rwq_ind_table, + ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, cmd->rwq_ind_tbl_handle, file->ucontext); if (!ind_tbl) { @@ -1409,7 +1411,7 @@ static int create_qp(struct ib_uverbs_file *file, has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { - xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle, + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, file->ucontext); if (IS_ERR(xrcd_uobj)) { @@ -1429,7 +1431,7 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = uobj_get_obj_read(srq, cmd->srq_handle, + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, file->ucontext); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; @@ -1439,7 +1441,7 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle, + rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, file->ucontext); if (!rcq) { ret = -EINVAL; @@ -1450,11 +1452,11 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = uobj_get_obj_read(cq, cmd->send_cq_handle, + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, file->ucontext); if (!ind_tbl) rcq = rcq ?: scq; - pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1751,12 +1753,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file->ucontext); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle, + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file->ucontext); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1859,7 +1861,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -1964,7 +1966,7 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -1989,6 +1991,13 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } + if ((cmd->base.attr_mask & IB_QP_CUR_STATE && + cmd->base.cur_qp_state > IB_QPS_ERR) || + cmd->base.qp_state > IB_QPS_ERR) { + ret = -EINVAL; + goto release_qp; + } + attr->qp_state = cmd->base.qp_state; attr->cur_qp_state = cmd->base.cur_qp_state; attr->path_mtu = cmd->base.path_mtu; @@ -2112,7 +2121,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle, + uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2178,7 +2187,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) goto out; @@ -2214,7 +2223,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah, + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, file->ucontext); if (!ud->ah) { kfree(ud); @@ -2449,7 +2458,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) goto out; @@ -2498,7 +2507,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); if (!srq) goto out; @@ -2555,11 +2564,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(uobj_get_type(ah), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; @@ -2627,7 +2636,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle, + uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2650,7 +2659,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -2701,7 +2710,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -2730,8 +2739,52 @@ out_put: return ret ? ret : in_len; } -static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +struct ib_uflow_resources { + size_t max; + size_t num; + struct ib_flow_action *collection[0]; +}; + +static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) +{ + struct ib_uflow_resources *resources; + + resources = + kmalloc(sizeof(*resources) + + num_specs * sizeof(*resources->collection), GFP_KERNEL); + + if (!resources) + return NULL; + + resources->num = 0; + resources->max = num_specs; + + return resources; +} + +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) +{ + unsigned int i; + + for (i = 0; i < uflow_res->num; i++) + atomic_dec(&uflow_res->collection[i]->usecnt); + + kfree(uflow_res); +} + +static void flow_resources_add(struct ib_uflow_resources *uflow_res, + struct ib_flow_action *action) +{ + WARN_ON(uflow_res->num >= uflow_res->max); + + atomic_inc(&action->usecnt); + uflow_res->collection[uflow_res->num++] = action; +} + +static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, + struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec, + struct ib_uflow_resources *uflow_res) { ib_spec->type = kern_spec->type; switch (ib_spec->type) { @@ -2750,19 +2803,34 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop); break; + case IB_FLOW_SPEC_ACTION_HANDLE: + if (kern_spec->action.size != + sizeof(struct ib_uverbs_flow_spec_action_handle)) + return -EOPNOTSUPP; + ib_spec->action.act = uobj_get_obj_read(flow_action, + UVERBS_OBJECT_FLOW_ACTION, + kern_spec->action.handle, + ucontext); + if (!ib_spec->action.act) + return -EINVAL; + ib_spec->action.size = + sizeof(struct ib_flow_spec_action_handle); + flow_resources_add(uflow_res, ib_spec->action.act); + uobj_put_obj_read(ib_spec->action.act); + break; default: return -EINVAL; } return 0; } -static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec) { /* Returns user space filter size, includes padding */ return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; } -static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, +static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { /* @@ -2780,28 +2848,21 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, return kern_filter_size; } -static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, + const void *kern_spec_mask, + const void *kern_spec_val, + size_t kern_filter_sz, + union ib_flow_spec *ib_spec) { ssize_t actual_filter_sz; - ssize_t kern_filter_sz; ssize_t ib_filter_sz; - void *kern_spec_mask; - void *kern_spec_val; - if (kern_spec->reserved) - return -EINVAL; - - ib_spec->type = kern_spec->type; - - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); /* User flow spec size must be aligned to 4 bytes */ if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) return -EINVAL; - kern_spec_val = (void *)kern_spec + - sizeof(struct ib_uverbs_flow_spec_hdr); - kern_spec_mask = kern_spec_val + kern_filter_sz; + ib_spec->type = type; + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) return -EINVAL; @@ -2870,20 +2931,56 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) return -EINVAL; break; + case IB_FLOW_SPEC_ESP: + ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->esp.size = sizeof(struct ib_flow_spec_esp); + memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz); + break; default: return -EINVAL; } return 0; } -static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ssize_t kern_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + + if (kern_spec->reserved) + return -EINVAL; + + kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + + kern_spec_val = (void *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = kern_spec_val + kern_filter_sz; + + return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type, + kern_spec_mask, + kern_spec_val, + kern_filter_sz, ib_spec); +} + +static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, + struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec, + struct ib_uflow_resources *uflow_res) { if (kern_spec->reserved) return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(kern_spec, ib_spec); + return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, + uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); } @@ -2925,18 +3022,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq), + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file->ucontext); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3040,7 +3137,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle, + uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3091,7 +3188,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); if (!wq) return -EINVAL; @@ -3185,7 +3282,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs], + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], file->ucontext); if (!wq) { err = -EINVAL; @@ -3195,7 +3292,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3282,7 +3379,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle, + uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3298,10 +3395,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; struct ib_uobject *uobj; + struct ib_uflow_object *uflow; struct ib_flow *flow_id; struct ib_uverbs_flow_attr *kern_flow_attr; struct ib_flow_attr *flow_attr; struct ib_qp *qp; + struct ib_uflow_resources *uflow_res; int err = 0; void *kern_spec; void *ib_spec; @@ -3361,13 +3460,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(uobj_get_type(flow), file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3379,6 +3478,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = -ENOMEM; goto err_put; } + uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs); + if (!uflow_res) { + err = -ENOMEM; + goto err_free_flow_attr; + } flow_attr->type = kern_flow_attr->type; flow_attr->priority = kern_flow_attr->priority; @@ -3393,7 +3497,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && cmd.flow_attr.size >= ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(kern_spec, ib_spec); + err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec, + uflow_res); if (err) goto err_free; flow_attr->size += @@ -3415,6 +3520,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, } flow_id->uobject = uobj; uobj->object = flow_id; + uflow = container_of(uobj, typeof(*uflow), uobject); + uflow->resources = uflow_res; memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; @@ -3433,6 +3540,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err_copy: ib_destroy_flow(flow_id); err_free: + ib_uverbs_flow_resources_free(uflow_res); +err_free_flow_attr: kfree(flow_attr); err_put: uobj_put_obj_read(qp); @@ -3463,7 +3572,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle, + uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3485,7 +3594,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq), + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file->ucontext); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3494,7 +3603,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, attr.ext.tag_matching.max_num_tags = cmd->max_num_tags; if (cmd->srq_type == IB_SRQT_XRC) { - xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle, + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, file->ucontext); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -3512,7 +3621,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (ib_srq_has_cq(cmd->srq_type)) { - attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle, + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, file->ucontext); if (!attr.ext.cq) { ret = -EINVAL; @@ -3520,7 +3629,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } } - pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -3572,7 +3681,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd->response, + if (copy_to_user(u64_to_user_ptr(cmd->response), &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; @@ -3692,7 +3801,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -3723,7 +3832,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -3760,7 +3869,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle, + uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3897,6 +4006,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.cq_moderation_caps.max_cq_moderation_period = attr.cq_caps.max_cq_moderation_period; resp.response_length += sizeof(resp.cq_moderation_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size)) + goto end; + + resp.max_dm_size = attr.max_dm_size; + resp.response_length += sizeof(resp.max_dm_size); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; @@ -3933,7 +4048,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 339b851..8c93970 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,6 +35,17 @@ #include "rdma_core.h" #include "uverbs.h" +static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, + u16 len) +{ + if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data)) + return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len, + uattr->len - len); + + return !memchr_inv((const void *)&uattr->data + len, + 0, uattr->len - len); +} + static int uverbs_process_attr(struct ib_device *ibdev, struct ib_ucontext *ucontext, const struct ib_uverbs_attr *uattr, @@ -44,14 +55,12 @@ static int uverbs_process_attr(struct ib_device *ibdev, struct ib_uverbs_attr __user *uattr_ptr) { const struct uverbs_attr_spec *spec; + const struct uverbs_attr_spec *val_spec; struct uverbs_attr *e; const struct uverbs_object_spec *object; struct uverbs_obj_attr *o_attr; struct uverbs_attr *elements = attr_bundle_h->attrs; - if (uattr->reserved) - return -EINVAL; - if (attr_id >= attr_spec_bucket->num_attrs) { if (uattr->flags & UVERBS_ATTR_F_MANDATORY) return -EINVAL; @@ -63,15 +72,46 @@ static int uverbs_process_attr(struct ib_device *ibdev, return -EINVAL; spec = &attr_spec_bucket->attrs[attr_id]; + val_spec = spec; e = &elements[attr_id]; e->uattr = uattr_ptr; switch (spec->type) { + case UVERBS_ATTR_TYPE_ENUM_IN: + if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + return -EOPNOTSUPP; + + if (uattr->attr_data.enum_data.reserved) + return -EINVAL; + + val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + + /* Currently we only support PTR_IN based enums */ + if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) + return -EOPNOTSUPP; + + e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id; + /* fall through */ case UVERBS_ATTR_TYPE_PTR_IN: + /* Ensure that any data provided by userspace beyond the known + * struct is zero. Userspace that knows how to use some future + * longer struct will fail here if used with an old kernel and + * non-zero content, making ABI compat/discovery simpler. + */ + if (uattr->len > val_spec->ptr.len && + val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && + !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + return -EOPNOTSUPP; + + /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < spec->len || - (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) && - uattr->len > spec->len)) + if (uattr->len < val_spec->ptr.min_len || + (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && + uattr->len > val_spec->ptr.len)) + return -EINVAL; + + if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && + uattr->attr_data.reserved) return -EINVAL; e->ptr_attr.data = uattr->data; @@ -84,6 +124,9 @@ static int uverbs_process_attr(struct ib_device *ibdev, return -EINVAL; /* fall through */ case UVERBS_ATTR_TYPE_FD: + if (uattr->attr_data.reserved) + return -EINVAL; + if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) return -EINVAL; @@ -246,6 +289,9 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, size_t ctx_size; uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; + if (hdr->driver_id != ib_dev->driver_id) + return -EINVAL; + object_spec = uverbs_get_object(ib_dev, hdr->object_id); if (!object_spec) return -EPROTONOSUPPORT; @@ -350,7 +396,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto out; } - if (hdr.reserved) { + if (hdr.reserved1 || hdr.reserved2) { err = -EPROTONOSUPPORT; goto out; } diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 62e1eb1..0f88a19 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -379,7 +379,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", min_id) || WARN(IS_ATTR_OBJECT(attr) && - attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ, + attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", min_id)) { res = -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b1ca223..4445d8e 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -468,7 +468,7 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) return; } - entry = kmalloc(sizeof *entry, GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&ev_queue->lock, flags); return; @@ -501,7 +501,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, return; } - entry = kmalloc(sizeof *entry, GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); return; @@ -635,39 +635,87 @@ err_put_refs: return filp; } -static int verify_command_mask(struct ib_device *ib_dev, __u32 command) +static bool verify_command_mask(struct ib_device *ib_dev, + u32 command, bool extended) { - u64 mask; + if (!extended) + return ib_dev->uverbs_cmd_mask & BIT_ULL(command); - if (command <= IB_USER_VERBS_CMD_OPEN_QP) - mask = ib_dev->uverbs_cmd_mask; - else - mask = ib_dev->uverbs_ex_cmd_mask; - - if (mask & ((u64)1 << command)) - return 0; - - return -1; + return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command); } static bool verify_command_idx(u32 command, bool extended) { if (extended) - return command < ARRAY_SIZE(uverbs_ex_cmd_table); + return command < ARRAY_SIZE(uverbs_ex_cmd_table) && + uverbs_ex_cmd_table[command]; + + return command < ARRAY_SIZE(uverbs_cmd_table) && + uverbs_cmd_table[command]; +} + +static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr, + u32 *command, bool *extended) +{ + if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return -EINVAL; + + *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK; + *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED; + + if (!verify_command_idx(*command, *extended)) + return -EOPNOTSUPP; + + return 0; +} + +static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, + struct ib_uverbs_ex_cmd_hdr *ex_hdr, + size_t count, bool extended) +{ + if (extended) { + count -= sizeof(*hdr) + sizeof(*ex_hdr); + + if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) + return -EINVAL; + + if (ex_hdr->cmd_hdr_reserved) + return -EINVAL; - return command < ARRAY_SIZE(uverbs_cmd_table); + if (ex_hdr->response) { + if (!hdr->out_words && !ex_hdr->provider_out_words) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, + u64_to_user_ptr(ex_hdr->response), + (hdr->out_words + ex_hdr->provider_out_words) * 8)) + return -EFAULT; + } else { + if (hdr->out_words || ex_hdr->provider_out_words) + return -EINVAL; + } + + return 0; + } + + /* not extended command */ + if (hdr->in_words * 4 != count) + return -EINVAL; + + return 0; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; - bool extended_command; - __u32 command; - __u32 flags; + bool extended; int srcu_key; + u32 command; ssize_t ret; if (!ib_safe_file_access(filp)) { @@ -676,12 +724,31 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, return -EACCES; } - if (count < sizeof hdr) + if (count < sizeof(hdr)) return -EINVAL; - if (copy_from_user(&hdr, buf, sizeof hdr)) + if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; + ret = process_hdr(&hdr, &command, &extended); + if (ret) + return ret; + + if (!file->ucontext && + (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) + return -EINVAL; + + if (extended) { + if (count < (sizeof(hdr) + sizeof(ex_hdr))) + return -EINVAL; + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) + return -EFAULT; + } + + ret = verify_hdr(&hdr, &ex_hdr, count, extended); + if (ret) + return ret; + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); @@ -690,106 +757,22 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } - - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - - extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED; - if (!verify_command_idx(command, extended_command)) { - ret = -EINVAL; - goto out; - } - - if (verify_command_mask(ib_dev, command)) { + if (!verify_command_mask(ib_dev, command, extended)) { ret = -EOPNOTSUPP; goto out; } - if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) { - ret = -EINVAL; - goto out; - } - - if (!flags) { - if (!uverbs_cmd_table[command]) { - ret = -EINVAL; - goto out; - } - - if (hdr.in_words * 4 != count) { - ret = -EINVAL; - goto out; - } + buf += sizeof(hdr); - ret = uverbs_cmd_table[command](file, ib_dev, - buf + sizeof(hdr), - hdr.in_words * 4, - hdr.out_words * 4); - - } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { - struct ib_uverbs_ex_cmd_hdr ex_hdr; + if (!extended) { + ret = uverbs_cmd_table[command](file, ib_dev, buf, + hdr.in_words * 4, + hdr.out_words * 4); + } else { struct ib_udata ucore; struct ib_udata uhw; - size_t written_count = count; - if (!uverbs_ex_cmd_table[command]) { - ret = -ENOSYS; - goto out; - } - - if (!file->ucontext) { - ret = -EINVAL; - goto out; - } - - if (count < (sizeof(hdr) + sizeof(ex_hdr))) { - ret = -EINVAL; - goto out; - } - - if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { - ret = -EFAULT; - goto out; - } - - count -= sizeof(hdr) + sizeof(ex_hdr); - buf += sizeof(hdr) + sizeof(ex_hdr); - - if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { - ret = -EINVAL; - goto out; - } - - if (ex_hdr.cmd_hdr_reserved) { - ret = -EINVAL; - goto out; - } - - if (ex_hdr.response) { - if (!hdr.out_words && !ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } - - if (!access_ok(VERIFY_WRITE, - u64_to_user_ptr(ex_hdr.response), - (hdr.out_words + ex_hdr.provider_out_words) * 8)) { - ret = -EFAULT; - goto out; - } - } else { - if (hdr.out_words || ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } - } + buf += sizeof(ex_hdr); ib_uverbs_init_udata_buf_or_null(&ucore, buf, u64_to_user_ptr(ex_hdr.response), @@ -802,10 +785,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ex_hdr.provider_out_words * 8); ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); - if (!ret) - ret = written_count; - } else { - ret = -ENOSYS; + ret = (ret) ? : count; } out: @@ -953,10 +933,8 @@ static const struct file_operations uverbs_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, -#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = ib_uverbs_ioctl, -#endif }; static const struct file_operations uverbs_mmap_fops = { @@ -966,10 +944,8 @@ static const struct file_operations uverbs_mmap_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, -#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = ib_uverbs_ioctl, -#endif }; static struct ib_client uverbs_client = { @@ -1032,7 +1008,7 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!device->alloc_ucontext) return; - uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); + uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) return; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index df1360e..569f48bd 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -48,7 +48,16 @@ static int uverbs_free_ah(struct ib_uobject *uobject, static int uverbs_free_flow(struct ib_uobject *uobject, enum rdma_remove_reason why) { - return ib_destroy_flow((struct ib_flow *)uobject->object); + int ret; + struct ib_flow *flow = (struct ib_flow *)uobject->object; + struct ib_uflow_object *uflow = + container_of(uobject, struct ib_uflow_object, uobject); + + ret = ib_destroy_flow(flow); + if (!ret) + ib_uverbs_flow_resources_free(uflow->resources); + + return ret; } static int uverbs_free_mw(struct ib_uobject *uobject, @@ -135,31 +144,6 @@ static int uverbs_free_srq(struct ib_uobject *uobject, return ret; } -static int uverbs_free_cq(struct ib_uobject *uobject, - enum rdma_remove_reason why) -{ - struct ib_cq *cq = uobject->object; - struct ib_uverbs_event_queue *ev_queue = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobject, struct ib_ucq_object, uobject); - int ret; - - ret = ib_destroy_cq(cq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? - container_of(ev_queue, - struct ib_uverbs_completion_event_file, - ev_queue) : NULL, - ucq); - return ret; -} - -static int uverbs_free_mr(struct ib_uobject *uobject, - enum rdma_remove_reason why) -{ - return ib_dereg_mr((struct ib_mr *)uobject->object); -} - static int uverbs_free_xrcd(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -210,18 +194,26 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; +int uverbs_destroy_def_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + return 0; +} + /* * This spec is used in order to pass information to the hardware driver in a * legacy way. Every verb that could get driver specific data should get this * spec. */ -static const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); -static const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); - -static void create_udata(struct uverbs_attr_bundle *ctx, - struct ib_udata *udata) +const struct uverbs_attr_def uverbs_uhw_compat_in = + UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); +const struct uverbs_attr_def uverbs_uhw_compat_out = + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); + +void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) { /* * This is for ease of conversion. The purpose is to convert all drivers @@ -229,9 +221,9 @@ static void create_udata(struct uverbs_attr_bundle *ctx, * Assume attr == 0 is input and attr == 1 is output. */ const struct uverbs_attr *uhw_in = - uverbs_attr_get(ctx, UVERBS_UHW_IN); + uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN); const struct uverbs_attr *uhw_out = - uverbs_attr_get(ctx, UVERBS_UHW_OUT); + uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT); if (!IS_ERR(uhw_in)) { udata->inlen = uhw_in->ptr_attr.len; @@ -253,207 +245,67 @@ static void create_udata(struct uverbs_attr_bundle *ctx, } } -static int uverbs_create_cq_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, + &UVERBS_TYPE_ALLOC_FD(0, + sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", O_RDONLY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, + uverbs_free_qp)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, + uverbs_free_srq)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + 0, uverbs_free_flow)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, + uverbs_free_wq)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, + uverbs_free_xrcd)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, + /* 2 is used in order to free the PD after MRs */ + &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); + +static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), + &UVERBS_OBJECT(UVERBS_OBJECT_PD), + &UVERBS_OBJECT(UVERBS_OBJECT_MR), + &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), + &UVERBS_OBJECT(UVERBS_OBJECT_CQ), + &UVERBS_OBJECT(UVERBS_OBJECT_QP), + &UVERBS_OBJECT(UVERBS_OBJECT_AH), + &UVERBS_OBJECT(UVERBS_OBJECT_MW), + &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), + &UVERBS_OBJECT(UVERBS_OBJECT_WQ), + &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), + &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), + &UVERBS_OBJECT(UVERBS_OBJECT_DM)); + +const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { - struct ib_ucontext *ucontext = file->ucontext; - struct ib_ucq_object *obj; - struct ib_udata uhw; - int ret; - u64 user_handle; - struct ib_cq_init_attr attr = {}; - struct ib_cq *cq; - struct ib_uverbs_completion_event_file *ev_file = NULL; - const struct uverbs_attr *ev_file_attr; - struct ib_uobject *ev_file_uobj; - - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) - return -EOPNOTSUPP; - - ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR); - if (!ret) - ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE); - if (!ret) - ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE); - if (ret) - return ret; - - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT) - return -EFAULT; - - ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL); - if (!IS_ERR(ev_file_attr)) { - ev_file_uobj = ev_file_attr->obj_attr.uobject; - - ev_file = container_of(ev_file_uobj, - struct ib_uverbs_completion_event_file, - uobj_file.uobj); - uverbs_uobject_get(ev_file_uobj); - } - - if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { - ret = -EINVAL; - goto err_event_file; - } - - obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject, - typeof(*obj), uobject); - obj->uverbs_file = ucontext->ufile; - obj->comp_events_reported = 0; - obj->async_events_reported = 0; - INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); - - /* Temporary, only until drivers get the new uverbs_attr_bundle */ - create_udata(attrs, &uhw); - - cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); - goto err_event_file; - } - - cq->device = ib_dev; - cq->uobject = &obj->uobject; - cq->comp_handler = ib_uverbs_comp_handler; - cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; - obj->uobject.object = cq; - obj->uobject.user_handle = user_handle; - atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); - - ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe, - sizeof(cq->cqe)); - if (ret) - goto err_cq; - - return 0; -err_cq: - ib_destroy_cq(cq); - -err_event_file: - if (ev_file) - uverbs_uobject_put(ev_file_uobj); - return ret; -}; - -static DECLARE_UVERBS_METHOD( - uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler, - &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), - &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32), - &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); - -static int uverbs_destroy_cq_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uverbs_destroy_cq_resp resp; - struct ib_uobject *uobj = - uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject; - struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object, - uobject); - int ret; - - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) - return -EOPNOTSUPP; - - ret = rdma_explicit_destroy(uobj); - if (ret) - return ret; - - resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; - - return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp)); + return &uverbs_default_objects; } - -static DECLARE_UVERBS_METHOD( - uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler, - &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, - UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(0, - sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, - &uverbs_event_fops, - "[infinibandevent]", O_RDONLY)); - -DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq), - &uverbs_method_cq_create, - &uverbs_method_cq_destroy); - -DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, - uverbs_free_qp)); - -DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); - -DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR, - /* 1 is used in order to free the MR after all the MWs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr)); - -DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, - uverbs_free_srq)); - -DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); - -DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow)); - -DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, - uverbs_free_wq)); - -DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table, - UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); - -DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, - uverbs_free_xrcd)); - -DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD, - /* 2 is used in order to free the PD after MRs */ - &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); - -DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL); - -DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &uverbs_object_device, - &uverbs_object_pd, - &uverbs_object_mr, - &uverbs_object_comp_channel, - &uverbs_object_cq, - &uverbs_object_qp, - &uverbs_object_ah, - &uverbs_object_mw, - &uverbs_object_srq, - &uverbs_object_flow, - &uverbs_object_wq, - &uverbs_object_rwq_ind_table, - &uverbs_object_xrcd); +EXPORT_SYMBOL_GPL(uverbs_default_get_objects); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c new file mode 100644 index 0000000..b0dbae9 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/uverbs_std_types.h> +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_cq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_cq *cq = uobject->object; + struct ib_uverbs_event_queue *ev_queue = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobject, struct ib_ucq_object, uobject); + int ret; + + ret = ib_destroy_cq(cq); + if (!ret || why != RDMA_REMOVE_DESTROY) + ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? + container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : NULL, + ucq); + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = file->ucontext; + struct ib_ucq_object *obj; + struct ib_udata uhw; + int ret; + u64 user_handle; + struct ib_cq_init_attr attr = {}; + struct ib_cq *cq; + struct ib_uverbs_completion_event_file *ev_file = NULL; + const struct uverbs_attr *ev_file_attr; + struct ib_uobject *ev_file_uobj; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.comp_vector, attrs, + UVERBS_ATTR_CREATE_CQ_COMP_VECTOR); + if (!ret) + ret = uverbs_copy_from(&attr.cqe, attrs, + UVERBS_ATTR_CREATE_CQ_CQE); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, + UVERBS_ATTR_CREATE_CQ_USER_HANDLE); + if (ret) + return ret; + + /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ + if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs, + UVERBS_ATTR_CREATE_CQ_FLAGS))) + return -EFAULT; + + ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); + if (!IS_ERR(ev_file_attr)) { + ev_file_uobj = ev_file_attr->obj_attr.uobject; + + ev_file = container_of(ev_file_uobj, + struct ib_uverbs_completion_event_file, + uobj_file.uobj); + uverbs_uobject_get(ev_file_uobj); + } + + if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + ret = -EINVAL; + goto err_event_file; + } + + obj = container_of(uverbs_attr_get(attrs, + UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject, + typeof(*obj), uobject); + obj->uverbs_file = ucontext->ufile; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); + + /* Temporary, only until drivers get the new uverbs_attr_bundle */ + create_udata(attrs, &uhw); + + cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err_event_file; + } + + cq->device = ib_dev; + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; + atomic_set(&cq->usecnt, 0); + cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_add(&cq->res); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, + sizeof(cq->cqe)); + if (ret) + goto err_cq; + + return 0; +err_cq: + ib_destroy_cq(cq); + +err_event_file: + if (ev_file) + uverbs_uobject_put(ev_file_uobj); + return ret; +}; + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, + &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject; + struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object, + uobject); + int ret; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) + return -EOPNOTSUPP; + + ret = rdma_explicit_destroy(uobj); + if (ret) + return ret; + + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; + + return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, + sizeof(resp)); +} + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, + &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, + uverbs_free_cq), +#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) +#endif + ); + diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c new file mode 100644 index 0000000..8b68157 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "uverbs.h" +#include <rdma/uverbs_std_types.h> + +static int uverbs_free_dm(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_dm *dm = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) + return -EBUSY; + + return dm->device->dealloc_dm(dm); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = file->ucontext; + struct ib_dm_alloc_attr attr = {}; + struct ib_uobject *uobj; + struct ib_dm *dm; + int ret; + + if (!ib_dev->alloc_dm) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.length, attrs, + UVERBS_ATTR_ALLOC_DM_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&attr.alignment, attrs, + UVERBS_ATTR_ALLOC_DM_ALIGNMENT); + if (ret) + return ret; + + uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; + + dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs); + if (IS_ERR(dm)) + return PTR_ERR(dm); + + dm->device = ib_dev; + dm->length = attr.length; + dm->uobject = uobj; + atomic_set(&dm->usecnt, 0); + + uobj->object = dm; + + return 0; +} + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, + &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, + uverbs_destroy_def_handler, + &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, + /* 1 is used in order to free the DM after MRs */ + &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm), + &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), + &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c new file mode 100644 index 0000000..cbcec3da --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "uverbs.h" +#include <rdma/uverbs_std_types.h> + +static int uverbs_free_flow_action(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_flow_action *action = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && + atomic_read(&action->usecnt)) + return -EBUSY; + + return action->device->destroy_flow_action(action); +} + +static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, + u32 flags, bool is_modify) +{ + u64 verbs_flags = flags; + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN)) + verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED; + + if (is_modify && uverbs_attr_is_valid(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) + verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS; + + return verbs_flags; +}; + +static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat) +{ + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm = + &keymat->keymat.aes_gcm; + + if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return -EOPNOTSUPP; + + if (aes_gcm->key_len != 32 && + aes_gcm->key_len != 24 && + aes_gcm->key_len != 16) + return -EINVAL; + + if (aes_gcm->icv_len != 16 && + aes_gcm->icv_len != 8 && + aes_gcm->icv_len != 12) + return -EINVAL; + + return 0; +} + +static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = { + [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm, +}; + +static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) +{ + /* This is used in order to modify an esp flow action with an enabled + * replay protection to a disabled one. This is only supported via + * modify, as in create verb we can simply drop the REPLAY attribute and + * achieve the same thing. + */ + return is_modify ? 0 : -EINVAL; +} + +static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) +{ + /* Some replay protections could always be enabled without validating + * anything. + */ + return 0; +} + +static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) = { + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none, + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok, +}; + +static int parse_esp_ip(enum ib_flow_spec_type proto, + const void __user *val_ptr, + size_t len, union ib_flow_spec *out) +{ + int ret; + const struct ib_uverbs_flow_ipv4_filter ipv4 = { + .src_ip = cpu_to_be32(0xffffffffUL), + .dst_ip = cpu_to_be32(0xffffffffUL), + .proto = 0xff, + .tos = 0xff, + .ttl = 0xff, + .flags = 0xff, + }; + const struct ib_uverbs_flow_ipv6_filter ipv6 = { + .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + .flow_label = cpu_to_be32(0xffffffffUL), + .next_hdr = 0xff, + .traffic_class = 0xff, + .hop_limit = 0xff, + }; + union { + struct ib_uverbs_flow_ipv4_filter ipv4; + struct ib_uverbs_flow_ipv6_filter ipv6; + } user_val = {}; + const void *user_pmask; + size_t val_len; + + /* If the flow IPv4/IPv6 flow specifications are extended, the mask + * should be changed as well. + */ + BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) + + sizeof(ipv4.flags) != sizeof(ipv4)); + BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) + + sizeof(ipv6.reserved) != sizeof(ipv6)); + + switch (proto) { + case IB_FLOW_SPEC_IPV4: + if (len > sizeof(user_val.ipv4) && + !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4), + len - sizeof(user_val.ipv4))) + return -EOPNOTSUPP; + + val_len = min_t(size_t, len, sizeof(user_val.ipv4)); + ret = copy_from_user(&user_val.ipv4, val_ptr, + val_len); + if (ret) + return -EFAULT; + + user_pmask = &ipv4; + break; + case IB_FLOW_SPEC_IPV6: + if (len > sizeof(user_val.ipv6) && + !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6), + len - sizeof(user_val.ipv6))) + return -EOPNOTSUPP; + + val_len = min_t(size_t, len, sizeof(user_val.ipv6)); + ret = copy_from_user(&user_val.ipv6, val_ptr, + val_len); + if (ret) + return -EFAULT; + + user_pmask = &ipv6; + break; + default: + return -EOPNOTSUPP; + } + + return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask, + &user_val, + val_len, out); +} + +static int flow_action_esp_get_encap(struct ib_flow_spec_list *out, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_flow_action_esp_encap uverbs_encap; + int ret; + + ret = uverbs_copy_from(&uverbs_encap, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP); + if (ret) + return ret; + + /* We currently support only one encap */ + if (uverbs_encap.next_ptr) + return -EOPNOTSUPP; + + if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 && + uverbs_encap.type != IB_FLOW_SPEC_IPV6) + return -EOPNOTSUPP; + + return parse_esp_ip(uverbs_encap.type, + u64_to_user_ptr(uverbs_encap.val_ptr), + uverbs_encap.len, + &out->spec); +} + +struct ib_flow_action_esp_attr { + struct ib_flow_action_attrs_esp hdr; + struct ib_flow_action_attrs_esp_keymats keymat; + struct ib_flow_action_attrs_esp_replays replay; + /* We currently support only one spec */ + struct ib_flow_spec_list encap; +}; + +#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW +static int parse_flow_action_esp(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs, + struct ib_flow_action_esp_attr *esp_attr, + bool is_modify) +{ + struct ib_uverbs_flow_action_esp uverbs_esp = {}; + int ret; + + /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ + ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ESN); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + /* This can be called from FLOW_ACTION_ESP_MODIFY where + * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional + */ + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) { + ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS); + if (ret) + return ret; + + if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1)) + return -EOPNOTSUPP; + + esp_attr->hdr.spi = uverbs_esp.spi; + esp_attr->hdr.seq = uverbs_esp.seq; + esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad; + esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts; + } + esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags, + is_modify); + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) { + esp_attr->keymat.protocol = + uverbs_attr_get_enum_id(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); + ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat, + attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); + if (ret) + return ret; + + ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat); + if (ret) + return ret; + + esp_attr->hdr.keymat = &esp_attr->keymat; + } + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) { + esp_attr->replay.protocol = + uverbs_attr_get_enum_id(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); + + ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay, + attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); + if (ret) + return ret; + + ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay, + is_modify); + if (ret) + return ret; + + esp_attr->hdr.replay = &esp_attr->replay; + } + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) { + ret = flow_action_esp_get_encap(&esp_attr->encap, attrs); + if (ret) + return ret; + + esp_attr->hdr.encap = &esp_attr->encap; + } + + return 0; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + int ret; + struct ib_uobject *uobj; + struct ib_flow_action *action; + struct ib_flow_action_esp_attr esp_attr = {}; + + if (!ib_dev->create_flow_action_esp) + return -EOPNOTSUPP; + + ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false); + if (ret) + return ret; + + /* No need to check as this attribute is marked as MANDATORY */ + uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject; + action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); + if (IS_ERR(action)) + return PTR_ERR(action); + + atomic_set(&action->usecnt, 0); + action->device = ib_dev; + action->type = IB_FLOW_ACTION_ESP; + action->uobject = uobj; + uobj->object = action; + + return 0; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + int ret; + struct ib_uobject *uobj; + struct ib_flow_action *action; + struct ib_flow_action_esp_attr esp_attr = {}; + + if (!ib_dev->modify_flow_action_esp) + return -EOPNOTSUPP; + + ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true); + if (ret) + return ret; + + uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject; + action = uobj->object; + + if (action->type != IB_FLOW_ACTION_ESP) + return -EINVAL; + + return ib_dev->modify_flow_action_esp(action, + &esp_attr.hdr, + attrs); +} + +static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { + [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { + .ptr = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), + .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, + }, + }, +}; + +static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { + .ptr = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + /* No need to specify any data */ + .len = 0, + } + }, + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { + .ptr = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), + .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, + } + }, +}; + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), + &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), + &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat), + &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); + +static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, + uverbs_destroy_def_handler, + &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c new file mode 100644 index 0000000..68f7cad --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "uverbs.h" +#include <rdma/uverbs_std_types.h> + +static int uverbs_free_mr(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_dereg_mr((struct ib_mr *)uobject->object); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_dm_mr_attr attr = {}; + struct ib_uobject *uobj; + struct ib_dm *dm; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (!ib_dev->reg_dm_mr) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET); + if (ret) + return ret; + + ret = uverbs_copy_from(&attr.length, attrs, + UVERBS_ATTR_REG_DM_MR_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&attr.access_flags, attrs, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS); + if (ret) + return ret; + + if (!(attr.access_flags & IB_ZERO_BASED)) + return -EINVAL; + + ret = ib_check_mr_access(attr.access_flags); + if (ret) + return ret; + + pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); + + dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); + + uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject; + + if (attr.offset > dm->length || attr.length > dm->length || + attr.length > dm->length - attr.offset) + return -EINVAL; + + mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->dm = dm; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + atomic_inc(&dm->usecnt); + + uobj->object = mr; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey, + sizeof(mr->lkey)); + if (ret) + goto err_dereg; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + if (ret) + goto err_dereg; + + return 0; + +err_dereg: + ib_dereg_mr(mr); + + return ret; +} + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, + &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, + /* 1 is used in order to free the MR after all the MWs */ + &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 93025d2..7eff3ae 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -655,7 +655,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) return ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : - -ENOSYS; + -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_modify_ah); @@ -663,7 +663,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : - -ENOSYS; + -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_query_ah); @@ -689,7 +689,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq *srq; if (!pd->device->create_srq) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); srq = pd->device->create_srq(pd, srq_init_attr, NULL); @@ -722,7 +722,7 @@ int ib_modify_srq(struct ib_srq *srq, { return srq->device->modify_srq ? srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : - -ENOSYS; + -EOPNOTSUPP; } EXPORT_SYMBOL(ib_modify_srq); @@ -730,7 +730,7 @@ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { return srq->device->query_srq ? - srq->device->query_srq(srq, srq_attr) : -ENOSYS; + srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_srq); @@ -1263,34 +1263,30 @@ static const struct { } }; -int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask, - enum rdma_link_layer ll) +bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll) { enum ib_qp_attr_mask req_param, opt_param; - if (cur_state < 0 || cur_state > IB_QPS_ERR || - next_state < 0 || next_state > IB_QPS_ERR) - return 0; - if (mask & IB_QP_CUR_STATE && cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) - return 0; + return false; if (!qp_state_table[cur_state][next_state].valid) - return 0; + return false; req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; if ((mask & req_param) != req_param) - return 0; + return false; if (mask & ~(req_param | opt_param | IB_QP_STATE)) - return 0; + return false; - return 1; + return true; } EXPORT_SYMBOL(ib_modify_qp_is_ok); @@ -1457,7 +1453,7 @@ int ib_query_qp(struct ib_qp *qp, { return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : - -ENOSYS; + -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_qp); @@ -1594,7 +1590,7 @@ EXPORT_SYMBOL(ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { return cq->device->modify_cq ? - cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; + cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_set_cq_moderation); @@ -1611,7 +1607,7 @@ EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { return cq->device->resize_cq ? - cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; + cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_resize_cq); @@ -1620,11 +1616,16 @@ EXPORT_SYMBOL(ib_resize_cq); int ib_dereg_mr(struct ib_mr *mr) { struct ib_pd *pd = mr->pd; + struct ib_dm *dm = mr->dm; int ret; + rdma_restrack_del(&mr->res); ret = mr->device->dereg_mr(mr); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (dm) + atomic_dec(&dm->usecnt); + } return ret; } @@ -1649,7 +1650,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, struct ib_mr *mr; if (!pd->device->alloc_mr) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); if (!IS_ERR(mr)) { @@ -1658,6 +1659,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_add(&mr->res); } return mr; @@ -1673,7 +1676,7 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, struct ib_fmr *fmr; if (!pd->device->alloc_fmr) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); if (!IS_ERR(fmr)) { @@ -1757,7 +1760,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) int ret; if (!qp->device->attach_mcast) - return -ENOSYS; + return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) @@ -1775,7 +1778,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) int ret; if (!qp->device->detach_mcast) - return -ENOSYS; + return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) @@ -1793,7 +1796,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) struct ib_xrcd *xrcd; if (!device->alloc_xrcd) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); xrcd = device->alloc_xrcd(device, NULL, NULL); if (!IS_ERR(xrcd)) { @@ -1847,7 +1850,7 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq *wq; if (!pd->device->create_wq) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); wq = pd->device->create_wq(pd, wq_attr, NULL); if (!IS_ERR(wq)) { @@ -1902,7 +1905,7 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, int err; if (!wq->device->modify_wq) - return -ENOSYS; + return -EOPNOTSUPP; err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); return err; @@ -1927,7 +1930,7 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, u32 table_size; if (!device->create_rwq_ind_table) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); table_size = (1 << init_attr->log_ind_tbl_size); rwq_ind_table = device->create_rwq_ind_table(device, @@ -1977,7 +1980,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, { struct ib_flow *flow_id; if (!qp->device->create_flow) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); flow_id = qp->device->create_flow(qp, flow_attr, domain); if (!IS_ERR(flow_id)) { @@ -2004,7 +2007,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { return mr->device->check_mr_status ? - mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; + mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_check_mr_status); @@ -2012,7 +2015,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state) { if (!device->set_vf_link_state) - return -ENOSYS; + return -EOPNOTSUPP; return device->set_vf_link_state(device, vf, port, state); } @@ -2022,7 +2025,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info) { if (!device->get_vf_config) - return -ENOSYS; + return -EOPNOTSUPP; return device->get_vf_config(device, vf, port, info); } @@ -2032,7 +2035,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats) { if (!device->get_vf_stats) - return -ENOSYS; + return -EOPNOTSUPP; return device->get_vf_stats(device, vf, port, stats); } @@ -2042,7 +2045,7 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type) { if (!device->set_vf_guid) - return -ENOSYS; + return -EOPNOTSUPP; return device->set_vf_guid(device, vf, port, guid, type); } @@ -2077,7 +2080,7 @@ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { if (unlikely(!mr->device->map_mr_sg)) - return -ENOSYS; + return -EOPNOTSUPP; mr->page_size = page_size; @@ -2194,7 +2197,14 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; - struct ib_send_wr swr = {}, *bad_swr; + struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); @@ -2203,11 +2213,10 @@ static void __ib_drain_sq(struct ib_qp *qp) return; } - swr.wr_cqe = &sdrain.cqe; sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); - ret = ib_post_send(qp, &swr, &bad_swr); + ret = ib_post_send(qp, &swr.wr, &bad_swr); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8301d7e..a76e206 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -314,12 +314,11 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, return rc; } -int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, - unsigned int index, void **context) +int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) { int rc = 0; struct bnxt_re_gid_ctx *ctx, **ctx_tbl; - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; struct bnxt_qplib_gid *gid_to_del; @@ -365,15 +364,14 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, return rc; } -int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num, - unsigned int index, const union ib_gid *gid, +int bnxt_re_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; u16 vlan_id = 0xFFFF; struct bnxt_re_gid_ctx *ctx, **ctx_tbl; - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; if ((attr->ndev) && is_vlan_dev(attr->ndev)) @@ -718,8 +716,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->sgid_index); goto fail; } - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); + dev_put(sgid_attr.ndev); /* Get network header type for this GID */ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); switch (nw_type) { @@ -1540,14 +1537,13 @@ int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr, ib_srq); struct bnxt_qplib_swqe wqe; unsigned long flags; - int rc = 0, payload_sz = 0; + int rc = 0; spin_lock_irqsave(&srq->lock, flags); while (wr) { /* Transcribe each ib_recv_wr to qplib_swqe */ wqe.num_sge = wr->num_sge; - payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, - wr->num_sge); + bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge); wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; @@ -1698,7 +1694,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, status = ib_get_cached_gid(&rdev->ibdev, 1, grh->sgid_index, &sgid, &sgid_attr); - if (!status && sgid_attr.ndev) { + if (!status) { memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, ETH_ALEN); dev_put(sgid_attr.ndev); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e62b7c2..5c6414c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -157,10 +157,8 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey); -int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, - unsigned int index, void **context); -int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num, - unsigned int index, const union ib_gid *gid, +int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); +int bnxt_re_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context); int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f6e3617..f6c739e 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -574,7 +574,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->get_port_immutable = bnxt_re_get_port_immutable; ibdev->get_dev_fw_str = bnxt_re_query_fw_str; ibdev->query_pkey = bnxt_re_query_pkey; - ibdev->query_gid = bnxt_re_query_gid; ibdev->get_netdev = bnxt_re_get_netdev; ibdev->add_gid = bnxt_re_add_gid; ibdev->del_gid = bnxt_re_del_gid; @@ -619,6 +618,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; + ibdev->driver_id = RDMA_DRIVER_BNXT_RE; return ib_register_device(ibdev, NULL); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ee98e5e..2f3f32ea 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -154,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); } - attr->is_atomic = 0; + attr->is_atomic = false; bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 431be73..a7b77cb 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -16,12 +16,3 @@ config INFINIBAND_CXGB3 To compile this driver as a module, choose M here: the module will be called iw_cxgb3. - -config INFINIBAND_CXGB3_DEBUG - bool "Verbose debugging output" - depends on INFINIBAND_CXGB3 - default n - ---help--- - This option causes the Chelsio RDMA driver to produce copious - amounts of debug messages. Select this if you are developing - the driver or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 2c66d35..66fe091 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -5,5 +5,3 @@ obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o - -ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c deleted file mode 100644 index 97dbe72..0000000 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef DEBUG -#include <linux/types.h> -#include <linux/slab.h> -#include "common.h" -#include "cxgb3_ioctl.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag) -{ - struct ch_mem_range *m; - u64 *data; - int rc; - int size = 32; - - m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) - return; - - m->mem_id = MEM_PMRX; - m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; - m->len = size; - pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len); - rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); - if (rc) { - pr_debug("%s toectl returned error %d\n", __func__, rc); - kfree(m); - return; - } - - data = (u64 *)m->buf; - while (size > 0) { - pr_debug("TPT %08x: %016llx\n", - m->addr, (unsigned long long)*data); - size -= 8; - data++; - m->addr += 8; - } - kfree(m); -} - -void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift) -{ - struct ch_mem_range *m; - u64 *data; - int rc; - int size, npages; - - shift += 12; - npages = (len + (1ULL << shift) - 1) >> shift; - size = npages * sizeof(u64); - - m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) - return; - - m->mem_id = MEM_PMRX; - m->addr = pbl_addr; - m->len = size; - pr_debug("%s PBL addr 0x%x len %d depth %d\n", - __func__, m->addr, m->len, npages); - rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); - if (rc) { - pr_debug("%s toectl returned error %d\n", __func__, rc); - kfree(m); - return; - } - - data = (u64 *)m->buf; - while (size > 0) { - pr_debug("PBL %08x: %016llx\n", - m->addr, (unsigned long long)*data); - size -= 8; - data++; - m->addr += 8; - } - kfree(m); -} - -void cxio_dump_wqe(union t3_wr *wqe) -{ - __be64 *data = (__be64 *)wqe; - uint size = (uint)(be64_to_cpu(*data) & 0xff); - - if (size == 0) - size = 8; - while (size > 0) { - pr_debug("WQE %p: %016llx\n", - data, (unsigned long long)be64_to_cpu(*data)); - size--; - data++; - } -} - -void cxio_dump_wce(struct t3_cqe *wce) -{ - __be64 *data = (__be64 *)wce; - int size = sizeof(*wce); - - while (size > 0) { - pr_debug("WCE %p: %016llx\n", - data, (unsigned long long)be64_to_cpu(*data)); - size -= 8; - data++; - } -} - -void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents) -{ - struct ch_mem_range *m; - int size = nents * 64; - u64 *data; - int rc; - - m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) - return; - - m->mem_id = MEM_PMRX; - m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; - m->len = size; - pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len); - rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); - if (rc) { - pr_debug("%s toectl returned error %d\n", __func__, rc); - kfree(m); - return; - } - - data = (u64 *)m->buf; - while (size > 0) { - pr_debug("RQT %08x: %016llx\n", - m->addr, (unsigned long long)*data); - size -= 8; - data++; - m->addr += 8; - } - kfree(m); -} - -void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid) -{ - struct ch_mem_range *m; - int size = TCB_SIZE; - u32 *data; - int rc; - - m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) - return; - - m->mem_id = MEM_CM; - m->addr = hwtid * size; - m->len = size; - pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len); - rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); - if (rc) { - pr_debug("%s toectl returned error %d\n", __func__, rc); - kfree(m); - return; - } - - data = (u32 *)m->buf; - while (size > 0) { - printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n", - m->addr, - *(data+2), *(data+3), *(data),*(data+1), - *(data+6), *(data+7), *(data+4), *(data+5)); - size -= 32; - data += 8; - m->addr += 32; - } - kfree(m); -} -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 7e70c54..c64e50b 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -202,13 +202,4 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#ifdef DEBUG -void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag); -void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift); -void cxio_dump_wqe(union t3_wr *wqe); -void cxio_dump_wce(struct t3_cqe *wce); -void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents); -void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid); -#endif - #endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index dd5348e..0a8542c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -200,9 +200,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&chp->lock, flags); for (npolled = 0; npolled < num_entries; ++npolled) { -#ifdef DEBUG - int i=0; -#endif /* * Because T3 can post CQEs that are _not_ associated @@ -211,9 +208,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) */ do { err = iwch_poll_cq_one(rhp, chp, wc + npolled); -#ifdef DEBUG - BUG_ON(++i > 1000); -#endif } while (err == -EAGAIN); if (err <= 0) break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index a578ca5..be097c6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -440,7 +440,9 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, php->pdid = pdid; php->rhp = rhp; if (context) { - if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) { + struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; + + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { iwch_deallocate_pd(&php->ibpd); return ERR_PTR(-EFAULT); } @@ -1439,6 +1441,7 @@ int iwch_register_device(struct iwch_dev *dev) memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, sizeof(dev->ibdev.iwcm->ifname)); + dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; ret = ib_register_device(&dev->ibdev, NULL); if (ret) goto bail1; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index e96771d..feeb8ee 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -220,14 +220,14 @@ static void set_ep_sin_addrs(struct c4iw_ep *ep, { struct iw_cm_id *id = ep->com.cm_id; - *lsin = (struct sockaddr_in *)&ep->com.local_addr; - *rsin = (struct sockaddr_in *)&ep->com.remote_addr; + *m_lsin = (struct sockaddr_in *)&ep->com.local_addr; + *m_rsin = (struct sockaddr_in *)&ep->com.remote_addr; if (id) { - *m_lsin = (struct sockaddr_in *)&id->m_local_addr; - *m_rsin = (struct sockaddr_in *)&id->m_remote_addr; + *lsin = (struct sockaddr_in *)&id->local_addr; + *rsin = (struct sockaddr_in *)&id->remote_addr; } else { - *m_lsin = &zero_sin; - *m_rsin = &zero_sin; + *lsin = &zero_sin; + *rsin = &zero_sin; } } @@ -239,14 +239,14 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, { struct iw_cm_id *id = ep->com.cm_id; - *lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr; - *rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + *m_lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr; + *m_rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr; if (id) { - *m_lsin6 = (struct sockaddr_in6 *)&id->m_local_addr; - *m_rsin6 = (struct sockaddr_in6 *)&id->m_remote_addr; + *lsin6 = (struct sockaddr_in6 *)&id->local_addr; + *rsin6 = (struct sockaddr_in6 *)&id->remote_addr; } else { - *m_lsin6 = &zero_sin6; - *m_rsin6 = &zero_sin6; + *lsin6 = &zero_sin6; + *rsin6 = &zero_sin6; } } diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7e0eb20..e90f2fd 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -391,6 +391,9 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mhp->attr.stag = stag; mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + mhp->ibmr.length = mhp->attr.len; + mhp->ibmr.iova = mhp->attr.va_fbo; + mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1b5c6cd..0b9cc73 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -281,7 +281,9 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, php->pdid = pdid; php->rhp = rhp; if (context) { - if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) { + struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid}; + + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { c4iw_deallocate_pd(&php->ibpd); return ERR_PTR(-EFAULT); } @@ -531,6 +533,24 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } +static struct net_device *get_netdev(struct ib_device *dev, u8 port) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); + struct c4iw_rdev *rdev = &c4iw_dev->rdev; + struct net_device *ndev; + + if (!port || port > rdev->lldi.nports) + return NULL; + + rcu_read_lock(); + ndev = rdev->lldi.ports[port - 1]; + if (ndev) + dev_hold(ndev); + rcu_read_unlock(); + + return ndev; +} + void c4iw_register_device(struct work_struct *work) { int ret; @@ -609,6 +629,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.get_dev_fw_str = get_dev_fw_str; + dev->ibdev.get_netdev = get_netdev; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { @@ -627,6 +648,7 @@ void c4iw_register_device(struct work_struct *work) memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); + dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ret = ib_register_device(&dev->ibdev, NULL); if (ret) goto err_kfree_iwcm; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index addc68e..46d1475 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -390,6 +390,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, svc_type = IB_CC_SVCTYPE_UC; break; default: + rcu_read_unlock(); goto drop; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 90bc8c7..32c4826 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -70,7 +70,6 @@ #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <rdma/rdma_vt.h> -#include <rdma/opa_addr.h> #include "chip_registers.h" #include "common.h" diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index d30dd1a..1697d96 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -481,7 +481,6 @@ static void iowait_sdma_drained(struct iowait *wait) } /** - * * qp_to_sdma_engine - map a qp to a send engine * @qp: the QP * @sc5: the 5 bit sc diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index c1c596a..0d5330b 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -473,7 +473,7 @@ nomem: tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; - if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), tidlist, sizeof(tidlist[0]) * tididx)) { /* * On failure to copy to the user level, we need to undo @@ -513,7 +513,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, if (unlikely(tinfo->tidcnt > fd->tid_used)) return -EINVAL; - tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist, + tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist), sizeof(tidinfo[0]) * tinfo->tidcnt); if (IS_ERR(tidinfo)) return PTR_ERR(tidinfo); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 471d55c..c8cf4d4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1960,7 +1960,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) i, ppd->pkeys); - ret = rvt_register_device(&dd->verbs_dev.rdi); + ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); if (ret) goto err_verbs_txreq; diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 97bf2cd..cf03404 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 7dd6a66..d749286 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -68,11 +68,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, return ERR_PTR(ret); } - if (gid_attr.ndev) { - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - dev_put(gid_attr.ndev); - } + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + dev_put(gid_attr.ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index bccc9b5..14734d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -315,6 +315,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq ucmd; + struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = NULL; struct hns_roce_uar *uar = NULL; int vector = attr->comp_vector; @@ -354,15 +355,36 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, goto err_cq; } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(resp))) { + ret = hns_roce_db_map_user(to_hr_ucontext(context), + ucmd.db_addr, &hr_cq->db); + if (ret) { + dev_err(dev, "cq record doorbell map failed!\n"); + goto err_mtt; + } + hr_cq->db_en = 1; + resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; + } + /* Get user space parameters */ uar = &to_hr_ucontext(context)->uar; } else { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { + ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); + if (ret) + goto err_cq; + + hr_cq->set_ci_db = hr_cq->db.db_record; + *hr_cq->set_ci_db = 0; + } + /* Init mmt table and write buff address to mtt table */ ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); - goto err_cq; + goto err_db; } uar = &hr_dev->priv_uar; @@ -375,7 +397,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq, vector); if (ret) { dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); - goto err_mtt; + goto err_dbmap; } /* @@ -393,10 +415,10 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->cq_depth = cq_entries; if (context) { - if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) { - ret = -EFAULT; + resp.cqn = hr_cq->cqn; + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) goto err_cqc; - } } return &hr_cq->ib_cq; @@ -404,6 +426,12 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, err_cqc: hns_roce_free_cq(hr_dev, hr_cq); +err_dbmap: + if (context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(resp))) + hns_roce_db_unmap_user(to_hr_ucontext(context), + &hr_cq->db); + err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); if (context) @@ -412,6 +440,10 @@ err_mtt: hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); +err_db: + if (!context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) + hns_roce_free_db(hr_dev, &hr_cq->db); + err_cq: kfree(hr_cq); return ERR_PTR(ret); @@ -430,12 +462,20 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) hns_roce_free_cq(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ib_cq->uobject) + if (ib_cq->uobject) { ib_umem_release(hr_cq->umem); - else + + if (hr_cq->db_en == 1) + hns_roce_db_unmap_user( + to_hr_ucontext(ib_cq->uobject->context), + &hr_cq->db); + } else { /* Free the buff of stored cq */ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hns_roce_free_db(hr_dev, &hr_cq->db); + } kfree(hr_cq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c new file mode 100644 index 0000000..ebee278 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +/* + * Copyright (c) 2017 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + */ + +#include <linux/platform_device.h> +#include <rdma/ib_umem.h> +#include "hns_roce_device.h" + +int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, + struct hns_roce_db *db) +{ + struct hns_roce_user_db_page *page; + int ret = 0; + + mutex_lock(&context->page_mutex); + + list_for_each_entry(page, &context->page_list, list) + if (page->user_virt == (virt & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof(*page), GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + refcount_set(&page->refcount, 1); + page->user_virt = (virt & PAGE_MASK); + page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, + PAGE_SIZE, 0, 0); + if (IS_ERR(page->umem)) { + ret = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->page_list); + +found: + db->dma = sg_dma_address(page->umem->sg_head.sgl) + + (virt & ~PAGE_MASK); + db->u.user_page = page; + refcount_inc(&page->refcount); + +out: + mutex_unlock(&context->page_mutex); + + return ret; +} +EXPORT_SYMBOL(hns_roce_db_map_user); + +void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, + struct hns_roce_db *db) +{ + mutex_lock(&context->page_mutex); + + refcount_dec(&db->u.user_page->refcount); + if (refcount_dec_if_one(&db->u.user_page->refcount)) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->page_mutex); +} +EXPORT_SYMBOL(hns_roce_db_unmap_user); + +static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( + struct device *dma_device) +{ + struct hns_roce_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2); + pgdir->bits[0] = pgdir->order0; + pgdir->bits[1] = pgdir->order1; + pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE, + &pgdir->db_dma, GFP_KERNEL); + if (!pgdir->page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, + struct hns_roce_db *db, int order) +{ + int o; + int i; + + for (o = order; o <= 1; ++o) { + i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); + if (i < HNS_ROCE_DB_PER_PAGE >> o) + goto found; + } + + return -ENOMEM; + +found: + clear_bit(i, pgdir->bits[o]); + + i <<= o; + + if (o > order) + set_bit(i ^ 1, pgdir->bits[order]); + + db->u.pgdir = pgdir; + db->index = i; + db->db_record = pgdir->page + db->index; + db->dma = pgdir->db_dma + db->index * 4; + db->order = order; + + return 0; +} + +int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + int order) +{ + struct hns_roce_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&hr_dev->pgdir_mutex); + + list_for_each_entry(pgdir, &hr_dev->pgdir_list, list) + if (!hns_roce_alloc_db_from_pgdir(pgdir, db, order)) + goto out; + + pgdir = hns_roce_alloc_db_pgdir(hr_dev->dev); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &hr_dev->pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(hns_roce_alloc_db_from_pgdir(pgdir, db, order)); + +out: + mutex_unlock(&hr_dev->pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(hns_roce_alloc_db); + +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) +{ + int o; + int i; + + mutex_lock(&hr_dev->pgdir_mutex); + + o = db->order; + i = db->index; + + if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { + clear_bit(i ^ 1, db->u.pgdir->order0); + ++o; + } + + i >>= o; + set_bit(i, db->u.pgdir->bits[o]); + + if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2)) { + dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page, + db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&hr_dev->pgdir_mutex); +} +EXPORT_SYMBOL_GPL(hns_roce_free_db); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 165a09b3..fb305b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -105,6 +105,14 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +enum { + HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, +}; + +enum { + HNS_ROCE_SUPPORT_CQ_RECORD_DB = 1 << 0, +}; + enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, @@ -178,7 +186,8 @@ enum { enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), - HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2) + HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), + HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) }; enum hns_roce_mtt_type { @@ -186,6 +195,10 @@ enum hns_roce_mtt_type { MTT_TYPE_CQE, }; +enum { + HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4 +}; + #define HNS_ROCE_CMD_SUCCESS 1 #define HNS_ROCE_PORT_DOWN 0 @@ -203,6 +216,8 @@ struct hns_roce_uar { struct hns_roce_ucontext { struct ib_ucontext ibucontext; struct hns_roce_uar uar; + struct list_head page_list; + struct mutex page_mutex; }; struct hns_roce_pd { @@ -335,6 +350,33 @@ struct hns_roce_buf { int page_shift; }; +struct hns_roce_db_pgdir { + struct list_head list; + DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE); + DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / 2); + unsigned long *bits[2]; + u32 *page; + dma_addr_t db_dma; +}; + +struct hns_roce_user_db_page { + struct list_head list; + struct ib_umem *umem; + unsigned long user_virt; + refcount_t refcount; +}; + +struct hns_roce_db { + u32 *db_record; + union { + struct hns_roce_db_pgdir *pgdir; + struct hns_roce_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; + int order; +}; + struct hns_roce_cq_buf { struct hns_roce_buf hr_buf; struct hns_roce_mtt hr_mtt; @@ -343,6 +385,8 @@ struct hns_roce_cq_buf { struct hns_roce_cq { struct ib_cq ib_cq; struct hns_roce_cq_buf hr_buf; + struct hns_roce_db db; + u8 db_en; spinlock_t lock; struct ib_umem *umem; void (*comp)(struct hns_roce_cq *cq); @@ -351,6 +395,7 @@ struct hns_roce_cq { struct hns_roce_uar *uar; u32 cq_depth; u32 cons_index; + u32 *set_ci_db; void __iomem *cq_db_l; u16 *tptr_addr; int arm_sn; @@ -466,6 +511,8 @@ struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_buf hr_buf; struct hns_roce_wq rq; + struct hns_roce_db rdb; + u8 rdb_en; u32 doorbell_qpn; __le32 sq_signal_bits; u32 sq_next_wqe; @@ -725,6 +772,8 @@ struct hns_roce_dev { spinlock_t bt_cmd_lock; struct hns_roce_ib_iboe iboe; + struct list_head pgdir_list; + struct mutex pgdir_mutex; int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; struct hns_roce_caps caps; @@ -930,6 +979,14 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); +int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, + struct hns_roce_db *db); +void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, + struct hns_roce_db *db); +int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + int order); +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); + void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index da13bd7..47e1b6a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1687,13 +1687,13 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, ROCEE_MB6_ROCEE_MB_TOKEN_S, token); - __raw_writeq(cpu_to_le64(in_param), hcr + 0); - __raw_writeq(cpu_to_le64(out_param), hcr + 2); - __raw_writel(cpu_to_le32(in_modifier), hcr + 4); + writeq(in_param, hcr + 0); + writeq(out_param, hcr + 2); + writel(in_modifier, hcr + 4); /* Memory barrier */ wmb(); - __raw_writel(cpu_to_le32(val), hcr + 5); + writel(val, hcr + 5); mmiowb(); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ec63877..8b84ab7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -498,7 +498,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct device *dev = hr_dev->dev; - struct hns_roce_v2_db rq_db; unsigned long flags; void *wqe = NULL; int ret = 0; @@ -509,7 +508,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&hr_qp->rq.lock, flags); ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); - if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) { + if (hr_qp->state == IB_QPS_RESET) { spin_unlock_irqrestore(&hr_qp->rq.lock, flags); *bad_wr = wr; return -EINVAL; @@ -564,17 +563,7 @@ out: /* Memory barrier */ wmb(); - rq_db.byte_4 = 0; - rq_db.parameter = 0; - - roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_TAG_M, - V2_DB_BYTE_4_TAG_S, hr_qp->qpn); - roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_CMD_M, - V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_RQ_DB); - roce_set_field(rq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M, - V2_DB_PARAMETER_CONS_IDX_S, hr_qp->rq.head); - - hns_roce_write64_k((__le32 *)&rq_db, hr_qp->rq.db_reg_l); + *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -1168,7 +1157,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE; + HNS_ROCE_CAP_FLAG_RQ_INLINE | + HNS_ROCE_CAP_FLAG_RECORD_DB; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; @@ -1228,14 +1218,14 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); - __raw_writeq(cpu_to_le64(in_param), hcr + 0); - __raw_writeq(cpu_to_le64(out_param), hcr + 2); + writeq(in_param, hcr + 0); + writeq(out_param, hcr + 2); /* Memory barrier */ wmb(); - __raw_writel(cpu_to_le32(val0), hcr + 4); - __raw_writel(cpu_to_le32(val1), hcr + 5); + writel(val0, hcr + 4); + writel(val1, hcr + 5); mmiowb(); @@ -1507,24 +1497,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - struct hns_roce_v2_cq_db cq_db; - - cq_db.byte_4 = 0; - cq_db.parameter = 0; - - roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_TAG_M, - V2_CQ_DB_BYTE_4_TAG_S, hr_cq->cqn); - roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_CMD_M, - V2_CQ_DB_BYTE_4_CMD_S, HNS_ROCE_V2_CQ_DB_PTR); - - roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CONS_IDX_M, - V2_CQ_DB_PARAMETER_CONS_IDX_S, - cons_index & ((hr_cq->cq_depth << 1) - 1)); - roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CMD_SN_M, - V2_CQ_DB_PARAMETER_CMD_SN_S, 1); - - hns_roce_write64_k((__be32 *)&cq_db, hr_cq->cq_db_l); - + *hr_cq->set_ci_db = cons_index & 0xffffff; } static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, @@ -1637,6 +1610,16 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M, V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3))); + if (hr_cq->db_en) + roce_set_bit(cq_context->byte_44_db_record, + V2_CQC_BYTE_44_DB_RECORD_EN_S, 1); + + roce_set_field(cq_context->byte_44_db_record, + V2_CQC_BYTE_44_DB_RECORD_ADDR_M, + V2_CQC_BYTE_44_DB_RECORD_ADDR_S, + ((u32)hr_cq->db.dma) >> 1); + cq_context->db_record_addr = hr_cq->db.dma >> 32; + roce_set_field(cq_context->byte_56_cqe_period_maxcnt, V2_CQC_BYTE_56_CQ_MAX_CNT_M, V2_CQC_BYTE_56_CQ_MAX_CNT_S, @@ -2274,6 +2257,23 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_qp->qkey = attr->qkey; } + if (hr_qp->rdb_en) { + roce_set_bit(context->byte_68_rq_db, + V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); + roce_set_bit(qpc_mask->byte_68_rq_db, + V2_QPC_BYTE_68_RQ_RECORD_EN_S, 0); + } + + roce_set_field(context->byte_68_rq_db, + V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M, + V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, + ((u32)hr_qp->rdb.dma) >> 1); + roce_set_field(qpc_mask->byte_68_rq_db, + V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M, + V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0); + context->rq_db_record_addr = hr_qp->rdb.dma >> 32; + qpc_mask->rq_db_record_addr = 0; + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); @@ -3211,6 +3211,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->sq.tail = 0; hr_qp->sq_next_wqe = 0; hr_qp->next_sge = 0; + if (hr_qp->rq.wqe_cnt) + *hr_qp->rdb.db_record = 0; } out: @@ -3437,11 +3439,17 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); if (is_user) { + if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) + hns_roce_db_unmap_user( + to_hr_ucontext(hr_qp->ibqp.uobject->context), + &hr_qp->rdb); ib_umem_release(hr_qp->umem); } else { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + if (hr_qp->rq.wqe_cnt) + hns_roce_free_db(hr_dev, &hr_qp->rdb); } if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 2bf8a47..182b672 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -299,6 +299,9 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_44_DB_RECORD_EN_S 0 +#define V2_CQC_BYTE_44_DB_RECORD_ADDR_S 1 +#define V2_CQC_BYTE_44_DB_RECORD_ADDR_M GENMASK(31, 1) + #define V2_CQC_BYTE_52_CQE_CNT_S 0 #define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index eb9a69f..9d48bc0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -74,12 +74,11 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static int hns_roce_add_gid(struct ib_device *device, u8 port_num, - unsigned int index, const union ib_gid *gid, +static int hns_roce_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { - struct hns_roce_dev *hr_dev = to_hr_dev(device); - u8 port = port_num - 1; + struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); + u8 port = attr->port_num - 1; unsigned long flags; int ret; @@ -88,20 +87,20 @@ static int hns_roce_add_gid(struct ib_device *device, u8 port_num, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid, - attr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, + (union ib_gid *)gid, attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); return ret; } -static int hns_roce_del_gid(struct ib_device *device, u8 port_num, - unsigned int index, void **context) +static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { - struct hns_roce_dev *hr_dev = to_hr_dev(device); + struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); + struct ib_gid_attr zattr = { }; union ib_gid zgid = { {0} }; - u8 port = port_num - 1; + u8 port = attr->port_num - 1; unsigned long flags; int ret; @@ -110,7 +109,7 @@ static int hns_roce_del_gid(struct ib_device *device, u8 port_num, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, index, &zgid, NULL); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -295,12 +294,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, return IB_LINK_LAYER_ETHERNET; } -static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index, - union ib_gid *gid) -{ - return 0; -} - static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index, u16 *pkey) { @@ -337,7 +330,7 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, { int ret = 0; struct hns_roce_ucontext *context; - struct hns_roce_ib_alloc_ucontext_resp resp; + struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); resp.qp_tab_size = hr_dev->caps.num_qps; @@ -350,6 +343,11 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, if (ret) goto error_fail_uar_alloc; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { + INIT_LIST_HEAD(&context->page_list); + mutex_init(&context->page_mutex); + } + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) goto error_fail_copy_to_udata; @@ -476,7 +474,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->modify_port = hns_roce_modify_port; ib_dev->get_link_layer = hns_roce_get_link_layer; ib_dev->get_netdev = hns_roce_get_netdev; - ib_dev->query_gid = hns_roce_query_gid; ib_dev->add_gid = hns_roce_add_gid; ib_dev->del_gid = hns_roce_del_gid; ib_dev->query_pkey = hns_roce_query_pkey; @@ -520,6 +517,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* OTHERS */ ib_dev->get_port_immutable = hns_roce_port_immutable; + ib_dev->driver_id = RDMA_DRIVER_HNS; ret = ib_register_device(ib_dev, NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -659,6 +657,11 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { + INIT_LIST_HEAD(&hr_dev->pgdir_list); + mutex_init(&hr_dev->pgdir_mutex); + } + ret = hns_roce_init_uar_table(hr_dev); if (ret) { dev_err(dev, "Failed to initialize uar table. aborting\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index da86a811..f7256d8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -933,7 +933,7 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); out: - free_page((unsigned long) pages); + free_pages((unsigned long) pages, order); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index bdab218..4b41e04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,6 +32,7 @@ #include <linux/platform_device.h> #include <linux/pci.h> +#include <uapi/rdma/hns-abi.h> #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) @@ -77,7 +78,9 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, } if (context) { - if (ib_copy_to_udata(udata, &pd->pdn, sizeof(u64))) { + struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); kfree(pd); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 088973a..e289a92 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -489,6 +489,15 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || + attr->qp_type == IB_QPT_XRC_TGT || attr->srq) + return 0; + + return 1; +} + static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -497,6 +506,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; struct hns_roce_ib_create_qp ucmd; + struct hns_roce_ib_create_qp_resp resp = {}; unsigned long qpn = 0; int ret = 0; u32 page_shift; @@ -602,6 +612,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n"); goto err_mtt; } + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_rq(init_attr)) { + ret = hns_roce_db_map_user( + to_hr_ucontext(ib_pd->uobject->context), + ucmd.db_addr, &hr_qp->rdb); + if (ret) { + dev_err(dev, "rp record doorbell map failed!\n"); + goto err_mtt; + } + } } else { if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { @@ -630,6 +652,16 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + hns_roce_qp_has_rq(init_attr)) { + ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); + if (ret) { + dev_err(dev, "rq record doorbell alloc failed!\n"); + goto err_rq_sge_list; + } + *hr_qp->rdb.db_record = 0; + } + /* Allocate QP buf */ page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, @@ -637,7 +669,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, &hr_qp->hr_buf, page_shift)) { dev_err(dev, "hns_roce_buf_alloc error!\n"); ret = -ENOMEM; - goto err_rq_sge_list; + goto err_db; } hr_qp->mtt.mtt_type = MTT_TYPE_WQE; @@ -698,17 +730,44 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); + if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { + + /* indicate kernel supports record db */ + resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) + goto err_qp; + + hr_qp->rdb_en = 1; + } hr_qp->event = hns_roce_ib_qp_event; return 0; +err_qp: + if (init_attr->qp_type == IB_QPT_GSI && + hr_dev->hw_rev == HNS_ROCE_HW_VER1) + hns_roce_qp_remove(hr_dev, hr_qp); + else + hns_roce_qp_free(hr_dev, hr_qp); + err_qpn: if (!sqpn) hns_roce_release_range_qp(hr_dev, qpn, 1); err_wrid: - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); + if (ib_pd->uobject) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_rq(init_attr)) + hns_roce_db_unmap_user( + to_hr_ucontext(ib_pd->uobject->context), + &hr_qp->rdb); + } else { + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + } err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); @@ -719,6 +778,11 @@ err_buf: else hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); +err_db: + if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) + hns_roce_free_db(hr_dev, &hr_qp->rdb); + err_rq_sge_list: if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index bcddd70..d5d8c1b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -60,7 +60,7 @@ #include <i40e_client.h> #include "i40iw_type.h" #include "i40iw_p.h" -#include "i40iw_ucontext.h" +#include <rdma/i40iw-abi.h> #include "i40iw_pble.h" #include "i40iw_verbs.h" #include "i40iw_cm.h" @@ -559,18 +559,25 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 term_len); int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack); +int i40iw_send_reset(struct i40iw_cm_node *cm_node); struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, - bool add_refcnt); + bool add_refcnt, + bool accelerated_list); enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp, struct i40iw_qp_flush_info *info, bool wait); +void i40iw_gen_ae(struct i40iw_device *iwdev, + struct i40iw_sc_qp *qp, + struct i40iw_gen_ae_info *info, + bool wait); + void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src); struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index abf4cd8..4cfa8f4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -539,7 +539,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, * i40iw_send_reset - Send RST packet * @cm_node: connection's node */ -static int i40iw_send_reset(struct i40iw_cm_node *cm_node) +int i40iw_send_reset(struct i40iw_cm_node *cm_node) { struct i40iw_puda_buf *sqbuf; int flags = SET_RST | SET_ACK; @@ -1183,6 +1183,26 @@ static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node } /** + * i40iw_build_timer_list - Add cm_nodes to timer list + * @timer_list: ptr to timer list + * @hte: ptr to accelerated or non-accelerated list + */ +static void i40iw_build_timer_list(struct list_head *timer_list, + struct list_head *hte) +{ + struct i40iw_cm_node *cm_node; + struct list_head *list_core_temp, *list_node; + + list_for_each_safe(list_node, list_core_temp, hte) { + cm_node = container_of(list_node, struct i40iw_cm_node, list); + if (cm_node->close_entry || cm_node->send_entry) { + atomic_inc(&cm_node->ref_count); + list_add(&cm_node->timer_entry, timer_list); + } + } +} + +/** * i40iw_cm_timer_tick - system's timer expired callback * @pass: Pointing to cm_core */ @@ -1202,15 +1222,10 @@ static void i40iw_cm_timer_tick(struct timer_list *t) struct list_head timer_list; INIT_LIST_HEAD(&timer_list); - spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { - cm_node = container_of(list_node, struct i40iw_cm_node, list); - if (cm_node->close_entry || cm_node->send_entry) { - atomic_inc(&cm_node->ref_count); - list_add(&cm_node->timer_entry, &timer_list); - } - } + spin_lock_irqsave(&cm_core->ht_lock, flags); + i40iw_build_timer_list(&timer_list, &cm_core->non_accelerated_list); + i40iw_build_timer_list(&timer_list, &cm_core->accelerated_list); spin_unlock_irqrestore(&cm_core->ht_lock, flags); list_for_each_safe(list_node, list_core_temp, &timer_list) { @@ -1406,19 +1421,22 @@ static int i40iw_send_fin(struct i40iw_cm_node *cm_node) * @loc_port: local tcp port num * @loc_addr: loc ip addr * @add_refcnt: flag to increment refcount of cm_node + * @accelerated_list: flag for accelerated vs non-accelerated list to search */ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, - bool add_refcnt) + bool add_refcnt, + bool accelerated_list) { struct list_head *hte; struct i40iw_cm_node *cm_node; unsigned long flags; - hte = &cm_core->connected_nodes; + hte = accelerated_list ? + &cm_core->accelerated_list : &cm_core->non_accelerated_list; /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->ht_lock, flags); @@ -1487,22 +1505,40 @@ static struct i40iw_cm_listener *i40iw_find_listener( static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, struct i40iw_cm_node *cm_node) { - struct list_head *hte; unsigned long flags; if (!cm_node || !cm_core) { i40iw_pr_err("cm_node or cm_core == NULL\n"); return; } - spin_lock_irqsave(&cm_core->ht_lock, flags); - /* get a handle on the hash table element (list head for this slot) */ - hte = &cm_core->connected_nodes; - list_add_tail(&cm_node->list, hte); + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_add_tail(&cm_node->list, &cm_core->non_accelerated_list); spin_unlock_irqrestore(&cm_core->ht_lock, flags); } /** + * i40iw_find_port - find port that matches reference port + * @port: port number + * @accelerated_list: flag for accelerated vs non-accelerated list + */ +static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port, + bool accelerated_list) +{ + struct list_head *hte; + struct i40iw_cm_node *cm_node; + + hte = accelerated_list ? + &cm_core->accelerated_list : &cm_core->non_accelerated_list; + + list_for_each_entry(cm_node, hte, list) { + if (cm_node->loc_port == port) + return true; + } + return false; +} + +/** * i40iw_port_in_use - determine if port is in use * @port: port number * @active_side: flag for listener side vs active side @@ -1510,19 +1546,14 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side) { struct i40iw_cm_listener *listen_node; - struct i40iw_cm_node *cm_node; unsigned long flags; bool ret = false; if (active_side) { - /* search connected node list */ spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_entry(cm_node, &cm_core->connected_nodes, list) { - if (cm_node->loc_port == port) { - ret = true; - break; - } - } + ret = i40iw_find_port(cm_core, port, true); + if (!ret) + ret = i40iw_find_port(cm_core, port, false); if (!ret) clear_bit(port, cm_core->active_side_ports); spin_unlock_irqrestore(&cm_core->ht_lock, flags); @@ -1829,9 +1860,11 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, INIT_LIST_HEAD(&reset_list); if (free_hanging_nodes) { spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_pos, + list_temp, &cm_core->non_accelerated_list) { cm_node = container_of(list_pos, struct i40iw_cm_node, list); - if ((cm_node->listener == listener) && !cm_node->accelerated) { + if ((cm_node->listener == listener) && + !cm_node->accelerated) { atomic_inc(&cm_node->ref_count); list_add(&cm_node->reset_entry, &reset_list); } @@ -3144,7 +3177,8 @@ void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf) cm_info.rem_addr, cm_info.loc_port, cm_info.loc_addr, - true); + true, + false); if (!cm_node) { /* Only type of packet accepted are for */ @@ -3202,7 +3236,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) cm_core->iwdev = iwdev; cm_core->dev = &iwdev->sc_dev; - INIT_LIST_HEAD(&cm_core->connected_nodes); + INIT_LIST_HEAD(&cm_core->accelerated_list); + INIT_LIST_HEAD(&cm_core->non_accelerated_list); INIT_LIST_HEAD(&cm_core->listen_nodes); timer_setup(&cm_core->tcp_timer, i40iw_cm_timer_tick, 0); @@ -3585,6 +3620,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct i40iw_qp *iwqp; struct i40iw_device *iwdev; struct i40iw_sc_dev *dev; + struct i40iw_cm_core *cm_core; struct i40iw_cm_node *cm_node; struct ib_qp_attr attr; int passive_state; @@ -3594,6 +3630,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct i40iw_kmem_info accept; enum i40iw_status_code status; u64 tagged_offset; + unsigned long flags; memset(&attr, 0, sizeof(attr)); ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); @@ -3603,6 +3640,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; dev = &iwdev->sc_dev; + cm_core = &iwdev->cm_core; cm_node = (struct i40iw_cm_node *)cm_id->provider_data; if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) { @@ -3697,6 +3735,10 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); cm_node->accelerated = true; + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_move_tail(&cm_node->list, &cm_core->accelerated_list); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); if (status) @@ -4026,10 +4068,12 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) { struct i40iw_qp *iwqp; struct i40iw_device *iwdev; + struct i40iw_cm_core *cm_core; struct i40iw_cm_node *cm_node; struct i40iw_sc_dev *dev; struct ib_qp_attr attr; struct iw_cm_id *cm_id; + unsigned long flags; int status; bool read0; @@ -4038,6 +4082,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) iwqp = (struct i40iw_qp *)cm_id->provider_data; iwdev = to_iwdev(iwqp->ibqp.device); dev = &iwdev->sc_dev; + cm_core = &iwdev->cm_core; if (iwqp->destroyed) { status = -ETIMEDOUT; @@ -4057,6 +4102,9 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); cm_node->accelerated = true; + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_move_tail(&cm_node->list, &cm_core->accelerated_list); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0); if (status) @@ -4256,25 +4304,38 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, struct list_head *list_node; struct i40iw_cm_node *cm_node; unsigned long flags; - struct list_head connected_list; + struct list_head teardown_list; struct ib_qp_attr attr; - INIT_LIST_HEAD(&connected_list); + INIT_LIST_HEAD(&teardown_list); spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_node, list_core_temp, + &cm_core->accelerated_list) { + cm_node = container_of(list_node, struct i40iw_cm_node, list); + if (disconnect_all || + (nfo->vlan_id == cm_node->vlan_id && + (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) || + !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) { + atomic_inc(&cm_node->ref_count); + list_add(&cm_node->teardown_entry, &teardown_list); + } + } + list_for_each_safe(list_node, list_core_temp, + &cm_core->non_accelerated_list) { cm_node = container_of(list_node, struct i40iw_cm_node, list); if (disconnect_all || (nfo->vlan_id == cm_node->vlan_id && (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) || !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) { atomic_inc(&cm_node->ref_count); - list_add(&cm_node->connected_entry, &connected_list); + list_add(&cm_node->teardown_entry, &teardown_list); } } spin_unlock_irqrestore(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &connected_list) { - cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry); + list_for_each_safe(list_node, list_core_temp, &teardown_list) { + cm_node = container_of(list_node, struct i40iw_cm_node, + teardown_entry); attr.qp_state = IB_QPS_ERR; i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (iwdev->reset) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index cf60c45..78ba36a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -341,7 +341,7 @@ struct i40iw_cm_node { int accept_pend; struct list_head timer_entry; struct list_head reset_entry; - struct list_head connected_entry; + struct list_head teardown_entry; atomic_t passive_state; bool qhash_set; u8 user_pri; @@ -403,7 +403,8 @@ struct i40iw_cm_core { struct i40iw_sc_dev *dev; struct list_head listen_nodes; - struct list_head connected_nodes; + struct list_head accelerated_list; + struct list_head non_accelerated_list; struct timer_list tcp_timer; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c74fd33..4d841a3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -2614,10 +2614,8 @@ static enum i40iw_status_code i40iw_sc_qp_flush_wqes( qp->flush_sq |= flush_sq; qp->flush_rq |= flush_rq; - if (!flush_sq && !flush_rq) { - if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR) - return 0; - } + if (!flush_sq && !flush_rq) + return 0; cqp = qp->pd->dev->cqp; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -2659,6 +2657,49 @@ static enum i40iw_status_code i40iw_sc_qp_flush_wqes( } /** + * i40iw_sc_gen_ae - generate AE, currently uses flush WQE CQP OP + * @qp: sc qp + * @info: gen ae information + * @scratch: u64 saved to be used during cqp completion + * @post_sq: flag for cqp db to ring + */ +static enum i40iw_status_code i40iw_sc_gen_ae( + struct i40iw_sc_qp *qp, + struct i40iw_gen_ae_info *info, + u64 scratch, + bool post_sq) +{ + u64 temp; + u64 *wqe; + struct i40iw_sc_cqp *cqp; + u64 header; + + cqp = qp->pd->dev->cqp; + wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); + if (!wqe) + return I40IW_ERR_RING_FULL; + + temp = info->ae_code | + LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE); + + set_64bit_val(wqe, 8, temp); + + header = qp->qp_uk.qp_id | + LS_64(I40IW_CQP_OP_GEN_AE, I40IW_CQPSQ_OPCODE) | + LS_64(1, I40IW_CQPSQ_FWQE_GENERATE_AE) | + LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); + + i40iw_insert_wqe_hdr(wqe, header); + + i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "GEN_AE WQE", + wqe, I40IW_CQP_WQE_SIZE * 8); + + if (post_sq) + i40iw_sc_cqp_post_sq(cqp); + return 0; +} + +/** * i40iw_sc_qp_upload_context - upload qp's context * @dev: sc device struct * @info: upload context info ptr for return @@ -4148,6 +4189,13 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, pcmdinfo->in.u.qp_flush_wqes. scratch, pcmdinfo->post_sq); break; + case OP_GEN_AE: + status = i40iw_sc_gen_ae( + pcmdinfo->in.u.gen_ae.qp, + &pcmdinfo->in.u.gen_ae.info, + pcmdinfo->in.u.gen_ae.scratch, + pcmdinfo->post_sq); + break; case OP_ADD_ARP_CACHE_ENTRY: status = i40iw_sc_add_arp_cache_entry( pcmdinfo->in.u.add_arp_cache_entry.cqp, diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 4b65e41..6ddaeec 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -418,6 +418,8 @@ #define I40IW_CQP_OP_QUERY_FPM_VALUES 0x20 #define I40IW_CQP_OP_COMMIT_FPM_VALUES 0x21 #define I40IW_CQP_OP_FLUSH_WQES 0x22 +/* I40IW_CQP_OP_GEN_AE is the same value as I40IW_CQP_OP_FLUSH_WQES */ +#define I40IW_CQP_OP_GEN_AE 0x22 #define I40IW_CQP_OP_MANAGE_APBVT 0x23 #define I40IW_CQP_OP_NOP 0x24 #define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25 @@ -1729,6 +1731,7 @@ enum i40iw_alignment { #define OP_COMMIT_FPM_VALUES 30 #define OP_REQUESTED_COMMANDS 31 #define OP_COMPLETED_COMMANDS 32 -#define OP_SIZE_CQP_STAT_ARRAY 33 +#define OP_GEN_AE 33 +#define OP_SIZE_CQP_STAT_ARRAY 34 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 61540e1..6139836 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -352,6 +352,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) else i40iw_cm_disconn(iwqp); break; + case I40IW_AE_BAD_CLOSE: + /* fall through */ case I40IW_AE_RESET_SENT: i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0); i40iw_cm_disconn(iwqp); @@ -668,6 +670,39 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, } /** + * i40iw_gen_ae - generate AE + * @iwdev: iwarp device + * @qp: qp associated with AE + * @info: info for ae + * @wait: wait for completion + */ +void i40iw_gen_ae(struct i40iw_device *iwdev, + struct i40iw_sc_qp *qp, + struct i40iw_gen_ae_info *info, + bool wait) +{ + struct i40iw_gen_ae_info *ae_info; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + + cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + ae_info = &cqp_request->info.in.u.gen_ae.info; + memcpy(ae_info, info, sizeof(*ae_info)); + + cqp_info->cqp_cmd = OP_GEN_AE; + cqp_info->post_sq = 1; + cqp_info->in.u.gen_ae.qp = qp; + cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request; + if (i40iw_handle_cqp_op(iwdev, cqp_request)) + i40iw_pr_err("CQP OP failed attempting to generate ae_code=0x%x\n", + info->ae_code); +} + +/** * i40iw_hw_manage_vf_pble_bp - manage vf pbles * @iwdev: iwarp device * @info: info for managing pble diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index b088629..9cd0d3e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1560,8 +1560,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, enum i40iw_status_code status; memcpy(&hdl->ldev, ldev, sizeof(*ldev)); - if (resource_profile == 1) - resource_profile = 2; iwdev->mpa_version = mpa_version; iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ? diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 4c21197..d9c7ae6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -348,8 +348,8 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, spin_lock_irqsave(&rsrc->bufpool_lock, flags); rsrc->tx_wqe_avail_cnt++; spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); - if (!list_empty(&rsrc->vsi->ilq->txpend)) - i40iw_puda_send_buf(rsrc->vsi->ilq, NULL); + if (!list_empty(&rsrc->txpend)) + i40iw_puda_send_buf(rsrc, NULL); } done: @@ -1471,10 +1471,6 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid) struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid; i40iw_puda_ret_bufpool(ieq, buf); - if (!list_empty(&ieq->txpend)) { - buf = i40iw_puda_get_listbuf(&ieq->txpend); - i40iw_puda_send_buf(ieq, buf); - } } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index a27d392..adc8d2e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -1004,6 +1004,11 @@ struct i40iw_cqp_query_fpm_values { u32 pbl_max; }; +struct i40iw_gen_ae_info { + u16 ae_code; + u8 ae_source; +}; + struct i40iw_cqp_ops { enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *, struct i40iw_cqp_init_info *); @@ -1291,6 +1296,12 @@ struct cqp_info { } qp_flush_wqes; struct { + struct i40iw_sc_qp *qp; + struct i40iw_gen_ae_info info; + u64 scratch; + } gen_ae; + + struct { struct i40iw_sc_cqp *cqp; void *fpm_values_va; u64 fpm_values_pa; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index ddc1056..a9ea966 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -1284,15 +1284,13 @@ void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) */ void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { - struct i40iw_qp_flush_info info; + struct i40iw_gen_ae_info info; struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__); - memset(&info, 0, sizeof(info)); info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR; - info.generate_ae = true; - info.ae_source = 0x3; - (void)i40iw_hw_flush_wqes(iwdev, qp, &info, false); + info.ae_source = I40IW_AE_SOURCE_RQ; + i40iw_gen_ae(iwdev, qp, &info, false); } /** @@ -1407,7 +1405,7 @@ struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, rem_port = ntohs(tcph->source); cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port, - loc_addr, false); + loc_addr, false, true); if (!cm_node) return NULL; iwqp = cm_node->iwqp; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 70024e8..40e4f5a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -38,6 +38,7 @@ #include <linux/highmem.h> #include <linux/time.h> #include <linux/hugetlb.h> +#include <linux/irq.h> #include <asm/byteorder.h> #include <net/ip.h> #include <rdma/ib_verbs.h> @@ -830,10 +831,10 @@ static int i40iw_query_qp(struct ib_qp *ibqp, void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, struct i40iw_modify_qp_info *info, bool wait) { - enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; struct i40iw_modify_qp_info *m_info; + struct i40iw_gen_ae_info ae_info; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait); if (!cqp_request) @@ -846,9 +847,25 @@ void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, cqp_info->post_sq = 1; cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp; cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request; - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (status) - i40iw_pr_err("CQP-OP Modify QP fail"); + if (!i40iw_handle_cqp_op(iwdev, cqp_request)) + return; + + switch (m_info->next_iwarp_state) { + case I40IW_QP_STATE_RTS: + if (iwqp->iwarp_state == I40IW_QP_STATE_IDLE) + i40iw_send_reset(iwqp->cm_node); + /* fall through */ + case I40IW_QP_STATE_IDLE: + case I40IW_QP_STATE_TERMINATE: + case I40IW_QP_STATE_CLOSING: + ae_info.ae_code = I40IW_AE_BAD_CLOSE; + ae_info.ae_source = 0; + i40iw_gen_ae(iwdev, &iwqp->sc_qp, &ae_info, false); + break; + case I40IW_QP_STATE_ERROR: + default: + break; + } } /** @@ -961,10 +978,6 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, iwqp->ibqp_state = attr->qp_state; - if (issue_modify_qp) - iwqp->iwarp_state = info.next_iwarp_state; - else - info.next_iwarp_state = iwqp->iwarp_state; } if (attr_mask & IB_QP_ACCESS_FLAGS) { ctx_info->iwarp_info_valid = true; @@ -1002,9 +1015,14 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_unlock_irqrestore(&iwqp->lock, flags); - if (issue_modify_qp) + if (issue_modify_qp) { i40iw_hw_modify_qp(iwdev, iwqp, &info, true); + spin_lock_irqsave(&iwqp->lock, flags); + iwqp->iwarp_state = info.next_iwarp_state; + spin_unlock_irqrestore(&iwqp->lock, flags); + } + if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) { if (dont_wait) { if (iwqp->cm_id && iwqp->hw_tcp_state) { @@ -2729,6 +2747,25 @@ static int i40iw_destroy_ah(struct ib_ah *ah) } /** + * i40iw_get_vector_affinity - report IRQ affinity mask + * @ibdev: IB device + * @comp_vector: completion vector index + */ +static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev, + int comp_vector) +{ + struct i40iw_device *iwdev = to_iwdev(ibdev); + struct i40iw_msix_vector *msix_vec; + + if (iwdev->msix_shared) + msix_vec = &iwdev->iw_msixtbl[comp_vector]; + else + msix_vec = &iwdev->iw_msixtbl[comp_vector + 1]; + + return irq_get_affinity_mask(msix_vec->irq); +} + +/** * i40iw_init_rdma_device - initialization of iwarp device * @iwdev: iwarp device */ @@ -2824,6 +2861,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; iwibdev->ibdev.post_send = i40iw_post_send; iwibdev->ibdev.post_recv = i40iw_post_recv; + iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity; return iwibdev; } @@ -2889,6 +2927,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; + iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; ret = ib_register_device(&iwibdev->ibdev, NULL); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 6dee4fd..9345d5b 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -101,12 +101,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, if (ret) return ERR_PTR(ret); eth_zero_addr(ah->av.eth.s_mac); - if (gid_attr.ndev) { - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); - } + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5a0e4fc..5b70744 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,14 +246,11 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } -static int mlx4_ib_add_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - const union ib_gid *gid, +static int mlx4_ib_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { - struct mlx4_ib_dev *ibdev = to_mdev(device); + struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct mlx4_port_gid_table *port_gid_table; int free = -1, found = -1; @@ -262,16 +259,16 @@ static int mlx4_ib_add_gid(struct ib_device *device, int i; struct gid_entry *gids = NULL; - if (!rdma_cap_roce_gid_table(device, port_num)) + if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; - if (port_num > MLX4_MAX_PORTS) + if (attr->port_num > MLX4_MAX_PORTS) return -EINVAL; if (!context) return -EINVAL; - port_gid_table = &iboe->gids[port_num - 1]; + port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && @@ -318,33 +315,30 @@ static int mlx4_ib_add_gid(struct ib_device *device, spin_unlock_bh(&iboe->lock); if (!ret && hw_update) { - ret = mlx4_ib_update_gids(gids, ibdev, port_num); + ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); kfree(gids); } return ret; } -static int mlx4_ib_del_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - void **context) +static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) { struct gid_cache_context *ctx = *context; - struct mlx4_ib_dev *ibdev = to_mdev(device); + struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct mlx4_port_gid_table *port_gid_table; int ret = 0; int hw_update = 0; struct gid_entry *gids = NULL; - if (!rdma_cap_roce_gid_table(device, port_num)) + if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; - if (port_num > MLX4_MAX_PORTS) + if (attr->port_num > MLX4_MAX_PORTS) return -EINVAL; - port_gid_table = &iboe->gids[port_num - 1]; + port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); if (ctx) { ctx->refcount--; @@ -376,7 +370,7 @@ static int mlx4_ib_del_gid(struct ib_device *device, spin_unlock_bh(&iboe->lock); if (!ret && hw_update) { - ret = mlx4_ib_update_gids(gids, ibdev, port_num); + ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); kfree(gids); } return ret; @@ -411,9 +405,6 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, if (attr.ndev) dev_put(attr.ndev); - if (!memcmp(&gid, &zgid, sizeof(gid))) - return -EINVAL; - spin_lock_irqsave(&iboe->lock, flags); port_gid_table = &iboe->gids[port_num - 1]; @@ -429,6 +420,9 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, return real_index; } +#define field_avail(type, fld, sz) (offsetof(type, fld) + \ + sizeof(((type *)0)->fld) <= (sz)) + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -556,14 +550,19 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0xFFFFFFFFFFFFULL; props->max_ah = INT_MAX; - if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && - (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || - mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) { - props->rss_caps.max_rwq_indirection_tables = props->max_qp; - props->rss_caps.max_rwq_indirection_table_size = - dev->dev->caps.max_rss_tbl_sz; - props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; - props->max_wq_type_rq = props->max_qp; + if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) { + if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) { + props->rss_caps.max_rwq_indirection_tables = + props->max_qp; + props->rss_caps.max_rwq_indirection_table_size = + dev->dev->caps.max_rss_tbl_sz; + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; + props->max_wq_type_rq = props->max_qp; + } + + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) + props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; } props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT; @@ -575,7 +574,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { resp.response_length += sizeof(resp.hca_core_clock_offset); if (!err && !mlx4_is_slave(dev->dev)) { - resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET; resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; } } @@ -587,8 +586,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx4_wqe_data_seg); } - if (uhw->outlen >= resp.response_length + sizeof(resp.rss_caps)) { - resp.response_length += sizeof(resp.rss_caps); + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { if (props->rss_caps.supported_qpts) { resp.rss_caps.rx_hash_function = MLX4_IB_RX_HASH_FUNC_TOEPLITZ; @@ -608,6 +606,22 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, resp.rss_caps.rx_hash_fields_mask |= MLX4_IB_RX_HASH_INNER; } + resp.response_length = offsetof(typeof(resp), rss_caps) + + sizeof(resp.rss_caps); + } + + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (dev->dev->caps.max_gso_sz && + ((mlx4_ib_port_link_layer(ibdev, 1) == + IB_LINK_LAYER_ETHERNET) || + (mlx4_ib_port_link_layer(ibdev, 2) == + IB_LINK_LAYER_ETHERNET))) { + resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz; + resp.tso_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + } + resp.response_length = offsetof(typeof(resp), tso_caps) + + sizeof(resp.tso_caps); } if (uhw->outlen) { @@ -865,24 +879,9 @@ out: static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { - int ret; - if (rdma_protocol_ib(ibdev, port)) return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); - - if (!rdma_protocol_roce(ibdev, port)) - return -ENODEV; - - if (!rdma_cap_roce_gid_table(ibdev, port)) - return -ENODEV; - - ret = ib_get_cached_gid(ibdev, port, index, gid, NULL); - if (ret == -EAGAIN) { - memcpy(gid, &zgid, sizeof(*gid)); - return 0; - } - - return ret; + return 0; } static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl) @@ -1330,7 +1329,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct mlx4_ib_pd *pd; int err; - pd = kmalloc(sizeof *pd, GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); @@ -1346,7 +1345,6 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(-EFAULT); } - return &pd->ibpd; } @@ -1860,6 +1858,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) return ERR_PTR(-EINVAL); + if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) + return ERR_PTR(-EOPNOTSUPP); + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); @@ -2933,6 +2934,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_alloc_diag_counters(ibdev)) goto err_steer_free_bitmap; + ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; if (ib_register_device(&ibdev->ib_dev, NULL)) goto err_diag_counters; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e14919c..7b14299 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -189,6 +189,7 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_SCATTER_FCS = IB_QP_CREATE_SCATTER_FCS, /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, @@ -641,24 +642,6 @@ struct mlx4_uverbs_ex_query_device { __u32 reserved; }; -enum query_device_resp_mask { - QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, -}; - -struct mlx4_ib_rss_caps { - __u64 rx_hash_fields_mask; /* enum mlx4_rx_hash_fields */ - __u8 rx_hash_function; /* enum mlx4_rx_hash_function_flags */ - __u8 reserved[7]; -}; - -struct mlx4_uverbs_ex_query_device_resp { - __u32 comp_mask; - __u32 response_length; - __u64 hca_core_clock_offset; - __u32 max_inl_recv_sz; - struct mlx4_ib_rss_caps rss_caps; -}; - static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 4975f3e..17f4f15 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -407,6 +407,9 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.length = length; + mr->ibmr.iova = virt_addr; + mr->ibmr.page_size = 1U << shift; return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f045491..50af891 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1096,6 +1096,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->inl_recv_sz = ucmd.qp.inl_recv_sz; } + if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { + if (!(dev->dev->caps.flags & + MLX4_DEV_CAP_FLAG_FCS_KEEP)) { + pr_debug("scatter FCS is unsupported\n"); + err = -EOPNOTSUPP; + goto err; + } + + qp->flags |= MLX4_IB_QP_SCATTER_FCS; + } + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) @@ -2234,6 +2245,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (qp->inl_recv_sz) context->param3 |= cpu_to_be32(1 << 25); + if (qp->flags & MLX4_IB_QP_SCATTER_FCS) + context->param3 |= cpu_to_be32(1 << 29); + if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (qp_type == IB_QPT_RAW_PACKET) @@ -2356,9 +2370,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, status = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &gid_attr); - if (!status && !memcmp(&gid, &zgid, sizeof(gid))) - status = -ENOENT; - if (!status && gid_attr.ndev) { + if (!status) { vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); dev_put(gid_attr.ndev); @@ -3880,8 +3892,8 @@ out: */ wmb(); - writel(qp->doorbell_qpn, - to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); + writel_relaxed(qp->doorbell_qpn, + to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); /* * Make sure doorbells don't leak out of SQ spinlock @@ -4204,7 +4216,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, return ERR_PTR(-EOPNOTSUPP); } - if (init_attr->create_flags) { + if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) { pr_debug("unsupported create_flags %u\n", init_attr->create_flags); return ERR_PTR(-EOPNOTSUPP); @@ -4225,6 +4237,9 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, ib_qp_init_attr.recv_cq = init_attr->cq; ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */ + if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) + ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS; + err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr, udata, 0, &qp); if (err) { diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index fe269f6..e6bde32a 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -36,6 +36,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_attr *ah_attr) { + enum ib_gid_type gid_type; + int err; + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -50,6 +53,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, + ah_attr->grh.sgid_index, + &gid_type); + if (err) + return ERR_PTR(err); + memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = @@ -57,6 +66,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, rdma_ah_get_port_num(ah_attr), rdma_ah_read_grh(ah_attr)->sgid_index); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) +#define MLX5_ECN_ENABLED BIT(1) + ah->av.tclass |= MLX5_ECN_ENABLED; } else { ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr)); ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6f6712f..188512b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } + +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, + u64 length, u32 alignment) +{ + struct mlx5_core_dev *dev = memic->dev; + u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) + >> PAGE_SHIFT; + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); + u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment); + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); + u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {}; + u32 mlx5_alignment; + u64 page_idx = 0; + int ret = 0; + + if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK)) + return -EINVAL; + + /* mlx5 device sets alignment as 64*2^driver_value + * so normalizing is needed. + */ + mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 : + alignment - MLX5_MEMIC_BASE_ALIGN; + if (mlx5_alignment > max_alignment) + return -EINVAL; + + MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC); + MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE); + MLX5_SET(alloc_memic_in, in, memic_size, length); + MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment, + mlx5_alignment); + + while (page_idx < num_memic_hw_pages) { + spin_lock(&memic->memic_lock); + page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, + num_memic_hw_pages, + page_idx, + num_pages, 0); + + if (page_idx < num_memic_hw_pages) + bitmap_set(memic->memic_alloc_pages, + page_idx, num_pages); + + spin_unlock(&memic->memic_lock); + + if (page_idx >= num_memic_hw_pages) + break; + + MLX5_SET64(alloc_memic_in, in, range_start_addr, + hw_start_addr + (page_idx * PAGE_SIZE)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&memic->memic_lock); + bitmap_clear(memic->memic_alloc_pages, + page_idx, num_pages); + spin_unlock(&memic->memic_lock); + + if (ret == -EAGAIN) { + page_idx++; + continue; + } + + return ret; + } + + *addr = pci_resource_start(dev->pdev, 0) + + MLX5_GET64(alloc_memic_out, out, memic_start_addr); + + return 0; + } + + return -ENOMEM; +} + +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +{ + struct mlx5_core_dev *dev = memic->dev; + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); + u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; + u64 start_page_idx; + int err; + + addr -= pci_resource_start(dev->pdev, 0); + start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT; + + MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC); + MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); + MLX5_SET(dealloc_memic_in, in, memic_size, length); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + + if (!err) { + spin_lock(&memic->memic_lock); + bitmap_clear(memic->memic_alloc_pages, + start_page_idx, num_pages); + spin_unlock(&memic->memic_lock); + } + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 78ffded..e7206c8 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -33,6 +33,7 @@ #ifndef MLX5_IB_CMD_H #define MLX5_IB_CMD_H +#include "mlx5_ib.h" #include <linux/kernel.h> #include <linux/mlx5/driver.h> @@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, + u64 length, u32 alignment); +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 071fd9a..daa919e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -38,6 +38,7 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/bitmap.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif @@ -51,6 +52,7 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/fs_helpers.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -60,6 +62,13 @@ #include "ib_rep.h" #include "cmd.h" #include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/accel.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/mlx5_user_ioctl_cmds.h> + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "5.0-0" @@ -92,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); +/* We can't use an array for xlt_emergency_page because dma_map_single + * doesn't work on kernel modules memory + */ +static unsigned long xlt_emergency_page; +static struct mutex xlt_emergency_page_mutex; + struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -399,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (err) goto out; + props->active_width = IB_WIDTH_4X; + props->active_speed = IB_SPEED_QDR; + translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); @@ -493,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, - unsigned int index, const union ib_gid *gid, +static int mlx5_ib_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, __always_unused void **context) { - return set_roce_addr(to_mdev(device), port_num, index, gid, attr); + return set_roce_addr(to_mdev(attr->device), attr->port_num, + attr->index, gid, attr); } -static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, - unsigned int index, __always_unused void **context) +static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, + __always_unused void **context) { - return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL); + return set_roce_addr(to_mdev(attr->device), attr->port_num, + attr->index, NULL, NULL); } __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, @@ -516,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) return 0; - if (!attr.ndev) - return 0; - dev_put(attr.ndev); if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -538,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, if (ret) return ret; - if (!attr.ndev) - return -ENODEV; - dev_put(attr.ndev); *gid_type = attr.gid_type; @@ -844,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + resp.rss_caps.rx_hash_fields_mask |= + MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { @@ -875,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; } + if (MLX5_CAP_DEV_MEM(mdev, memic)) { + props->max_dm_size = + MLX5_CAP_DEV_MEM(mdev, max_memic_size); + } + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; @@ -980,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_QOS(mdev, packet_pacing_min_rate); resp.packet_pacing_caps.supported_qpts |= 1 << IB_QPT_RAW_PACKET; + if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) && + MLX5_CAP_QOS(mdev, packet_pacing_typical_size)) + resp.packet_pacing_caps.cap_flags |= + MLX5_IB_PP_SUPPORT_BURST; } resp.response_length += sizeof(resp.packet_pacing_caps); } @@ -1665,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -1702,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - context->upd_xlt_page = __get_free_page(GFP_KERNEL); - if (!context->upd_xlt_page) { - err = -ENOMEM; - goto out_uars; - } - mutex_init(&context->upd_xlt_page_mutex); - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); if (err) - goto out_page; + goto out_uars; } INIT_LIST_HEAD(&context->vma_private_list); @@ -1789,9 +1820,6 @@ out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); -out_page: - free_page(context->upd_xlt_page); - out_uars: deallocate_uars(dev, context); @@ -1817,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); - free_page(context->upd_xlt_page); deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); @@ -1993,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) return "best effort WC"; case MLX5_IB_MMAP_NC_PAGE: return "NC"; + case MLX5_IB_MMAP_DEVICE_MEM: + return "Device Memory"; default: return NULL; } @@ -2151,6 +2180,34 @@ free_bfreg: return err; } +static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct mlx5_ib_ucontext *mctx = to_mucontext(context); + struct mlx5_ib_dev *dev = to_mdev(context->device); + u16 page_idx = get_extended_index(vma->vm_pgoff); + size_t map_size = vma->vm_end - vma->vm_start; + u32 npages = map_size >> PAGE_SHIFT; + phys_addr_t pfn; + pgprot_t prot; + + if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != + page_idx + npages) + return -EINVAL; + + pfn = ((pci_resource_start(dev->mdev->pdev, 0) + + MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> + PAGE_SHIFT) + + page_idx; + prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_page_prot = prot; + + if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, + vma->vm_page_prot)) + return -EAGAIN; + + return mlx5_ib_set_vma_data(vma, mctx); +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2195,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); + case MLX5_IB_MMAP_DEVICE_MEM: + return dm_mmap(ibcontext, vma); + default: return -EINVAL; } @@ -2202,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); + struct mlx5_memic *memic = &to_mdev(ibdev)->memic; + phys_addr_t memic_addr; + struct mlx5_ib_dm *dm; + u64 start_offset; + u32 page_idx; + int err; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", + attr->length, act_size, attr->alignment); + + err = mlx5_cmd_alloc_memic(memic, &memic_addr, + act_size, attr->alignment); + if (err) + goto err_free; + + start_offset = memic_addr & ~PAGE_MASK; + page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + PAGE_SHIFT; + + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &start_offset, sizeof(start_offset)); + if (err) + goto err_dealloc; + + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); + if (err) + goto err_dealloc; + + bitmap_set(to_mucontext(context)->dm_pages, page_idx, + DIV_ROUND_UP(act_size, PAGE_SIZE)); + + dm->dev_addr = memic_addr; + + return &dm->ibdm; + +err_dealloc: + mlx5_cmd_dealloc_memic(memic, memic_addr, + act_size); +err_free: + kfree(dm); + return ERR_PTR(err); +} + +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +{ + struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; + struct mlx5_ib_dm *dm = to_mdm(ibdm); + u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); + u32 page_idx; + int ret; + + ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); + if (ret) + return ret; + + page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + PAGE_SHIFT; + bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, + page_idx, + DIV_ROUND_UP(act_size, PAGE_SIZE)); + + kfree(dm); + + return 0; +} + static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -2317,8 +2458,28 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) offsetof(typeof(filter), field) -\ sizeof(filter.field)) +static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_act *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); + + switch (maction->ib_action.type) { + case IB_FLOW_ACTION_ESP: + /* Currently only AES_GCM keymat is supported by the driver */ + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; + action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; + return 0; + default: + return -EOPNOTSUPP; + } +} + static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, struct mlx5_flow_act *action) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, @@ -2328,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, void *headers_c; void *headers_v; int match_ipv; + int ret; if (ib_spec->type & IB_FLOW_SPEC_INNER) { headers_c = MLX5_ADDR_OF(fte_match_param, match_c, @@ -2478,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohl(ib_spec->ipv6.mask.flow_label), ntohl(ib_spec->ipv6.val.flow_label), ib_spec->type & IB_FLOW_SPEC_INNER); + break; + case IB_FLOW_SPEC_ESP: + if (ib_spec->esp.mask.seq) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, + ntohl(ib_spec->esp.mask.spi)); + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + ntohl(ib_spec->esp.val.spi)); break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, @@ -2546,6 +2716,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, return -EOPNOTSUPP; action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; break; + case IB_FLOW_SPEC_ACTION_HANDLE: + ret = parse_flow_flow_action(ib_spec, flow_attr, action); + if (ret) + return ret; + break; default: return -EINVAL; } @@ -2587,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) return false; } +enum valid_spec { + VALID_SPEC_INVALID, + VALID_SPEC_VALID, + VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + const u32 *match_c = spec->match_criteria; + bool is_crypto = + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* + * Currently only crypto is supported in egress, when regular egress + * rules would be supported, always return VALID_SPEC_NA. + */ + if (!is_crypto) + return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; + + return is_crypto && is_ipsec && + (!egress || (!is_drop && !flow_act->has_flow_tag)) ? + VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + /* We curretly only support ipsec egress flow */ + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} + static bool is_valid_ethertype(struct mlx5_core_dev *mdev, const struct ib_flow_attr *flow_attr, bool check_inner) @@ -2711,13 +2926,17 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_is_multicast_only(flow_attr) && - !dont_trap) + if (ft_type == MLX5_IB_FT_TX) + priority = 0; + else if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, + ft_type == MLX5_IB_FT_TX ? + MLX5_FLOW_NAMESPACE_EGRESS : MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; @@ -2804,6 +3023,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, unsigned int spec_index; int err = 0; int dest_num = 1; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); @@ -2820,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, &flow_act); + ib_flow, flow_attr, &flow_act); if (err < 0) goto free; @@ -2843,12 +3063,23 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); + + if (is_egress && + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + err = -EINVAL; + goto free; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { rule_dst = NULL; dest_num = 0; } else { - flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + if (is_egress) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + else + flow_act.action |= + dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } if (flow_act.has_flow_tag && @@ -3022,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; int err; int underlay_qpn; @@ -3030,7 +3262,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > dev->num_ports || - (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | + IB_FLOW_ATTR_FLAGS_EGRESS))) + return ERR_PTR(-EINVAL); + + if (is_egress && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); @@ -3039,7 +3277,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_lock(&dev->flow_db->lock); - ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); + ft_prio = get_flow_table(dev, flow_attr, + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -3053,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } } - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->flags & MLX5_IB_QP_RSS) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; + if (is_egress) { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; + } if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { @@ -3102,6 +3345,170 @@ unlock: return ERR_PTR(err); } +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ + u32 flags = 0; + + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + + return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *mdev = to_mdev(device); + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; + struct mlx5_ib_flow_action *action; + u64 action_flags; + u64 flags; + int err = 0; + + if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, + MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) + return ERR_PTR(-EFAULT); + + if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) + return ERR_PTR(-EOPNOTSUPP); + + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + + /* We current only support a subset of the standard features. Only a + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn + * (with overlap). Full offload mode isn't supported. + */ + if (!attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->keymat->protocol != + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) + return ERR_PTR(-EOPNOTSUPP); + + aes_gcm = &attr->keymat->keymat.aes_gcm; + + if (aes_gcm->icv_len != 16 || + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return ERR_PTR(-EOPNOTSUPP); + + action = kmalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return ERR_PTR(-ENOMEM); + + action->esp_aes_gcm.ib_flags = attr->flags; + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, + sizeof(accel_attrs.keymat.aes_gcm.salt)); + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + + action->esp_aes_gcm.ctx = + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); + if (IS_ERR(action->esp_aes_gcm.ctx)) { + err = PTR_ERR(action->esp_aes_gcm.ctx); + goto err_parse; + } + + action->esp_aes_gcm.ib_flags = attr->flags; + + return &action->ib_action; + +err_parse: + kfree(action); + return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + struct mlx5_accel_esp_xfrm_attrs accel_attrs; + int err = 0; + + if (attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) + return -EOPNOTSUPP; + + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can + * be modified. + */ + if (!(maction->esp_aes_gcm.ib_flags & + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) + return -EINVAL; + + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, + sizeof(accel_attrs)); + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + else + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, + &accel_attrs); + if (err) + return err; + + maction->esp_aes_gcm.ib_flags &= + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + maction->esp_aes_gcm.ib_flags |= + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + + return 0; +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + + switch (action->type) { + case IB_FLOW_ACTION_ESP: + /* + * We only support aes_gcm by now, so we implicitly know this is + * the underline crypto. + */ + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); + break; + default: + WARN_ON(true); + break; + } + + kfree(maction); + return 0; +} + static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4553,6 +4960,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +#define NUM_TREES 2 +static int populate_specs_root(struct mlx5_ib_dev *dev) +{ + const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { + uverbs_default_get_objects()}; + size_t num_trees = 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = &mlx5_ib_flow_action; + + if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = &mlx5_ib_dm; + + dev->ib_dev.specs_root = + uverbs_alloc_spec_tree(num_trees, default_root); + + return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); +} + +static void depopulate_specs_root(struct mlx5_ib_dev *dev) +{ + uverbs_free_spec_tree(dev->ib_dev.specs_root); +} + void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -4616,6 +5064,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + spin_lock_init(&dev->memic.memic_lock); + dev->memic.dev = mdev; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING err = init_srcu_struct(&dev->mr_srcu); if (err) @@ -4778,11 +5229,21 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (MLX5_CAP_DEV_MEM(mdev, memic)) { + dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; + dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; + dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; + } + dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; + dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; + dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; err = init_node_data(dev); if (err) @@ -4997,11 +5458,21 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } +static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) +{ + return populate_specs_root(dev); +} + int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { return ib_register_device(&dev->ib_dev, NULL); } +static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) +{ + depopulate_specs_root(dev); +} + void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); @@ -5136,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SPECS, + mlx5_ib_stage_populate_specs, + mlx5_ib_stage_depopulate_specs), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -5181,6 +5655,9 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SPECS, + mlx5_ib_stage_populate_specs, + mlx5_ib_stage_depopulate_specs), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -5301,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = { .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; +unsigned long mlx5_ib_get_xlt_emergency_page(void) +{ + mutex_lock(&xlt_emergency_page_mutex); + return xlt_emergency_page; +} + +void mlx5_ib_put_xlt_emergency_page(void) +{ + mutex_unlock(&xlt_emergency_page_mutex); +} + static int __init mlx5_ib_init(void) { int err; + xlt_emergency_page = __get_free_page(GFP_KERNEL); + if (!xlt_emergency_page) + return -ENOMEM; + + mutex_init(&xlt_emergency_page_mutex); + mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); - if (!mlx5_ib_event_wq) + if (!mlx5_ib_event_wq) { + free_page(xlt_emergency_page); return -ENOMEM; + } mlx5_ib_odp_init(); @@ -5320,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); destroy_workqueue(mlx5_ib_event_wq); + mutex_destroy(&xlt_emergency_page_mutex); + free_page(xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c33bf152..49a1aa0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -45,6 +45,7 @@ #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> #include <rdma/mlx5-abi.h> +#include <rdma/uverbs_ioctl.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -108,6 +109,16 @@ enum { MLX5_IB_INVALID_BFREG = BIT(31), }; +enum { + MLX5_MAX_MEMIC_PAGES = 0x100, + MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f, +}; + +enum { + MLX5_MEMIC_BASE_ALIGN = 6, + MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; @@ -130,10 +141,8 @@ struct mlx5_ib_ucontext { /* protect vma_private_list add/del */ struct mutex vma_private_list_mutex; - unsigned long upd_xlt_page; - /* protect ODP/KSM */ - struct mutex upd_xlt_page_mutex; u64 lib_caps; + DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -155,6 +164,7 @@ struct mlx5_ib_pd { #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) #define MLX5_IB_NUM_SNIFFER_FTS 2 +#define MLX5_IB_NUM_EGRESS_FTS 1 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@ -170,6 +180,7 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; + struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -406,7 +417,7 @@ struct mlx5_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; - u32 rate_limit; + struct mlx5_rate_limit rl; u32 underlay_qpn; bool tunnel_offload_en; /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ @@ -522,8 +533,19 @@ enum mlx5_ib_mtt_access_flags { MLX5_IB_MTT_WRITE = (1 << 1), }; +struct mlx5_ib_dm { + struct ib_dm ibdm; + phys_addr_t dev_addr; +}; + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) +#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ACCESS_REMOTE_ATOMIC |\ + IB_ZERO_BASED) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -743,6 +765,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, + MLX5_IB_STAGE_SPECS, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, @@ -774,6 +797,22 @@ struct mlx5_ib_multiport_info { bool unaffiliate; }; +struct mlx5_ib_flow_action { + struct ib_flow_action ib_action; + union { + struct { + u64 ib_flags; + struct mlx5_accel_esp_xfrm *ctx; + } esp_aes_gcm; + }; +}; + +struct mlx5_memic { + struct mlx5_core_dev *dev; + spinlock_t memic_lock; + DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -820,6 +859,7 @@ struct mlx5_ib_dev { u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; + struct mlx5_memic memic; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -887,6 +927,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) return container_of(msrq, struct mlx5_ib_srq, msrq); } +static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm) +{ + return container_of(ibdm, struct mlx5_ib_dm, ibdm); +} + static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx5_ib_mr, ibmr); @@ -897,6 +942,12 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx5_ib_mw, ibmw); } +static inline struct mlx5_ib_flow_action * +to_mflow_act(struct ib_flow_action *ibact) +{ + return container_of(ibact, struct mlx5_ib_flow_action, ib_action); +} + int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); @@ -1025,7 +1076,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); - +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); @@ -1221,4 +1279,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } +unsigned long mlx5_ib_get_xlt_emergency_page(void); +void mlx5_ib_put_xlt_emergency_page(void); + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 654bc31..1520a2f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,10 +47,25 @@ enum { #define MLX5_UMR_ALIGN 2048 -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); +} + +static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); +} + +static bool use_umr(struct mlx5_ib_dev *dev, int order) +{ + return order <= mr_cache_max_order(dev) && + umr_can_modify_entity_size(dev); +} static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { @@ -189,7 +204,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode, ent->access_mode); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (ent->access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); @@ -220,26 +237,32 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - int err; + LIST_HEAD(del_list); int i; for (i = 0; i < num; i++) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - return; + break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); + list_move(&mr->list, &del_list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "failed destroy mkey\n"); - else - kfree(mr); + mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + synchronize_srcu(&dev->mr_srcu); +#endif + + list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { + list_del(&mr->list); + kfree(mr); } } @@ -562,26 +585,32 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - int err; + LIST_HEAD(del_list); cancel_delayed_work(&ent->dwork); while (1) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - return; + break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); + list_move(&mr->list, &del_list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "failed destroy mkey\n"); - else - kfree(mr); + mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + synchronize_srcu(&dev->mr_srcu); +#endif + + list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { + list_del(&mr->list); + kfree(mr); } } @@ -780,7 +809,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -947,7 +976,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct ib_umem *umem = mr->umem; + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + if (!umr_can_use_indirect_mkey(dev)) + return -EPERM; mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); return npages; } @@ -977,7 +1009,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct device *ddev = dev->ib_dev.dev.parent; - struct mlx5_ib_ucontext *uctx = NULL; int size; void *xlt; dma_addr_t dma; @@ -993,6 +1024,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_t pages_to_map = 0; size_t pages_iter = 0; gfp_t gfp; + bool use_emergency_page = false; + + if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && + !umr_can_use_indirect_mkey(dev)) + return -EPERM; /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly @@ -1019,12 +1055,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, } if (!xlt) { - uctx = to_mucontext(mr->ibmr.pd->uobject->context); mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); + xlt = (void *)mlx5_ib_get_xlt_emergency_page(); size = PAGE_SIZE; - xlt = (void *)uctx->upd_xlt_page; - mutex_lock(&uctx->upd_xlt_page_mutex); memset(xlt, 0, size); + use_emergency_page = true; } pages_iter = size / desc_size; dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); @@ -1088,8 +1123,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); free_xlt: - if (uctx) - mutex_unlock(&uctx->upd_xlt_page_mutex); + if (use_emergency_page) + mlx5_ib_put_xlt_emergency_page(); else free_pages((unsigned long)xlt, get_order(size)); @@ -1141,7 +1176,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, !populate); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -1197,22 +1232,96 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, + u64 length, int acc) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, + memic_addr - pci_resource_start(dev->mdev->pdev, 0)); + + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + if (err) + goto err_in; + + kfree(in); + + mr->umem = NULL; + set_mr_fileds(dev, mr, 0, length, acc); + + return &mr->ibmr; + +err_in: + kfree(in); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dm *mdm = to_mdm(dm); + u64 memic_addr; + + if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); + + memic_addr = mdm->dev_addr + attr->offset; + + return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, + attr->access_flags); +} + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; + bool populate_mtts = false; struct ib_umem *umem; int page_shift; int npages; int ncont; int order; int err; - bool use_umr = true; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); @@ -1224,6 +1333,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); return &mr->ibmr; } #endif @@ -1234,26 +1345,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (order <= mr_cache_max_order(dev)) { + if (use_umr(dev, order)) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } + populate_mtts = false; } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } - use_umr = false; + populate_mtts = true; } if (!mr) { + if (!umr_can_modify_entity_size(dev)) + populate_mtts = true; mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !use_umr); + page_shift, access_flags, populate_mtts); mutex_unlock(&dev->slow_path_mutex); } @@ -1271,7 +1385,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); #endif - if (use_umr) { + if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; if (access_flags & IB_ACCESS_ON_DEMAND) @@ -1286,7 +1400,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING mr->live = 1; +#endif return &mr->ibmr; error: ib_umem_release(umem); @@ -1365,36 +1481,34 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, ib_umem_release(mr->umem); err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); - if (err < 0) { - clean_mr(dev, mr); - return err; - } + if (err) + goto err; } if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) { + if (mr->allocated_from_cache) err = unreg_umr(dev, mr); - if (err) - mlx5_ib_warn(dev, "Failed to unregister MR\n"); - } else { + else err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "Failed to destroy MKey\n"); - } if (err) - return err; + goto err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, page_shift, access_flags, true); - if (IS_ERR(mr)) - return PTR_ERR(mr); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + mr = to_mmr(ib_mr); + goto err; + } mr->allocated_from_cache = 0; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING mr->live = 1; +#endif } else { /* * Send a UMR WQE @@ -1417,13 +1531,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, err = rereg_umr(pd, mr, access_flags, flags); } - if (err) { - mlx5_ib_warn(dev, "Failed to rereg UMR\n"); - ib_umem_release(mr->umem); - mr->umem = NULL; - clean_mr(dev, mr); - return err; - } + if (err) + goto err; } set_mr_fileds(dev, mr, npages, len, access_flags); @@ -1432,6 +1541,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, update_odp_mr(mr); #endif return 0; + +err: + if (mr->umem) { + ib_umem_release(mr->umem); + mr->umem = NULL; + } + clean_mr(dev, mr); + return err; } static int @@ -1480,10 +1597,9 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) } } -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int allocated_from_cache = mr->allocated_from_cache; - int err; if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, @@ -1500,21 +1616,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!allocated_from_cache) { - u32 key = mr->mmkey.key; - - err = destroy_mkey(dev, mr); - if (err) { - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - key, err); - return err; - } - } - - return 0; + if (!allocated_from_cache) + destroy_mkey(dev, mr); } -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int npages = mr->npages; struct ib_umem *umem = mr->umem; @@ -1555,16 +1661,12 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) kfree(mr); else mlx5_mr_cache_free(dev, mr); - - return 0; } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) { - struct mlx5_ib_dev *dev = to_mdev(ibmr->device); - struct mlx5_ib_mr *mr = to_mmr(ibmr); - - return dereg_mr(dev, mr); + dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + return 0; } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, @@ -1645,7 +1747,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - MLX5_SET(mkc, mkc, access_mode, mr->access_mode); + MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, umr_en, 1); mr->ibmr.device = pd->device; @@ -1726,7 +1829,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d0b0b8..7ed4b70 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ - u32 rate_limit; + + struct mlx5_rate_limit rl; + u8 rq_q_ctr_id; }; @@ -878,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_free; } - err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); goto err_unmap; @@ -1411,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, void *tirc; void *hfso; u32 selected_fields = 0; + u32 outer_l4; size_t min_resp_len; u32 tdn = mucontext->tdn; struct mlx5_ib_create_qp_rss ucmd = {}; @@ -1466,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - err = ib_copy_to_udata(udata, &resp, min_resp_len); + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); return -EINVAL; @@ -1541,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); - if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + + /* Check that only one l4 protocol is set */ + if (outer_l4 & (outer_l4 - 1)) { err = -EINVAL; goto err; } @@ -1575,6 +1582,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) + selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: @@ -2774,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, const struct mlx5_modify_raw_qp_param *raw_qp_param) { struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; - u32 old_rate = ibqp->rate_limit; - u32 new_rate = old_rate; + struct mlx5_rate_limit old_rl = ibqp->rl; + struct mlx5_rate_limit new_rl = old_rl; + bool new_rate_added = false; u16 rl_index = 0; void *in; void *sqc; @@ -2797,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", __func__); else - new_rate = raw_qp_param->rate_limit; + new_rl = raw_qp_param->rl; } - if (old_rate != new_rate) { - if (new_rate) { - err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (!mlx5_rl_are_equal(&old_rl, &new_rl)) { + if (new_rl.rate) { + err = mlx5_rl_add_rate(dev, &rl_index, &new_rl); if (err) { - pr_err("Failed configuring rate %u: %d\n", - new_rate, err); + pr_err("Failed configuring rate limit(err %d): \ + rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, new_rl.rate, new_rl.max_burst_sz, + new_rl.typical_pkt_sz); + goto out; } + new_rate_added = true; } MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + /* index 0 means no limit */ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); } err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); if (err) { /* Remove new rate from table if failed */ - if (new_rate && - old_rate != new_rate) - mlx5_rl_remove_rate(dev, new_rate); + if (new_rate_added) + mlx5_rl_remove_rate(dev, &new_rl); goto out; } /* Only remove the old rate after new rate was set */ - if ((old_rate && - (old_rate != new_rate)) || + if ((old_rl.rate && + !mlx5_rl_are_equal(&old_rl, &new_rl)) || (new_state != MLX5_SQC_STATE_RDY)) - mlx5_rl_remove_rate(dev, old_rate); + mlx5_rl_remove_rate(dev, &old_rl); - ibqp->rate_limit = new_rate; + ibqp->rl = new_rl; sq->state = new_state; out: @@ -2906,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, enum ib_qp_state new_state, + const struct mlx5_ib_modify_qp *ucmd) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { @@ -2959,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, u16 op; u8 tx_affinity = 0; + mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? + qp->qp_sub_type : ibqp->qp_type); + if (mlx5_st < 0) + return -EINVAL; + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; - err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (err < 0) { - mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); - goto out; - } - - context->flags = cpu_to_be32(err << 16); + context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); @@ -3124,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); - mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (mlx5_st < 0) - goto out; if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || !optab[mlx5_cur][mlx5_new]) { @@ -3150,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_RATE_LIMIT) { - raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.rl.rate = attr->rate_limit; + + if (ucmd->burst_info.max_burst_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) { + raw_qp_param.rl.max_burst_sz = + ucmd->burst_info.max_burst_sz; + } else { + err = -EINVAL; + goto out; + } + } + + if (ucmd->burst_info.typical_pkt_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) { + raw_qp_param.rl.typical_pkt_sz = + ucmd->burst_info.typical_pkt_sz; + } else { + err = -EINVAL; + goto out; + } + } + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; } @@ -3178,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { + if (new_state == IB_QPS_RESET && + !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) @@ -3337,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; + size_t required_cmd_sz; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; @@ -3346,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->rwq_ind_tbl) return -ENOSYS; + if (udata && udata->inlen) { + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd)))) + return -EFAULT; + + if (ucmd.comp_mask || + memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) || + memchr_inv(&ucmd.burst_info.reserved, 0, + sizeof(ucmd.burst_info.reserved))) + return -EOPNOTSUPP; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3426,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, + new_state, &ucmd); out: mutex_unlock(&qp->mutex); @@ -3646,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void) return cpu_to_be64(result); } -static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || + (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + return -EPERM; + return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, + struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3679,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (!wr->num_sge) umr->flags |= MLX5_UMR_INLINE; + + return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); } static u8 get_umr_flags(int acc) @@ -4501,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + if (unlikely(err)) + goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 6fee779..541f237 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1295,6 +1295,7 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); + dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA; ret = ib_register_device(&dev->ib_dev, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 162475a..1040a6e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3854,6 +3854,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) struct nes_adapter *nesadapter = nesdev->nesadapter; int i, ret; + nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES; ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); if (ret) { return ret; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index dec6509..3897b645 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -193,11 +193,9 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, __func__, status); goto av_conf_err; } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); - } + if (is_vlan_dev(sgid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); + dev_put(sgid_attr.ndev); /* Get network header type for this GID */ ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 9904918..2c260e1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2014,7 +2014,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, u32 pbl_cnt, u32 pbl_offset, u32 last) { - int status = -ENOMEM; + int status; int i; struct ocrdma_reg_nsmr_cont *cmd; @@ -2033,9 +2033,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev, upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa); } status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; -mbx_err: + kfree(cmd); return status; } @@ -2496,7 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid, zgid; + union ib_gid sgid; struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; @@ -2529,16 +2527,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, &sgid_attr); - if (!status && sgid_attr.ndev) { + if (!status) { vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); dev_put(sgid_attr.ndev); } - memset(&zgid, 0, sizeof(zgid)); - if (!memcmp(&sgid, &zgid, sizeof(zgid))) - return -EINVAL; - qp->sgid_idx = grh->sgid_index; memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); @@ -3133,12 +3127,12 @@ done: static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq, int num) { - int i, status = -ENOMEM; + int i, status; struct ocrdma_modify_eqd_req *cmd; cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_EQ_DELAY, sizeof(*cmd)); if (!cmd) - return status; + return -ENOMEM; ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_MODIFY_EQ_DELAY, OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); @@ -3151,9 +3145,7 @@ static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq, (eq[i].aic_obj.prev_eqd * 65)/100; } status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; -mbx_err: + kfree(cmd); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index fbfbd9e..eb8b6a9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -158,10 +158,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.query_device = ocrdma_query_device; dev->ibdev.query_port = ocrdma_query_port; dev->ibdev.modify_port = ocrdma_modify_port; - dev->ibdev.query_gid = ocrdma_query_gid; dev->ibdev.get_netdev = ocrdma_get_netdev; - dev->ibdev.add_gid = ocrdma_add_gid; - dev->ibdev.del_gid = ocrdma_del_gid; dev->ibdev.get_link_layer = ocrdma_link_layer; dev->ibdev.alloc_pd = ocrdma_alloc_pd; dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; @@ -217,6 +214,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.destroy_srq = ocrdma_destroy_srq; dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; } + dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; return ib_register_device(&dev->ibdev, NULL); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8009bda..784ed6b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -62,40 +62,6 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return 0; } -int ocrdma_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *sgid) -{ - int ret; - - memset(sgid, 0, sizeof(*sgid)); - if (index >= OCRDMA_MAX_SGID) - return -EINVAL; - - ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); - if (ret == -EAGAIN) { - memcpy(sgid, &zgid, sizeof(*sgid)); - return 0; - } - - return ret; -} - -int ocrdma_add_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) { - return 0; -} - -int ocrdma_del_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - void **context) { - return 0; -} - int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *uhw) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 704ef1e..9a99717 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -61,19 +61,7 @@ enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); -int ocrdma_query_gid(struct ib_device *, u8 port, - int index, union ib_gid *gid); struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num); -int ocrdma_add_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context); -int ocrdma_del_gid(struct ib_device *device, - u8 port_num, - unsigned int index, - void **context); int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *, diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f9a645c..f4cb60b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -162,10 +162,6 @@ static int qedr_iw_register_device(struct qedr_dev *dev) static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; - dev->ibdev.query_gid = qedr_query_gid; - - dev->ibdev.add_gid = qedr_add_gid; - dev->ibdev.del_gid = qedr_del_gid; dev->ibdev.get_port_immutable = qedr_roce_port_immutable; } @@ -257,6 +253,7 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.get_link_layer = qedr_link_layer; dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; + dev->ibdev.driver_id = RDMA_DRIVER_QEDR; return ib_register_device(&dev->ibdev, NULL); } @@ -707,7 +704,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) "Error: CQ event with NULL pointer ibcq. Handle=%llx\n", roce_handle64); } - DP_ERR(dev, "CQ event %d on hanlde %p\n", e_code, cq); + DP_ERR(dev, "CQ event %d on handle %p\n", e_code, cq); break; case EVENT_TYPE_QP: qp = (struct qedr_qp *)(uintptr_t)roce_handle64; @@ -723,7 +720,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) "Error: QP event with NULL pointer ibqp. Handle=%llx\n", roce_handle64); } - DP_ERR(dev, "QP event %d on hanlde %p\n", e_code, qp); + DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp); break; default: break; diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 2bdbb12..0f14e68 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -412,19 +412,11 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, return rc; } - if (sgid_attr.ndev) { - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - if (vlan_id < VLAN_CFI_MASK) - has_vlan = true; + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + if (vlan_id < VLAN_CFI_MASK) + has_vlan = true; - dev_put(sgid_attr.ndev); - } - - if (!memcmp(&sgid, &zgid, sizeof(sgid))) { - DP_ERR(dev, "gsi post send: GID not found GID index %d\n", - grh->sgid_index); - return -ENOENT; - } + dev_put(sgid_attr.ndev); has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f9c3cc7..7d3763b 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -84,58 +84,6 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, return 0; } -int qedr_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *sgid) -{ - struct qedr_dev *dev = get_qedr_dev(ibdev); - int rc = 0; - - if (!rdma_cap_roce_gid_table(ibdev, port)) - return -ENODEV; - - rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL); - if (rc == -EAGAIN) { - memcpy(sgid, &zgid, sizeof(*sgid)); - return 0; - } - - DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index, - sgid->global.interface_id, sgid->global.subnet_prefix); - - return rc; -} - -int qedr_add_gid(struct ib_device *device, u8 port_num, - unsigned int index, const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) -{ - if (!rdma_cap_roce_gid_table(device, port_num)) - return -EINVAL; - - if (port_num > QEDR_MAX_PORT) - return -EINVAL; - - if (!context) - return -EINVAL; - - return 0; -} - -int qedr_del_gid(struct ib_device *device, u8 port_num, - unsigned int index, void **context) -{ - if (!rdma_cap_roce_gid_table(device, port_num)) - return -EINVAL; - - if (port_num > QEDR_MAX_PORT) - return -EINVAL; - - if (!context) - return -EINVAL; - - return 0; -} - int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata) { @@ -525,9 +473,9 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, pd->pd_id = pd_id; if (udata && context) { - struct qedr_alloc_pd_uresp uresp; - - uresp.pd_id = pd_id; + struct qedr_alloc_pd_uresp uresp = { + .pd_id = pd_id, + }; rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) { @@ -856,8 +804,6 @@ static inline void qedr_init_cq_params(struct qedr_cq *cq, static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags) { - /* Flush data before signalling doorbell */ - wmb(); cq->db.data.agg_flags = flags; cq->db.data.value = cpu_to_le32(cons); writeq(cq->db.raw, cq->db_addr); @@ -1145,46 +1091,41 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, if (rc) return rc; - if (!memcmp(&gid, &zgid, sizeof(gid))) - return -ENOENT; - - if (gid_attr.ndev) { - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); - - dev_put(gid_attr.ndev); - nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); - switch (nw_type) { - case RDMA_NETWORK_IPV6: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], - sizeof(qp_params->sgid)); - memcpy(&qp_params->dgid.bytes[0], - &grh->dgid, - sizeof(qp_params->dgid)); - qp_params->roce_mode = ROCE_V2_IPV6; - SET_FIELD(qp_params->modify_flags, - QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); - break; - case RDMA_NETWORK_IB: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], - sizeof(qp_params->sgid)); - memcpy(&qp_params->dgid.bytes[0], - &grh->dgid, - sizeof(qp_params->dgid)); - qp_params->roce_mode = ROCE_V1; - break; - case RDMA_NETWORK_IPV4: - memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); - memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); - ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); - qp_params->sgid.ipv4_addr = ipv4_addr; - ipv4_addr = - qedr_get_ipv4_from_gid(grh->dgid.raw); - qp_params->dgid.ipv4_addr = ipv4_addr; - SET_FIELD(qp_params->modify_flags, - QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); - qp_params->roce_mode = ROCE_V2_IPV4; - break; - } + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + + dev_put(gid_attr.ndev); + nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + switch (nw_type) { + case RDMA_NETWORK_IPV6: + memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + sizeof(qp_params->sgid)); + memcpy(&qp_params->dgid.bytes[0], + &grh->dgid, + sizeof(qp_params->dgid)); + qp_params->roce_mode = ROCE_V2_IPV6; + SET_FIELD(qp_params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); + break; + case RDMA_NETWORK_IB: + memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + sizeof(qp_params->sgid)); + memcpy(&qp_params->dgid.bytes[0], + &grh->dgid, + sizeof(qp_params->dgid)); + qp_params->roce_mode = ROCE_V1; + break; + case RDMA_NETWORK_IPV4: + memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); + memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); + ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + qp_params->sgid.ipv4_addr = ipv4_addr; + ipv4_addr = + qedr_get_ipv4_from_gid(grh->dgid.raw); + qp_params->dgid.ipv4_addr = ipv4_addr; + SET_FIELD(qp_params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); + qp_params->roce_mode = ROCE_V2_IPV4; + break; } for (i = 0; i < 4; i++) { @@ -1870,7 +1811,6 @@ static int qedr_update_qp_state(struct qedr_dev *dev, */ if (rdma_protocol_roce(&dev->ibdev, 1)) { - wmb(); writel(qp->rq.db_data.raw, qp->rq.db); /* Make sure write takes effect */ mmiowb(); @@ -3274,8 +3214,15 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * vane. However this is not harmful (as long as the producer value is * unchanged). For performance reasons we avoid checking for this * redundant doorbell. + * + * qp->wqe_wr_id is accessed during qedr_poll_cq, as + * soon as we give the doorbell, we could get a completion + * for this wr, therefore we need to make sure that the + * memory is updated before giving the doorbell. + * During qedr_poll_cq, rmb is called before accessing the + * cqe. This covers for the smp_rmb as well. */ - wmb(); + smp_wmb(); writel(qp->sq.db_data.raw, qp->sq.db); /* Make sure write sticks */ @@ -3362,8 +3309,14 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, qedr_inc_sw_prod(&qp->rq); - /* Flush all the writes before signalling doorbell */ - wmb(); + /* qp->rqe_wr_id is accessed during qedr_poll_cq, as + * soon as we give the doorbell, we could get a completion + * for this wr, therefore we need to make sure that the + * memory is update before giving the doorbell. + * During qedr_poll_cq, rmb is called before accessing the + * cqe. This covers for the smp_rmb as well. + */ + smp_wmb(); qp->rq.db_data.data.value++; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 1a94425..2c57e4c 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -38,7 +38,6 @@ int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); int qedr_modify_port(struct ib_device *, u8 port, int mask, struct ib_port_modify *props); -int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); @@ -48,11 +47,6 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *); int qedr_dealloc_ucontext(struct ib_ucontext *); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -int qedr_del_gid(struct ib_device *device, u8 port_num, - unsigned int index, void **context); -int qedr_add_gid(struct ib_device *device, u8 port_num, - unsigned int index, const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context); struct ib_pd *qedr_alloc_pd(struct ib_device *, struct ib_ucontext *, struct ib_udata *); int qedr_dealloc_pd(struct ib_pd *pd); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 0235f76b..4607245 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -472,9 +472,6 @@ enum qib_sdma_events { qib_sdma_event_e90_timer_tick, }; -extern char *qib_sdma_state_names[]; -extern char *qib_sdma_event_names[]; - struct sdma_set_state_action { unsigned op_enable:1; unsigned op_intenable:1; diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index a9377ee..11da796 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -614,7 +614,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, } if (copy_from_user(tmpbuf, - (const void __user *) (unsigned long) dp.data, + u64_to_user_ptr(dp.data), dp.len)) { ret = -EFAULT; goto bail; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 52c29db3..6a8800b 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -443,7 +443,7 @@ cleanup: ret = -EFAULT; goto cleanup; } - if (copy_to_user((void __user *) (unsigned long) ti->tidmap, + if (copy_to_user(u64_to_user_ptr(ti->tidmap), tidmap, sizeof(tidmap))) { ret = -EFAULT; goto cleanup; @@ -490,7 +490,7 @@ static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt, goto done; } - if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, + if (copy_from_user(tidmap, u64_to_user_ptr(ti->tidmap), sizeof(tidmap))) { ret = -EFAULT; goto done; @@ -2168,8 +2168,8 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ret = qib_do_user_init(fp, &cmd.cmd.user_info); if (ret) goto bail; - ret = qib_get_base_info(fp, (void __user *) (unsigned long) - cmd.cmd.user_info.spu_base_info, + ret = qib_get_base_info(fp, u64_to_user_ptr( + cmd.cmd.user_info.spu_base_info), cmd.cmd.user_info.spu_base_info_size); break; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 6265dac..8414ae4 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -463,6 +463,16 @@ static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { [IB_RATE_40_GBPS] = 1 }; +static const char * const qib_sdma_state_names[] = { + [qib_sdma_state_s00_hw_down] = "s00_HwDown", + [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait", + [qib_sdma_state_s20_idle] = "s20_Idle", + [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait", + [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait", + [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait", + [qib_sdma_state_s99_running] = "s99_Running", +}; + #define IBA7322_LINKSPEED_SHIFT SYM_LSB(IBCStatusA_0, LinkSpeedActive) #define IBA7322_LINKWIDTH_SHIFT SYM_LSB(IBCStatusA_0, LinkWidthActive) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 3990f38..6c68f8a 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -678,11 +678,9 @@ int qib_init(struct qib_devdata *dd, int reinit) lastfail = qib_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = qib_setup_eagerbufs(rcd); - if (lastfail) { + if (lastfail) qib_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); - continue; - } } for (pidx = 0; pidx < dd->num_pports; ++pidx) { diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index c3690bd..d0723d4 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -54,30 +54,6 @@ MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries"); #define SDMA_DESC_COUNT_LSB 16 #define SDMA_DESC_GEN_LSB 30 -char *qib_sdma_state_names[] = { - [qib_sdma_state_s00_hw_down] = "s00_HwDown", - [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait", - [qib_sdma_state_s20_idle] = "s20_Idle", - [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait", - [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait", - [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait", - [qib_sdma_state_s99_running] = "s99_Running", -}; - -char *qib_sdma_event_names[] = { - [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown", - [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart", - [qib_sdma_event_e20_hw_started] = "e20_HwStarted", - [qib_sdma_event_e30_go_running] = "e30_GoRunning", - [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned", - [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned", - [qib_sdma_event_e60_hw_halted] = "e60_HwHalted", - [qib_sdma_event_e70_go_idle] = "e70_GoIdle", - [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted", - [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted", - [qib_sdma_event_e90_timer_tick] = "e90_TimerTick", -}; - /* declare all statics here rather than keep sorting */ static int alloc_sdma(struct qib_pportdata *); static void sdma_complete(struct kref *); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fabee76..3977abb 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1646,7 +1646,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->rcd[ctxt]->pkeys); } - ret = rvt_register_device(&dd->verbs_dev.rdi); + ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index ca56380..f0538a4 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -415,6 +415,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; + us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; if (ib_register_device(&us_ibdev->ib_dev, NULL)) goto err_fwd_dealloc; diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 67de943..e0a9553 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -200,10 +200,8 @@ int usnic_transport_sock_get_addr(struct socket *sock, int *proto, int usnic_transport_init(void) { roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); - if (!roce_bitmap) { - usnic_err("Failed to allocate bit map"); + if (!roce_bitmap) return -ENOMEM; - } /* Do not ever allocate bit 0, hence set it here */ bitmap_set(roce_bitmap, 0, 1); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index d650a9f..0be33a8 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -62,17 +62,10 @@ static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); static struct workqueue_struct *event_wq; -static int pvrdma_add_gid(struct ib_device *ibdev, - u8 port_num, - unsigned int index, - const union ib_gid *gid, +static int pvrdma_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context); -static int pvrdma_del_gid(struct ib_device *ibdev, - u8 port_num, - unsigned int index, - void **context); - +static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) @@ -276,6 +269,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) if (!dev->srq_tbl) goto err_qp_free; } + dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; spin_lock_init(&dev->srq_tbl_lock); ret = ib_register_device(&dev->ib_dev, NULL); @@ -656,18 +650,15 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, return 0; } -static int pvrdma_add_gid(struct ib_device *ibdev, - u8 port_num, - unsigned int index, - const union ib_gid *gid, +static int pvrdma_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { - struct pvrdma_dev *dev = to_vdev(ibdev); + struct pvrdma_dev *dev = to_vdev(attr->device); return pvrdma_add_gid_at_index(dev, gid, ib_gid_type_to_pvrdma(attr->gid_type), - index); + attr->index); } static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) @@ -697,17 +688,14 @@ static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) return 0; } -static int pvrdma_del_gid(struct ib_device *ibdev, - u8 port_num, - unsigned int index, - void **context) +static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) { - struct pvrdma_dev *dev = to_vdev(ibdev); + struct pvrdma_dev *dev = to_vdev(attr->device); dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", - index, dev->netdev->name); + attr->index, dev->netdev->name); - return pvrdma_del_gid_at_index(dev, index); + return pvrdma_del_gid_at_index(dev, attr->index); } static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 7bf518b..eb5b106 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -489,7 +489,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, union pvrdma_cmd_req req; union pvrdma_cmd_resp rsp; struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp; - int cur_state, next_state; + enum ib_qp_state cur_state, next_state; int ret; /* Sanity checking. Should need lock here */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index a4553b2..434199d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,7 +91,7 @@ module_exit(rvt_cleanup); */ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { - struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); + struct rvt_dev_info *rdi; rdi = (struct rvt_dev_info *)ib_alloc_device(size); if (!rdi) @@ -730,7 +730,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * * Return: 0 on success otherwise an errno. */ -int rvt_register_device(struct rvt_dev_info *rdi) +int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) { int ret = 0, i; @@ -831,6 +831,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) rdi->ibdev.node_type = RDMA_NODE_IB_CA; rdi->ibdev.num_comp_vectors = 1; + rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 8823b2e..0675ea6 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -59,7 +59,6 @@ #include "mmap.h" #include "cq.h" #include "mad.h" -#include "mmap.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index b7debb6f..e493fdb 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -78,7 +78,7 @@ void rxe_release(struct kref *kref) } /* initialize rxe device parameters */ -static int rxe_init_device_param(struct rxe_dev *rxe) +static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; @@ -122,8 +122,6 @@ static int rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; rxe->max_ucontext = RXE_MAX_UCONTEXT; - - return 0; } /* initialize port attributes */ diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 7d23261..561ad30 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -59,7 +59,11 @@ #include "rxe_verbs.h" #include "rxe_loc.h" -#define RXE_UVERBS_ABI_VERSION (1) +/* + * Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit + * machines Version 2 has a different struct layout. + */ +#define RXE_UVERBS_ABI_VERSION 2 #define IB_PHYS_STATE_LINK_UP (5) #define IB_PHYS_STATE_LINK_DOWN (3) diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 7522d1a..7f1ae364 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -74,8 +74,9 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct ib_gid_attr *sgid_attr, union ib_gid *sgid) { - rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); - rdma_gid2ip(&av->dgid_addr._sockaddr, &rdma_ah_read_grh(attr)->dgid); + rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid); + rdma_gid2ip((struct sockaddr *)&av->dgid_addr, + &rdma_ah_read_grh(attr)->dgid); av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index c4aabf7..2ee4b08 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -36,7 +36,7 @@ #include "rxe_queue.h" int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, - int cqe, int comp_vector, struct ib_udata *udata) + int cqe, int comp_vector) { int count; @@ -83,7 +83,7 @@ static void rxe_send_complete(unsigned long data) int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int comp_vector, struct ib_ucontext *context, - struct ib_udata *udata) + struct rxe_create_cq_resp __user *uresp) { int err; @@ -94,15 +94,15 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return -ENOMEM; } - err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, - cq->queue->buf_size, &cq->queue->ip); + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, + cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); if (err) { kvfree(cq->queue->buf); kfree(cq->queue); return err; } - if (udata) + if (uresp) cq->is_user = 1; cq->is_dying = false; @@ -114,14 +114,15 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return 0; } -int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata) +int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, + struct rxe_resize_cq_resp __user *uresp) { int err; err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, sizeof(struct rxe_cqe), cq->queue->ip ? cq->queue->ip->context : NULL, - udata, NULL, &cq->cq_lock); + uresp ? &uresp->mi : NULL, NULL, &cq->cq_lock); if (!err) cq->ibcq.cqe = cqe; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4ef75d5..b71023c 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -52,13 +52,14 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, - int cqe, int comp_vector, struct ib_udata *udata); + int cqe, int comp_vector); int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int comp_vector, struct ib_ucontext *context, - struct ib_udata *udata); + struct rxe_create_cq_resp __user *uresp); -int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); +int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, + struct rxe_resize_cq_resp __user *uresp); int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); @@ -143,8 +144,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); /* rxe_net.c */ int rxe_loopback(struct sk_buff *skb); -int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb); +int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, @@ -159,7 +159,8 @@ int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, - struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_qp_init_attr *init, + struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd); int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); @@ -227,11 +228,12 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, - struct ib_ucontext *context, struct ib_udata *udata); + struct ib_ucontext *context, + struct rxe_create_srq_resp __user *uresp); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct ib_udata *udata); + struct rxe_modify_srq_cmd *ucmd); void rxe_release(struct kref *kref); @@ -268,7 +270,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); err = rxe_loopback(skb); } else { - err = rxe_send(rxe, pkt, skb); + err = rxe_send(pkt, skb); } if (err) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 159246b..9da6e37 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -182,11 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, #endif +/* + * Derive the net_device from the av. + * For physical devices, this will just return rxe->ndev. + * But for VLAN devices, it will return the vlan dev. + * Caller should dev_put() the returned net_device. + */ +static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe, + int port_num, + struct rxe_av *av) +{ + union ib_gid gid; + struct ib_gid_attr attr; + struct net_device *ndev = rxe->ndev; + + if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index, + &gid, &attr) == 0 && + attr.ndev && attr.ndev != ndev) + ndev = attr.ndev; + else + /* Only to ensure that caller may call dev_put() */ + dev_hold(ndev); + + return ndev; +} + static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_av *av) { struct dst_entry *dst = NULL; + struct net_device *ndev; + + ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av); if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); @@ -201,14 +229,14 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; - dst = rxe_find_route4(rxe->ndev, saddr, daddr); + dst = rxe_find_route4(ndev, saddr, daddr); } else if (av->network_type == RDMA_NETWORK_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; - dst = rxe_find_route6(rxe->ndev, saddr6, daddr6); + dst = rxe_find_route6(ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = @@ -217,6 +245,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, } } + dev_put(ndev); return dst; } @@ -224,9 +253,14 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; + struct net_device *rdev = ndev; struct rxe_dev *rxe = net_to_rxe(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + if (!rxe && is_vlan_dev(rdev)) { + rdev = vlan_dev_real_dev(ndev); + rxe = net_to_rxe(rdev); + } if (!rxe) goto drop; @@ -450,7 +484,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) rxe_drop_ref(qp); } -int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_av *av; int err; @@ -498,6 +532,10 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, { unsigned int hdr_len; struct sk_buff *skb; + struct net_device *ndev; + const int port_num = 1; + + ndev = rxe_netdev_from_av(rxe, port_num, av); if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -506,26 +544,30 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct ipv6hdr); - skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), + skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) + + if (unlikely(!skb)) { + dev_put(ndev); return NULL; + } skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); - skb->dev = rxe->ndev; + skb->dev = ndev; if (av->network_type == RDMA_NETWORK_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); pkt->rxe = rxe; - pkt->port_num = 1; + pkt->port_num = port_num; pkt->hdr = skb_put(skb, paylen); pkt->mask |= RXE_GRH_MASK; memset(pkt->hdr, 0, paylen); + dev_put(ndev); return skb; } diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 2fcf1ca..b9f7aa1 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -40,15 +40,6 @@ #include "rxe_queue.h" #include "rxe_task.h" -char *rxe_qp_state_name[] = { - [QP_STATE_RESET] = "RESET", - [QP_STATE_INIT] = "INIT", - [QP_STATE_READY] = "READY", - [QP_STATE_DRAIN] = "DRAIN", - [QP_STATE_DRAINED] = "DRAINED", - [QP_STATE_ERROR] = "ERROR", -}; - static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { @@ -225,7 +216,8 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, - struct ib_ucontext *context, struct ib_udata *udata) + struct ib_ucontext *context, + struct rxe_create_qp_resp __user *uresp) { int err; int wqe_size; @@ -250,9 +242,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->sq.queue) return -ENOMEM; - err = do_mmap_info(rxe, udata, true, - context, qp->sq.queue->buf, - qp->sq.queue->buf_size, &qp->sq.queue->ip); + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, context, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); if (err) { kvfree(qp->sq.queue->buf); @@ -283,7 +275,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, - struct ib_ucontext *context, struct ib_udata *udata) + struct ib_ucontext *context, + struct rxe_create_qp_resp __user *uresp) { int err; int wqe_size; @@ -303,9 +296,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->rq.queue) return -ENOMEM; - err = do_mmap_info(rxe, udata, false, context, - qp->rq.queue->buf, - qp->rq.queue->buf_size, + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, context, + qp->rq.queue->buf, qp->rq.queue->buf_size, &qp->rq.queue->ip); if (err) { kvfree(qp->rq.queue->buf); @@ -331,14 +323,15 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, /* called by the create qp verb */ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, - struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_qp_init_attr *init, + struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd) { int err; struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL; rxe_add_ref(pd); rxe_add_ref(rcq); @@ -353,11 +346,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, rxe_qp_init_misc(rxe, qp, init); - err = rxe_qp_init_req(rxe, qp, init, context, udata); + err = rxe_qp_init_req(rxe, qp, init, context, uresp); if (err) goto err1; - err = rxe_qp_init_resp(rxe, qp, init, context, udata); + err = rxe_qp_init_resp(rxe, qp, init, context, uresp); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index d14bf49..f84ab44 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -37,35 +37,21 @@ #include "rxe_queue.h" int do_mmap_info(struct rxe_dev *rxe, - struct ib_udata *udata, - bool is_req, + struct mminfo __user *outbuf, struct ib_ucontext *context, struct rxe_queue_buf *buf, size_t buf_size, struct rxe_mmap_info **ip_p) { int err; - u32 len, offset; struct rxe_mmap_info *ip = NULL; - if (udata) { - if (is_req) { - len = udata->outlen - sizeof(struct mminfo); - offset = sizeof(struct mminfo); - } else { - len = udata->outlen; - offset = 0; - } - - if (len < sizeof(ip->info)) - goto err1; - + if (outbuf) { ip = rxe_create_mmap_info(rxe, buf_size, context, buf); if (!ip) goto err1; - err = copy_to_user(udata->outbuf + offset, &ip->info, - sizeof(ip->info)); + err = copy_to_user(outbuf, &ip->info, sizeof(ip->info)); if (err) goto err2; @@ -171,7 +157,7 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, unsigned int elem_size, struct ib_ucontext *context, - struct ib_udata *udata, + struct mminfo __user *outbuf, spinlock_t *producer_lock, spinlock_t *consumer_lock) { @@ -184,7 +170,7 @@ int rxe_queue_resize(struct rxe_queue *q, if (!new_q) return -ENOMEM; - err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf, + err = do_mmap_info(new_q->rxe, outbuf, context, new_q->buf, new_q->buf_size, &new_q->ip); if (err) { vfree(new_q->buf); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 8c8641c..79ba4b32 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -77,8 +77,7 @@ struct rxe_queue { }; int do_mmap_info(struct rxe_dev *rxe, - struct ib_udata *udata, - bool is_req, + struct mminfo __user *outbuf, struct ib_ucontext *context, struct rxe_queue_buf *buf, size_t buf_size, @@ -94,7 +93,7 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, unsigned int elem_size, struct ib_ucontext *context, - struct ib_udata *udata, + struct mminfo __user *outbuf, /* Protect producers while resizing queue */ spinlock_t *producer_lock, /* Protect consumers while resizing queue */ diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 4c3f899..dd80c7d 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -276,7 +276,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); struct rxe_mc_grp *mcg; - struct sk_buff *skb_copy; struct rxe_mc_elem *mce; struct rxe_qp *qp; union ib_gid dgid; @@ -309,18 +308,14 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) continue; /* if *not* the last qp in the list - * make a copy of the skb to post to the next qp + * increase the users of the skb then post to the next qp */ - skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_ATOMIC) : NULL; + if (mce->qp_list.next != &mcg->qp_list) + refcount_inc(&skb->users); pkt->qp = qp; rxe_add_ref(qp); rxe_rcv_pkt(rxe, pkt, skb); - - skb = skb_copy; - if (!skb) - break; } spin_unlock_bh(&mcg->mcg_lock); @@ -328,8 +323,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ err1: - if (skb) - kfree_skb(skb); + kfree_skb(skb); } static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) @@ -347,7 +341,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, IB_GID_TYPE_ROCE_UDP_ENCAP, - 1, rxe->ndev, NULL); + 1, skb->dev, NULL); } /* rxe_rcv is called from the interface driver */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index d37bb9b..a65c996 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -969,7 +969,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int rc = 0; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - struct sk_buff *skb_copy; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct resp_res *res; @@ -981,14 +980,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, goto out; } - skb_copy = skb_clone(skb, GFP_ATOMIC); - if (skb_copy) - rxe_add_ref(qp); /* for the new SKB */ - else { - pr_warn("Could not clone atomic response\n"); - rc = -ENOMEM; - goto out; - } + rxe_add_ref(qp); res = &qp->resp.resources[qp->resp.res_head]; free_rd_atomic_resource(qp, res); @@ -998,19 +990,18 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0, sizeof(skb->cb) - sizeof(ack_pkt)); + refcount_inc(&skb->users); res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; res->first_psn = ack_pkt.psn; res->last_psn = ack_pkt.psn; res->cur_psn = ack_pkt.psn; - rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); + rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); if (rc) { pr_err_ratelimited("Failed sending ack\n"); rxe_drop_ref(qp); - kfree_skb(skb_copy); } - out: return rc; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index efc832a..0d6c04b 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -99,7 +99,8 @@ err1: int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, - struct ib_ucontext *context, struct ib_udata *udata) + struct ib_ucontext *context, + struct rxe_create_srq_resp __user *uresp) { int err; int srq_wqe_size; @@ -126,55 +127,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->rq.queue = q; - err = do_mmap_info(rxe, udata, false, context, q->buf, + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf, q->buf_size, &q->ip); if (err) return err; - if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) { - if (copy_to_user(udata->outbuf + sizeof(struct mminfo), - &srq->srq_num, sizeof(u32))) + if (uresp) { + if (copy_to_user(&uresp->srq_num, &srq->srq_num, + sizeof(uresp->srq_num))) return -EFAULT; } + return 0; } int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct ib_udata *udata) + struct rxe_modify_srq_cmd *ucmd) { int err; struct rxe_queue *q = srq->rq.queue; - struct mminfo mi = { .offset = 1, .size = 0}; + struct mminfo __user *mi = NULL; if (mask & IB_SRQ_MAX_WR) { - /* Check that we can write the mminfo struct to user space */ - if (udata && udata->inlen >= sizeof(__u64)) { - __u64 mi_addr; - - /* Get address of user space mminfo struct */ - err = ib_copy_from_udata(&mi_addr, udata, - sizeof(mi_addr)); - if (err) - goto err1; - - udata->outbuf = (void __user *)(unsigned long)mi_addr; - udata->outlen = sizeof(mi); - - if (!access_ok(VERIFY_WRITE, - (void __user *)udata->outbuf, - udata->outlen)) { - err = -EFAULT; - goto err1; - } - } + /* + * This is completely screwed up, the response is supposed to + * be in the outbuf not like this. + */ + mi = u64_to_user_ptr(ucmd->mmap_info_addr); err = rxe_queue_resize(q, &attr->max_wr, rcv_wqe_size(srq->rq.max_sge), srq->rq.queue->ip ? srq->rq.queue->ip->context : NULL, - udata, &srq->rq.producer_lock, + mi, &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) goto err2; @@ -188,6 +175,5 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, err2: rxe_queue_cleanup(q); srq->rq.queue = NULL; -err1: return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f4bab2c..2cb52fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -77,40 +77,6 @@ out: return rc; } -static int rxe_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid) -{ - int ret; - - if (index > RXE_PORT_GID_TBL_LEN) - return -EINVAL; - - ret = ib_get_cached_gid(device, port_num, index, gid, NULL); - if (ret == -EAGAIN) { - memcpy(gid, &zgid, sizeof(*gid)); - return 0; - } - - return ret; -} - -static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int - index, const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) -{ - if (index >= RXE_PORT_GID_TBL_LEN) - return -EINVAL; - return 0; -} - -static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int - index, void **context) -{ - if (index >= RXE_PORT_GID_TBL_LEN) - return -EINVAL; - return 0; -} - static struct net_device *rxe_get_netdev(struct ib_device *device, u8 port_num) { @@ -273,9 +239,7 @@ static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid); - - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); + dev_put(sgid_attr.ndev); return 0; } @@ -407,6 +371,13 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, struct rxe_pd *pd = to_rpd(ibpd); struct rxe_srq *srq; struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + struct rxe_create_srq_resp __user *uresp = NULL; + + if (udata) { + if (udata->outlen < sizeof(*uresp)) + return ERR_PTR(-EINVAL); + uresp = udata->outbuf; + } err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); if (err) @@ -422,7 +393,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, rxe_add_ref(pd); srq->pd = pd; - err = rxe_srq_from_init(rxe, srq, init, context, udata); + err = rxe_srq_from_init(rxe, srq, init, context, uresp); if (err) goto err2; @@ -443,12 +414,22 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, int err; struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_dev *rxe = to_rdev(ibsrq->device); + struct rxe_modify_srq_cmd ucmd = {}; + + if (udata) { + if (udata->inlen < sizeof(ucmd)) + return -EINVAL; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) + return err; + } err = rxe_srq_chk_attr(rxe, srq, attr, mask); if (err) goto err1; - err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); + err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd); if (err) goto err1; @@ -517,6 +498,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_qp *qp; + struct rxe_create_qp_resp __user *uresp = NULL; + + if (udata) { + if (udata->outlen < sizeof(*uresp)) + return ERR_PTR(-EINVAL); + uresp = udata->outbuf; + } err = rxe_qp_chk_init(rxe, init); if (err) @@ -538,7 +526,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, rxe_add_index(qp); - err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); + err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd); if (err) goto err3; @@ -711,9 +699,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); - wqe->iova = (mask & WR_ATOMIC_MASK) ? - atomic_wr(ibwr)->remote_addr : - rdma_wr(ibwr)->remote_addr; + wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : + mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; wqe->mask = mask; wqe->dma.length = length; wqe->dma.resid = length; @@ -889,11 +876,18 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev, int err; struct rxe_dev *rxe = to_rdev(dev); struct rxe_cq *cq; + struct rxe_create_cq_resp __user *uresp = NULL; + + if (udata) { + if (udata->outlen < sizeof(*uresp)) + return ERR_PTR(-EINVAL); + uresp = udata->outbuf; + } if (attr->flags) return ERR_PTR(-EINVAL); - err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); + err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) goto err1; @@ -904,7 +898,7 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev, } err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, - context, udata); + context, uresp); if (err) goto err2; @@ -931,12 +925,19 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) int err; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_dev *rxe = to_rdev(ibcq->device); + struct rxe_resize_cq_resp __user *uresp = NULL; + + if (udata) { + if (udata->outlen < sizeof(*uresp)) + return -EINVAL; + uresp = udata->outbuf; + } - err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); + err = rxe_cq_chk_attr(rxe, cq, cqe, 0); if (err) goto err1; - err = rxe_cq_resize_queue(cq, cqe, udata); + err = rxe_cq_resize_queue(cq, cqe, uresp); if (err) goto err1; @@ -1207,7 +1208,7 @@ int rxe_register_device(struct rxe_dev *rxe) rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; dma_coerce_mask_and_coherent(&dev->dev, - dma_get_required_mask(dev->dev.parent)); + dma_get_required_mask(&dev->dev)); dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) @@ -1248,10 +1249,7 @@ int rxe_register_device(struct rxe_dev *rxe) dev->query_port = rxe_query_port; dev->modify_port = rxe_modify_port; dev->get_link_layer = rxe_get_link_layer; - dev->query_gid = rxe_query_gid; dev->get_netdev = rxe_get_netdev; - dev->add_gid = rxe_add_gid; - dev->del_gid = rxe_del_gid; dev->query_pkey = rxe_query_pkey; dev->alloc_ucontext = rxe_alloc_ucontext; dev->dealloc_ucontext = rxe_dealloc_ucontext; @@ -1298,6 +1296,7 @@ int rxe_register_device(struct rxe_dev *rxe) } rxe->tfm = tfm; + dev->driver_id = RDMA_DRIVER_RXE; err = ib_register_device(dev, NULL); if (err) { pr_warn("%s failed with error %d\n", __func__, err); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 1019f5e..af1470d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -139,8 +139,6 @@ enum rxe_qp_state { QP_STATE_ERROR }; -extern char *rxe_qp_state_name[]; - struct rxe_req_info { enum rxe_qp_state state; int wqe_index; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8033a00..308e0ce 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -193,11 +193,6 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; -struct ipoib_cm_tx_buf { - struct sk_buff *skb; - u64 mapping; -}; - struct ib_cm_id; struct ipoib_cm_data { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 10384ea..f47f9ac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1085,7 +1085,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) netif_addr_unlock_bh(priv->dev); - err = ib_find_gid(priv->ca, &search_gid, priv->dev, &port, &index); + err = ib_find_gid(priv->ca, &search_gid, &port, &index); netif_addr_lock_bh(priv->dev); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b488438..c35d2cd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -327,29 +327,10 @@ static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) return 0; } -static const char *inet_ntop(const void *sa, char *dst, unsigned int size) -{ - switch (((struct sockaddr *)sa)->sa_family) { - case AF_INET: - snprintf(dst, size, "%pI4", - &((struct sockaddr_in *)sa)->sin_addr); - break; - case AF_INET6: - snprintf(dst, size, "%pI6", - &((struct sockaddr_in6 *)sa)->sin6_addr); - break; - default: - snprintf(dst, size, "???"); - break; - } - return dst; -} - static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; struct rdma_cm_id *new_cm_id; - char src_addr[64], dst_addr[64]; int ret; new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, @@ -366,13 +347,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) (struct sockaddr *)&target->rdma_cm.dst, SRP_PATH_REC_TIMEOUT_MS); if (ret) { - pr_err("No route available from %s to %s (%d)\n", - target->rdma_cm.src_specified ? - inet_ntop(&target->rdma_cm.src, src_addr, - sizeof(src_addr)) : "(any)", - inet_ntop(&target->rdma_cm.dst, dst_addr, - sizeof(dst_addr)), - ret); + pr_err("No route available from %pIS to %pIS (%d)\n", + &target->rdma_cm.src, &target->rdma_cm.dst, ret); goto out; } ret = wait_for_completion_interruptible(&ch->done); @@ -381,10 +357,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) ret = ch->status; if (ret) { - pr_err("Resolving address %s failed (%d)\n", - inet_ntop(&target->rdma_cm.dst, dst_addr, - sizeof(dst_addr)), - ret); + pr_err("Resolving address %pIS failed (%d)\n", + &target->rdma_cm.dst, ret); goto out; } @@ -457,6 +431,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_desc *d; struct ib_mr *mr; int i, ret = -EINVAL; + enum ib_mr_type mr_type; if (pool_size <= 0) goto err; @@ -470,9 +445,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); + if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, - max_page_list_len); + mr = ib_alloc_mr(pd, mr_type, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); if (ret == -ENOMEM) @@ -765,19 +744,12 @@ static void srp_path_rec_completion(int status, static int srp_ib_lookup_path(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; - int ret = -ENODEV; + int ret; ch->ib_cm.path.numb_path = 1; init_completion(&ch->done); - /* - * Avoid that the SCSI host can be removed by srp_remove_target() - * before srp_path_rec_completion() is called. - */ - if (!scsi_host_get(target->scsi_host)) - goto out; - ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, target->srp_host->srp_dev->dev, target->srp_host->port, @@ -791,27 +763,21 @@ static int srp_ib_lookup_path(struct srp_rdma_ch *ch) GFP_KERNEL, srp_path_rec_completion, ch, &ch->ib_cm.path_query); - ret = ch->ib_cm.path_query_id; - if (ret < 0) - goto put; + if (ch->ib_cm.path_query_id < 0) + return ch->ib_cm.path_query_id; ret = wait_for_completion_interruptible(&ch->done); if (ret < 0) - goto put; + return ret; - ret = ch->status; - if (ret < 0) + if (ch->status < 0) shost_printk(KERN_WARNING, target->scsi_host, PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, be16_to_cpu(target->ib_cm.pkey), be64_to_cpu(target->ib_cm.service_id)); -put: - scsi_host_put(target->scsi_host); - -out: - return ret; + return ch->status; } static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) @@ -2974,9 +2940,11 @@ static int srp_abort(struct scsi_cmnd *scmnd) ret = FAST_IO_FAIL; else ret = FAILED; - srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scmnd->scsi_done(scmnd); + if (ret == SUCCESS) { + srp_free_req(ch, req, scmnd, 0); + scmnd->result = DID_ABORT << 16; + scmnd->scsi_done(scmnd); + } return ret; } @@ -3033,8 +3001,9 @@ static int srp_slave_alloc(struct scsi_device *sdev) struct Scsi_Host *shost = sdev->host; struct srp_target_port *target = host_to_target(shost); struct srp_device *srp_dev = target->srp_host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; - if (true) + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) blk_queue_virt_boundary(sdev->request_queue, ~srp_dev->mr_page_mask); @@ -3365,9 +3334,6 @@ static bool srp_conn_unique(struct srp_host *host, if (t != target && target->id_ext == t->id_ext && target->ioc_guid == t->ioc_guid && - (!target->using_rdma_cm || - memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst, - sizeof(target->rdma_cm.dst)) == 0) && target->initiator_ext == t->initiator_ext) { ret = false; break; @@ -3445,18 +3411,37 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_ERR, NULL } }; +/** + * srp_parse_in - parse an IP address and port number combination + * + * Parse the following address formats: + * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. + * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. + */ static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, const char *addr_port_str) { - char *addr = kstrdup(addr_port_str, GFP_KERNEL); - char *port_str = addr; + char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); + char *port_str; int ret; if (!addr) return -ENOMEM; - strsep(&port_str, ":"); - ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa); + port_str = strrchr(addr, ':'); + if (!port_str) + return -EINVAL; + *port_str++ = '\0'; + ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); + if (ret && addr[0]) { + addr_end = addr + strlen(addr) - 1; + if (addr[0] == '[' && *addr_end == ']') { + *addr_end = '\0'; + ret = inet_pton_with_scope(net, AF_INET6, addr + 1, + port_str, sa); + } + } kfree(addr); + pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); return ret; } @@ -3789,14 +3774,11 @@ static ssize_t srp_create_target(struct device *dev, if (!srp_conn_unique(target->srp_host, target)) { if (target->using_rdma_cm) { - char dst_addr[64]; - shost_printk(KERN_INFO, target->scsi_host, - PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n", + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", be64_to_cpu(target->id_ext), be64_to_cpu(target->ioc_guid), - inet_ntop(&target->rdma_cm.dst, dst_addr, - sizeof(dst_addr))); + &target->rdma_cm.dst); } else { shost_printk(KERN_INFO, target->scsi_host, PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", @@ -3815,26 +3797,36 @@ static ssize_t srp_create_target(struct device *dev, } if (srp_dev->use_fast_reg || srp_dev->use_fmr) { - /* - * FR and FMR can only map one HCA page per entry. If the - * start address is not aligned on a HCA page boundary two - * entries will be used for the head and the tail although - * these two entries combined contain at most one HCA page of - * data. Hence the "+ 1" in the calculation below. - * - * The indirect data buffer descriptor is contiguous so the - * memory for that buffer will only be registered if - * register_always is true. Hence add one to mr_per_cmd if - * register_always has been set. - */ + bool gaps_reg = (ibdev->attrs.device_cap_flags & + IB_DEVICE_SG_GAPS_REG); + max_sectors_per_mr = srp_dev->max_pages_per_mr << (ilog2(srp_dev->mr_page_size) - 9); - mr_per_cmd = register_always + - (target->scsi_host->max_sectors + 1 + - max_sectors_per_mr - 1) / max_sectors_per_mr; + if (!gaps_reg) { + /* + * FR and FMR can only map one HCA page per entry. If + * the start address is not aligned on a HCA page + * boundary two entries will be used for the head and + * the tail although these two entries combined + * contain at most one HCA page of data. Hence the "+ + * 1" in the calculation below. + * + * The indirect data buffer descriptor is contiguous + * so the memory for that buffer will only be + * registered if register_always is true. Hence add + * one to mr_per_cmd if register_always has been set. + */ + mr_per_cmd = register_always + + (target->scsi_host->max_sectors + 1 + + max_sectors_per_mr - 1) / max_sectors_per_mr; + } else { + mr_per_cmd = register_always + + (target->sg_tablesize + + srp_dev->max_pages_per_mr - 1) / + srp_dev->max_pages_per_mr; + } pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", - target->scsi_host->max_sectors, - srp_dev->max_pages_per_mr, srp_dev->mr_page_size, + target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, max_sectors_per_mr, mr_per_cmd); } @@ -3871,12 +3863,10 @@ static ssize_t srp_create_target(struct device *dev, num_online_nodes()); const int ch_end = ((node_idx + 1) * target->ch_count / num_online_nodes()); - const int cv_start = (node_idx * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; - const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; + const int cv_start = node_idx * ibdev->num_comp_vectors / + num_online_nodes(); + const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / + num_online_nodes(); int cpu_idx = 0; for_each_online_cpu(cpu) { @@ -3907,8 +3897,8 @@ static ssize_t srp_create_target(struct device *dev, char dst[64]; if (target->using_rdma_cm) - inet_ntop(&target->rdma_cm.dst, dst, - sizeof(dst)); + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); else snprintf(dst, sizeof(dst), "%pI6", target->ib_cm.orig_dgid.raw); @@ -3941,14 +3931,11 @@ connected: if (target->state != SRP_TARGET_REMOVED) { if (target->using_rdma_cm) { - char dst[64]; - - inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst)); shost_printk(KERN_DEBUG, target->scsi_host, PFX - "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n", + "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", be64_to_cpu(target->id_ext), be64_to_cpu(target->ioc_guid), - target->sgid.raw, dst); + target->sgid.raw, &target->rdma_cm.dst); } else { shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0373b7c..dfec0e1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -41,6 +41,7 @@ #include <linux/string.h> #include <linux/delay.h> #include <linux/atomic.h> +#include <linux/inet.h> #include <rdma/ib_cache.h> #include <scsi/scsi_proto.h> #include <scsi/scsi_tcq.h> @@ -92,6 +93,11 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; +/* Protects both rdma_cm_port and rdma_cm_id. */ +static DEFINE_MUTEX(rdma_cm_mutex); +/* Port number RDMA/CM will bind to. */ +static u16 rdma_cm_port; +static struct rdma_cm_id *rdma_cm_id; static void srpt_release_cmd(struct se_cmd *se_cmd); static void srpt_free_ch(struct kref *kref); static int srpt_queue_status(struct se_cmd *cmd); @@ -220,7 +226,10 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) switch (event->event) { case IB_EVENT_COMM_EST: - ib_cm_notify(ch->ib_cm.cm_id, event->event); + if (ch->using_rdma_cm) + rdma_notify(ch->rdma_cm.cm_id, event->event); + else + ib_cm_notify(ch->ib_cm.cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: pr_debug("%s-%d, state %s: received Last WQE event.\n", @@ -838,16 +847,20 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, */ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) { - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr *bad_wr; + struct ib_rdma_wr wr = { + .wr = { + .next = NULL, + { .wr_cqe = &ch->zw_cqe, }, + .opcode = IB_WR_RDMA_WRITE, + .send_flags = IB_SEND_SIGNALED, + } + }; pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, ch->qp->qp_num); - memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_RDMA_WRITE; - wr.wr_cqe = &ch->zw_cqe; - wr.send_flags = IB_SEND_SIGNALED; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, &bad_wr); } static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1057,6 +1070,8 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) struct ib_qp_attr *attr; int ret; + WARN_ON_ONCE(ch->using_rdma_cm); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1096,6 +1111,8 @@ static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp) int attr_mask; int ret; + WARN_ON_ONCE(ch->using_rdma_cm); + qp_attr.qp_state = IB_QPS_RTR; ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask); if (ret) @@ -1746,18 +1763,33 @@ retry: qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; } - ch->qp = ib_create_qp(sdev->pd, qp_init); - if (IS_ERR(ch->qp)) { - ret = PTR_ERR(ch->qp); - if (ret == -ENOMEM) { - sq_size /= 2; - if (sq_size >= MIN_SRPT_SQ_SIZE) { - ib_destroy_cq(ch->cq); - goto retry; - } + if (ch->using_rdma_cm) { + ret = rdma_create_qp(ch->rdma_cm.cm_id, sdev->pd, qp_init); + ch->qp = ch->rdma_cm.cm_id->qp; + } else { + ch->qp = ib_create_qp(sdev->pd, qp_init); + if (!IS_ERR(ch->qp)) { + ret = srpt_init_ch_qp(ch, ch->qp); + if (ret) + ib_destroy_qp(ch->qp); + } else { + ret = PTR_ERR(ch->qp); + } + } + if (ret) { + bool retry = sq_size > MIN_SRPT_SQ_SIZE; + + if (retry) { + pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n", + sq_size, ret); + ib_free_cq(ch->cq); + sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); + goto retry; + } else { + pr_err("failed to create queue pair with sq_size = %d (%d)\n", + sq_size, ret); + goto err_destroy_cq; } - pr_err("failed to create_qp ret= %d\n", ret); - goto err_destroy_cq; } atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); @@ -1766,10 +1798,6 @@ retry: __func__, ch->cq->cqe, qp_init->cap.max_send_sge, qp_init->cap.max_send_wr, ch); - ret = srpt_init_ch_qp(ch, ch->qp); - if (ret) - goto err_destroy_qp; - if (!sdev->use_srq) for (i = 0; i < ch->rq_size; i++) srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); @@ -1778,9 +1806,8 @@ out: kfree(qp_init); return ret; -err_destroy_qp: - ib_destroy_qp(ch->qp); err_destroy_cq: + ch->qp = NULL; ib_free_cq(ch->cq); goto out; } @@ -1849,9 +1876,13 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) return -ENOTCONN; - ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0); - if (ret < 0) - ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0); + if (ch->using_rdma_cm) { + ret = rdma_disconnect(ch->rdma_cm.cm_id); + } else { + ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0); + if (ret < 0) + ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0); + } if (ret < 0 && srpt_close_ch(ch)) ret = 0; @@ -2002,7 +2033,10 @@ static void srpt_release_channel_work(struct work_struct *w) transport_deregister_session(se_sess); ch->sess = NULL; - ib_destroy_cm_id(ch->ib_cm.cm_id); + if (ch->using_rdma_cm) + rdma_destroy_id(ch->rdma_cm.cm_id); + else + ib_destroy_cm_id(ch->ib_cm.cm_id); srpt_destroy_ch_ib(ch); @@ -2026,26 +2060,33 @@ static void srpt_release_channel_work(struct work_struct *w) /** * srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED - * @cm_id: IB/CM connection identifier. - * @port_num: Port through which the IB/CM REQ message was received. + * @sdev: HCA through which the login request was received. + * @ib_cm_id: IB/CM connection identifier in case of IB/CM. + * @rdma_cm_id: RDMA/CM connection identifier in case of RDMA/CM. + * @port_num: Port through which the REQ message was received. * @pkey: P_Key of the incoming connection. * @req: SRP login request. - * @src_addr: GID of the port that submitted the login request. + * @src_addr: GID (IB/CM) or IP address (RDMA/CM) of the port that submitted + * the login request. * * Ownership of the cm_id is transferred to the target session if this - * functions returns zero. Otherwise the caller remains the owner of cm_id. + * function returns zero. Otherwise the caller remains the owner of cm_id. */ -static int srpt_cm_req_recv(struct ib_cm_id *cm_id, +static int srpt_cm_req_recv(struct srpt_device *const sdev, + struct ib_cm_id *ib_cm_id, + struct rdma_cm_id *rdma_cm_id, u8 port_num, __be16 pkey, const struct srp_login_req *req, const char *src_addr) { - struct srpt_device *sdev = cm_id->context; struct srpt_port *sport = &sdev->port[port_num - 1]; struct srpt_nexus *nexus; struct srp_login_rsp *rsp = NULL; struct srp_login_rej *rej = NULL; - struct ib_cm_rep_param *rep_param = NULL; + union { + struct rdma_conn_param rdma_cm; + struct ib_cm_rep_param ib_cm; + } *rep_param = NULL; struct srpt_rdma_ch *ch; char i_port_id[36]; u32 it_iu_len; @@ -2115,8 +2156,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->zw_cqe.done = srpt_zerolength_write_done; INIT_WORK(&ch->release_work, srpt_release_channel_work); ch->sport = sport; - ch->ib_cm.cm_id = cm_id; - cm_id->context = ch; + if (ib_cm_id) { + ch->ib_cm.cm_id = ib_cm_id; + ib_cm_id->context = ch; + } else { + ch->using_rdma_cm = true; + ch->rdma_cm.cm_id = rdma_cm_id; + rdma_cm_id->context = ch; + } /* * ch->rq_size should be at least as large as the initiator queue * depth to avoid that the initiator driver has to report QUEUE_FULL @@ -2227,7 +2274,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, mutex_unlock(&sport->mutex); - ret = srpt_ch_qp_rtr(ch, ch->qp); + ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rtr(ch, ch->qp); if (ret) { rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", @@ -2251,25 +2298,38 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, atomic_set(&ch->req_lim_delta, 0); /* create cm reply */ - rep_param->qp_num = ch->qp->qp_num; - rep_param->private_data = (void *)rsp; - rep_param->private_data_len = sizeof(*rsp); - rep_param->rnr_retry_count = 7; - rep_param->flow_control = 1; - rep_param->failover_accepted = 0; - rep_param->srq = 1; - rep_param->responder_resources = 4; - rep_param->initiator_depth = 4; + if (ch->using_rdma_cm) { + rep_param->rdma_cm.private_data = (void *)rsp; + rep_param->rdma_cm.private_data_len = sizeof(*rsp); + rep_param->rdma_cm.rnr_retry_count = 7; + rep_param->rdma_cm.flow_control = 1; + rep_param->rdma_cm.responder_resources = 4; + rep_param->rdma_cm.initiator_depth = 4; + } else { + rep_param->ib_cm.qp_num = ch->qp->qp_num; + rep_param->ib_cm.private_data = (void *)rsp; + rep_param->ib_cm.private_data_len = sizeof(*rsp); + rep_param->ib_cm.rnr_retry_count = 7; + rep_param->ib_cm.flow_control = 1; + rep_param->ib_cm.failover_accepted = 0; + rep_param->ib_cm.srq = 1; + rep_param->ib_cm.responder_resources = 4; + rep_param->ib_cm.initiator_depth = 4; + } /* * Hold the sport mutex while accepting a connection to avoid that * srpt_disconnect_ch() is invoked concurrently with this code. */ mutex_lock(&sport->mutex); - if (sport->enabled && ch->state == CH_CONNECTING) - ret = ib_send_cm_rep(cm_id, rep_param); - else + if (sport->enabled && ch->state == CH_CONNECTING) { + if (ch->using_rdma_cm) + ret = rdma_accept(rdma_cm_id, &rep_param->rdma_cm); + else + ret = ib_send_cm_rep(ib_cm_id, &rep_param->ib_cm); + } else { ret = -EINVAL; + } mutex_unlock(&sport->mutex); switch (ret) { @@ -2299,7 +2359,8 @@ free_ring: ch->sport->sdev, ch->rq_size, ch->max_rsp_size, DMA_TO_DEVICE); free_ch: - cm_id->context = NULL; + if (ib_cm_id) + ib_cm_id->context = NULL; kfree(ch); ch = NULL; @@ -2312,8 +2373,11 @@ reject: rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, - (void *)rej, sizeof(*rej)); + if (rdma_cm_id) + rdma_reject(rdma_cm_id, rej, sizeof(*rej)); + else + ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + rej, sizeof(*rej)); out: kfree(rep_param); @@ -2332,10 +2396,44 @@ static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, srpt_format_guid(sguid, sizeof(sguid), ¶m->primary_path->dgid.global.interface_id); - return srpt_cm_req_recv(cm_id, param->port, param->primary_path->pkey, + return srpt_cm_req_recv(cm_id->context, cm_id, NULL, param->port, + param->primary_path->pkey, private_data, sguid); } +static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct srpt_device *sdev; + struct srp_login_req req; + const struct srp_login_req_rdma *req_rdma; + char src_addr[40]; + + sdev = ib_get_client_data(cm_id->device, &srpt_client); + if (!sdev) + return -ECONNREFUSED; + + if (event->param.conn.private_data_len < sizeof(*req_rdma)) + return -EINVAL; + + /* Transform srp_login_req_rdma into srp_login_req. */ + req_rdma = event->param.conn.private_data; + memset(&req, 0, sizeof(req)); + req.opcode = req_rdma->opcode; + req.tag = req_rdma->tag; + req.req_it_iu_len = req_rdma->req_it_iu_len; + req.req_buf_fmt = req_rdma->req_buf_fmt; + req.req_flags = req_rdma->req_flags; + memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16); + memcpy(req.target_port_id, req_rdma->target_port_id, 16); + + snprintf(src_addr, sizeof(src_addr), "%pIS", + &cm_id->route.addr.src_addr); + + return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num, + cm_id->route.path_rec->pkey, &req, src_addr); +} + static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, enum ib_cm_rej_reason reason, const u8 *private_data, @@ -2359,14 +2457,14 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, * srpt_cm_rtu_recv - process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event * @ch: SRPT RDMA channel. * - * An IB_CM_RTU_RECEIVED message indicates that the connection is established - * and that the recipient may begin transmitting (RTU = ready to use). + * An RTU (ready to use) message indicates that the connection has been + * established and that the recipient may begin transmitting. */ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) { int ret; - ret = srpt_ch_qp_rts(ch, ch->qp); + ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rts(ch, ch->qp); if (ret < 0) { pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name, ch->qp->qp_num); @@ -2453,6 +2551,49 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) return ret; } +static int srpt_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct srpt_rdma_ch *ch = cm_id->context; + int ret = 0; + + switch (event->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + ret = srpt_rdma_cm_req_recv(cm_id, event); + break; + case RDMA_CM_EVENT_REJECTED: + srpt_cm_rej_recv(ch, event->status, + event->param.conn.private_data, + event->param.conn.private_data_len); + break; + case RDMA_CM_EVENT_ESTABLISHED: + srpt_cm_rtu_recv(ch); + break; + case RDMA_CM_EVENT_DISCONNECTED: + if (ch->state < CH_DISCONNECTING) + srpt_disconnect_ch(ch); + else + srpt_close_ch(ch); + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + srpt_close_ch(ch); + break; + case RDMA_CM_EVENT_UNREACHABLE: + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_ADDR_CHANGE: + break; + default: + pr_err("received unrecognized RDMA CM event %d\n", + event->event); + break; + } + + return ret; +} + static int srpt_write_pending_status(struct se_cmd *se_cmd) { struct srpt_send_ioctx *ioctx; @@ -2824,7 +2965,7 @@ static void srpt_add_one(struct ib_device *device) { struct srpt_device *sdev; struct srpt_port *sport; - int i; + int i, ret; pr_debug("device = %p\n", device); @@ -2848,9 +2989,15 @@ static void srpt_add_one(struct ib_device *device) if (!srpt_service_guid) srpt_service_guid = be64_to_cpu(device->node_guid); - sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); - if (IS_ERR(sdev->cm_id)) - goto err_ring; + if (rdma_port_get_link_layer(device, 1) == IB_LINK_LAYER_INFINIBAND) + sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); + if (IS_ERR(sdev->cm_id)) { + pr_info("ib_create_cm_id() failed: %ld\n", + PTR_ERR(sdev->cm_id)); + sdev->cm_id = NULL; + if (!rdma_cm_id) + goto err_ring; + } /* print out target login information */ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," @@ -2863,8 +3010,14 @@ static void srpt_add_one(struct ib_device *device) * in the system as service_id; therefore, the target_id will change * if this HCA is gone bad and replaced by different HCA */ - if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0)) + ret = sdev->cm_id ? + ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) : + 0; + if (ret < 0) { + pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret, + sdev->cm_id->state); goto err_cm; + } INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); @@ -2904,7 +3057,8 @@ out: err_event: ib_unregister_event_handler(&sdev->event_handler); err_cm: - ib_destroy_cm_id(sdev->cm_id); + if (sdev->cm_id) + ib_destroy_cm_id(sdev->cm_id); err_ring: srpt_free_srq(sdev); ib_dealloc_pd(sdev->pd); @@ -2939,7 +3093,10 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) for (i = 0; i < sdev->device->phys_port_cnt; i++) cancel_work_sync(&sdev->port[i].work); - ib_destroy_cm_id(sdev->cm_id); + if (sdev->cm_id) + ib_destroy_cm_id(sdev->cm_id); + + ib_set_client_data(device, &srpt_client, NULL); /* * Unregistering a target must happen after destroying sdev->cm_id @@ -3103,18 +3260,26 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) leading_zero_bytes = 16 - count; memset(i_port_id, 0, leading_zero_bytes); ret = hex2bin(i_port_id + leading_zero_bytes, p, count); - if (ret < 0) - pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret); + out: return ret; } /* - * configfs callback function invoked for - * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + * configfs callback function invoked for mkdir + * /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + * + * i_port_id must be an initiator port GUID, GID or IP address. See also the + * target_alloc_session() calls in this driver. Examples of valid initiator + * port IDs: + * 0x0000000000000000505400fffe4a0b7b + * 0000000000000000505400fffe4a0b7b + * 5054:00ff:fe4a:0b7b + * 192.168.122.76 */ static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { + struct sockaddr_storage sa; u64 guid; u8 i_port_id[16]; int ret; @@ -3123,6 +3288,9 @@ static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) if (ret < 0) ret = srpt_parse_i_port_id(i_port_id, name); if (ret < 0) + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, name, NULL, + &sa); + if (ret < 0) pr_err("invalid initiator port ID %s\n", name); return ret; } @@ -3296,6 +3464,95 @@ static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { NULL, }; +static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr) +{ + struct rdma_cm_id *rdma_cm_id; + int ret; + + rdma_cm_id = rdma_create_id(&init_net, srpt_rdma_cm_handler, + NULL, RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(rdma_cm_id)) { + pr_err("RDMA/CM ID creation failed: %ld\n", + PTR_ERR(rdma_cm_id)); + goto out; + } + + ret = rdma_bind_addr(rdma_cm_id, listen_addr); + if (ret) { + char addr_str[64]; + + snprintf(addr_str, sizeof(addr_str), "%pISp", listen_addr); + pr_err("Binding RDMA/CM ID to address %s failed: %d\n", + addr_str, ret); + rdma_destroy_id(rdma_cm_id); + rdma_cm_id = ERR_PTR(ret); + goto out; + } + + ret = rdma_listen(rdma_cm_id, 128); + if (ret) { + pr_err("rdma_listen() failed: %d\n", ret); + rdma_destroy_id(rdma_cm_id); + rdma_cm_id = ERR_PTR(ret); + } + +out: + return rdma_cm_id; +} + +static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", rdma_cm_port); +} + +static ssize_t srpt_rdma_cm_port_store(struct config_item *item, + const char *page, size_t count) +{ + struct sockaddr_in addr4 = { .sin_family = AF_INET }; + struct sockaddr_in6 addr6 = { .sin6_family = AF_INET6 }; + struct rdma_cm_id *new_id = NULL; + u16 val; + int ret; + + ret = kstrtou16(page, 0, &val); + if (ret < 0) + return ret; + ret = count; + if (rdma_cm_port == val) + goto out; + + if (val) { + addr6.sin6_port = cpu_to_be16(val); + new_id = srpt_create_rdma_id((struct sockaddr *)&addr6); + if (IS_ERR(new_id)) { + addr4.sin_port = cpu_to_be16(val); + new_id = srpt_create_rdma_id((struct sockaddr *)&addr4); + if (IS_ERR(new_id)) { + ret = PTR_ERR(new_id); + goto out; + } + } + } + + mutex_lock(&rdma_cm_mutex); + rdma_cm_port = val; + swap(rdma_cm_id, new_id); + mutex_unlock(&rdma_cm_mutex); + + if (new_id) + rdma_destroy_id(new_id); + ret = count; +out: + return ret; +} + +CONFIGFS_ATTR(srpt_, rdma_cm_port); + +static struct configfs_attribute *srpt_da_attrs[] = { + &srpt_attr_rdma_cm_port, + NULL, +}; + static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) { struct se_portal_group *se_tpg = to_tpg(item); @@ -3441,6 +3698,7 @@ static const struct target_core_fabric_ops srpt_template = { .fabric_drop_tpg = srpt_drop_tpg, .fabric_init_nodeacl = srpt_init_nodeacl, + .tfc_discovery_attrs = srpt_da_attrs, .tfc_wwn_attrs = srpt_wwn_attrs, .tfc_tpg_base_attrs = srpt_tpg_attrs, .tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs, @@ -3494,6 +3752,8 @@ out: static void __exit srpt_cleanup_module(void) { + if (rdma_cm_id) + rdma_destroy_id(rdma_cm_id); ib_unregister_client(&srpt_client); target_unregister_template(&srpt_template); } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 4d9199f..2361483 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -42,6 +42,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_sa.h> #include <rdma/ib_cm.h> +#include <rdma/rdma_cm.h> #include <rdma/rw.h> #include <scsi/srp.h> @@ -261,6 +262,7 @@ enum rdma_ch_state { * @spinlock: Protects free_list and state. * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. + * @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel. * @processing_wait_list: Whether or not cmd_wait_list is being processed. * @ioctx_ring: Send ring. * @ioctx_recv_ring: Receive I/O context ring. @@ -280,6 +282,9 @@ struct srpt_rdma_ch { struct { struct ib_cm_id *cm_id; } ib_cm; + struct { + struct rdma_cm_id *cm_id; + } rdma_cm; }; struct ib_cq *cq; struct ib_cqe zw_cqe; @@ -300,9 +305,10 @@ struct srpt_rdma_ch { struct list_head list; struct list_head cmd_wait_list; uint16_t pkey; + bool using_rdma_cm; bool processing_wait_list; struct se_session *sess; - u8 sess_name[24]; + u8 sess_name[40]; struct work_struct release_work; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 634f603..de6b3d4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -37,6 +37,7 @@ #include <linux/module.h> #include <linux/cache.h> #include <linux/kernel.h> +#include <uapi/rdma/mlx4-abi.h> #include "fw.h" #include "icm.h" diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5a26851..5af5019 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -46,6 +46,7 @@ #include <linux/etherdevice.h> #include <net/devlink.h> +#include <uapi/rdma/mlx4-abi.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 784e282..db3278c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -70,7 +70,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0aab3af..d68f510 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -357,7 +357,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); @@ -1212,10 +1212,13 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) { struct mlx5e_channel *c = sq->channel; struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_rate_limit rl = {0}; mlx5e_destroy_sq(mdev, sq->sqn); - if (sq->rate_limit) - mlx5_rl_remove_rate(mdev, sq->rate_limit); + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + mlx5_rl_remove_rate(mdev, &rl); + } mlx5e_free_txqsq_descs(sq); mlx5e_free_txqsq(sq); } @@ -1646,6 +1649,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_rate_limit rl = {0}; u16 rl_index = 0; int err; @@ -1653,14 +1657,17 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, /* nothing to do */ return 0; - if (sq->rate_limit) + if (sq->rate_limit) { + rl.rate = sq->rate_limit; /* remove current rl index to free space to next ones */ - mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5_rl_remove_rate(mdev, &rl); + } sq->rate_limit = 0; if (rate) { - err = mlx5_rl_add_rate(mdev, rate, &rl_index); + rl.rate = rate; + err = mlx5_rl_add_rate(mdev, &rl_index, &rl); if (err) { netdev_err(dev, "Failed configuring rate %u: %d\n", rate, err); @@ -1678,7 +1685,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, rate, err); /* remove the rate from the table */ if (rate) - mlx5_rl_remove_rate(mdev, rate); + mlx5_rl_remove_rate(mdev, &rl); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index e6175f8..de7fe087 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -232,7 +232,7 @@ static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 7006697..afd9f4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -195,6 +195,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, qcam_reg)) mlx5_get_qcam_reg(dev); + if (MLX5_CAP_GEN(dev, device_memory)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index d3c33e9..bc86dff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -107,16 +107,16 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, * If the table is full, return NULL */ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, - u32 rate) + struct mlx5_rate_limit *rl) { struct mlx5_rl_entry *ret_entry = NULL; bool empty_found = false; int i; for (i = 0; i < table->max_size; i++) { - if (table->rl_entry[i].rate == rate) + if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl)) return &table->rl_entry[i]; - if (!empty_found && !table->rl_entry[i].rate) { + if (!empty_found && !table->rl_entry[i].rl.rate) { empty_found = true; ret_entry = &table->rl_entry[i]; } @@ -126,7 +126,8 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, } static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, - u32 rate, u16 index) + u16 index, + struct mlx5_rate_limit *rl) { u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; @@ -134,7 +135,9 @@ static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, MLX5_SET(set_pp_rate_limit_in, in, opcode, MLX5_CMD_OP_SET_PP_RATE_LIMIT); MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -146,7 +149,17 @@ bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) } EXPORT_SYMBOL(mlx5_rl_is_in_range); -int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1) +{ + return ((rl_0->rate == rl_1->rate) && + (rl_0->max_burst_sz == rl_1->max_burst_sz) && + (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz)); +} +EXPORT_SYMBOL(mlx5_rl_are_equal); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) { struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry; @@ -154,14 +167,14 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) mutex_lock(&table->rl_lock); - if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) { mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", - rate, table->min_rate, table->max_rate); + rl->rate, table->min_rate, table->max_rate); err = -EINVAL; goto out; } - entry = find_rl_entry(table, rate); + entry = find_rl_entry(table, rl); if (!entry) { mlx5_core_err(dev, "Max number of %u rates reached\n", table->max_size); @@ -173,13 +186,15 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl); if (err) { - mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", - rate, err); + mlx5_core_err(dev, "Failed configuring rate limit(err %d): \ + rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rl->rate, rl->max_burst_sz, + rl->typical_pkt_sz); goto out; } - entry->rate = rate; + entry->rl = *rl; entry->refcount = 1; } *index = entry->index; @@ -190,27 +205,30 @@ out: } EXPORT_SYMBOL(mlx5_rl_add_rate); -void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) { struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry = NULL; + struct mlx5_rate_limit reset_rl = {0}; /* 0 is a reserved value for unlimited rate */ - if (rate == 0) + if (rl->rate == 0) return; mutex_lock(&table->rl_lock); - entry = find_rl_entry(table, rate); + entry = find_rl_entry(table, rl); if (!entry || !entry->refcount) { - mlx5_core_warn(dev, "Rate %u is not configured\n", rate); + mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \ + are not configured\n", + rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); goto out; } entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); - entry->rate = 0; + mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl); + entry->rl = reset_rl; } out: @@ -257,13 +275,14 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev) void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) { struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rate_limit rl = {0}; int i; /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) - if (table->rl_entry[i].rate) - mlx5_set_pp_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + if (table->rl_entry[i].rl.rate) + mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index, + &rl); kfree(dev->priv.rl_table.rl_entry); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a9b5fed..81d0799 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -257,10 +257,6 @@ enum { }; enum { - MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0 -}; - -enum { MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0, MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1, MLX4_FUNC_CAP_DMFS_A0_STATIC = 1L << 2 diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1275859..2bc27f8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1017,6 +1017,8 @@ enum mlx5_cap_type { MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, MLX5_CAP_DEBUG, + MLX5_CAP_RESERVED_14, + MLX5_CAP_DEV_MEM, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1168,6 +1170,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP64_FPGA(mdev, cap) \ MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) +#define MLX5_CAP_DEV_MEM(mdev, cap)\ + MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + +#define MLX5_CAP64_DEV_MEM(mdev, cap)\ + MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, @@ -1211,8 +1219,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 -#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 +#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 +#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cded85a..767d193 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -591,8 +591,14 @@ struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; +struct mlx5_rate_limit { + u32 rate; + u32 max_burst_sz; + u16 typical_pkt_sz; +}; + struct mlx5_rl_entry { - u32 rate; + struct mlx5_rate_limit rl; u16 index; u16 refcount; }; @@ -1107,9 +1113,12 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); -int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); -void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl); +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1); int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h index 7b476bb..9db21cd 100644 --- a/include/linux/mlx5/fs_helpers.h +++ b/include/linux/mlx5/fs_helpers.h @@ -38,6 +38,14 @@ #define MLX5_FS_IPV4_VERSION 4 #define MLX5_FS_IPV6_VERSION 6 +static inline bool mlx5_fs_is_ipsec_flow(const u32 *match_c) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + + return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); +} + static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c, const u32 *match_v, u8 match) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d25011f..1aad455 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -92,6 +92,8 @@ enum { MLX5_CMD_OP_DESTROY_MKEY = 0x202, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, + MLX5_CMD_OP_ALLOC_MEMIC = 0x205, + MLX5_CMD_OP_DEALLOC_MEMIC = 0x206, MLX5_CMD_OP_CREATE_EQ = 0x301, MLX5_CMD_OP_DESTROY_EQ = 0x302, MLX5_CMD_OP_QUERY_EQ = 0x303, @@ -575,7 +577,10 @@ struct mlx5_ifc_qos_cap_bits { u8 esw_scheduling[0x1]; u8 esw_bw_share[0x1]; u8 esw_rate_limit[0x1]; - u8 reserved_at_4[0x1c]; + u8 reserved_at_4[0x1]; + u8 packet_pacing_burst_bound[0x1]; + u8 packet_pacing_typical_size[0x1]; + u8 reserved_at_7[0x19]; u8 reserved_at_20[0x20]; @@ -669,6 +674,24 @@ struct mlx5_ifc_roce_cap_bits { u8 reserved_at_100[0x700]; }; +struct mlx5_ifc_device_mem_cap_bits { + u8 memic[0x1]; + u8 reserved_at_1[0x1f]; + + u8 reserved_at_20[0xb]; + u8 log_min_memic_alloc_size[0x5]; + u8 reserved_at_30[0x8]; + u8 log_max_memic_addr_alignment[0x8]; + + u8 memic_bar_start_addr[0x40]; + + u8 memic_bar_size[0x20]; + + u8 max_memic_size[0x20]; + + u8 reserved_at_c0[0x740]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -883,7 +906,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable[0x1]; + u8 device_memory[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; @@ -927,7 +950,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_202[0x1]; u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0x5]; + u8 reserved_at_205[0x1]; + u8 repeated_block_disabled[0x1]; + u8 umr_modify_entity_size_disabled[0x1]; + u8 umr_modify_atomic_disabled[0x1]; + u8 umr_indirect_mkey_disabled[0x1]; u8 umr_fence[0x2]; u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; @@ -2748,12 +2775,17 @@ enum { MLX5_MKC_ACCESS_MODE_MTT = 0x1, MLX5_MKC_ACCESS_MODE_KLMS = 0x2, MLX5_MKC_ACCESS_MODE_KSM = 0x3, + MLX5_MKC_ACCESS_MODE_MEMIC = 0x5, }; struct mlx5_ifc_mkc_bits { u8 reserved_at_0[0x1]; u8 free[0x1]; - u8 reserved_at_2[0xd]; + u8 reserved_at_2[0x1]; + u8 access_mode_4_2[0x3]; + u8 reserved_at_6[0x7]; + u8 relaxed_ordering_write[0x1]; + u8 reserved_at_e[0x1]; u8 small_fence_on_rdma_read_response[0x1]; u8 umr_en[0x1]; u8 a[0x1]; @@ -2761,7 +2793,7 @@ struct mlx5_ifc_mkc_bits { u8 rr[0x1]; u8 lw[0x1]; u8 lr[0x1]; - u8 access_mode[0x2]; + u8 access_mode_1_0[0x2]; u8 reserved_at_18[0x8]; u8 qpn[0x18]; @@ -7397,7 +7429,12 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 rate_limit[0x20]; - u8 reserved_at_a0[0x160]; + u8 burst_upper_bound[0x20]; + + u8 reserved_at_c0[0x10]; + u8 typical_packet_size[0x10]; + + u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_access_register_out_bits { @@ -8951,4 +8988,57 @@ struct mlx5_ifc_destroy_vport_lag_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_alloc_memic_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_30[0x20]; + + u8 reserved_at_40[0x18]; + u8 log_memic_addr_alignment[0x8]; + + u8 range_start_addr[0x40]; + + u8 range_size[0x20]; + + u8 memic_size[0x20]; +}; + +struct mlx5_ifc_alloc_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 memic_start_addr[0x40]; +}; + +struct mlx5_ifc_dealloc_memic_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + u8 memic_start_addr[0x40]; + + u8 memic_size[0x20]; + + u8 reserved_at_e0[0x20]; +}; + +struct mlx5_ifc_dealloc_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 415e099..a08cc72 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -119,10 +119,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context); -int rdma_resolve_ip_route(struct sockaddr *src_addr, - const struct sockaddr *dst_addr, - struct rdma_dev_addr *addr); - void rdma_addr_cancel(struct rdma_dev_addr *addr); void rdma_copy_addr(struct rdma_dev_addr *dev_addr, @@ -133,11 +129,6 @@ int rdma_addr_size(struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); -int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, - const union ib_gid *dgid, - u8 *dmac, const struct net_device *ndev, - int *hoplimit); - static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 385ec88..eb49cc8 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -55,20 +55,6 @@ int ib_get_cached_gid(struct ib_device *device, union ib_gid *gid, struct ib_gid_attr *attr); -/** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. - * @device: The device to query. - * @gid: The GID value to search for. - * @gid_type: The GID type to search for. - * @ndev: In RoCE, the net device of the device. NULL means ignore. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. - * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. - */ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, @@ -76,21 +62,6 @@ int ib_find_cached_gid(struct ib_device *device, u8 *port_num, u16 *index); -/** - * ib_find_cached_gid_by_port - Returns the GID table index where a specified - * GID value occurs - * @device: The device to query. - * @gid: The GID value to search for. - * @gid_type: The GID type to search for. - * @port_num: The port number of the device where the GID value sould be - * searched. - * @ndev: In RoCE, the net device of the device. Null means ignore. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. - * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. - */ int ib_find_cached_gid_by_port(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 811cfcfc..bacb144 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -163,7 +163,15 @@ struct sa_path_rec_ib { u8 raw_traffic; }; +/** + * struct sa_path_rec_roce - RoCE specific portion of the path record entry + * @route_resolved: When set, it indicates that this route is already + * resolved for this path record entry. + * @dmac: Destination mac address for the given DGID entry + * of the path record entry. + */ struct sa_path_rec_roce { + bool route_resolved; u8 dmac[ETH_ALEN]; /* ignored in IB */ int ifindex; @@ -590,6 +598,11 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } +static inline bool sa_path_is_opa(struct sa_path_rec *rec) +{ + return (rec->rec_type == SA_PATH_REC_TYPE_OPA); +} + static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6eb1747..9fc8a82 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -64,6 +64,8 @@ #include <linux/cgroup_rdma.h> #include <uapi/rdma/ib_user_verbs.h> #include <rdma/restrack.h> +#include <uapi/rdma/rdma_user_ioctl.h> +#include <uapi/rdma/ib_user_ioctl_verbs.h> #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN @@ -90,8 +92,11 @@ enum ib_gid_type { #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { - enum ib_gid_type gid_type; struct net_device *ndev; + struct ib_device *device; + enum ib_gid_type gid_type; + u16 index; + u8 port_num; }; enum rdma_node_type { @@ -316,6 +321,18 @@ struct ib_cq_caps { u16 max_cq_moderation_period; }; +struct ib_dm_mr_attr { + u64 length; + u64 offset; + u32 access_flags; +}; + +struct ib_dm_alloc_attr { + u64 length; + u32 alignment; + u32 flags; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -367,6 +384,7 @@ struct ib_device_attr { u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; + u64 max_dm_size; }; enum ib_mtu { @@ -469,6 +487,9 @@ enum ib_port_speed { /** * struct rdma_hw_stats + * @lock - Mutex to protect parallel write access to lifespan and values + * of counters, which are 64bits and not guaranteeed to be written + * atomicaly on 32bits systems. * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults @@ -484,6 +505,7 @@ enum ib_port_speed { * filled in by the drivers get_stats routine */ struct rdma_hw_stats { + struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; const char * const *names; @@ -1755,6 +1777,14 @@ struct ib_qp { struct rdma_restrack_entry res; }; +struct ib_dm { + struct ib_device *device; + u32 length; + u32 flags; + struct ib_uobject *uobject; + atomic_t usecnt; +}; + struct ib_mr { struct ib_device *device; struct ib_pd *pd; @@ -1768,6 +1798,13 @@ struct ib_mr { struct ib_uobject *uobject; /* user */ struct list_head qp_entry; /* FR */ }; + + struct ib_dm *dm; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; struct ib_mw { @@ -1810,6 +1847,7 @@ enum ib_flow_spec_type { /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, IB_FLOW_SPEC_IPV6 = 0x31, + IB_FLOW_SPEC_ESP = 0x34, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41, @@ -1818,6 +1856,7 @@ enum ib_flow_spec_type { /* Actions */ IB_FLOW_SPEC_ACTION_TAG = 0x1000, IB_FLOW_SPEC_ACTION_DROP = 0x1001, + IB_FLOW_SPEC_ACTION_HANDLE = 0x1002, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 8 @@ -1835,7 +1874,8 @@ enum ib_flow_domain { enum ib_flow_flags { IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ - IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ + IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */ + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 3 /* Must be last */ }; struct ib_flow_eth_filter { @@ -1940,6 +1980,20 @@ struct ib_flow_spec_tunnel { struct ib_flow_tunnel_filter mask; }; +struct ib_flow_esp_filter { + __be32 spi; + __be32 seq; + /* Must be last */ + u8 real_sz[0]; +}; + +struct ib_flow_spec_esp { + u32 type; + u16 size; + struct ib_flow_esp_filter val; + struct ib_flow_esp_filter mask; +}; + struct ib_flow_spec_action_tag { enum ib_flow_spec_type type; u16 size; @@ -1951,6 +2005,12 @@ struct ib_flow_spec_action_drop { u16 size; }; +struct ib_flow_spec_action_handle { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_action *act; +}; + union ib_flow_spec { struct { u32 type; @@ -1962,8 +2022,10 @@ union ib_flow_spec { struct ib_flow_spec_tcp_udp tcp_udp; struct ib_flow_spec_ipv6 ipv6; struct ib_flow_spec_tunnel tunnel; + struct ib_flow_spec_esp esp; struct ib_flow_spec_action_tag flow_tag; struct ib_flow_spec_action_drop drop; + struct ib_flow_spec_action_handle action; }; struct ib_flow_attr { @@ -1984,6 +2046,64 @@ struct ib_flow { struct ib_uobject *uobject; }; +enum ib_flow_action_type { + IB_FLOW_ACTION_UNSPECIFIED, + IB_FLOW_ACTION_ESP = 1, +}; + +struct ib_flow_action_attrs_esp_keymats { + enum ib_uverbs_flow_action_esp_keymat protocol; + union { + struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm; + } keymat; +}; + +struct ib_flow_action_attrs_esp_replays { + enum ib_uverbs_flow_action_esp_replay protocol; + union { + struct ib_uverbs_flow_action_esp_replay_bmp bmp; + } replay; +}; + +enum ib_flow_action_attrs_esp_flags { + /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags + * This is done in order to share the same flags between user-space and + * kernel and spare an unnecessary translation. + */ + + /* Kernel flags */ + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32, + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33, +}; + +struct ib_flow_spec_list { + struct ib_flow_spec_list *next; + union ib_flow_spec spec; +}; + +struct ib_flow_action_attrs_esp { + struct ib_flow_action_attrs_esp_keymats *keymat; + struct ib_flow_action_attrs_esp_replays *replay; + struct ib_flow_spec_list *encap; + /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled. + * Value of 0 is a valid value. + */ + u32 esn; + u32 spi; + u32 seq; + u32 tfc_pad; + /* Use enum ib_flow_action_attrs_esp_flags */ + u64 flags; + u64 hard_limit_pkts; +}; + +struct ib_flow_action { + struct ib_device *device; + struct ib_uobject *uobject; + enum ib_flow_action_type type; + atomic_t usecnt; +}; + struct ib_mad_hdr; struct ib_grh; @@ -2060,6 +2180,8 @@ struct ib_port_pkey_list { struct list_head pkey_list; }; +struct uverbs_attr_bundle; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2127,37 +2249,36 @@ struct ib_device { */ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + /* query_gid should be return GID value for @device, when @port_num + * link layer is either IB or iWarp. It is no-op if @port_num port + * is RoCE link layer. + */ int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); - /* When calling add_gid, the HW vendor's driver should - * add the gid of device @device at gid index @index of - * port @port_num to be @gid. Meta-info of that gid (for example, - * the network device related to this gid is available - * at @attr. @context allows the HW vendor driver to store extra - * information together with a GID entry. The HW vendor may allocate - * memory to contain this information and store it in @context when a - * new GID entry is written to. Params are consistent until the next - * call of add_gid or delete_gid. The function should return 0 on + /* When calling add_gid, the HW vendor's driver should add the gid + * of device of port at gid index available at @attr. Meta-info of + * that gid (for example, the network device related to this gid) is + * available at @attr. @context allows the HW vendor driver to store + * extra information together with a GID entry. The HW vendor driver may + * allocate memory to contain this information and store it in @context + * when a new GID entry is written to. Params are consistent until the + * next call of add_gid or delete_gid. The function should return 0 on * success or error otherwise. The function could be called - * concurrently for different ports. This function is only called - * when roce_gid_table is used. + * concurrently for different ports. This function is only called when + * roce_gid_table is used. */ - int (*add_gid)(struct ib_device *device, - u8 port_num, - unsigned int index, - const union ib_gid *gid, + int (*add_gid)(const union ib_gid *gid, const struct ib_gid_attr *attr, void **context); /* When calling del_gid, the HW vendor's driver should delete the - * gid of device @device at gid index @index of port @port_num. + * gid of device @device at gid index gid_index of port port_num + * available in @attr. * Upon the deletion of a GID entry, the HW vendor must free any * allocated memory. The caller will clear @context afterwards. * This function is only called when roce_gid_table is used. */ - int (*del_gid)(struct ib_device *device, - u8 port_num, - unsigned int index, + int (*del_gid)(const struct ib_gid_attr *attr, void **context); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); @@ -2315,6 +2436,21 @@ struct ib_device { struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); + struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*destroy_flow_action)(struct ib_flow_action *action); + int (*modify_flow_action_esp)(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + struct ib_dm * (*alloc_dm)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); + int (*dealloc_dm)(struct ib_dm *dm); + struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); /** * rdma netdev operation * @@ -2376,6 +2512,7 @@ struct ib_device { int comp_vector); struct uverbs_root_spec *specs_root; + enum rdma_driver_id driver_id; }; struct ib_client { @@ -2435,11 +2572,9 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } -static inline bool ib_is_udata_cleared(struct ib_udata *udata, - size_t offset, - size_t len) +static inline bool ib_is_buffer_cleared(const void __user *p, + size_t len) { - const void __user *p = udata->inbuf + offset; bool ret; u8 *buf; @@ -2455,6 +2590,13 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, return ret; } +static inline bool ib_is_udata_cleared(struct ib_udata *udata, + size_t offset, + size_t len) +{ + return ib_is_buffer_cleared(udata->inbuf + offset, len); +} + /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for @@ -2471,9 +2613,9 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, * transition from cur_state to next_state is allowed by the IB spec, * and that the attribute mask supplied is allowed for the transition. */ -int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask, - enum rdma_link_layer ll); +bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll); void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); @@ -2848,7 +2990,7 @@ int ib_modify_port(struct ib_device *device, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - struct net_device *ndev, u8 *port_num, u16 *index); + u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); @@ -3217,18 +3359,6 @@ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, } /** - * ib_peek_cq - Returns the number of unreaped completions currently - * on the specified CQ. - * @cq: The CQ to peek. - * @wc_cnt: A minimum number of unreaped completions to check for. - * - * If the number of unreaped completions is greater than or equal to wc_cnt, - * this function returns wc_cnt, otherwise, it returns the actual number of - * unreaped completions. - */ -int ib_peek_cq(struct ib_cq *cq, int wc_cnt); - -/** * ib_req_notify_cq - Request completion notification on a CQ. * @cq: The CQ to generate an event for. * @flags: diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 6538a5c..6909347 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -38,6 +38,7 @@ #include <linux/in6.h> #include <rdma/ib_addr.h> #include <rdma/ib_sa.h> +#include <uapi/rdma/rdma_user_cm.h> /* * Upon receiving a device removal event, users must destroy the associated @@ -64,14 +65,6 @@ enum rdma_cm_event_type { const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); -enum rdma_port_space { - RDMA_PS_SDP = 0x0001, - RDMA_PS_IPOIB = 0x0002, - RDMA_PS_IB = 0x013F, - RDMA_PS_TCP = 0x0106, - RDMA_PS_UDP = 0x0111, -}; - #define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL #define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL #define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL @@ -120,20 +113,6 @@ struct rdma_cm_event { } param; }; -enum rdma_cm_state { - RDMA_CM_IDLE, - RDMA_CM_ADDR_QUERY, - RDMA_CM_ADDR_RESOLVED, - RDMA_CM_ROUTE_QUERY, - RDMA_CM_ROUTE_RESOLVED, - RDMA_CM_CONNECT, - RDMA_CM_DISCONNECT, - RDMA_CM_ADDR_BOUND, - RDMA_CM_LISTEN, - RDMA_CM_DEVICE_REMOVAL, - RDMA_CM_DESTROYING -}; - struct rdma_cm_id; /** @@ -152,11 +131,17 @@ struct rdma_cm_id { struct ib_qp *qp; rdma_cm_event_handler event_handler; struct rdma_route route; - enum rdma_port_space ps; + enum rdma_ucm_port_space ps; enum ib_qp_type qp_type; u8 port_num; }; +struct rdma_cm_id *__rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, + const char *caller); + /** * rdma_create_id - Create an RDMA identifier. * @@ -169,10 +154,9 @@ struct rdma_cm_id { * * The id holds a reference on the network namespace until it is destroyed. */ -struct rdma_cm_id *rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type); +#define rdma_create_id(net, event_handler, context, ps, qp_type) \ + __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ + KBUILD_MODNAME) /** * rdma_destroy_id - Destroys an RDMA identifier. @@ -284,6 +268,9 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); */ int rdma_listen(struct rdma_cm_id *id, int backlog); +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller); + /** * rdma_accept - Called to accept a connection request or response. * @id: Connection identifier associated with the request. @@ -299,7 +286,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog); * state of the qp associated with the id is modified to error, such that any * previously posted receive buffers would be flushed. */ -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +#define rdma_accept(id, conn_param) \ + __rdma_accept((id), (conn_param), KBUILD_MODNAME) /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4118324..3f4c187 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -538,7 +538,7 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp) struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); -int rvt_register_device(struct rvt_dev_info *rvd); +int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 2cdf8dc..f3b3e35 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/kref.h> #include <linux/completion.h> +#include <linux/sched/task.h> /** * enum rdma_restrack_type - HW objects to track @@ -29,6 +30,14 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_QP, /** + * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID) + */ + RDMA_RESTRACK_CM_ID, + /** + * @RDMA_RESTRACK_MR: Memory Region (MR) + */ + RDMA_RESTRACK_MR, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX @@ -146,8 +155,23 @@ static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res) int __must_check rdma_restrack_get(struct rdma_restrack_entry *res); /** - * rdma_restrack_put() - relase resource + * rdma_restrack_put() - release resource * @res: resource entry */ int rdma_restrack_put(struct rdma_restrack_entry *res); + +/** + * rdma_restrack_set_task() - set the task for this resource + * @res: resource entry + * @task: task struct + */ +static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res, + struct task_struct *task) +{ + if (res->task) + put_task_struct(res->task); + get_task_struct(task); + res->task = task; +} + #endif /* _RDMA_RESTRACK_H_ */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 38287d9..4a4201d 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -37,6 +37,7 @@ #include <linux/uaccess.h> #include <rdma/rdma_user_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> +#include <rdma/ib_user_ioctl_cmds.h> /* * ======================================= @@ -50,6 +51,7 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_ENUM_IN, }; enum uverbs_obj_access { @@ -61,15 +63,32 @@ enum uverbs_obj_access { enum { UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, - /* Support extending attributes by length */ - UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1, + /* Support extending attributes by length, validate all unknown size == zero */ + UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1, }; +/* Specification of a single attribute inside the ioctl message */ struct uverbs_attr_spec { - enum uverbs_attr_type type; union { - u16 len; + /* Header shared by all following union members - to reduce space. */ struct { + enum uverbs_attr_type type; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; + }; + struct { + enum uverbs_attr_type type; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; + /* Current known size to kernel */ + u16 len; + /* User isn't allowed to provide something < min_len */ + u16 min_len; + } ptr; + struct { + enum uverbs_attr_type type; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. @@ -77,9 +96,19 @@ struct uverbs_attr_spec { u16 obj_type; u8 access; } obj; + struct { + enum uverbs_attr_type type; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; + u8 num_elems; + /* + * The enum attribute can select one of the attributes + * contained in the ids array. Currently only PTR_IN + * attributes are supported in the ids array. + */ + const struct uverbs_attr_spec *ids; + } enum_def; }; - /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ - u8 flags; }; struct uverbs_attr_spec_hash { @@ -164,30 +193,45 @@ struct uverbs_object_tree_def { }; #define UA_FLAGS(_flags) .flags = _flags -#define __UVERBS_ATTR0(_id, _len, _type, ...) \ +#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \ ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {.type = _type, {.len = _len}, .flags = 0, } }) -#define __UVERBS_ATTR1(_id, _len, _type, _flags) \ + {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } }) +#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \ ((const struct uverbs_attr_def) \ - {.id = _id, .attr = {.type = _type, {.len = _len}, _flags, } }) -#define __UVERBS_ATTR(_id, _len, _type, _flags, _n, ...) \ - __UVERBS_ATTR##_n(_id, _len, _type, _flags) + {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} }) +#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \ + ((const struct uverbs_attr_def) \ + {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} }) +#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \ + __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2) + +#define UVERBS_ATTR_TYPE(_type) \ + .min_len = sizeof(_type), .len = sizeof(_type) +#define UVERBS_ATTR_STRUCT(_type, _last) \ + .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type) +#define UVERBS_ATTR_SIZE(_min_len, _len) \ + .min_len = _min_len, .len = _len + /* * In new compiler, UVERBS_ATTR could be simplified by declaring it as * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__} * But since we support older compilers too, we need the more complex code. */ -#define UVERBS_ATTR(_id, _len, _type, ...) \ - __UVERBS_ATTR(_id, _len, _type, ##__VA_ARGS__, 1, 0) +#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \ + __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0) #define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_IN, ##__VA_ARGS__) + UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__) /* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */ #define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ - UVERBS_ATTR_PTR_IN_SZ(_id, sizeof(_type), ##__VA_ARGS__) + UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__) #define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ - UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_OUT, ##__VA_ARGS__) + UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__) #define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ - UVERBS_ATTR_PTR_OUT_SZ(_id, sizeof(_type), ##__VA_ARGS__) + UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__) +#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \ + UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \ + .ids = (_enum_arr), \ + .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__) /* * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring @@ -202,15 +246,13 @@ struct uverbs_object_tree_def { #define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\ ((const struct uverbs_attr_def) \ {.id = _id, \ - .attr = {.type = _obj_class, \ - {.obj = {.obj_type = _obj_type, .access = _access } },\ - .flags = 0} }) + .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ + .access = _access, .flags = 0 } }, } }) #define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\ ((const struct uverbs_attr_def) \ {.id = _id, \ - .attr = {.type = _obj_class, \ - {.obj = {.obj_type = _obj_type, .access = _access} }, \ - _flags} }) + .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ + .access = _access, _flags} }, } }) #define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \ _n, ...) \ ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags) @@ -229,6 +271,11 @@ struct uverbs_object_tree_def { #define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \ const struct uverbs_attr_def _name = __VA_ARGS__ +#define DECLARE_UVERBS_ENUM(_name, ...) \ + const struct uverbs_enum_spec _name = { \ + .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\ + .ids = {__VA_ARGS__}, \ + } #define _UVERBS_METHOD_ATTRS_SZ(...) \ (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\ sizeof(const struct uverbs_attr_def *)) @@ -280,6 +327,7 @@ struct uverbs_ptr_attr { u16 len; /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ u16 flags; + u8 enum_id; }; struct uverbs_obj_attr { @@ -336,6 +384,8 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b idx & ~UVERBS_ID_NS_MASK); } +#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) + static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { @@ -347,6 +397,29 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; } +static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return attr->ptr_attr.enum_id; +} + +static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + struct ib_uobject *uobj = + uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject; + + if (IS_ERR(uobj)) + return uobj; + + return uobj->object; +} + static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size) { @@ -385,8 +458,8 @@ static inline int _uverbs_copy_from(void *to, /* * Validation ensures attr->ptr_attr.len >= size. If the caller is - * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with - * the right size. + * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call + * uverbs_copy_from_or_zero. */ if (unlikely(size < attr->ptr_attr.len)) return -EINVAL; @@ -400,9 +473,37 @@ static inline int _uverbs_copy_from(void *to, return 0; } +static inline int _uverbs_copy_from_or_zero(void *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, + size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, size, attr->ptr_attr.len); + + if (uverbs_attr_ptr_is_inline(attr)) + memcpy(to, &attr->ptr_attr.data, min_size); + else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), + min_size)) + return -EFAULT; + + if (size > min_size) + memset(to + min_size, 0, size - min_size); + + return 0; +} + #define uverbs_copy_from(to, attrs_bundle, idx) \ _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) +#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ + _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) + /* ================================================= * Definitions -> Specs infrastructure * ================================================= diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h new file mode 100644 index 0000000..c5bb4eb --- /dev/null +++ b/include/rdma/uverbs_named_ioctl.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_NAMED_IOCTL_ +#define _UVERBS_NAMED_IOCTL_ + +#include <rdma/uverbs_ioctl.h> + +#ifndef UVERBS_MODULE_NAME +#error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h" +#endif + +#define _UVERBS_PASTE(x, y) x ## y +#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) +#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) +#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) + +#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \ + DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__) + +#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \ + DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__) + +#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \ + DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__) + +#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ + DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) + +#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) + +#define UVERBS_NO_OVERRIDE NULL + +/* This declares a parsing tree with one object and one method. This is usually + * used for merging driver attributes to the common attributes. The driver has + * a chance to override the handler and type attrs of the original object. + * The __VA_ARGS__ just contains a list of attributes. + */ +#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \ +static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ + _method_, _name), \ + _method, _handler, ##__VA_ARGS__); \ + \ +static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ + _object_, _name), \ + _object, _type_attrs, \ + &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ + _method_, _name)); \ + \ +static DECLARE_UVERBS_OBJECT_TREE(_name, \ + &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ + _object_, _name)) + +/* A very common use case is that the driver doesn't override the handler and + * type_attrs. Therefore, we provide a simplified macro for this common case. + */ +#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \ + ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \ + UVERBS_NO_OVERRIDE, ##__VA_ARGS__) + +#endif diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 5f8e20b..9d56cdb 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -37,26 +37,10 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> +#define UVERBS_OBJECT(id) uverbs_object_##id + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -extern const struct uverbs_object_def uverbs_object_comp_channel; -extern const struct uverbs_object_def uverbs_object_cq; -extern const struct uverbs_object_def uverbs_object_qp; -extern const struct uverbs_object_def uverbs_object_rwq_ind_table; -extern const struct uverbs_object_def uverbs_object_wq; -extern const struct uverbs_object_def uverbs_object_srq; -extern const struct uverbs_object_def uverbs_object_ah; -extern const struct uverbs_object_def uverbs_object_flow; -extern const struct uverbs_object_def uverbs_object_mr; -extern const struct uverbs_object_def uverbs_object_mw; -extern const struct uverbs_object_def uverbs_object_pd; -extern const struct uverbs_object_def uverbs_object_xrcd; -extern const struct uverbs_object_def uverbs_object_device; - -extern const struct uverbs_object_tree_def uverbs_default_objects; -static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) -{ - return &uverbs_default_objects; -} +const struct uverbs_object_tree_def *uverbs_default_get_objects(void); #else static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { @@ -72,22 +56,22 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, return rdma_lookup_get_uobject(type, ucontext, id, write); } -#define uobj_get_type(_object) uverbs_object_##_object.type_attrs +#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs #define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(_type, false, _ucontext, _id) + __uobj_get(uobj_get_type(_type), false, _ucontext, _id) -#define uobj_get_obj_read(_object, _id, _ucontext) \ +#define uobj_get_obj_read(_object, _type, _id, _ucontext) \ ({ \ struct ib_uobject *__uobj = \ - __uobj_get(uverbs_object_##_object.type_attrs, \ + __uobj_get(uobj_get_type(_type), \ false, _ucontext, _id); \ \ (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ }) #define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(_type, true, _ucontext, _id) + __uobj_get(uobj_get_type(_type), true, _ucontext, _id) static inline void uobj_put_read(struct ib_uobject *uobj) { @@ -124,7 +108,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type } #define uobj_alloc(_type, ucontext) \ - __uobj_alloc(_type, ucontext) + __uobj_alloc(uobj_get_type(_type), ucontext) #endif diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index db54115..a7a6111 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -53,15 +53,20 @@ struct bnxt_re_uctx_resp { __u32 rsvd; }; +/* + * This struct is placed after the ib_uverbs_alloc_pd_resp struct, which is + * not 8 byted aligned. To avoid undesired padding in various cases we have to + * set this struct to packed. + */ struct bnxt_re_pd_resp { __u32 pdid; __u32 dpi; __u64 dbr; -}; +} __attribute__((packed, aligned(4))); struct bnxt_re_cq_req { - __u64 cq_va; - __u64 cq_handle; + __aligned_u64 cq_va; + __aligned_u64 cq_handle; }; struct bnxt_re_cq_resp { @@ -72,9 +77,9 @@ struct bnxt_re_cq_resp { }; struct bnxt_re_qp_req { - __u64 qpsva; - __u64 qprva; - __u64 qp_handle; + __aligned_u64 qpsva; + __aligned_u64 qprva; + __aligned_u64 qp_handle; }; struct bnxt_re_qp_resp { @@ -83,8 +88,8 @@ struct bnxt_re_qp_resp { }; struct bnxt_re_srq_req { - __u64 srqva; - __u64 srq_handle; + __aligned_u64 srqva; + __aligned_u64 srq_handle; }; struct bnxt_re_srq_resp { diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h index d5745e4..9acb4b7 100644 --- a/include/uapi/rdma/cxgb3-abi.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -41,21 +41,21 @@ * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 + * In particular do not use pointer types -- pass pointers in __aligned_u64 * instead. */ struct iwch_create_cq_req { - __u64 user_rptr_addr; + __aligned_u64 user_rptr_addr; }; struct iwch_create_cq_resp_v0 { - __u64 key; + __aligned_u64 key; __u32 cqid; __u32 size_log2; }; struct iwch_create_cq_resp { - __u64 key; + __aligned_u64 key; __u32 cqid; __u32 size_log2; __u32 memsize; @@ -63,8 +63,8 @@ struct iwch_create_cq_resp { }; struct iwch_create_qp_resp { - __u64 key; - __u64 db_key; + __aligned_u64 key; + __aligned_u64 db_key; __u32 qpid; __u32 size_log2; __u32 sq_size_log2; @@ -74,4 +74,9 @@ struct iwch_create_qp_resp { struct iwch_reg_user_mr_resp { __u32 pbl_addr; }; + +struct iwch_alloc_pd_resp { + __u32 pdid; +}; + #endif /* CXGB3_ABI_USER_H */ diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index 05f71f1..1fefd01 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -41,13 +41,13 @@ * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 + * In particular do not use pointer types -- pass pointers in __aligned_u64 * instead. */ struct c4iw_create_cq_resp { - __u64 key; - __u64 gts_key; - __u64 memsize; + __aligned_u64 key; + __aligned_u64 gts_key; + __aligned_u64 memsize; __u32 cqid; __u32 size; __u32 qid_mask; @@ -59,13 +59,13 @@ enum { }; struct c4iw_create_qp_resp { - __u64 ma_sync_key; - __u64 sq_key; - __u64 rq_key; - __u64 sq_db_gts_key; - __u64 rq_db_gts_key; - __u64 sq_memsize; - __u64 rq_memsize; + __aligned_u64 ma_sync_key; + __aligned_u64 sq_key; + __aligned_u64 rq_key; + __aligned_u64 sq_db_gts_key; + __aligned_u64 rq_db_gts_key; + __aligned_u64 sq_memsize; + __aligned_u64 rq_memsize; __u32 sqid; __u32 rqid; __u32 sq_size; @@ -75,8 +75,13 @@ struct c4iw_create_qp_resp { }; struct c4iw_alloc_ucontext_resp { - __u64 status_page_key; + __aligned_u64 status_page_key; __u32 status_page_size; __u32 reserved; /* explicit padding (optional for i386) */ }; + +struct c4iw_alloc_pd_resp { + __u32 pdid; +}; + #endif /* CXGB4_ABI_USER_H */ diff --git a/include/uapi/rdma/hfi/hfi1_ioctl.h b/include/uapi/rdma/hfi/hfi1_ioctl.h index 9de78c5..8f3d9fe 100644 --- a/include/uapi/rdma/hfi/hfi1_ioctl.h +++ b/include/uapi/rdma/hfi/hfi1_ioctl.h @@ -79,7 +79,7 @@ struct hfi1_user_info { }; struct hfi1_ctxt_info { - __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */ + __aligned_u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */ __u32 rcvegr_size; /* size of each eager buffer */ __u16 num_active; /* number of active units */ __u16 unit; /* unit (chip) assigned to caller */ @@ -98,9 +98,9 @@ struct hfi1_ctxt_info { struct hfi1_tid_info { /* virtual address of first page in transfer */ - __u64 vaddr; + __aligned_u64 vaddr; /* pointer to tid array. this array is big enough */ - __u64 tidlist; + __aligned_u64 tidlist; /* number of tids programmed by this request */ __u32 tidcnt; /* length of transfer buffer programmed by this request */ @@ -131,23 +131,23 @@ struct hfi1_base_info { */ __u32 bthqp; /* PIO credit return address, */ - __u64 sc_credits_addr; + __aligned_u64 sc_credits_addr; /* * Base address of write-only pio buffers for this process. * Each buffer has sendpio_credits*64 bytes. */ - __u64 pio_bufbase_sop; + __aligned_u64 pio_bufbase_sop; /* * Base address of write-only pio buffers for this process. * Each buffer has sendpio_credits*64 bytes. */ - __u64 pio_bufbase; + __aligned_u64 pio_bufbase; /* address where receive buffer queue is mapped into */ - __u64 rcvhdr_bufbase; + __aligned_u64 rcvhdr_bufbase; /* base address of Eager receive buffers. */ - __u64 rcvegr_bufbase; + __aligned_u64 rcvegr_bufbase; /* base address of SDMA completion ring */ - __u64 sdma_comp_bufbase; + __aligned_u64 sdma_comp_bufbase; /* * User register base for init code, not to be used directly by * protocol or applications. Always maps real chip register space. @@ -155,20 +155,20 @@ struct hfi1_base_info { * ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail, * ur_rcvtidflow */ - __u64 user_regbase; + __aligned_u64 user_regbase; /* notification events */ - __u64 events_bufbase; + __aligned_u64 events_bufbase; /* status page */ - __u64 status_bufbase; + __aligned_u64 status_bufbase; /* rcvhdrtail update */ - __u64 rcvhdrtail_base; + __aligned_u64 rcvhdrtail_base; /* * shared memory pages for subctxts if ctxt is shared; these cover * all the processes in the group sharing a single context. * all have enough space for the num_subcontexts value on this job. */ - __u64 subctxt_uregbase; - __u64 subctxt_rcvegrbuf; - __u64 subctxt_rcvhdrbuf; + __aligned_u64 subctxt_uregbase; + __aligned_u64 subctxt_rcvegrbuf; + __aligned_u64 subctxt_rcvhdrbuf; }; #endif /* _LINIUX__HFI1_IOCTL_H */ diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 791bea2..c6a984c 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -177,8 +177,8 @@ struct hfi1_sdma_comp_entry { * Device status and notifications from driver to user-space. */ struct hfi1_status { - __u64 dev; /* device/hw status bits */ - __u64 port; /* port state and status bits */ + __aligned_u64 dev; /* device/hw status bits */ + __aligned_u64 port; /* port state and status bits */ char freezemsg[0]; }; @@ -219,7 +219,7 @@ struct sdma_req_info { * in charge of managing its own ring. */ __u16 comp_idx; -} __packed; +} __attribute__((__packed__)); /* * SW KDETH header. @@ -230,7 +230,7 @@ struct hfi1_kdeth_header { __le16 jkey; __le16 hcrc; __le32 swdata[7]; -} __packed; +} __attribute__((__packed__)); /* * Structure describing the headers that User space uses. The @@ -241,7 +241,7 @@ struct hfi1_pkt_header { __be16 lrh[4]; __be32 bth[3]; struct hfi1_kdeth_header kdeth; -} __packed; +} __attribute__((__packed__)); /* diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index a9c03b0..7092c8d 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -37,19 +37,35 @@ #include <linux/types.h> struct hns_roce_ib_create_cq { - __u64 buf_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; +}; + +struct hns_roce_ib_create_cq_resp { + __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ + __aligned_u64 cap_flags; }; struct hns_roce_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; __u8 reserved[5]; }; +struct hns_roce_ib_create_qp_resp { + __aligned_u64 cap_flags; +}; + struct hns_roce_ib_alloc_ucontext_resp { __u32 qp_tab_size; + __u32 reserved; }; + +struct hns_roce_ib_alloc_pd_resp { + __u32 pdn; +}; + #endif /* HNS_ABI_USER_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/include/uapi/rdma/i40iw-abi.h index 57d3f1d..79890ba 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h +++ b/include/uapi/rdma/i40iw-abi.h @@ -34,8 +34,8 @@ * */ -#ifndef I40IW_USER_CONTEXT_H -#define I40IW_USER_CONTEXT_H +#ifndef I40IW_ABI_H +#define I40IW_ABI_H #include <linux/types.h> @@ -61,17 +61,17 @@ struct i40iw_alloc_pd_resp { }; struct i40iw_create_cq_req { - __u64 user_cq_buffer; - __u64 user_shadow_area; + __aligned_u64 user_cq_buffer; + __aligned_u64 user_shadow_area; }; struct i40iw_create_qp_req { - __u64 user_wqe_buffers; - __u64 user_compl_ctx; + __aligned_u64 user_wqe_buffers; + __aligned_u64 user_compl_ctx; /* UDA QP PHB */ - __u64 user_sq_phb; /* place for VA of the sq phb buff */ - __u64 user_rq_phb; /* place for VA of the rq phb buff */ + __aligned_u64 user_sq_phb; /* place for VA of the sq phb buff */ + __aligned_u64 user_rq_phb; /* place for VA of the rq phb buff */ }; enum i40iw_memreg_type { diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h index f4041bd..4a8f956 100644 --- a/include/uapi/rdma/ib_user_cm.h +++ b/include/uapi/rdma/ib_user_cm.h @@ -73,8 +73,8 @@ struct ib_ucm_cmd_hdr { }; struct ib_ucm_create_id { - __u64 uid; - __u64 response; + __aligned_u64 uid; + __aligned_u64 response; }; struct ib_ucm_create_id_resp { @@ -82,7 +82,7 @@ struct ib_ucm_create_id_resp { }; struct ib_ucm_destroy_id { - __u64 response; + __aligned_u64 response; __u32 id; __u32 reserved; }; @@ -92,7 +92,7 @@ struct ib_ucm_destroy_id_resp { }; struct ib_ucm_attr_id { - __u64 response; + __aligned_u64 response; __u32 id; __u32 reserved; }; @@ -105,7 +105,7 @@ struct ib_ucm_attr_id_resp { }; struct ib_ucm_init_qp_attr { - __u64 response; + __aligned_u64 response; __u32 id; __u32 qp_state; }; @@ -123,7 +123,7 @@ struct ib_ucm_notify { }; struct ib_ucm_private_data { - __u64 data; + __aligned_u64 data; __u32 id; __u8 len; __u8 reserved[3]; @@ -135,9 +135,9 @@ struct ib_ucm_req { __u32 qp_type; __u32 psn; __be64 sid; - __u64 data; - __u64 primary_path; - __u64 alternate_path; + __aligned_u64 data; + __aligned_u64 primary_path; + __aligned_u64 alternate_path; __u8 len; __u8 peer_to_peer; __u8 responder_resources; @@ -153,8 +153,8 @@ struct ib_ucm_req { }; struct ib_ucm_rep { - __u64 uid; - __u64 data; + __aligned_u64 uid; + __aligned_u64 data; __u32 id; __u32 qpn; __u32 psn; @@ -172,15 +172,15 @@ struct ib_ucm_rep { struct ib_ucm_info { __u32 id; __u32 status; - __u64 info; - __u64 data; + __aligned_u64 info; + __aligned_u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; }; struct ib_ucm_mra { - __u64 data; + __aligned_u64 data; __u32 id; __u8 len; __u8 timeout; @@ -188,8 +188,8 @@ struct ib_ucm_mra { }; struct ib_ucm_lap { - __u64 path; - __u64 data; + __aligned_u64 path; + __aligned_u64 data; __u32 id; __u8 len; __u8 reserved[3]; @@ -199,8 +199,8 @@ struct ib_ucm_sidr_req { __u32 id; __u32 timeout; __be64 sid; - __u64 data; - __u64 path; + __aligned_u64 data; + __aligned_u64 path; __u16 reserved_pkey; __u8 len; __u8 max_cm_retries; @@ -212,8 +212,8 @@ struct ib_ucm_sidr_rep { __u32 qpn; __u32 qkey; __u32 status; - __u64 info; - __u64 data; + __aligned_u64 info; + __aligned_u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; @@ -222,9 +222,9 @@ struct ib_ucm_sidr_rep { * event notification ABI structures. */ struct ib_ucm_event_get { - __u64 response; - __u64 data; - __u64 info; + __aligned_u64 response; + __aligned_u64 data; + __aligned_u64 info; __u8 data_len; __u8 info_len; __u8 reserved[6]; @@ -303,7 +303,7 @@ struct ib_ucm_sidr_rep_event_resp { #define IB_UCM_PRES_ALTERNATE 0x08 struct ib_ucm_event_resp { - __u64 uid; + __aligned_u64 uid; __u32 id; __u32 event; __u32 present; diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h new file mode 100644 index 0000000..83e3890 --- /dev/null +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_IOCTL_CMDS_H +#define IB_USER_IOCTL_CMDS_H + +#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +#define UVERBS_UDATA_DRIVER_DATA_NS 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) + +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_OBJECT_DM, +}; + +enum { + UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_ATTR_UHW_OUT, +}; + +enum uverbs_attrs_create_cq_cmd_attr_ids { + UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_CREATE_CQ_FLAGS, + UVERBS_ATTR_CREATE_CQ_RESP_CQE, +}; + +enum uverbs_attrs_destroy_cq_cmd_attr_ids { + UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_ATTR_DESTROY_CQ_RESP, +}; + +enum uverbs_attrs_create_flow_action_esp { + UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, +}; + +enum uverbs_attrs_destroy_flow_action_esp { + UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, +}; + +enum uverbs_methods_cq { + UVERBS_METHOD_CQ_CREATE, + UVERBS_METHOD_CQ_DESTROY, +}; + +enum uverbs_methods_actions_flow_action_ops { + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, +}; + +enum uverbs_attrs_alloc_dm_cmd_attr_ids { + UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_ALLOC_DM_ALIGNMENT, +}; + +enum uverbs_attrs_free_dm_cmd_attr_ids { + UVERBS_ATTR_FREE_DM_HANDLE, +}; + +enum uverbs_methods_dm { + UVERBS_METHOD_DM_ALLOC, + UVERBS_METHOD_DM_FREE, +}; + +enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_REG_DM_MR_RESP_RKEY, +}; + +enum uverbs_methods_mr { + UVERBS_METHOD_DM_MR_REG, +}; + +#endif diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 842792e..04e46ea 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,52 +34,69 @@ #ifndef IB_USER_IOCTL_VERBS_H #define IB_USER_IOCTL_VERBS_H -#include <rdma/rdma_user_ioctl.h> - -#define UVERBS_UDATA_DRIVER_DATA_NS 1 -#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) - -enum uverbs_default_objects { - UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ - UVERBS_OBJECT_PD, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_OBJECT_CQ, - UVERBS_OBJECT_QP, - UVERBS_OBJECT_SRQ, - UVERBS_OBJECT_AH, - UVERBS_OBJECT_MR, - UVERBS_OBJECT_MW, - UVERBS_OBJECT_FLOW, - UVERBS_OBJECT_XRCD, - UVERBS_OBJECT_RWQ_IND_TBL, - UVERBS_OBJECT_WQ, - UVERBS_OBJECT_LAST, +#include <linux/types.h> + +#ifndef RDMA_UAPI_PTR +#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name +#endif + +enum ib_uverbs_flow_action_esp_keymat { + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, }; -enum { - UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, - UVERBS_UHW_OUT, +enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo { + IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ, }; -enum uverbs_create_cq_cmd_attr_ids { - CREATE_CQ_HANDLE, - CREATE_CQ_CQE, - CREATE_CQ_USER_HANDLE, - CREATE_CQ_COMP_CHANNEL, - CREATE_CQ_COMP_VECTOR, - CREATE_CQ_FLAGS, - CREATE_CQ_RESP_CQE, +struct ib_uverbs_flow_action_esp_keymat_aes_gcm { + __aligned_u64 iv; + __u32 iv_algo; /* Use enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo */ + + __u32 salt; + __u32 icv_len; + + __u32 key_len; + __u32 aes_key[256 / 32]; }; -enum uverbs_destroy_cq_cmd_attr_ids { - DESTROY_CQ_HANDLE, - DESTROY_CQ_RESP, +enum ib_uverbs_flow_action_esp_replay { + IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE, + IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP, }; -enum uverbs_actions_cq_ops { - UVERBS_CQ_CREATE, - UVERBS_CQ_DESTROY, +struct ib_uverbs_flow_action_esp_replay_bmp { + __u32 size; }; -#endif +enum ib_uverbs_flow_action_esp_flags { + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_INLINE_CRYPTO = 0UL << 0, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_FULL_OFFLOAD = 1UL << 0, + + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TUNNEL = 0UL << 1, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TRANSPORT = 1UL << 1, + + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_DECRYPT = 0UL << 2, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT = 1UL << 2, + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW = 1UL << 3, +}; + +struct ib_uverbs_flow_action_esp_encap { + /* This struct represents a list of pointers to flow_xxxx_filter that + * encapsulates the payload in ESP tunnel mode. + */ + RDMA_UAPI_PTR(void *, val_ptr); /* pointer to a flow_xxxx_filter */ + RDMA_UAPI_PTR(struct ib_uverbs_flow_action_esp_encap *, next_ptr); + __u16 len; /* Len of the filter struct val_ptr points to */ + __u16 type; /* Use flow_spec_type enum */ +}; + +struct ib_uverbs_flow_action_esp { + __u32 spi; + __u32 seq; + __u32 tfc_pad; + __u32 flags; + __aligned_u64 hard_limit_pkts; +}; + +#endif diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h index 330a3c5..ef92118 100644 --- a/include/uapi/rdma/ib_user_mad.h +++ b/include/uapi/rdma/ib_user_mad.h @@ -143,7 +143,7 @@ struct ib_user_mad_hdr { */ struct ib_user_mad { struct ib_user_mad_hdr hdr; - __u64 data[0]; + __aligned_u64 data[0]; }; /* @@ -225,7 +225,7 @@ struct ib_user_mad_reg_req2 { __u8 mgmt_class_version; __u16 res; __u32 flags; - __u64 method_mask[2]; + __aligned_u64 method_mask[2]; __u32 oui; __u8 rmpp_version; __u8 reserved[3]; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 04d0e67..9be0739 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -117,13 +117,13 @@ enum { */ struct ib_uverbs_async_event_desc { - __u64 element; + __aligned_u64 element; __u32 event_type; /* enum ib_event_type */ __u32 reserved; }; struct ib_uverbs_comp_event_desc { - __u64 cq_handle; + __aligned_u64 cq_handle; }; struct ib_uverbs_cq_moderation_caps { @@ -141,10 +141,7 @@ struct ib_uverbs_cq_moderation_caps { */ #define IB_USER_VERBS_CMD_COMMAND_MASK 0xff -#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u -#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 - -#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 +#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80000000u struct ib_uverbs_cmd_hdr { __u32 command; @@ -153,15 +150,15 @@ struct ib_uverbs_cmd_hdr { }; struct ib_uverbs_ex_cmd_hdr { - __u64 response; + __aligned_u64 response; __u16 provider_in_words; __u16 provider_out_words; __u32 cmd_hdr_reserved; }; struct ib_uverbs_get_context { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { @@ -170,16 +167,16 @@ struct ib_uverbs_get_context_resp { }; struct ib_uverbs_query_device { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_device_resp { - __u64 fw_ver; + __aligned_u64 fw_ver; __be64 node_guid; __be64 sys_image_guid; - __u64 max_mr_size; - __u64 page_size_cap; + __aligned_u64 max_mr_size; + __aligned_u64 page_size_cap; __u32 vendor_id; __u32 vendor_part_id; __u32 hw_ver; @@ -224,7 +221,7 @@ struct ib_uverbs_ex_query_device { }; struct ib_uverbs_odp_caps { - __u64 general_caps; + __aligned_u64 general_caps; struct { __u32 rc_odp_caps; __u32 uc_odp_caps; @@ -263,21 +260,22 @@ struct ib_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; struct ib_uverbs_odp_caps odp_caps; - __u64 timestamp_mask; - __u64 hca_core_clock; /* in KHZ */ - __u64 device_cap_flags_ex; + __aligned_u64 timestamp_mask; + __aligned_u64 hca_core_clock; /* in KHZ */ + __aligned_u64 device_cap_flags_ex; struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; struct ib_uverbs_tm_caps tm_caps; struct ib_uverbs_cq_moderation_caps cq_moderation_caps; + __aligned_u64 max_dm_size; }; struct ib_uverbs_query_port { - __u64 response; + __aligned_u64 response; __u8 port_num; __u8 reserved[7]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_port_resp { @@ -305,8 +303,8 @@ struct ib_uverbs_query_port_resp { }; struct ib_uverbs_alloc_pd { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_alloc_pd_resp { @@ -318,10 +316,10 @@ struct ib_uverbs_dealloc_pd { }; struct ib_uverbs_open_xrcd { - __u64 response; + __aligned_u64 response; __u32 fd; __u32 oflags; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_open_xrcd_resp { @@ -333,13 +331,13 @@ struct ib_uverbs_close_xrcd { }; struct ib_uverbs_reg_mr { - __u64 response; - __u64 start; - __u64 length; - __u64 hca_va; + __aligned_u64 response; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 hca_va; __u32 pd_handle; __u32 access_flags; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_reg_mr_resp { @@ -349,12 +347,12 @@ struct ib_uverbs_reg_mr_resp { }; struct ib_uverbs_rereg_mr { - __u64 response; + __aligned_u64 response; __u32 mr_handle; __u32 flags; - __u64 start; - __u64 length; - __u64 hca_va; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 hca_va; __u32 pd_handle; __u32 access_flags; }; @@ -369,7 +367,7 @@ struct ib_uverbs_dereg_mr { }; struct ib_uverbs_alloc_mw { - __u64 response; + __aligned_u64 response; __u32 pd_handle; __u8 mw_type; __u8 reserved[3]; @@ -385,7 +383,7 @@ struct ib_uverbs_dealloc_mw { }; struct ib_uverbs_create_comp_channel { - __u64 response; + __aligned_u64 response; }; struct ib_uverbs_create_comp_channel_resp { @@ -393,13 +391,13 @@ struct ib_uverbs_create_comp_channel_resp { }; struct ib_uverbs_create_cq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; enum ib_uverbs_ex_create_cq_flags { @@ -408,7 +406,7 @@ enum ib_uverbs_ex_create_cq_flags { }; struct ib_uverbs_ex_create_cq { - __u64 user_handle; + __aligned_u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; @@ -429,26 +427,26 @@ struct ib_uverbs_ex_create_cq_resp { }; struct ib_uverbs_resize_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 cqe; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_resize_cq_resp { __u32 cqe; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_poll_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 ne; }; struct ib_uverbs_wc { - __u64 wr_id; + __aligned_u64 wr_id; __u32 status; __u32 opcode; __u32 vendor_err; @@ -480,7 +478,7 @@ struct ib_uverbs_req_notify_cq { }; struct ib_uverbs_destroy_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 reserved; }; @@ -549,8 +547,8 @@ struct ib_uverbs_qp_attr { }; struct ib_uverbs_create_qp { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; @@ -564,7 +562,7 @@ struct ib_uverbs_create_qp { __u8 qp_type; __u8 is_srq; __u8 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; enum ib_uverbs_create_qp_mask { @@ -590,7 +588,7 @@ enum { }; struct ib_uverbs_ex_create_qp { - __u64 user_handle; + __aligned_u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; @@ -611,13 +609,13 @@ struct ib_uverbs_ex_create_qp { }; struct ib_uverbs_open_qp { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 qpn; __u8 qp_type; __u8 reserved[7]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; /* also used for open response */ @@ -658,10 +656,10 @@ struct ib_uverbs_qp_dest { }; struct ib_uverbs_query_qp { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 attr_mask; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_qp_resp { @@ -695,7 +693,7 @@ struct ib_uverbs_query_qp_resp { __u8 alt_timeout; __u8 sq_sig_all; __u8 reserved[5]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_modify_qp { @@ -725,7 +723,7 @@ struct ib_uverbs_modify_qp { __u8 alt_port_num; __u8 alt_timeout; __u8 reserved[2]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_ex_modify_qp { @@ -743,7 +741,7 @@ struct ib_uverbs_ex_modify_qp_resp { }; struct ib_uverbs_destroy_qp { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 reserved; }; @@ -759,13 +757,13 @@ struct ib_uverbs_destroy_qp_resp { * document the ABI. */ struct ib_uverbs_sge { - __u64 addr; + __aligned_u64 addr; __u32 length; __u32 lkey; }; struct ib_uverbs_send_wr { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; __u32 opcode; __u32 send_flags; @@ -775,14 +773,14 @@ struct ib_uverbs_send_wr { } ex; union { struct { - __u64 remote_addr; + __aligned_u64 remote_addr; __u32 rkey; __u32 reserved; } rdma; struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; + __aligned_u64 remote_addr; + __aligned_u64 compare_add; + __aligned_u64 swap; __u32 rkey; __u32 reserved; } atomic; @@ -796,7 +794,7 @@ struct ib_uverbs_send_wr { }; struct ib_uverbs_post_send { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; @@ -809,13 +807,13 @@ struct ib_uverbs_post_send_resp { }; struct ib_uverbs_recv_wr { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; __u32 reserved; }; struct ib_uverbs_post_recv { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; @@ -828,7 +826,7 @@ struct ib_uverbs_post_recv_resp { }; struct ib_uverbs_post_srq_recv { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 wr_count; __u32 sge_count; @@ -841,8 +839,8 @@ struct ib_uverbs_post_srq_recv_resp { }; struct ib_uverbs_create_ah { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 reserved; struct ib_uverbs_ah_attr attr; @@ -861,7 +859,7 @@ struct ib_uverbs_attach_mcast { __u32 qp_handle; __u16 mlid; __u16 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_detach_mcast { @@ -869,7 +867,7 @@ struct ib_uverbs_detach_mcast { __u32 qp_handle; __u16 mlid; __u16 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_flow_spec_hdr { @@ -877,7 +875,7 @@ struct ib_uverbs_flow_spec_hdr { __u16 size; __u16 reserved; /* followed by flow_spec */ - __u64 flow_spec_data[0]; + __aligned_u64 flow_spec_data[0]; }; struct ib_uverbs_flow_eth_filter { @@ -987,6 +985,19 @@ struct ib_uverbs_flow_spec_action_drop { }; }; +struct ib_uverbs_flow_spec_action_handle { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 handle; + __u32 reserved1; +}; + struct ib_uverbs_flow_tunnel_filter { __be32 tunnel_id; }; @@ -1004,6 +1015,24 @@ struct ib_uverbs_flow_spec_tunnel { struct ib_uverbs_flow_tunnel_filter mask; }; +struct ib_uverbs_flow_spec_esp_filter { + __u32 spi; + __u32 seq; +}; + +struct ib_uverbs_flow_spec_esp { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_spec_esp_filter val; + struct ib_uverbs_flow_spec_esp_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; @@ -1036,18 +1065,18 @@ struct ib_uverbs_destroy_flow { }; struct ib_uverbs_create_srq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_xsrq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 srq_type; __u32 pd_handle; __u32 max_wr; @@ -1056,7 +1085,7 @@ struct ib_uverbs_create_xsrq { __u32 max_num_tags; __u32 xrcd_handle; __u32 cq_handle; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_srq_resp { @@ -1071,14 +1100,14 @@ struct ib_uverbs_modify_srq { __u32 attr_mask; __u32 max_wr; __u32 srq_limit; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_srq { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_srq_resp { @@ -1089,7 +1118,7 @@ struct ib_uverbs_query_srq_resp { }; struct ib_uverbs_destroy_srq { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 reserved; }; @@ -1101,7 +1130,7 @@ struct ib_uverbs_destroy_srq_resp { struct ib_uverbs_ex_create_wq { __u32 comp_mask; __u32 wq_type; - __u64 user_handle; + __aligned_u64 user_handle; __u32 pd_handle; __u32 cq_handle; __u32 max_wr; diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 7f9c373..04f64bc 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -59,6 +59,10 @@ struct mlx4_ib_alloc_ucontext_resp_v3 { __u16 bf_regs_per_page; }; +enum { + MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0, +}; + struct mlx4_ib_alloc_ucontext_resp { __u32 dev_caps; __u32 qp_tab_size; @@ -73,8 +77,8 @@ struct mlx4_ib_alloc_pd_resp { }; struct mlx4_ib_create_cq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; }; struct mlx4_ib_create_cq_resp { @@ -83,12 +87,12 @@ struct mlx4_ib_create_cq_resp { }; struct mlx4_ib_resize_cq { - __u64 buf_addr; + __aligned_u64 buf_addr; }; struct mlx4_ib_create_srq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; }; struct mlx4_ib_create_srq_resp { @@ -97,7 +101,7 @@ struct mlx4_ib_create_srq_resp { }; struct mlx4_ib_create_qp_rss { - __u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */ + __aligned_u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */ __u8 rx_hash_function; /* Use enum mlx4_ib_rx_hash_function_flags */ __u8 reserved[7]; __u8 rx_hash_key[40]; @@ -106,8 +110,8 @@ struct mlx4_ib_create_qp_rss { }; struct mlx4_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; @@ -116,8 +120,8 @@ struct mlx4_ib_create_qp { }; struct mlx4_ib_create_wq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u8 log_range_size; __u8 reserved[3]; __u32 comp_mask; @@ -156,4 +160,32 @@ enum mlx4_ib_rx_hash_fields { MLX4_IB_RX_HASH_INNER = 1ULL << 31, }; +struct mlx4_ib_rss_caps { + __aligned_u64 rx_hash_fields_mask; /* enum mlx4_ib_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx4_ib_rx_hash_function_flags */ + __u8 reserved[7]; +}; + +enum query_device_resp_mask { + MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, +}; + +struct mlx4_ib_tso_caps { + __u32 max_tso; /* Maximum tso payload size in bytes */ + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported. + */ + __u32 supported_qpts; +}; + +struct mlx4_uverbs_ex_query_device_resp { + __u32 comp_mask; + __u32 response_length; + __aligned_u64 hca_core_clock_offset; + __u32 max_inl_recv_sz; + __u32 reserved; + struct mlx4_ib_rss_caps rss_caps; + struct mlx4_ib_tso_caps tso_caps; +}; + #endif /* MLX4_ABI_USER_H */ diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 1111aa4..cb4a02c 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -84,7 +84,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 { __u8 reserved0; __u16 reserved1; __u32 reserved2; - __u64 lib_caps; + __aligned_u64 lib_caps; }; enum mlx5_ib_alloc_ucontext_resp_mask { @@ -107,6 +107,14 @@ enum mlx5_user_inline_mode { MLX5_USER_INLINE_MODE_TCP_UDP, }; +enum { + MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM = 1 << 0, + MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA = 1 << 1, + MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING = 1 << 2, + MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD = 1 << 3, + MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN = 1 << 4, +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; @@ -118,14 +126,14 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 max_recv_wr; __u32 max_srq_recv_wr; __u16 num_ports; - __u16 reserved1; + __u16 flow_action_flags; __u32 comp_mask; __u32 response_length; __u8 cqe_version; __u8 cmds_supp_uhw; __u8 eth_min_inline; __u8 clock_info_versions; - __u64 hca_core_clock_offset; + __aligned_u64 hca_core_clock_offset; __u32 log_uar_size; __u32 num_uars_per_page; __u32 num_dyn_bfregs; @@ -147,7 +155,7 @@ struct mlx5_ib_tso_caps { }; struct mlx5_ib_rss_caps { - __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __aligned_u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 reserved[7]; }; @@ -163,6 +171,10 @@ struct mlx5_ib_cqe_comp_caps { __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ }; +enum mlx5_ib_packet_pacing_cap_flags { + MLX5_IB_PP_SUPPORT_BURST = 1 << 0, +}; + struct mlx5_packet_pacing_caps { __u32 qp_rate_limit_min; __u32 qp_rate_limit_max; /* In kpbs */ @@ -172,7 +184,8 @@ struct mlx5_packet_pacing_caps { * supported_qpts |= 1 << IB_QPT_RAW_PACKET */ __u32 supported_qpts; - __u32 reserved; + __u8 cap_flags; /* enum mlx5_ib_packet_pacing_cap_flags */ + __u8 reserved[3]; }; enum mlx5_ib_mpw_caps { @@ -243,8 +256,8 @@ enum mlx5_ib_create_cq_flags { }; struct mlx5_ib_create_cq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u32 cqe_size; __u8 cqe_comp_en; __u8 cqe_comp_res_format; @@ -257,15 +270,15 @@ struct mlx5_ib_create_cq_resp { }; struct mlx5_ib_resize_cq { - __u64 buf_addr; + __aligned_u64 buf_addr; __u16 cqe_size; __u16 reserved0; __u32 reserved1; }; struct mlx5_ib_create_srq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u32 flags; __u32 reserved0; /* explicit padding (optional on i386) */ __u32 uidx; @@ -278,8 +291,8 @@ struct mlx5_ib_create_srq_resp { }; struct mlx5_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u32 sq_wqe_count; __u32 rq_wqe_count; __u32 rq_wqe_shift; @@ -287,8 +300,8 @@ struct mlx5_ib_create_qp { __u32 uidx; __u32 bfreg_index; union { - __u64 sq_buf_addr; - __u64 access_key; + __aligned_u64 sq_buf_addr; + __aligned_u64 access_key; }; }; @@ -314,12 +327,13 @@ enum mlx5_rx_hash_fields { MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, MLX5_RX_HASH_DST_PORT_UDP = 1 << 7, + MLX5_RX_HASH_IPSEC_SPI = 1 << 8, /* Save bits for future fields */ MLX5_RX_HASH_INNER = (1UL << 31), }; struct mlx5_ib_create_qp_rss { - __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __aligned_u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 rx_key_len; /* valid only for Toeplitz */ __u8 reserved[6]; @@ -330,6 +344,7 @@ struct mlx5_ib_create_qp_rss { struct mlx5_ib_create_qp_resp { __u32 bfreg_index; + __u32 reserved; }; struct mlx5_ib_alloc_mw { @@ -344,8 +359,8 @@ enum mlx5_ib_create_wq_mask { }; struct mlx5_ib_create_wq { - __u64 buf_addr; - __u64 db_addr; + __aligned_u64 buf_addr; + __aligned_u64 db_addr; __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 user_index; @@ -362,6 +377,18 @@ struct mlx5_ib_create_ah_resp { __u8 reserved[6]; }; +struct mlx5_ib_burst_info { + __u32 max_burst_sz; + __u16 typical_pkt_sz; + __u16 reserved; +}; + +struct mlx5_ib_modify_qp { + __u32 comp_mask; + struct mlx5_ib_burst_info burst_info; + __u32 reserved; +}; + struct mlx5_ib_modify_qp_resp { __u32 response_length; __u32 dctn; @@ -385,13 +412,13 @@ struct mlx5_ib_modify_wq { struct mlx5_ib_clock_info { __u32 sign; __u32 resv; - __u64 nsec; - __u64 cycles; - __u64 frac; + __aligned_u64 nsec; + __aligned_u64 cycles; + __aligned_u64 frac; __u32 mult; __u32 shift; - __u64 mask; - __u64 overflow_period; + __aligned_u64 mask; + __aligned_u64 overflow_period; }; enum mlx5_ib_mmap_cmd { @@ -403,6 +430,7 @@ enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_CORE_CLOCK = 5, MLX5_IB_MMAP_ALLOC_WC = 6, MLX5_IB_MMAP_CLOCK_INFO = 7, + MLX5_IB_MMAP_DEVICE_MEM = 8, }; enum { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h new file mode 100644 index 0000000..f7d685e --- /dev/null +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_USER_IOCTL_CMDS_H +#define MLX5_USER_IOCTL_CMDS_H + +#include <rdma/ib_user_ioctl_cmds.h> + +enum mlx5_ib_create_flow_action_attrs { + /* This attribute belong to the driver namespace */ + MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_alloc_dm_attrs { + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, +}; + +#endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h new file mode 100644 index 0000000..8a2fb33 --- /dev/null +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_USER_IOCTL_VERBS_H +#define MLX5_USER_IOCTL_VERBS_H + +#include <linux/types.h> + +enum mlx5_ib_uapi_flow_action_flags { + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, +}; + +#endif + diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h index 3020d8a..ac756cd 100644 --- a/include/uapi/rdma/mthca-abi.h +++ b/include/uapi/rdma/mthca-abi.h @@ -74,8 +74,8 @@ struct mthca_reg_mr { struct mthca_create_cq { __u32 lkey; __u32 pdn; - __u64 arm_db_page; - __u64 set_db_page; + __aligned_u64 arm_db_page; + __aligned_u64 set_db_page; __u32 arm_db_index; __u32 set_db_index; }; @@ -93,7 +93,7 @@ struct mthca_resize_cq { struct mthca_create_srq { __u32 lkey; __u32 db_index; - __u64 db_page; + __aligned_u64 db_page; }; struct mthca_create_srq_resp { @@ -104,8 +104,8 @@ struct mthca_create_srq_resp { struct mthca_create_qp { __u32 lkey; __u32 reserved; - __u64 sq_db_page; - __u64 rq_db_page; + __aligned_u64 sq_db_page; + __aligned_u64 rq_db_page; __u32 sq_db_index; __u32 rq_db_index; }; diff --git a/include/uapi/rdma/nes-abi.h b/include/uapi/rdma/nes-abi.h index f5b2437..35bfd40 100644 --- a/include/uapi/rdma/nes-abi.h +++ b/include/uapi/rdma/nes-abi.h @@ -72,14 +72,14 @@ struct nes_alloc_pd_resp { }; struct nes_create_cq_req { - __u64 user_cq_buffer; + __aligned_u64 user_cq_buffer; __u32 mcrqf; __u8 reserved[4]; }; struct nes_create_qp_req { - __u64 user_wqe_buffers; - __u64 user_qp_buffer; + __aligned_u64 user_wqe_buffers; + __aligned_u64 user_qp_buffer; }; enum iwnes_memreg_type { diff --git a/include/uapi/rdma/ocrdma-abi.h b/include/uapi/rdma/ocrdma-abi.h index ad64a3c..284d47b 100644 --- a/include/uapi/rdma/ocrdma-abi.h +++ b/include/uapi/rdma/ocrdma-abi.h @@ -55,17 +55,17 @@ struct ocrdma_alloc_ucontext_resp { __u32 wqe_size; __u32 max_inline_data; __u32 dpp_wqe_size; - __u64 ah_tbl_page; + __aligned_u64 ah_tbl_page; __u32 ah_tbl_len; __u32 rqe_size; __u8 fw_ver[32]; /* for future use/new features in progress */ - __u64 rsvd1; - __u64 rsvd2; + __aligned_u64 rsvd1; + __aligned_u64 rsvd2; }; struct ocrdma_alloc_pd_ureq { - __u64 rsvd1; + __u32 rsvd[2]; }; struct ocrdma_alloc_pd_uresp { @@ -73,7 +73,7 @@ struct ocrdma_alloc_pd_uresp { __u32 dpp_enabled; __u32 dpp_page_addr_hi; __u32 dpp_page_addr_lo; - __u64 rsvd1; + __u32 rsvd[2]; }; struct ocrdma_create_cq_ureq { @@ -87,13 +87,13 @@ struct ocrdma_create_cq_uresp { __u32 page_size; __u32 num_pages; __u32 max_hw_cqe; - __u64 page_addr[MAX_CQ_PAGES]; - __u64 db_page_addr; + __aligned_u64 page_addr[MAX_CQ_PAGES]; + __aligned_u64 db_page_addr; __u32 db_page_size; __u32 phase_change; /* for future use/new features in progress */ - __u64 rsvd1; - __u64 rsvd2; + __aligned_u64 rsvd1; + __aligned_u64 rsvd2; }; #define MAX_QP_PAGES 8 @@ -115,9 +115,9 @@ struct ocrdma_create_qp_uresp { __u32 rq_page_size; __u32 num_sq_pages; __u32 num_rq_pages; - __u64 sq_page_addr[MAX_QP_PAGES]; - __u64 rq_page_addr[MAX_QP_PAGES]; - __u64 db_page_addr; + __aligned_u64 sq_page_addr[MAX_QP_PAGES]; + __aligned_u64 rq_page_addr[MAX_QP_PAGES]; + __aligned_u64 db_page_addr; __u32 db_page_size; __u32 dpp_credit; __u32 dpp_offset; @@ -126,8 +126,8 @@ struct ocrdma_create_qp_uresp { __u32 db_sq_offset; __u32 db_rq_offset; __u32 db_shift; - __u64 rsvd[11]; -} __packed; + __aligned_u64 rsvd[11]; +}; struct ocrdma_create_srq_uresp { __u16 rq_dbid; @@ -137,16 +137,16 @@ struct ocrdma_create_srq_uresp { __u32 rq_page_size; __u32 num_rq_pages; - __u64 rq_page_addr[MAX_QP_PAGES]; - __u64 db_page_addr; + __aligned_u64 rq_page_addr[MAX_QP_PAGES]; + __aligned_u64 db_page_addr; __u32 db_page_size; __u32 num_rqe_allocated; __u32 db_rq_offset; __u32 db_shift; - __u64 rsvd2; - __u64 rsvd3; + __aligned_u64 rsvd2; + __aligned_u64 rsvd3; }; #endif /* OCRDMA_ABI_USER_H */ diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 261c6db..8ba0989 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -40,7 +40,7 @@ /* user kernel communication data structures. */ struct qedr_alloc_ucontext_resp { - __u64 db_pa; + __aligned_u64 db_pa; __u32 db_size; __u32 max_send_wr; @@ -53,24 +53,27 @@ struct qedr_alloc_ucontext_resp { __u8 dpm_enabled; __u8 wids_enabled; __u16 wid_count; + __u32 reserved; }; struct qedr_alloc_pd_ureq { - __u64 rsvd1; + __aligned_u64 rsvd1; }; struct qedr_alloc_pd_uresp { __u32 pd_id; + __u32 reserved; }; struct qedr_create_cq_ureq { - __u64 addr; - __u64 len; + __aligned_u64 addr; + __aligned_u64 len; }; struct qedr_create_cq_uresp { __u32 db_offset; __u16 icid; + __u16 reserved; }; struct qedr_create_qp_ureq { @@ -79,17 +82,17 @@ struct qedr_create_qp_ureq { /* SQ */ /* user space virtual address of SQ buffer */ - __u64 sq_addr; + __aligned_u64 sq_addr; /* length of SQ buffer */ - __u64 sq_len; + __aligned_u64 sq_len; /* RQ */ /* user space virtual address of RQ buffer */ - __u64 rq_addr; + __aligned_u64 rq_addr; /* length of RQ buffer */ - __u64 rq_len; + __aligned_u64 rq_len; }; struct qedr_create_qp_uresp { @@ -105,6 +108,7 @@ struct qedr_create_qp_uresp { __u16 rq_icid; __u32 rq_db2_offset; + __u32 reserved; }; #endif /* __QEDR_USER_H__ */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 4c77e2a..0ce0943 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -238,6 +238,14 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */ + RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */ + + RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */ + + RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */ + + RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */ + RDMA_NLDEV_NUM_OPS }; @@ -350,6 +358,49 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */ + RDMA_NLDEV_ATTR_RES_CM_ID, /* nested table */ + RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, /* nested table */ + /* + * rdma_cm_id port space. + */ + RDMA_NLDEV_ATTR_RES_PS, /* u32 */ + /* + * Source and destination socket addresses + */ + RDMA_NLDEV_ATTR_RES_SRC_ADDR, /* __kernel_sockaddr_storage */ + RDMA_NLDEV_ATTR_RES_DST_ADDR, /* __kernel_sockaddr_storage */ + + RDMA_NLDEV_ATTR_RES_CQ, /* nested table */ + RDMA_NLDEV_ATTR_RES_CQ_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_RES_CQE, /* u32 */ + RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */ + RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */ + + RDMA_NLDEV_ATTR_RES_MR, /* nested table */ + RDMA_NLDEV_ATTR_RES_MR_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_RES_RKEY, /* u32 */ + RDMA_NLDEV_ATTR_RES_LKEY, /* u32 */ + RDMA_NLDEV_ATTR_RES_IOVA, /* u64 */ + RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */ + + RDMA_NLDEV_ATTR_RES_PD, /* nested table */ + RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */ + RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */ + + /* + * Provides logical name and index of netdevice which is + * connected to physical port. This information is relevant + * for RoCE and iWARP. + * + * The netdevices which are associated with containers are + * supposed to be exported together with GID table once it + * will be exposed through the netlink. Because the + * associated netdevices are properties of GIDs. + */ + RDMA_NLDEV_ATTR_NDEV_INDEX, /* u32 */ + RDMA_NLDEV_ATTR_NDEV_NAME, /* string */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index c83ef00..e126902 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -70,6 +70,14 @@ enum { RDMA_USER_CM_CMD_JOIN_MCAST }; +/* See IBTA Annex A11, servies ID bytes 4 & 5 */ +enum rdma_ucm_port_space { + RDMA_PS_IPOIB = 0x0002, + RDMA_PS_IB = 0x013F, + RDMA_PS_TCP = 0x0106, + RDMA_PS_UDP = 0x0111, +}; + /* * command ABI structures. */ @@ -80,9 +88,9 @@ struct rdma_ucm_cmd_hdr { }; struct rdma_ucm_create_id { - __u64 uid; - __u64 response; - __u16 ps; + __aligned_u64 uid; + __aligned_u64 response; + __u16 ps; /* use enum rdma_ucm_port_space */ __u8 qp_type; __u8 reserved[5]; }; @@ -92,7 +100,7 @@ struct rdma_ucm_create_id_resp { }; struct rdma_ucm_destroy_id { - __u64 response; + __aligned_u64 response; __u32 id; __u32 reserved; }; @@ -102,7 +110,7 @@ struct rdma_ucm_destroy_id_resp { }; struct rdma_ucm_bind_ip { - __u64 response; + __aligned_u64 response; struct sockaddr_in6 addr; __u32 id; }; @@ -143,13 +151,13 @@ enum { }; struct rdma_ucm_query { - __u64 response; + __aligned_u64 response; __u32 id; __u32 option; }; struct rdma_ucm_query_route_resp { - __u64 node_guid; + __aligned_u64 node_guid; struct ib_user_path_rec ib_route[2]; struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; @@ -159,7 +167,7 @@ struct rdma_ucm_query_route_resp { }; struct rdma_ucm_query_addr_resp { - __u64 node_guid; + __aligned_u64 node_guid; __u8 port_num; __u8 reserved; __u16 pkey; @@ -210,7 +218,7 @@ struct rdma_ucm_listen { }; struct rdma_ucm_accept { - __u64 uid; + __aligned_u64 uid; struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; @@ -228,7 +236,7 @@ struct rdma_ucm_disconnect { }; struct rdma_ucm_init_qp_attr { - __u64 response; + __aligned_u64 response; __u32 id; __u32 qp_state; }; @@ -239,8 +247,8 @@ struct rdma_ucm_notify { }; struct rdma_ucm_join_ip_mcast { - __u64 response; /* rdma_ucm_create_id_resp */ - __u64 uid; + __aligned_u64 response; /* rdma_ucm_create_id_resp */ + __aligned_u64 uid; struct sockaddr_in6 addr; __u32 id; }; @@ -253,8 +261,8 @@ enum { }; struct rdma_ucm_join_mcast { - __u64 response; /* rdma_ucma_create_id_resp */ - __u64 uid; + __aligned_u64 response; /* rdma_ucma_create_id_resp */ + __aligned_u64 uid; __u32 id; __u16 addr_size; __u16 join_flags; @@ -262,18 +270,23 @@ struct rdma_ucm_join_mcast { }; struct rdma_ucm_get_event { - __u64 response; + __aligned_u64 response; }; struct rdma_ucm_event_resp { - __u64 uid; + __aligned_u64 uid; __u32 id; __u32 event; __u32 status; + /* + * NOTE: This union is not aligned to 8 bytes so none of the union + * members may contain a u64 or anything with higher alignment than 4. + */ union { struct rdma_ucm_conn_param conn; struct rdma_ucm_ud_param ud; } param; + __u32 reserved; }; /* Option levels */ @@ -291,7 +304,7 @@ enum { }; struct rdma_ucm_set_option { - __u64 optval; + __aligned_u64 optval; __u32 id; __u32 level; __u32 optname; @@ -299,7 +312,7 @@ struct rdma_ucm_set_option { }; struct rdma_ucm_migrate_id { - __u64 response; + __aligned_u64 response; __u32 id; __u32 fd; }; diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 46de088..d223f41 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -34,49 +34,13 @@ #ifndef RDMA_USER_IOCTL_H #define RDMA_USER_IOCTL_H -#include <linux/types.h> -#include <linux/ioctl.h> #include <rdma/ib_user_mad.h> #include <rdma/hfi/hfi1_ioctl.h> +#include <rdma/rdma_user_ioctl_cmds.h> -/* Documentation/ioctl/ioctl-number.txt */ -#define RDMA_IOCTL_MAGIC 0x1b /* Legacy name, for user space application which already use it */ #define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC -#define RDMA_VERBS_IOCTL \ - _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) - -#define UVERBS_ID_NS_MASK 0xF000 -#define UVERBS_ID_NS_SHIFT 12 - -enum { - /* User input */ - UVERBS_ATTR_F_MANDATORY = 1U << 0, - /* - * Valid output bit should be ignored and considered set in - * mandatory fields. This bit is kernel output. - */ - UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, -}; - -struct ib_uverbs_attr { - __u16 attr_id; /* command specific type attribute */ - __u16 len; /* only for pointers */ - __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ - __u16 reserved; - __aligned_u64 data; /* ptr to command, inline data or idr/fd */ -}; - -struct ib_uverbs_ioctl_hdr { - __u16 length; - __u16 object_id; - __u16 method_id; - __u16 num_attrs; - __aligned_u64 reserved; - struct ib_uverbs_attr attrs[0]; -}; - /* * General blocks assignments * It is closed on purpose do not expose it it user space diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h new file mode 100644 index 0000000..1da5a1e --- /dev/null +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_IOCTL_CMDS_H +#define RDMA_USER_IOCTL_CMDS_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* Documentation/ioctl/ioctl-number.txt */ +#define RDMA_IOCTL_MAGIC 0x1b +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +enum { + /* User input */ + UVERBS_ATTR_F_MANDATORY = 1U << 0, + /* + * Valid output bit should be ignored and considered set in + * mandatory fields. This bit is kernel output. + */ + UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* only for pointers */ + __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ + union { + struct { + __u8 elem_id; + __u8 reserved; + } enum_data; + __u16 reserved; + } attr_data; + __aligned_u64 data; /* ptr to command, inline data or idr/fd */ +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 object_id; + __u16 method_id; + __u16 num_attrs; + __aligned_u64 reserved1; + __u32 driver_id; + __u32 reserved2; + struct ib_uverbs_attr attrs[0]; +}; + +enum rdma_driver_id { + RDMA_DRIVER_UNKNOWN, + RDMA_DRIVER_MLX5, + RDMA_DRIVER_MLX4, + RDMA_DRIVER_CXGB3, + RDMA_DRIVER_CXGB4, + RDMA_DRIVER_MTHCA, + RDMA_DRIVER_BNXT_RE, + RDMA_DRIVER_OCRDMA, + RDMA_DRIVER_NES, + RDMA_DRIVER_I40IW, + RDMA_DRIVER_VMW_PVRDMA, + RDMA_DRIVER_QEDR, + RDMA_DRIVER_HNS, + RDMA_DRIVER_USNIC, + RDMA_DRIVER_RXE, + RDMA_DRIVER_HFI1, + RDMA_DRIVER_QIB, +}; + +#endif diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index bdeea94..1f8a9e7 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -35,6 +35,9 @@ #define RDMA_USER_RXE_H #include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> union rxe_gid { __u8 raw[16]; @@ -55,16 +58,17 @@ struct rxe_global_route { struct rxe_av { __u8 port_num; __u8 network_type; + __u16 reserved1; + __u32 reserved2; struct rxe_global_route grh; union { - struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; }; struct rxe_send_wr { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; __u32 opcode; __u32 send_flags; @@ -74,36 +78,42 @@ struct rxe_send_wr { } ex; union { struct { - __u64 remote_addr; + __aligned_u64 remote_addr; __u32 rkey; + __u32 reserved; } rdma; struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; + __aligned_u64 remote_addr; + __aligned_u64 compare_add; + __aligned_u64 swap; __u32 rkey; + __u32 reserved; } atomic; struct { __u32 remote_qpn; __u32 remote_qkey; __u16 pkey_index; } ud; + /* reg is only used by the kernel and is not part of the uapi */ struct { - struct ib_mr *mr; + union { + struct ib_mr *mr; + __aligned_u64 reserved; + }; __u32 key; - int access; + __u32 access; } reg; } wr; }; struct rxe_sge { - __u64 addr; + __aligned_u64 addr; __u32 length; __u32 lkey; }; struct mminfo { - __u64 offset; + __aligned_u64 offset; __u32 size; __u32 pad; }; @@ -114,6 +124,7 @@ struct rxe_dma_info { __u32 cur_sge; __u32 num_sge; __u32 sge_offset; + __u32 reserved; union { __u8 inline_data[0]; struct rxe_sge sge[0]; @@ -125,7 +136,7 @@ struct rxe_send_wqe { struct rxe_av av; __u32 status; __u32 state; - __u64 iova; + __aligned_u64 iova; __u32 mask; __u32 first_psn; __u32 last_psn; @@ -136,10 +147,33 @@ struct rxe_send_wqe { }; struct rxe_recv_wqe { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; __u32 padding; struct rxe_dma_info dma; }; +struct rxe_create_cq_resp { + struct mminfo mi; +}; + +struct rxe_resize_cq_resp { + struct mminfo mi; +}; + +struct rxe_create_qp_resp { + struct mminfo rq_mi; + struct mminfo sq_mi; +}; + +struct rxe_create_srq_resp { + struct mminfo mi; + __u32 srq_num; + __u32 reserved; +}; + +struct rxe_modify_srq_cmd { + __aligned_u64 mmap_info_addr; +}; + #endif /* RDMA_USER_RXE_H */ diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index 02ca0d0..d13fd49 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -143,7 +143,7 @@ struct pvrdma_alloc_pd_resp { }; struct pvrdma_create_cq { - __u64 buf_addr; + __aligned_u64 buf_addr; __u32 buf_size; __u32 reserved; }; @@ -154,13 +154,13 @@ struct pvrdma_create_cq_resp { }; struct pvrdma_resize_cq { - __u64 buf_addr; + __aligned_u64 buf_addr; __u32 buf_size; __u32 reserved; }; struct pvrdma_create_srq { - __u64 buf_addr; + __aligned_u64 buf_addr; __u32 buf_size; __u32 reserved; }; @@ -171,25 +171,25 @@ struct pvrdma_create_srq_resp { }; struct pvrdma_create_qp { - __u64 rbuf_addr; - __u64 sbuf_addr; + __aligned_u64 rbuf_addr; + __aligned_u64 sbuf_addr; __u32 rbuf_size; __u32 sbuf_size; - __u64 qp_addr; + __aligned_u64 qp_addr; }; /* PVRDMA masked atomic compare and swap */ struct pvrdma_ex_cmp_swap { - __u64 swap_val; - __u64 compare_val; - __u64 swap_mask; - __u64 compare_mask; + __aligned_u64 swap_val; + __aligned_u64 compare_val; + __aligned_u64 swap_mask; + __aligned_u64 compare_mask; }; /* PVRDMA masked atomic fetch and add */ struct pvrdma_ex_fetch_add { - __u64 add_val; - __u64 field_boundary; + __aligned_u64 add_val; + __aligned_u64 field_boundary; }; /* PVRDMA address vector. */ @@ -207,14 +207,14 @@ struct pvrdma_av { /* PVRDMA scatter/gather entry */ struct pvrdma_sge { - __u64 addr; + __aligned_u64 addr; __u32 length; __u32 lkey; }; /* PVRDMA receive queue work request */ struct pvrdma_rq_wqe_hdr { - __u64 wr_id; /* wr id */ + __aligned_u64 wr_id; /* wr id */ __u32 num_sge; /* size of s/g array */ __u32 total_len; /* reserved */ }; @@ -222,7 +222,7 @@ struct pvrdma_rq_wqe_hdr { /* PVRDMA send queue work request */ struct pvrdma_sq_wqe_hdr { - __u64 wr_id; /* wr id */ + __aligned_u64 wr_id; /* wr id */ __u32 num_sge; /* size of s/g array */ __u32 total_len; /* reserved */ __u32 opcode; /* operation type */ @@ -234,19 +234,19 @@ struct pvrdma_sq_wqe_hdr { __u32 reserved; union { struct { - __u64 remote_addr; + __aligned_u64 remote_addr; __u32 rkey; __u8 reserved[4]; } rdma; struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; + __aligned_u64 remote_addr; + __aligned_u64 compare_add; + __aligned_u64 swap; __u32 rkey; __u32 reserved; } atomic; struct { - __u64 remote_addr; + __aligned_u64 remote_addr; __u32 log_arg_sz; __u32 rkey; union { @@ -255,13 +255,14 @@ struct pvrdma_sq_wqe_hdr { } wr_data; } masked_atomics; struct { - __u64 iova_start; - __u64 pl_pdir_dma; + __aligned_u64 iova_start; + __aligned_u64 pl_pdir_dma; __u32 page_shift; __u32 page_list_len; __u32 length; __u32 access_flags; __u32 rkey; + __u32 reserved; } fast_reg; struct { __u32 remote_qpn; @@ -274,8 +275,8 @@ struct pvrdma_sq_wqe_hdr { /* Completion queue element. */ struct pvrdma_cqe { - __u64 wr_id; - __u64 qp; + __aligned_u64 wr_id; + __aligned_u64 qp; __u32 opcode; __u32 status; __u32 byte_len; |