]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic
authorKonstantin Taranov <kotaranov@microsoft.com>
Wed, 7 May 2025 15:59:03 +0000 (08:59 -0700)
committerLeon Romanovsky <leon@kernel.org>
Mon, 12 May 2025 10:44:52 +0000 (06:44 -0400)
Allow mana_ib to be created over ethernet gdma device and
over rnic gdma device. The HW has two devices with different
capabilities and different use-cases. Initialize required
resources depending on the used gdma device.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://patch.msgid.link/1746633545-17653-3-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/mana/device.c
drivers/infiniband/hw/mana/main.c
drivers/infiniband/hw/mana/mana_ib.h

index b31089320aa52011619ac394b9f860724daa463c..165c0a1e67d14da121354ccdad89cf887ea3aae0 100644 (file)
@@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device *adev,
                         const struct auxiliary_device_id *id)
 {
        struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+       struct gdma_context *gc = madev->mdev->gdma_context;
+       struct mana_context *mc = gc->mana.driver_data;
        struct gdma_dev *mdev = madev->mdev;
        struct net_device *ndev;
-       struct mana_context *mc;
        struct mana_ib_dev *dev;
        u8 mac_addr[ETH_ALEN];
        int ret;
 
-       mc = mdev->driver_data;
-
        dev = ib_alloc_device(mana_ib_dev, ib_dev);
        if (!dev)
                return -ENOMEM;
 
        ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
-
-       dev->ib_dev.phys_port_cnt = mc->num_ports;
-
-       ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
-                 mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
-
        dev->ib_dev.node_type = RDMA_NODE_IB_CA;
-
-       /*
-        * num_comp_vectors needs to set to the max MSIX index
-        * when interrupts and event queues are implemented
-        */
-       dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
-       dev->ib_dev.dev.parent = mdev->gdma_context->dev;
-
-       ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
-       if (!ndev) {
-               ret = -ENODEV;
-               ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
-               goto free_ib_device;
-       }
-       ether_addr_copy(mac_addr, ndev->dev_addr);
-       addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
-       ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
-       /* mana_get_primary_netdev() returns ndev with refcount held */
-       netdev_put(ndev, &dev->dev_tracker);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
-               goto free_ib_device;
-       }
-
-       ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
-                         ret);
-               goto free_ib_device;
-       }
-       dev->gdma_dev = &mdev->gdma_context->mana_ib;
-
-       dev->nb.notifier_call = mana_ib_netdev_event;
-       ret = register_netdevice_notifier(&dev->nb);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
-                         ret);
-               goto deregister_device;
-       }
-
-       ret = mana_ib_gd_query_adapter_caps(dev);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
-                         ret);
-               goto deregister_net_notifier;
-       }
-
-       ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
-
-       ret = mana_ib_create_eqs(dev);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
-               goto deregister_net_notifier;
-       }
-
-       ret = mana_ib_gd_create_rnic_adapter(dev);
-       if (ret)
-               goto destroy_eqs;
-
+       dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+       dev->ib_dev.dev.parent = gc->dev;
+       dev->gdma_dev = mdev;
        xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
-       ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
-       if (ret) {
-               ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
-                         ret);
-               goto destroy_rnic;
+
+       if (mana_ib_is_rnic(dev)) {
+               dev->ib_dev.phys_port_cnt = 1;
+               ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+               if (!ndev) {
+                       ret = -ENODEV;
+                       ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+                       goto free_ib_device;
+               }
+               ether_addr_copy(mac_addr, ndev->dev_addr);
+               addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+               ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+               /* mana_get_primary_netdev() returns ndev with refcount held */
+               netdev_put(ndev, &dev->dev_tracker);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+                       goto free_ib_device;
+               }
+
+               dev->nb.notifier_call = mana_ib_netdev_event;
+               ret = register_netdevice_notifier(&dev->nb);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+                                 ret);
+                       goto free_ib_device;
+               }
+
+               ret = mana_ib_gd_query_adapter_caps(dev);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+                       goto deregister_net_notifier;
+               }
+
+               ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
+               ret = mana_ib_create_eqs(dev);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+                       goto deregister_net_notifier;
+               }
+
+               ret = mana_ib_gd_create_rnic_adapter(dev);
+               if (ret)
+                       goto destroy_eqs;
+
+               ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+                       goto destroy_rnic;
+               }
+       } else {
+               dev->ib_dev.phys_port_cnt = mc->num_ports;
+               ret = mana_eth_query_adapter_caps(dev);
+               if (ret) {
+                       ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+                       goto free_ib_device;
+               }
        }
 
-       dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
-                                      MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+       dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+                                      MANA_AV_BUFFER_SIZE, 0);
        if (!dev->av_pool) {
                ret = -ENOMEM;
                goto destroy_rnic;
        }
 
-       ret = ib_register_device(&dev->ib_dev, "mana_%d",
-                                mdev->gdma_context->dev);
+       ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+                 mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+       ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+                                gc->dev);
        if (ret)
                goto deallocate_pool;
 
@@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device *adev,
 deallocate_pool:
        dma_pool_destroy(dev->av_pool);
 destroy_rnic:
-       xa_destroy(&dev->qp_table_wq);
-       mana_ib_gd_destroy_rnic_adapter(dev);
+       if (mana_ib_is_rnic(dev))
+               mana_ib_gd_destroy_rnic_adapter(dev);
 destroy_eqs:
-       mana_ib_destroy_eqs(dev);
+       if (mana_ib_is_rnic(dev))
+               mana_ib_destroy_eqs(dev);
 deregister_net_notifier:
-       unregister_netdevice_notifier(&dev->nb);
-deregister_device:
-       mana_gd_deregister_device(dev->gdma_dev);
+       if (mana_ib_is_rnic(dev))
+               unregister_netdevice_notifier(&dev->nb);
 free_ib_device:
+       xa_destroy(&dev->qp_table_wq);
        ib_dealloc_device(&dev->ib_dev);
        return ret;
 }
@@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device *adev)
 
        ib_unregister_device(&dev->ib_dev);
        dma_pool_destroy(dev->av_pool);
+       if (mana_ib_is_rnic(dev)) {
+               mana_ib_gd_destroy_rnic_adapter(dev);
+               mana_ib_destroy_eqs(dev);
+               unregister_netdevice_notifier(&dev->nb);
+       }
        xa_destroy(&dev->qp_table_wq);
-       mana_ib_gd_destroy_rnic_adapter(dev);
-       mana_ib_destroy_eqs(dev);
-       unregister_netdevice_notifier(&dev->nb);
-       mana_gd_deregister_device(dev->gdma_dev);
        ib_dealloc_device(&dev->ib_dev);
 }
 
 static const struct auxiliary_device_id mana_id_table[] = {
-       {
-               .name = "mana.rdma",
-       },
+       { .name = "mana.rdma", },
+       { .name = "mana.eth", },
        {},
 };
 
 MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
 
 static struct auxiliary_driver mana_driver = {
-       .name = "rdma",
        .probe = mana_ib_probe,
        .remove = mana_ib_remove,
        .id_table = mana_id_table,
index bb0f685babe6032c3c0ead54a80d5400d9862f54..3837e30ed8198b2010a521e503153cf4d2911141 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "mana_ib.h"
+#include "linux/pci.h"
 
 void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
                         u32 port)
@@ -551,6 +552,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
                               struct ib_port_immutable *immutable)
 {
+       struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
        struct ib_port_attr attr;
        int err;
 
@@ -560,10 +562,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
-       if (port_num == 1) {
-               immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       if (mana_ib_is_rnic(dev)) {
+               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
                immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+       } else {
+               immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
        }
 
        return 0;
@@ -572,10 +576,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
 int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
                         struct ib_udata *uhw)
 {
-       struct mana_ib_dev *dev = container_of(ibdev,
-                       struct mana_ib_dev, ib_dev);
+       struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+       struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
 
        memset(props, 0, sizeof(*props));
+       props->vendor_id = pdev->vendor;
+       props->vendor_part_id = dev->gdma_dev->dev_id.type;
        props->max_mr_size = MANA_IB_MAX_MR_SIZE;
        props->page_size_cap = dev->adapter_caps.page_size_cap;
        props->max_qp = dev->adapter_caps.max_qp_count;
@@ -596,6 +602,8 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
        props->max_ah = INT_MAX;
        props->max_pkeys = 1;
        props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+       if (!mana_ib_is_rnic(dev))
+               props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
 
        return 0;
 }
@@ -603,6 +611,7 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
 int mana_ib_query_port(struct ib_device *ibdev, u32 port,
                       struct ib_port_attr *props)
 {
+       struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
        struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
 
        if (!ndev)
@@ -623,7 +632,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
        props->active_width = IB_WIDTH_4X;
        props->active_speed = IB_SPEED_EDR;
        props->pkey_tbl_len = 1;
-       if (port == 1) {
+       if (mana_ib_is_rnic(dev)) {
                props->gid_tbl_len = 16;
                props->port_cap_flags = IB_PORT_CM_SUP;
                props->ip_gids = true;
@@ -703,6 +712,37 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
        return 0;
 }
 
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+       struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+       struct gdma_query_max_resources_resp resp = {};
+       struct gdma_general_req req = {};
+       int err;
+
+       mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+                            sizeof(req), sizeof(resp));
+
+       err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+       if (err) {
+               ibdev_err(&dev->ib_dev,
+                         "Failed to query adapter caps err %d", err);
+               return err;
+       }
+
+       caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+       caps->max_cq_count = resp.max_cq;
+       caps->max_mr_count = resp.max_mst;
+       caps->max_pd_count = 0x6000;
+       caps->max_qp_wr = min_t(u32,
+                               0x100000 / GDMA_MAX_SQE_SIZE,
+                               0x100000 / GDMA_MAX_RQE_SIZE);
+       caps->max_send_sge_count = 30;
+       caps->max_recv_sge_count = 15;
+       caps->page_size_cap = PAGE_SZ_BM;
+
+       return 0;
+}
+
 static void
 mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
 {
@@ -921,6 +961,9 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
        struct mana_rnic_create_cq_req req = {};
        int err;
 
+       if (!mdev->eqs)
+               return -EINVAL;
+
        mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
        req.hdr.dev_id = gc->mana_ib.dev_id;
        req.adapter = mdev->adapter_handle;
index f0dbd90b860076f3c0b27eff21f1ac63d0c046e6..42bebd6cd4f7a6dfec1cd23316222e2b27084c36 100644 (file)
@@ -544,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
                complete(&qp->free);
 }
 
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+       return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
 static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
 {
        struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -643,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
 
 int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
 
 int mana_ib_create_eqs(struct mana_ib_dev *mdev);