]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/rxe: support perf mgmt GET method
authorzhenwei pi <zhenwei.pi@linux.dev>
Tue, 14 Apr 2026 06:29:47 +0000 (14:29 +0800)
committerLeon Romanovsky <leon@kernel.org>
Mon, 18 May 2026 08:58:41 +0000 (04:58 -0400)
In RXE, hardware counters are already supported, but not in a
standardized manner. For instance, user-space monitoring tools like
atop only read from the *counters* directory. Therefore, it is
necessary to add perf management support to RXE.

Also use rxe_counter_get instead of raw atomic64_read in hw-counters.

Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
Link: https://patch.msgid.link/20260414062948.671658-4-zhenwei.pi@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/sw/rxe/Makefile
drivers/infiniband/sw/rxe/rxe_loc.h
drivers/infiniband/sw/rxe/rxe_mad.c [new file with mode: 0644]
drivers/infiniband/sw/rxe/rxe_verbs.c

index 3977f4f13258b3f74a84958aa1e5d18b579a2e3b..e097c1ca1874fc712aec746f864d3c8d8fcb9911 100644 (file)
@@ -23,6 +23,7 @@ rdma_rxe-y := \
        rxe_task.o \
        rxe_net.o \
        rxe_hw_counters.o \
+       rxe_mad.o \
        rxe_ns.o
 
 rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
index e095c12699cb0830a0ff53f6bf35f17a80a19b88..64d636bf80fd2c51cdea24ced76a169e617899cc 100644 (file)
@@ -242,4 +242,10 @@ static inline int rxe_ib_advise_mr(struct ib_pd *pd,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* rxe-mad.c */
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+                   const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+                   const struct ib_mad *in, struct ib_mad *out,
+                   size_t *out_mad_size, u16 *out_mad_pkey_index);
+
 #endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mad.c b/drivers/infiniband/sw/rxe/rxe_mad.c
new file mode 100644 (file)
index 0000000..cb2d558
--- /dev/null
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2026 zhenwei pi <zhenwei.pi@linux.dev>
+ */
+
+#include <rdma/ib_pma.h>
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+static int rxe_get_pma_info(struct ib_mad *out)
+{
+       struct ib_class_port_info cpi = {};
+
+       cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+       memcpy((out->data + 40), &cpi, sizeof(cpi));
+
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters(struct rxe_dev *rxe, struct ib_mad *out)
+{
+       struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *)(out->data + 40);
+       s64 val;
+
+       /* IBA release 1.8, 16.1.3.5: During operation, instead of overflowing, they shall stop
+        * at all ones.
+        */
+       val = atomic64_read(&rxe->stats_counters[RXE_CNT_LINK_DOWNED]);
+       pma_cnt->link_downed_counter = clamp(val, 0, U8_MAX);
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters_ext(struct rxe_dev *rxe, struct ib_mad *out)
+{
+       struct ib_pma_portcounters_ext *pma_cnt_ext =
+               (struct ib_pma_portcounters_ext *)(out->data + 40);
+       s64 val;
+
+       val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_BYTES]);
+       pma_cnt_ext->port_xmit_data = cpu_to_be64(val >> 2);
+
+       val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_BYTES]);
+       pma_cnt_ext->port_rcv_data = cpu_to_be64(val >> 2);
+
+       val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_PKTS]);
+       pma_cnt_ext->port_xmit_packets = cpu_to_be64(val);
+
+       val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_PKTS]);
+       pma_cnt_ext->port_rcv_packets = cpu_to_be64(val);
+
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_perf_mgmt(struct rxe_dev *rxe, const struct ib_mad *in, struct ib_mad *out)
+{
+       switch (in->mad_hdr.attr_id) {
+       case IB_PMA_CLASS_PORT_INFO:
+               return rxe_get_pma_info(out);
+
+       case IB_PMA_PORT_COUNTERS:
+               return rxe_get_pma_counters(rxe, out);
+
+       case IB_PMA_PORT_COUNTERS_EXT:
+               return rxe_get_pma_counters_ext(rxe, out);
+
+       default:
+               out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+               return IB_MAD_RESULT_SUCCESS;
+       }
+}
+
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+                   const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+                   const struct ib_mad *in, struct ib_mad *out,
+                   size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+       struct rxe_dev *rxe = to_rdev(ibdev);
+       u8 mgmt_class = in->mad_hdr.mgmt_class;
+       u8 method = in->mad_hdr.method;
+
+       if (port_num != 1)
+               return IB_MAD_RESULT_FAILURE;
+
+       memset(out, 0, sizeof(*out));
+       switch (mgmt_class) {
+       case IB_MGMT_CLASS_PERF_MGMT:
+               if (method == IB_MGMT_METHOD_GET)
+                       return rxe_get_perf_mgmt(rxe, in, out);
+               break;
+
+       default:
+               out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD);
+               return IB_MAD_RESULT_SUCCESS;
+       }
+
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
index eb17b6086d5e0cd037d432bd9de44d58c8db5723..8edd4dd1f031f4a247eeb572f7c20eb871eb70e7 100644 (file)
@@ -1496,6 +1496,7 @@ static const struct ib_device_ops rxe_dev_ops = {
        .post_recv = rxe_post_recv,
        .post_send = rxe_post_send,
        .post_srq_recv = rxe_post_srq_recv,
+       .process_mad = rxe_process_mad,
        .query_ah = rxe_query_ah,
        .query_device = rxe_query_device,
        .query_pkey = rxe_query_pkey,