LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH mlx5-next v3 01/13] net/mlx5: Add ifc bits to support optional counters
       [not found] <cover.1633513239.git.leonro@nvidia.com>
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH mlx5-next v3 02/13] net/mlx5: Add priorities for counters in RDMA namespaces Leon Romanovsky
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, Dennis Dalessandro, Gal Pressman, Jakub Kicinski,
	linux-kernel, linux-rdma, Maor Gottlieb, Mark Zhang,
	Mike Marciniszyn, Mustafa Ismail, netdev, Potnuri Bharat Teja,
	Saeed Mahameed, Selvin Xavier, Shiraz Saleem, Yishai Hadas,
	Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Adding bth_opcode field and the relevant bits. This field will be used
to capture and count congestion notification packets (CNP).

Adding source_vhca_port support bit.
This field will be used to check the capability to use the
source_vhca_port as a match criteria in cases of dual port.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c1cef8f35e00..399ea52171fe 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -342,7 +342,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         outer_geneve_oam[0x1];
 	u8         outer_geneve_protocol_type[0x1];
 	u8         outer_geneve_opt_len[0x1];
-	u8         reserved_at_1e[0x1];
+	u8         source_vhca_port[0x1];
 	u8         source_eswitch_port[0x1];
 
 	u8         inner_dmac[0x1];
@@ -393,6 +393,14 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         metadata_reg_c_0[0x1];
 };
 
+struct mlx5_ifc_flow_table_fields_supported_2_bits {
+	u8         reserved_at_0[0xe];
+	u8         bth_opcode[0x1];
+	u8         reserved_at_f[0x11];
+
+	u8         reserved_at_20[0x60];
+};
+
 struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
 	u8         reserved_at_1[0x1];
@@ -539,7 +547,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 	union mlx5_ifc_gre_key_bits gre_key;
 
 	u8         vxlan_vni[0x18];
-	u8         reserved_at_b8[0x8];
+	u8         bth_opcode[0x8];
 
 	u8         geneve_vni[0x18];
 	u8         reserved_at_d8[0x7];
@@ -756,7 +764,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-	u8         reserved_at_e00[0x1200];
+	u8         reserved_at_e00[0x700];
+
+	struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma;
+
+	u8         reserved_at_1580[0x280];
+
+	struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_transmit_rdma;
+
+	u8         reserved_at_1880[0x780];
 
 	u8         sw_steering_nic_rx_action_drop_icm_address[0x40];
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH mlx5-next v3 02/13] net/mlx5: Add priorities for counters in RDMA namespaces
       [not found] <cover.1633513239.git.leonro@nvidia.com>
  2021-10-06  9:52 ` [PATCH mlx5-next v3 01/13] net/mlx5: Add ifc bits to support optional counters Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 03/13] RDMA/counter: Add a descriptor in struct rdma_hw_stats Leon Romanovsky
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, Dennis Dalessandro, Gal Pressman, Jakub Kicinski,
	linux-kernel, linux-rdma, Maor Gottlieb, Mark Zhang,
	Mike Marciniszyn, Mustafa Ismail, netdev, Potnuri Bharat Teja,
	Saeed Mahameed, Selvin Xavier, Shiraz Saleem, Yishai Hadas,
	Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Add additional flow steering priorities in the RDMA namespace.
This allows adding flow counters to count filtered RDMA traffic and then
continue processing in the regular RDMA steering flow.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 54 ++++++++++++++++---
 include/linux/mlx5/device.h                   |  2 +
 include/linux/mlx5/fs.h                       |  2 +
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe501ba88bea..71a08f84d49d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -99,6 +99,9 @@
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
+#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
+#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
+
 #define BY_PASS_PRIO_NUM_LEVELS 1
 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
 			   LEFTOVERS_NUM_PRIOS)
@@ -206,34 +209,63 @@ static struct init_tree_node egress_root_fs = {
 	}
 };
 
-#define RDMA_RX_BYPASS_PRIO 0
-#define RDMA_RX_KERNEL_PRIO 1
+enum {
+	RDMA_RX_COUNTERS_PRIO,
+	RDMA_RX_BYPASS_PRIO,
+	RDMA_RX_KERNEL_PRIO,
+};
+
+#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
+#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
+#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
+
 static struct init_tree_node rdma_rx_root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 2,
+	.ar_size = 3,
 	.children = (struct init_tree_node[]) {
+		[RDMA_RX_COUNTERS_PRIO] =
+		ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
+						  RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
 		[RDMA_RX_BYPASS_PRIO] =
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
+		ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
 		[RDMA_RX_KERNEL_PRIO] =
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
+		ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
 				ADD_MULTIPLE_PRIO(1, 1))),
 	}
 };
 
+enum {
+	RDMA_TX_COUNTERS_PRIO,
+	RDMA_TX_BYPASS_PRIO,
+};
+
+#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
+#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
+
 static struct init_tree_node rdma_tx_root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 1,
+	.ar_size = 2,
 	.children = (struct init_tree_node[]) {
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+		[RDMA_TX_COUNTERS_PRIO] =
+		ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
+						  RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
+		[RDMA_TX_BYPASS_PRIO] =
+		ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS_RDMA_TX,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
-				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+				ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
 						  BY_PASS_PRIO_NUM_LEVELS))),
 	}
 };
@@ -2215,6 +2247,12 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		prio = RDMA_RX_KERNEL_PRIO;
 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
 		root_ns = steering->rdma_tx_root_ns;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_COUNTERS_PRIO;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) {
+		root_ns = steering->rdma_tx_root_ns;
+		prio = RDMA_TX_COUNTERS_PRIO;
 	} else { /* Must be NIC RX */
 		root_ns = steering->root_ns;
 		prio = type;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 66eaf0aa7f69..ed0230ff9422 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1456,6 +1456,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
 }
 
+#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2
+#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1
 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0106c67e8ccb..f2c3da2006d9 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -83,6 +83,8 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_RDMA_RX,
 	MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
 	MLX5_FLOW_NAMESPACE_RDMA_TX,
+	MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS,
+	MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS,
 };
 
 enum {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 03/13] RDMA/counter: Add a descriptor in struct rdma_hw_stats
       [not found] <cover.1633513239.git.leonro@nvidia.com>
  2021-10-06  9:52 ` [PATCH mlx5-next v3 01/13] net/mlx5: Add ifc bits to support optional counters Leon Romanovsky
  2021-10-06  9:52 ` [PATCH mlx5-next v3 02/13] net/mlx5: Add priorities for counters in RDMA namespaces Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 04/13] RDMA/core: Add a helper API rdma_free_hw_stats_struct Leon Romanovsky
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, netdev,
	Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Add a counter statistic descriptor structure in rdma_hw_stats. In
addition to the counter name, more meta-information will be added.
This code extension is needed for optional-counter support in the
following patches.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c             |   6 +-
 drivers/infiniband/core/sysfs.c             |   8 +-
 drivers/infiniband/hw/bnxt_re/hw_counters.c | 137 ++++++++++----------
 drivers/infiniband/hw/cxgb4/provider.c      |  22 ++--
 drivers/infiniband/hw/efa/efa_verbs.c       |  19 +--
 drivers/infiniband/hw/hfi1/verbs.c          |  53 ++++----
 drivers/infiniband/hw/irdma/verbs.c         | 100 +++++++-------
 drivers/infiniband/hw/mlx4/main.c           |  44 +++----
 drivers/infiniband/hw/mlx4/mlx4_ib.h        |   2 +-
 drivers/infiniband/hw/mlx5/counters.c       |  41 +++---
 drivers/infiniband/hw/mlx5/mlx5_ib.h        |   2 +-
 drivers/infiniband/sw/rxe/rxe_hw_counters.c |  42 +++---
 include/rdma/ib_verbs.h                     |  21 ++-
 13 files changed, 252 insertions(+), 245 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e9b4b2cccaa0..3f6b98a87566 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -969,7 +969,8 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
 		return -EMSGSIZE;
 
 	for (i = 0; i < st->num_counters; i++)
-		if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
+						 st->value[i]))
 			goto err;
 
 	nla_nest_end(msg, table_attr);
@@ -2105,7 +2106,8 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
 	for (i = 0; i < num_cnts; i++) {
 		v = stats->value[i] +
 			rdma_counter_get_hwstat_value(device, port, i);
-		if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
+		if (rdma_nl_stat_hwcounter_entry(msg,
+						 stats->descs[i].name, v)) {
 			ret = -EMSGSIZE;
 			goto err_table;
 		}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 6146c3c1cbe5..c3663cfdcd52 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -895,7 +895,7 @@ alloc_hw_stats_device(struct ib_device *ibdev)
 	stats = ibdev->ops.alloc_hw_device_stats(ibdev);
 	if (!stats)
 		return ERR_PTR(-ENOMEM);
-	if (!stats->names || stats->num_counters <= 0)
+	if (!stats->descs || stats->num_counters <= 0)
 		goto err_free_stats;
 
 	/*
@@ -957,7 +957,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
 	for (i = 0; i < data->stats->num_counters; i++) {
 		attr = &data->attrs[i];
 		sysfs_attr_init(&attr->attr.attr);
-		attr->attr.attr.name = data->stats->names[i];
+		attr->attr.attr.name = data->stats->descs[i].name;
 		attr->attr.attr.mode = 0444;
 		attr->attr.show = hw_stat_device_show;
 		attr->show = show_hw_stats;
@@ -994,7 +994,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
 	stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
 	if (!stats)
 		return ERR_PTR(-ENOMEM);
-	if (!stats->names || stats->num_counters <= 0)
+	if (!stats->descs || stats->num_counters <= 0)
 		goto err_free_stats;
 
 	/*
@@ -1047,7 +1047,7 @@ static int setup_hw_port_stats(struct ib_port *port,
 	for (i = 0; i < data->stats->num_counters; i++) {
 		attr = &data->attrs[i];
 		sysfs_attr_init(&attr->attr.attr);
-		attr->attr.attr.name = data->stats->names[i];
+		attr->attr.attr.name = data->stats->descs[i].name;
 		attr->attr.attr.mode = 0444;
 		attr->attr.show = hw_stat_port_show;
 		attr->show = show_hw_stats;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 1c06c9cf234b..78ca6dfd182b 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -57,74 +57,72 @@
 #include "bnxt_re.h"
 #include "hw_counters.h"
 
-static const char * const bnxt_re_stat_name[] = {
-	[BNXT_RE_ACTIVE_PD]		=  "active_pds",
-	[BNXT_RE_ACTIVE_AH]		=  "active_ahs",
-	[BNXT_RE_ACTIVE_QP]		=  "active_qps",
-	[BNXT_RE_ACTIVE_SRQ]		=  "active_srqs",
-	[BNXT_RE_ACTIVE_CQ]		=  "active_cqs",
-	[BNXT_RE_ACTIVE_MR]		=  "active_mrs",
-	[BNXT_RE_ACTIVE_MW]		=  "active_mws",
-	[BNXT_RE_RX_PKTS]		=  "rx_pkts",
-	[BNXT_RE_RX_BYTES]		=  "rx_bytes",
-	[BNXT_RE_TX_PKTS]		=  "tx_pkts",
-	[BNXT_RE_TX_BYTES]		=  "tx_bytes",
-	[BNXT_RE_RECOVERABLE_ERRORS]	=  "recoverable_errors",
-	[BNXT_RE_RX_ERRORS]		=  "rx_roce_errors",
-	[BNXT_RE_RX_DISCARDS]		=  "rx_roce_discards",
-	[BNXT_RE_TO_RETRANSMITS]        = "to_retransmits",
-	[BNXT_RE_SEQ_ERR_NAKS_RCVD]     = "seq_err_naks_rcvd",
-	[BNXT_RE_MAX_RETRY_EXCEEDED]    = "max_retry_exceeded",
-	[BNXT_RE_RNR_NAKS_RCVD]         = "rnr_naks_rcvd",
-	[BNXT_RE_MISSING_RESP]          = "missing_resp",
-	[BNXT_RE_UNRECOVERABLE_ERR]     = "unrecoverable_err",
-	[BNXT_RE_BAD_RESP_ERR]          = "bad_resp_err",
-	[BNXT_RE_LOCAL_QP_OP_ERR]       = "local_qp_op_err",
-	[BNXT_RE_LOCAL_PROTECTION_ERR]  = "local_protection_err",
-	[BNXT_RE_MEM_MGMT_OP_ERR]       = "mem_mgmt_op_err",
-	[BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
-	[BNXT_RE_REMOTE_ACCESS_ERR]     = "remote_access_err",
-	[BNXT_RE_REMOTE_OP_ERR]         = "remote_op_err",
-	[BNXT_RE_DUP_REQ]               = "dup_req",
-	[BNXT_RE_RES_EXCEED_MAX]        = "res_exceed_max",
-	[BNXT_RE_RES_LENGTH_MISMATCH]   = "res_length_mismatch",
-	[BNXT_RE_RES_EXCEEDS_WQE]       = "res_exceeds_wqe",
-	[BNXT_RE_RES_OPCODE_ERR]        = "res_opcode_err",
-	[BNXT_RE_RES_RX_INVALID_RKEY]   = "res_rx_invalid_rkey",
-	[BNXT_RE_RES_RX_DOMAIN_ERR]     = "res_rx_domain_err",
-	[BNXT_RE_RES_RX_NO_PERM]        = "res_rx_no_perm",
-	[BNXT_RE_RES_RX_RANGE_ERR]      = "res_rx_range_err",
-	[BNXT_RE_RES_TX_INVALID_RKEY]   = "res_tx_invalid_rkey",
-	[BNXT_RE_RES_TX_DOMAIN_ERR]     = "res_tx_domain_err",
-	[BNXT_RE_RES_TX_NO_PERM]        = "res_tx_no_perm",
-	[BNXT_RE_RES_TX_RANGE_ERR]      = "res_tx_range_err",
-	[BNXT_RE_RES_IRRQ_OFLOW]        = "res_irrq_oflow",
-	[BNXT_RE_RES_UNSUP_OPCODE]      = "res_unsup_opcode",
-	[BNXT_RE_RES_UNALIGNED_ATOMIC]  = "res_unaligned_atomic",
-	[BNXT_RE_RES_REM_INV_ERR]       = "res_rem_inv_err",
-	[BNXT_RE_RES_MEM_ERROR]         = "res_mem_err",
-	[BNXT_RE_RES_SRQ_ERR]           = "res_srq_err",
-	[BNXT_RE_RES_CMP_ERR]           = "res_cmp_err",
-	[BNXT_RE_RES_INVALID_DUP_RKEY]  = "res_invalid_dup_rkey",
-	[BNXT_RE_RES_WQE_FORMAT_ERR]    = "res_wqe_format_err",
-	[BNXT_RE_RES_CQ_LOAD_ERR]       = "res_cq_load_err",
-	[BNXT_RE_RES_SRQ_LOAD_ERR]      = "res_srq_load_err",
-	[BNXT_RE_RES_TX_PCI_ERR]        = "res_tx_pci_err",
-	[BNXT_RE_RES_RX_PCI_ERR]        = "res_rx_pci_err",
-	[BNXT_RE_OUT_OF_SEQ_ERR]        = "oos_drop_count",
-	[BNXT_RE_TX_ATOMIC_REQ]		= "tx_atomic_req",
-	[BNXT_RE_TX_READ_REQ]		= "tx_read_req",
-	[BNXT_RE_TX_READ_RES]		= "tx_read_resp",
-	[BNXT_RE_TX_WRITE_REQ]		= "tx_write_req",
-	[BNXT_RE_TX_SEND_REQ]		= "tx_send_req",
-	[BNXT_RE_RX_ATOMIC_REQ]		= "rx_atomic_req",
-	[BNXT_RE_RX_READ_REQ]		= "rx_read_req",
-	[BNXT_RE_RX_READ_RESP]		= "rx_read_resp",
-	[BNXT_RE_RX_WRITE_REQ]		= "rx_write_req",
-	[BNXT_RE_RX_SEND_REQ]		= "rx_send_req",
-	[BNXT_RE_RX_ROCE_GOOD_PKTS]	= "rx_roce_good_pkts",
-	[BNXT_RE_RX_ROCE_GOOD_BYTES]	= "rx_roce_good_bytes",
-	[BNXT_RE_OOB]			= "rx_out_of_buffer"
+static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
+	[BNXT_RE_ACTIVE_QP].name		=  "active_qps",
+	[BNXT_RE_ACTIVE_SRQ].name		=  "active_srqs",
+	[BNXT_RE_ACTIVE_CQ].name		=  "active_cqs",
+	[BNXT_RE_ACTIVE_MR].name		=  "active_mrs",
+	[BNXT_RE_ACTIVE_MW].name		=  "active_mws",
+	[BNXT_RE_RX_PKTS].name		=  "rx_pkts",
+	[BNXT_RE_RX_BYTES].name		=  "rx_bytes",
+	[BNXT_RE_TX_PKTS].name		=  "tx_pkts",
+	[BNXT_RE_TX_BYTES].name		=  "tx_bytes",
+	[BNXT_RE_RECOVERABLE_ERRORS].name	=  "recoverable_errors",
+	[BNXT_RE_RX_ERRORS].name		=  "rx_roce_errors",
+	[BNXT_RE_RX_DISCARDS].name		=  "rx_roce_discards",
+	[BNXT_RE_TO_RETRANSMITS].name        = "to_retransmits",
+	[BNXT_RE_SEQ_ERR_NAKS_RCVD].name     = "seq_err_naks_rcvd",
+	[BNXT_RE_MAX_RETRY_EXCEEDED].name    = "max_retry_exceeded",
+	[BNXT_RE_RNR_NAKS_RCVD].name         = "rnr_naks_rcvd",
+	[BNXT_RE_MISSING_RESP].name          = "missing_resp",
+	[BNXT_RE_UNRECOVERABLE_ERR].name     = "unrecoverable_err",
+	[BNXT_RE_BAD_RESP_ERR].name          = "bad_resp_err",
+	[BNXT_RE_LOCAL_QP_OP_ERR].name       = "local_qp_op_err",
+	[BNXT_RE_LOCAL_PROTECTION_ERR].name  = "local_protection_err",
+	[BNXT_RE_MEM_MGMT_OP_ERR].name       = "mem_mgmt_op_err",
+	[BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err",
+	[BNXT_RE_REMOTE_ACCESS_ERR].name     = "remote_access_err",
+	[BNXT_RE_REMOTE_OP_ERR].name         = "remote_op_err",
+	[BNXT_RE_DUP_REQ].name               = "dup_req",
+	[BNXT_RE_RES_EXCEED_MAX].name        = "res_exceed_max",
+	[BNXT_RE_RES_LENGTH_MISMATCH].name   = "res_length_mismatch",
+	[BNXT_RE_RES_EXCEEDS_WQE].name       = "res_exceeds_wqe",
+	[BNXT_RE_RES_OPCODE_ERR].name        = "res_opcode_err",
+	[BNXT_RE_RES_RX_INVALID_RKEY].name   = "res_rx_invalid_rkey",
+	[BNXT_RE_RES_RX_DOMAIN_ERR].name     = "res_rx_domain_err",
+	[BNXT_RE_RES_RX_NO_PERM].name        = "res_rx_no_perm",
+	[BNXT_RE_RES_RX_RANGE_ERR].name      = "res_rx_range_err",
+	[BNXT_RE_RES_TX_INVALID_RKEY].name   = "res_tx_invalid_rkey",
+	[BNXT_RE_RES_TX_DOMAIN_ERR].name     = "res_tx_domain_err",
+	[BNXT_RE_RES_TX_NO_PERM].name        = "res_tx_no_perm",
+	[BNXT_RE_RES_TX_RANGE_ERR].name      = "res_tx_range_err",
+	[BNXT_RE_RES_IRRQ_OFLOW].name        = "res_irrq_oflow",
+	[BNXT_RE_RES_UNSUP_OPCODE].name      = "res_unsup_opcode",
+	[BNXT_RE_RES_UNALIGNED_ATOMIC].name  = "res_unaligned_atomic",
+	[BNXT_RE_RES_REM_INV_ERR].name       = "res_rem_inv_err",
+	[BNXT_RE_RES_MEM_ERROR].name         = "res_mem_err",
+	[BNXT_RE_RES_SRQ_ERR].name           = "res_srq_err",
+	[BNXT_RE_RES_CMP_ERR].name           = "res_cmp_err",
+	[BNXT_RE_RES_INVALID_DUP_RKEY].name  = "res_invalid_dup_rkey",
+	[BNXT_RE_RES_WQE_FORMAT_ERR].name    = "res_wqe_format_err",
+	[BNXT_RE_RES_CQ_LOAD_ERR].name       = "res_cq_load_err",
+	[BNXT_RE_RES_SRQ_LOAD_ERR].name      = "res_srq_load_err",
+	[BNXT_RE_RES_TX_PCI_ERR].name        = "res_tx_pci_err",
+	[BNXT_RE_RES_RX_PCI_ERR].name        = "res_rx_pci_err",
+	[BNXT_RE_OUT_OF_SEQ_ERR].name        = "oos_drop_count",
+	[BNXT_RE_TX_ATOMIC_REQ].name	     = "tx_atomic_req",
+	[BNXT_RE_TX_READ_REQ].name	     = "tx_read_req",
+	[BNXT_RE_TX_READ_RES].name	     = "tx_read_resp",
+	[BNXT_RE_TX_WRITE_REQ].name	     = "tx_write_req",
+	[BNXT_RE_TX_SEND_REQ].name	     = "tx_send_req",
+	[BNXT_RE_RX_ATOMIC_REQ].name	     = "rx_atomic_req",
+	[BNXT_RE_RX_READ_REQ].name	     = "rx_read_req",
+	[BNXT_RE_RX_READ_RESP].name	     = "rx_read_resp",
+	[BNXT_RE_RX_WRITE_REQ].name	     = "rx_write_req",
+	[BNXT_RE_RX_SEND_REQ].name	     = "rx_send_req",
+	[BNXT_RE_RX_ROCE_GOOD_PKTS].name     = "rx_roce_good_pkts",
+	[BNXT_RE_RX_ROCE_GOOD_BYTES].name    = "rx_roce_good_bytes",
+	[BNXT_RE_OOB].name		     = "rx_out_of_buffer"
 };
 
 static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
@@ -322,7 +320,6 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
 	else
 		num_counters = BNXT_RE_NUM_STD_COUNTERS;
 
-	return rdma_alloc_hw_stats_struct(bnxt_re_stat_name,
-					  num_counters,
+	return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index e7337662aff8..0c8fd5a85fcb 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -366,23 +366,23 @@ enum counters {
 	NR_COUNTERS
 };
 
-static const char * const names[] = {
-	[IP4INSEGS] = "ip4InSegs",
-	[IP4OUTSEGS] = "ip4OutSegs",
-	[IP4RETRANSSEGS] = "ip4RetransSegs",
-	[IP4OUTRSTS] = "ip4OutRsts",
-	[IP6INSEGS] = "ip6InSegs",
-	[IP6OUTSEGS] = "ip6OutSegs",
-	[IP6RETRANSSEGS] = "ip6RetransSegs",
-	[IP6OUTRSTS] = "ip6OutRsts"
+static const struct rdma_stat_desc cxgb4_descs[] = {
+	[IP4INSEGS].name = "ip4InSegs",
+	[IP4OUTSEGS].name = "ip4OutSegs",
+	[IP4RETRANSSEGS].name = "ip4RetransSegs",
+	[IP4OUTRSTS].name = "ip4OutRsts",
+	[IP6INSEGS].name = "ip6InSegs",
+	[IP6OUTSEGS].name = "ip6OutSegs",
+	[IP6RETRANSSEGS].name = "ip6RetransSegs",
+	[IP6OUTRSTS].name = "ip6OutRsts"
 };
 
 static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev)
 {
-	BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
+	BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS);
 
 	/* FIXME: these look like port stats */
-	return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+	return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index e5f9d90aad5e..35d818b38e77 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -60,13 +60,14 @@ struct efa_user_mmap_entry {
 	op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
 
 #define EFA_STATS_ENUM(ename, name) ename,
-#define EFA_STATS_STR(ename, name) [ename] = name,
+#define EFA_STATS_STR(ename, nam) \
+	[ename].name = nam,
 
 enum efa_hw_device_stats {
 	EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
 };
 
-static const char *const efa_device_stats_names[] = {
+static const struct rdma_stat_desc efa_device_stats_descs[] = {
 	EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
 };
 
@@ -74,7 +75,7 @@ enum efa_hw_port_stats {
 	EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
 };
 
-static const char *const efa_port_stats_names[] = {
+static const struct rdma_stat_desc efa_port_stats_descs[] = {
 	EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
 };
 
@@ -1906,15 +1907,15 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
 					      u32 port_num)
 {
-	return rdma_alloc_hw_stats_struct(efa_port_stats_names,
-					  ARRAY_SIZE(efa_port_stats_names),
+	return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
+					  ARRAY_SIZE(efa_port_stats_descs),
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
 {
-	return rdma_alloc_hw_stats_struct(efa_device_stats_names,
-					  ARRAY_SIZE(efa_device_stats_names),
+	return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
+					  ARRAY_SIZE(efa_device_stats_descs),
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -1939,7 +1940,7 @@ static int efa_fill_device_stats(struct efa_dev *dev,
 	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
 	stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
 
-	return ARRAY_SIZE(efa_device_stats_names);
+	return ARRAY_SIZE(efa_device_stats_descs);
 }
 
 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
@@ -1988,7 +1989,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
 	stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
 	stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
 
-	return ARRAY_SIZE(efa_port_stats_names);
+	return ARRAY_SIZE(efa_port_stats_descs);
 }
 
 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 26bea51869bf..ed9fa0d84e9e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1602,8 +1602,8 @@ static const char * const driver_cntr_names[] = {
 };
 
 static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
-static const char **dev_cntr_names;
-static const char **port_cntr_names;
+static struct rdma_stat_desc *dev_cntr_descs;
+static struct rdma_stat_desc *port_cntr_descs;
 int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
 static int num_dev_cntrs;
 static int num_port_cntrs;
@@ -1614,13 +1614,12 @@ static int cntr_names_initialized;
  * strings. Optionally some entries can be reserved in the array to hold extra
  * external strings.
  */
-static int init_cntr_names(const char *names_in,
-			   const size_t names_len,
-			   int num_extra_names,
-			   int *num_cntrs,
-			   const char ***cntr_names)
+static int init_cntr_names(const char *names_in, const size_t names_len,
+			   int num_extra_names, int *num_cntrs,
+			   struct rdma_stat_desc **cntr_descs)
 {
-	char *names_out, *p, **q;
+	struct rdma_stat_desc *q;
+	char *names_out, *p;
 	int i, n;
 
 	n = 0;
@@ -1628,26 +1627,28 @@ static int init_cntr_names(const char *names_in,
 		if (names_in[i] == '\n')
 			n++;
 
-	names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
-			    GFP_KERNEL);
+	names_out =
+		kmalloc((n + num_extra_names) * sizeof(struct rdma_stat_desc) +
+				names_len,
+			GFP_KERNEL);
 	if (!names_out) {
 		*num_cntrs = 0;
-		*cntr_names = NULL;
+		*cntr_descs = NULL;
 		return -ENOMEM;
 	}
 
-	p = names_out + (n + num_extra_names) * sizeof(char *);
+	p = names_out + (n + num_extra_names) * sizeof(struct rdma_stat_desc);
 	memcpy(p, names_in, names_len);
 
-	q = (char **)names_out;
+	q = (struct rdma_stat_desc *)names_out;
 	for (i = 0; i < n; i++) {
-		q[i] = p;
+		q[i].name = p;
 		p = strchr(p, '\n');
 		*p++ = '\0';
 	}
 
 	*num_cntrs = n;
-	*cntr_names = (const char **)names_out;
+	*cntr_descs = (struct rdma_stat_desc *)names_out;
 	return 0;
 }
 
@@ -1661,18 +1662,18 @@ static int init_counters(struct ib_device *ibdev)
 		goto out_unlock;
 
 	err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs,
-			      &num_dev_cntrs, &dev_cntr_names);
+			      &num_dev_cntrs, &dev_cntr_descs);
 	if (err)
 		goto out_unlock;
 
 	for (i = 0; i < num_driver_cntrs; i++)
-		dev_cntr_names[num_dev_cntrs + i] = driver_cntr_names[i];
+		dev_cntr_descs[num_dev_cntrs + i].name = driver_cntr_names[i];
 
 	err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0,
-			      &num_port_cntrs, &port_cntr_names);
+			      &num_port_cntrs, &port_cntr_descs);
 	if (err) {
-		kfree(dev_cntr_names);
-		dev_cntr_names = NULL;
+		kfree(dev_cntr_descs);
+		dev_cntr_descs = NULL;
 		goto out_unlock;
 	}
 	cntr_names_initialized = 1;
@@ -1686,7 +1687,7 @@ static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
 {
 	if (init_counters(ibdev))
 		return NULL;
-	return rdma_alloc_hw_stats_struct(dev_cntr_names,
+	return rdma_alloc_hw_stats_struct(dev_cntr_descs,
 					  num_dev_cntrs + num_driver_cntrs,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
@@ -1696,7 +1697,7 @@ static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
 {
 	if (init_counters(ibdev))
 		return NULL;
-	return rdma_alloc_hw_stats_struct(port_cntr_names, num_port_cntrs,
+	return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -1921,10 +1922,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
 	verbs_txreq_exit(dev);
 
 	mutex_lock(&cntr_names_lock);
-	kfree(dev_cntr_names);
-	kfree(port_cntr_names);
-	dev_cntr_names = NULL;
-	port_cntr_names = NULL;
+	kfree(dev_cntr_descs);
+	kfree(port_cntr_descs);
+	dev_cntr_descs = NULL;
+	port_cntr_descs = NULL;
 	cntr_names_initialized = 0;
 	mutex_unlock(&cntr_names_lock);
 }
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 7110ebf834f9..6a544c253603 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -3647,89 +3647,89 @@ static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
 	return 0;
 }
 
-static const char *const irdma_hw_stat_names[] = {
+static const struct rdma_stat_desc irdma_hw_stat_descs[] = {
 	/* 32bit names */
-	[IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors",
-	[IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
-	[IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
-	[IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
-	[IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
-	[IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
-	[IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
-	[IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
-	[IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
-	[IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
-	[IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled",
-	[IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored",
-	[IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent",
+	[IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors",
+	[IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards",
+	[IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts",
+	[IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes",
+	[IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards",
+	[IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts",
+	[IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes",
+	[IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs",
+	[IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors",
+	[IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors",
+	[IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled",
+	[IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored",
+	[IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent",
 
 	/* 64bit names */
-	[IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4InOctets",
-	[IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4InPkts",
-	[IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4InReasmRqd",
-	[IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4InMcastOctets",
-	[IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4InMcastPkts",
-	[IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4OutOctets",
-	[IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4OutPkts",
-	[IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4OutSegRqd",
-	[IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4OutMcastOctets",
-	[IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip4OutMcastPkts",
-	[IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6InOctets",
-	[IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6InPkts",
-	[IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6InReasmRqd",
-	[IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6InMcastOctets",
-	[IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6InMcastPkts",
-	[IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6OutOctets",
-	[IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6OutPkts",
-	[IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6OutSegRqd",
-	[IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6OutMcastOctets",
-	[IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"ip6OutMcastPkts",
-	[IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"tcpInSegs",
-	[IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"tcpOutSegs",
-	[IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwInRdmaReads",
-	[IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwInRdmaSends",
-	[IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwInRdmaWrites",
-	[IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwOutRdmaReads",
-	[IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwOutRdmaSends",
-	[IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwOutRdmaWrites",
-	[IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwRdmaBnd",
-	[IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"iwRdmaInv",
-	[IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"RxUDP",
-	[IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+	[IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name =
 		"TxUDP",
-	[IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
-		"RxECNMrkd",
+	[IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32]
+		.name = "RxECNMrkd",
 };
 
 static void irdma_get_dev_fw_str(struct ib_device *dev, char *str)
@@ -3753,10 +3753,10 @@ static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev,
 			   IRDMA_HW_STAT_INDEX_MAX_64;
 	unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
 
-	BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_names) !=
+	BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_descs) !=
 		     (IRDMA_HW_STAT_INDEX_MAX_32 + IRDMA_HW_STAT_INDEX_MAX_64));
 
-	return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters,
+	return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters,
 					  lifespan);
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f367f4a4abff..38742e233915 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2105,10 +2105,10 @@ mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
 
-	if (!diag[0].name)
+	if (!diag[0].descs)
 		return NULL;
 
-	return rdma_alloc_hw_stats_struct(diag[0].name, diag[0].num_counters,
+	return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -2118,10 +2118,10 @@ mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
 
-	if (!diag[1].name)
+	if (!diag[1].descs)
 		return NULL;
 
-	return rdma_alloc_hw_stats_struct(diag[1].name, diag[1].num_counters,
+	return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -2151,10 +2151,8 @@ static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
 }
 
 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
-					 const char ***name,
-					 u32 **offset,
-					 u32 *num,
-					 bool port)
+					 struct rdma_stat_desc **pdescs,
+					 u32 **offset, u32 *num, bool port)
 {
 	u32 num_counters;
 
@@ -2166,46 +2164,46 @@ static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
 	if (!port)
 		num_counters += ARRAY_SIZE(diag_device_only);
 
-	*name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
-	if (!*name)
+	*pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
+			  GFP_KERNEL);
+	if (!*pdescs)
 		return -ENOMEM;
 
 	*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
 	if (!*offset)
-		goto err_name;
+		goto err;
 
 	*num = num_counters;
 
 	return 0;
 
-err_name:
-	kfree(*name);
+err:
+	kfree(*pdescs);
 	return -ENOMEM;
 }
 
 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
-				       const char **name,
-				       u32 *offset,
-				       bool port)
+				       struct rdma_stat_desc *descs,
+				       u32 *offset, bool port)
 {
 	int i;
 	int j;
 
 	for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
-		name[i] = diag_basic[i].name;
+		descs[i].name = diag_basic[i].name;
 		offset[i] = diag_basic[i].offset;
 	}
 
 	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
 		for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
-			name[j] = diag_ext[i].name;
+			descs[j].name = diag_ext[i].name;
 			offset[j] = diag_ext[i].offset;
 		}
 	}
 
 	if (!port) {
 		for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
-			name[j] = diag_device_only[i].name;
+			descs[j].name = diag_device_only[i].name;
 			offset[j] = diag_device_only[i].offset;
 		}
 	}
@@ -2233,13 +2231,13 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
 		if (i && !per_port)
 			continue;
 
-		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
 						    &diag[i].offset,
 						    &diag[i].num_counters, i);
 		if (ret)
 			goto err_alloc;
 
-		mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
+		mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
 					   diag[i].offset, i);
 	}
 
@@ -2249,7 +2247,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
 
 err_alloc:
 	if (i) {
-		kfree(diag[i - 1].name);
+		kfree(diag[i - 1].descs);
 		kfree(diag[i - 1].offset);
 	}
 
@@ -2262,7 +2260,7 @@ static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
 
 	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
 		kfree(ibdev->diag_counters[i].offset);
-		kfree(ibdev->diag_counters[i].name);
+		kfree(ibdev->diag_counters[i].descs);
 	}
 }
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c60f6e9ac640..d84023b4b1b8 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -601,7 +601,7 @@ struct mlx4_ib_counters {
 #define MLX4_DIAG_COUNTERS_TYPES 2
 
 struct mlx4_ib_diag_counters {
-	const char **name;
+	struct rdma_stat_desc *descs;
 	u32 *offset;
 	u32 num_counters;
 };
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 224ba36f2946..caa35ea14b48 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -167,7 +167,7 @@ mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 
-	return rdma_alloc_hw_stats_struct(cnts->names,
+	return rdma_alloc_hw_stats_struct(cnts->descs,
 					  cnts->num_q_counters +
 						  cnts->num_cong_counters +
 						  cnts->num_ext_ppcnt_counters,
@@ -180,7 +180,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
 
-	return rdma_alloc_hw_stats_struct(cnts->names,
+	return rdma_alloc_hw_stats_struct(cnts->descs,
 					  cnts->num_q_counters +
 						  cnts->num_cong_counters +
 						  cnts->num_ext_ppcnt_counters,
@@ -302,7 +302,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 	const struct mlx5_ib_counters *cnts =
 		get_counters(dev, counter->port - 1);
 
-	return rdma_alloc_hw_stats_struct(cnts->names,
+	return rdma_alloc_hw_stats_struct(cnts->descs,
 					  cnts->num_q_counters +
 					  cnts->num_cong_counters +
 					  cnts->num_ext_ppcnt_counters,
@@ -371,57 +371,55 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
 	return mlx5_ib_qp_set_counter(qp, NULL);
 }
 
-
 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
-				  const char **names,
-				  size_t *offsets)
+				  struct rdma_stat_desc *descs, size_t *offsets)
 {
 	int i;
 	int j = 0;
 
 	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
-		names[j] = basic_q_cnts[i].name;
+		descs[j].name = basic_q_cnts[i].name;
 		offsets[j] = basic_q_cnts[i].offset;
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
 		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
-			names[j] = out_of_seq_q_cnts[i].name;
+			descs[j].name = out_of_seq_q_cnts[i].name;
 			offsets[j] = out_of_seq_q_cnts[i].offset;
 		}
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
-			names[j] = retrans_q_cnts[i].name;
+			descs[j].name = retrans_q_cnts[i].name;
 			offsets[j] = retrans_q_cnts[i].offset;
 		}
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
 		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
-			names[j] = extended_err_cnts[i].name;
+			descs[j].name = extended_err_cnts[i].name;
 			offsets[j] = extended_err_cnts[i].offset;
 		}
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
 		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
-			names[j] = roce_accl_cnts[i].name;
+			descs[j].name = roce_accl_cnts[i].name;
 			offsets[j] = roce_accl_cnts[i].offset;
 		}
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
-			names[j] = cong_cnts[i].name;
+			descs[j].name = cong_cnts[i].name;
 			offsets[j] = cong_cnts[i].offset;
 		}
 	}
 
 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
-			names[j] = ext_ppcnt_cnts[i].name;
+			descs[j].name = ext_ppcnt_cnts[i].name;
 			offsets[j] = ext_ppcnt_cnts[i].offset;
 		}
 	}
@@ -457,20 +455,21 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 	}
-	cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
-	if (!cnts->names)
+	cnts->descs = kcalloc(num_counters,
+			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
+	if (!cnts->descs)
 		return -ENOMEM;
 
 	cnts->offsets = kcalloc(num_counters,
 				sizeof(*cnts->offsets), GFP_KERNEL);
 	if (!cnts->offsets)
-		goto err_names;
+		goto err;
 
 	return 0;
 
-err_names:
-	kfree(cnts->names);
-	cnts->names = NULL;
+err:
+	kfree(cnts->descs);
+	cnts->descs = NULL;
 	return -ENOMEM;
 }
 
@@ -491,7 +490,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 				 dev->port[i].cnts.set_id);
 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 		}
-		kfree(dev->port[i].cnts.names);
+		kfree(dev->port[i].cnts.descs);
 		kfree(dev->port[i].cnts.offsets);
 	}
 }
@@ -514,7 +513,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 		if (err)
 			goto err_alloc;
 
-		mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
 				      dev->port[i].cnts.offsets);
 
 		MLX5_SET(alloc_q_counter_in, in, uid,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bf20a388eabe..6f5451d96dd7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -798,7 +798,7 @@ struct mlx5_ib_resources {
 };
 
 struct mlx5_ib_counters {
-	const char **names;
+	struct rdma_stat_desc *descs;
 	size_t *offsets;
 	u32 num_q_counters;
 	u32 num_cong_counters;
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index d5ceb706d964..a012522b577a 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -6,22 +6,22 @@
 #include "rxe.h"
 #include "rxe_hw_counters.h"
 
-static const char * const rxe_counter_name[] = {
-	[RXE_CNT_SENT_PKTS]           =  "sent_pkts",
-	[RXE_CNT_RCVD_PKTS]           =  "rcvd_pkts",
-	[RXE_CNT_DUP_REQ]             =  "duplicate_request",
-	[RXE_CNT_OUT_OF_SEQ_REQ]      =  "out_of_seq_request",
-	[RXE_CNT_RCV_RNR]             =  "rcvd_rnr_err",
-	[RXE_CNT_SND_RNR]             =  "send_rnr_err",
-	[RXE_CNT_RCV_SEQ_ERR]         =  "rcvd_seq_err",
-	[RXE_CNT_COMPLETER_SCHED]     =  "ack_deferred",
-	[RXE_CNT_RETRY_EXCEEDED]      =  "retry_exceeded_err",
-	[RXE_CNT_RNR_RETRY_EXCEEDED]  =  "retry_rnr_exceeded_err",
-	[RXE_CNT_COMP_RETRY]          =  "completer_retry_err",
-	[RXE_CNT_SEND_ERR]            =  "send_err",
-	[RXE_CNT_LINK_DOWNED]         =  "link_downed",
-	[RXE_CNT_RDMA_SEND]           =  "rdma_sends",
-	[RXE_CNT_RDMA_RECV]           =  "rdma_recvs",
+static const struct rdma_stat_desc rxe_counter_descs[] = {
+	[RXE_CNT_SENT_PKTS].name           =  "sent_pkts",
+	[RXE_CNT_RCVD_PKTS].name           =  "rcvd_pkts",
+	[RXE_CNT_DUP_REQ].name             =  "duplicate_request",
+	[RXE_CNT_OUT_OF_SEQ_REQ].name      =  "out_of_seq_request",
+	[RXE_CNT_RCV_RNR].name             =  "rcvd_rnr_err",
+	[RXE_CNT_SND_RNR].name             =  "send_rnr_err",
+	[RXE_CNT_RCV_SEQ_ERR].name         =  "rcvd_seq_err",
+	[RXE_CNT_COMPLETER_SCHED].name     =  "ack_deferred",
+	[RXE_CNT_RETRY_EXCEEDED].name      =  "retry_exceeded_err",
+	[RXE_CNT_RNR_RETRY_EXCEEDED].name  =  "retry_rnr_exceeded_err",
+	[RXE_CNT_COMP_RETRY].name          =  "completer_retry_err",
+	[RXE_CNT_SEND_ERR].name            =  "send_err",
+	[RXE_CNT_LINK_DOWNED].name         =  "link_downed",
+	[RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
+	[RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
 };
 
 int rxe_ib_get_hw_stats(struct ib_device *ibdev,
@@ -34,18 +34,18 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev,
 	if (!port || !stats)
 		return -EINVAL;
 
-	for (cnt = 0; cnt  < ARRAY_SIZE(rxe_counter_name); cnt++)
+	for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++)
 		stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]);
 
-	return ARRAY_SIZE(rxe_counter_name);
+	return ARRAY_SIZE(rxe_counter_descs);
 }
 
 struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev,
 						 u32 port_num)
 {
-	BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
+	BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS);
 
-	return rdma_alloc_hw_stats_struct(rxe_counter_name,
-					  ARRAY_SIZE(rxe_counter_name),
+	return rdma_alloc_hw_stats_struct(rxe_counter_descs,
+					  ARRAY_SIZE(rxe_counter_descs),
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4b50d9a3018a..aa1e1029b736 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -545,6 +545,14 @@ enum ib_port_speed {
 	IB_SPEED_NDR	= 128,
 };
 
+/**
+ * struct rdma_stat_desc
+ * @name - The name of the counter
+ */
+struct rdma_stat_desc {
+	const char *name;
+};
+
 /**
  * struct rdma_hw_stats
  * @lock - Mutex to protect parallel write access to lifespan and values
@@ -555,8 +563,8 @@ enum ib_port_speed {
  *   should be before being updated again.  Stored in jiffies, defaults
  *   to 10 milliseconds, drivers can override the default be specifying
  *   their own value during their allocation routine.
- * @name - Array of pointers to static names used for the counters in
- *   directory.
+ * @descs - Array of pointers to static descriptors used for the counters
+ *   in directory.
  * @num_counters - How many hardware counters there are.  If name is
  *   shorter than this number, a kernel oops will result.  Driver authors
  *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
@@ -568,7 +576,7 @@ struct rdma_hw_stats {
 	struct mutex	lock; /* Protect lifespan and values[] */
 	unsigned long	timestamp;
 	unsigned long	lifespan;
-	const char * const *names;
+	const struct rdma_stat_desc *descs;
 	int		num_counters;
 	u64		value[];
 };
@@ -577,12 +585,12 @@ struct rdma_hw_stats {
 /**
  * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
  *   for drivers.
- * @names - Array of static const char *
+ * @descs - Array of static descriptors
  * @num_counters - How many elements in array
  * @lifespan - How many milliseconds between updates
  */
 static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
-		const char * const *names, int num_counters,
+		const struct rdma_stat_desc *descs, int num_counters,
 		unsigned long lifespan)
 {
 	struct rdma_hw_stats *stats;
@@ -591,7 +599,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 			GFP_KERNEL);
 	if (!stats)
 		return NULL;
-	stats->names = names;
+
+	stats->descs = descs;
 	stats->num_counters = num_counters;
 	stats->lifespan = msecs_to_jiffies(lifespan);
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 04/13] RDMA/core: Add a helper API rdma_free_hw_stats_struct
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (2 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 03/13] RDMA/counter: Add a descriptor in struct rdma_hw_stats Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats Leon Romanovsky
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Mark Zhang, Aharon Landau, David S. Miller, Dennis Dalessandro,
	Gal Pressman, Jakub Kicinski, linux-kernel, linux-rdma,
	Maor Gottlieb, Mike Marciniszyn, Mustafa Ismail, netdev,
	Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Mark Zhang <markzhang@nvidia.com>

Add a new API rdma_free_hw_stats_struct to pair with
rdma_alloc_hw_stats_struct (which is also de-inlined).

This will be useful when there are more alloc/free works
in following patches.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/counters.c |  8 +++----
 drivers/infiniband/core/sysfs.c    |  8 +++----
 drivers/infiniband/core/verbs.c    | 35 ++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h            | 27 ++++-------------------
 4 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index df9e6c5e4ddf..331cd29f0d61 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -165,7 +165,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
 	return counter;
 
 err_mode:
-	kfree(counter->stats);
+	rdma_free_hw_stats_struct(counter->stats);
 err_stats:
 	rdma_restrack_put(&counter->res);
 	kfree(counter);
@@ -186,7 +186,7 @@ static void rdma_counter_free(struct rdma_counter *counter)
 	mutex_unlock(&port_counter->lock);
 
 	rdma_restrack_del(&counter->res);
-	kfree(counter->stats);
+	rdma_free_hw_stats_struct(counter->stats);
 	kfree(counter);
 }
 
@@ -618,7 +618,7 @@ void rdma_counter_init(struct ib_device *dev)
 fail:
 	for (i = port; i >= rdma_start_port(dev); i--) {
 		port_counter = &dev->port_data[port].port_counter;
-		kfree(port_counter->hstats);
+		rdma_free_hw_stats_struct(port_counter->hstats);
 		port_counter->hstats = NULL;
 		mutex_destroy(&port_counter->lock);
 	}
@@ -631,7 +631,7 @@ void rdma_counter_release(struct ib_device *dev)
 
 	rdma_for_each_port(dev, port) {
 		port_counter = &dev->port_data[port].port_counter;
-		kfree(port_counter->hstats);
+		rdma_free_hw_stats_struct(port_counter->hstats);
 		mutex_destroy(&port_counter->lock);
 	}
 }
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c3663cfdcd52..8d831d4fd2ad 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -755,7 +755,7 @@ static void ib_port_release(struct kobject *kobj)
 	for (i = 0; i != ARRAY_SIZE(port->groups); i++)
 		kfree(port->groups[i].attrs);
 	if (port->hw_stats_data)
-		kfree(port->hw_stats_data->stats);
+		rdma_free_hw_stats_struct(port->hw_stats_data->stats);
 	kfree(port->hw_stats_data);
 	kfree(port);
 }
@@ -919,14 +919,14 @@ alloc_hw_stats_device(struct ib_device *ibdev)
 err_free_data:
 	kfree(data);
 err_free_stats:
-	kfree(stats);
+	rdma_free_hw_stats_struct(stats);
 	return ERR_PTR(-ENOMEM);
 }
 
 void ib_device_release_hw_stats(struct hw_stats_device_data *data)
 {
 	kfree(data->group.attrs);
-	kfree(data->stats);
+	rdma_free_hw_stats_struct(data->stats);
 	kfree(data);
 }
 
@@ -1018,7 +1018,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
 err_free_data:
 	kfree(data);
 err_free_stats:
-	kfree(stats);
+	rdma_free_hw_stats_struct(stats);
 	return ERR_PTR(-ENOMEM);
 }
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 89a2b21976d6..c3319a7584a2 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2976,3 +2976,38 @@ bool __rdma_block_iter_next(struct ib_block_iter *biter)
 	return true;
 }
 EXPORT_SYMBOL(__rdma_block_iter_next);
+
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ *   for the drivers.
+ * @descs: array of static descriptors
+ * @num_counters: number of elments in array
+ * @lifespan: milliseconds between updates
+ */
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+	const struct rdma_stat_desc *descs, int num_counters,
+	unsigned long lifespan)
+{
+	struct rdma_hw_stats *stats;
+
+	stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
+	if (!stats)
+		return NULL;
+
+	stats->descs = descs;
+	stats->num_counters = num_counters;
+	stats->lifespan = msecs_to_jiffies(lifespan);
+
+	return stats;
+}
+EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
+
+/**
+ * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
+ * @stats: statistics to release
+ */
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
+{
+	kfree(stats);
+}
+EXPORT_SYMBOL(rdma_free_hw_stats_struct);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index aa1e1029b736..938c0c0a1c19 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -582,31 +582,12 @@ struct rdma_hw_stats {
 };
 
 #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
-/**
- * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
- *   for drivers.
- * @descs - Array of static descriptors
- * @num_counters - How many elements in array
- * @lifespan - How many milliseconds between updates
- */
-static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
-		const struct rdma_stat_desc *descs, int num_counters,
-		unsigned long lifespan)
-{
-	struct rdma_hw_stats *stats;
 
-	stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
-			GFP_KERNEL);
-	if (!stats)
-		return NULL;
-
-	stats->descs = descs;
-	stats->num_counters = num_counters;
-	stats->lifespan = msecs_to_jiffies(lifespan);
-
-	return stats;
-}
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+	const struct rdma_stat_desc *descs, int num_counters,
+	unsigned long lifespan);
 
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
 
 /* Define bits for the various functionality this port needs to be supported by
  * the core.
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (3 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 04/13] RDMA/core: Add a helper API rdma_free_hw_stats_struct Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06 11:49   ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 06/13] RDMA/counter: Add optional counter support Leon Romanovsky
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, netdev,
	Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Add a bitmap in rdma_hw_stat structure, with each bit indicates whether
the corresponding counter is currently disabled or not. By default
hwcounters are enabled.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c | 11 ++++++++++-
 drivers/infiniband/core/verbs.c | 10 ++++++++++
 include/rdma/ib_verbs.h         |  3 +++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 3f6b98a87566..67519730b1ac 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -968,15 +968,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
 	if (!table_attr)
 		return -EMSGSIZE;
 
-	for (i = 0; i < st->num_counters; i++)
+	mutex_lock(&st->lock);
+	for (i = 0; i < st->num_counters; i++) {
+		if (test_bit(i, st->is_disabled))
+			continue;
 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
 						 st->value[i]))
 			goto err;
+	}
+	mutex_unlock(&st->lock);
 
 	nla_nest_end(msg, table_attr);
 	return 0;
 
 err:
+	mutex_unlock(&st->lock);
 	nla_nest_cancel(msg, table_attr);
 	return -EMSGSIZE;
 }
@@ -2104,6 +2110,9 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
 		goto err_stats;
 	}
 	for (i = 0; i < num_cnts; i++) {
+		if (test_bit(i, stats->is_disabled))
+			continue;
+
 		v = stats->value[i] +
 			rdma_counter_get_hwstat_value(device, port, i);
 		if (rdma_nl_stat_hwcounter_entry(msg,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c3319a7584a2..d957b23a0e23 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2994,11 +2994,20 @@ struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 	if (!stats)
 		return NULL;
 
+	stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
+				     sizeof(*stats->is_disabled), GFP_KERNEL);
+	if (!stats->is_disabled)
+		goto err;
+
 	stats->descs = descs;
 	stats->num_counters = num_counters;
 	stats->lifespan = msecs_to_jiffies(lifespan);
 
 	return stats;
+
+err:
+	kfree(stats);
+	return NULL;
 }
 EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
 
@@ -3008,6 +3017,7 @@ EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
  */
 void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
 {
+	kfree(stats->is_disabled);
 	kfree(stats);
 }
 EXPORT_SYMBOL(rdma_free_hw_stats_struct);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 938c0c0a1c19..ae467365706b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -565,6 +565,8 @@ struct rdma_stat_desc {
  *   their own value during their allocation routine.
  * @descs - Array of pointers to static descriptors used for the counters
  *   in directory.
+ * @is_disabled - A bitmap to indicate each counter is currently disabled
+ *   or not.
  * @num_counters - How many hardware counters there are.  If name is
  *   shorter than this number, a kernel oops will result.  Driver authors
  *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
@@ -577,6 +579,7 @@ struct rdma_hw_stats {
 	unsigned long	timestamp;
 	unsigned long	lifespan;
 	const struct rdma_stat_desc *descs;
+	unsigned long	*is_disabled;
 	int		num_counters;
 	u64		value[];
 };
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 06/13] RDMA/counter: Add optional counter support
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (4 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 07/13] RDMA/nldev: Add support to get status of all counters Leon Romanovsky
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

An optional counter is a vendor-specific counter that may be dynamically
enabled/disabled.
This enhancement allows us to expose counters which are for example
mutual exclusive and cannot be enabled at the same time, counters that
might degrades performance, optional debug counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are
not exported in sysfs, and must be at the end of all stats, otherwise
the attr->show() in sysfs would get wrong indexes for hwcounters that
are behind optional counters.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/counters.c | 32 ++++++++++++++++++++++++++
 drivers/infiniband/core/device.c   |  1 +
 drivers/infiniband/core/sysfs.c    | 36 +++++++++++++++++++++---------
 include/rdma/ib_verbs.h            | 13 +++++++++++
 include/rdma/rdma_counter.h        |  2 ++
 5 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 331cd29f0d61..af59486fe418 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
 	return ret;
 }
 
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+			unsigned int index, bool enable)
+{
+	struct rdma_hw_stats *stats;
+	int ret = 0;
+
+	if (!dev->ops.modify_hw_stat)
+		return -EOPNOTSUPP;
+
+	stats = ib_get_hw_stats_port(dev, port);
+	if (!stats || index >= stats->num_counters ||
+	    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+		return -EINVAL;
+
+	mutex_lock(&stats->lock);
+
+	if (enable != test_bit(index, stats->is_disabled))
+		goto out;
+
+	ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+	if (ret)
+		goto out;
+
+	if (enable)
+		clear_bit(index, stats->is_disabled);
+	else
+		set_bit(index, stats->is_disabled);
+out:
+	mutex_unlock(&stats->lock);
+	return ret;
+}
+
 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
 					   struct ib_qp *qp,
 					   enum rdma_nl_counter_mode mode)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f4814bb7f082..22a4adda7981 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, modify_cq);
 	SET_DEVICE_OP(dev_ops, modify_device);
 	SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+	SET_DEVICE_OP(dev_ops, modify_hw_stat);
 	SET_DEVICE_OP(dev_ops, modify_port);
 	SET_DEVICE_OP(dev_ops, modify_qp);
 	SET_DEVICE_OP(dev_ops, modify_srq);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 8d831d4fd2ad..1bf3aea4b71e 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
 {
 	struct hw_stats_device_attribute *attr;
 	struct hw_stats_device_data *data;
-	int i, ret;
+	bool opstat_skipped = false;
+	int i, ret, pos = 0;
 
 	data = alloc_hw_stats_device(ibdev);
 	if (IS_ERR(data)) {
@@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
 	data->stats->timestamp = jiffies;
 
 	for (i = 0; i < data->stats->num_counters; i++) {
-		attr = &data->attrs[i];
+		if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+			opstat_skipped = true;
+			continue;
+		}
+
+		WARN_ON(opstat_skipped);
+		attr = &data->attrs[pos];
 		sysfs_attr_init(&attr->attr.attr);
 		attr->attr.attr.name = data->stats->descs[i].name;
 		attr->attr.attr.mode = 0444;
 		attr->attr.show = hw_stat_device_show;
 		attr->show = show_hw_stats;
-		data->group.attrs[i] = &attr->attr.attr;
+		data->group.attrs[pos] = &attr->attr.attr;
+		pos++;
 	}
 
-	attr = &data->attrs[i];
+	attr = &data->attrs[pos];
 	sysfs_attr_init(&attr->attr.attr);
 	attr->attr.attr.name = "lifespan";
 	attr->attr.attr.mode = 0644;
@@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
 	attr->show = show_stats_lifespan;
 	attr->attr.store = hw_stat_device_store;
 	attr->store = set_stats_lifespan;
-	data->group.attrs[i] = &attr->attr.attr;
+	data->group.attrs[pos] = &attr->attr.attr;
 	for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
 		if (!ibdev->groups[i]) {
 			ibdev->groups[i] = &data->group;
@@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port,
 {
 	struct hw_stats_port_attribute *attr;
 	struct hw_stats_port_data *data;
-	int i, ret;
+	bool opstat_skipped = false;
+	int i, ret, pos = 0;
 
 	data = alloc_hw_stats_port(port, group);
 	if (IS_ERR(data))
@@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port,
 	data->stats->timestamp = jiffies;
 
 	for (i = 0; i < data->stats->num_counters; i++) {
-		attr = &data->attrs[i];
+		if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+			opstat_skipped = true;
+			continue;
+		}
+
+		WARN_ON(opstat_skipped);
+		attr = &data->attrs[pos];
 		sysfs_attr_init(&attr->attr.attr);
 		attr->attr.attr.name = data->stats->descs[i].name;
 		attr->attr.attr.mode = 0444;
 		attr->attr.show = hw_stat_port_show;
 		attr->show = show_hw_stats;
-		group->attrs[i] = &attr->attr.attr;
+		group->attrs[pos] = &attr->attr.attr;
+		pos++;
 	}
 
-	attr = &data->attrs[i];
+	attr = &data->attrs[pos];
 	sysfs_attr_init(&attr->attr.attr);
 	attr->attr.attr.name = "lifespan";
 	attr->attr.attr.mode = 0644;
@@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port,
 	attr->show = show_stats_lifespan;
 	attr->attr.store = hw_stat_port_store;
 	attr->store = set_stats_lifespan;
-	group->attrs[i] = &attr->attr.attr;
+	group->attrs[pos] = &attr->attr.attr;
 
 	port->hw_stats_data = data;
 	return 0;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ae467365706b..2207f60b002f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -545,12 +545,18 @@ enum ib_port_speed {
 	IB_SPEED_NDR	= 128,
 };
 
+enum ib_stat_flag {
+	IB_STAT_FLAG_OPTIONAL = 1 << 0,
+};
+
 /**
  * struct rdma_stat_desc
  * @name - The name of the counter
+ * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
  */
 struct rdma_stat_desc {
 	const char *name;
+	unsigned int flags;
 };
 
 /**
@@ -2562,6 +2568,13 @@ struct ib_device_ops {
 	int (*get_hw_stats)(struct ib_device *device,
 			    struct rdma_hw_stats *stats, u32 port, int index);
 
+	/**
+	 * modify_hw_stat - Modify the counter configuration
+	 * @enable: true/false when enable/disable a counter
+	 * Return codes - 0 on success or error code otherwise.
+	 */
+	int (*modify_hw_stat)(struct ib_device *device, u32 port,
+			      unsigned int counter_index, bool enable);
 	/**
 	 * Allows rdma drivers to add their own restrack attributes.
 	 */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 0295b22cd1cd..45d5481a7846 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port,
 			  enum rdma_nl_counter_mode *mode,
 			  enum rdma_nl_counter_mask *mask);
 
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+			unsigned int index, bool enable);
 #endif /* _RDMA_COUNTER_H_ */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 07/13] RDMA/nldev: Add support to get status of all counters
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (5 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 06/13] RDMA/counter: Add optional counter support Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 08/13] RDMA/nldev: Split nldev_stat_set_mode_doit out of nldev_stat_set_doit Leon Romanovsky
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

This patch adds the ability to get the name, index and status of all
counters for each link through RDMA netlink. This can be used for
user-space to get the current optional-counter mode.

Examples:
$ rdma statistic mode
link rocep8s0f0/1 optional-counters cc_rx_ce_pkts

$ rdma statistic mode supported
link rocep8s0f0/1 supported optional-counters cc_rx_ce_pkts,cc_rx_cnp_pkts,cc_tx_cnp_pkts
link rocep8s0f1/1 supported optional-counters cc_rx_ce_pkts,cc_rx_cnp_pkts,cc_tx_cnp_pkts

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c  | 98 ++++++++++++++++++++++++++++++++
 include/uapi/rdma/rdma_netlink.h |  5 ++
 2 files changed, 103 insertions(+)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 67519730b1ac..210057fef7bd 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -154,6 +154,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2264,6 +2266,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb,
 	return ret;
 }
 
+static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
+					      struct nlmsghdr *nlh,
+					      struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
+	struct rdma_hw_stats *stats;
+	struct ib_device *device;
+	struct sk_buff *msg;
+	u32 devid, port;
+	int ret, i;
+
+	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+		return -EINVAL;
+
+	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(sock_net(skb->sk), devid);
+	if (!device)
+		return -EINVAL;
+
+	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+	if (!rdma_is_port_valid(device, port)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	stats = ib_get_hw_stats_port(device, port);
+	if (!stats) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	nlh = nlmsg_put(
+		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
+		0, 0);
+
+	ret = -EMSGSIZE;
+	if (fill_nldev_handle(msg, device) ||
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+		goto err_msg;
+
+	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+	if (!table)
+		goto err_msg;
+
+	mutex_lock(&stats->lock);
+	for (i = 0; i < stats->num_counters; i++) {
+		entry = nla_nest_start(msg,
+				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+		if (!entry)
+			goto err_msg_table;
+
+		if (nla_put_string(msg,
+				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+				   stats->descs[i].name) ||
+		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
+			goto err_msg_entry;
+
+		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
+		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
+				!test_bit(i, stats->is_disabled))))
+			goto err_msg_entry;
+
+		nla_nest_end(msg, entry);
+	}
+	mutex_unlock(&stats->lock);
+
+	nla_nest_end(msg, table);
+	nlmsg_end(msg, nlh);
+	ib_device_put(device);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg_entry:
+	nla_nest_cancel(msg, entry);
+err_msg_table:
+	mutex_unlock(&stats->lock);
+	nla_nest_cancel(msg, table);
+err_msg:
+	nlmsg_free(msg);
+err:
+	ib_device_put(device);
+	return ret;
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -2353,6 +2448,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.dump = nldev_res_get_mr_raw_dumpit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
+		.doit = nldev_stat_get_counter_status_doit,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 75a1ae2311d8..e50c357367db 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -297,6 +297,8 @@ enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_STAT_GET_STATUS,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -549,6 +551,9 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK,	/* u8 */
 
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX,	/* u32 */
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, /* u8 */
+
 	/*
 	 * Always the end
 	 */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 08/13] RDMA/nldev: Split nldev_stat_set_mode_doit out of nldev_stat_set_doit
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (6 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 07/13] RDMA/nldev: Add support to get status of all counters Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink Leon Romanovsky
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

In order to allow expansion of the set command with more set options,
take the set mode out of the main set function.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c | 74 ++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 210057fef7bd..bbe3dfca3d98 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1897,42 +1897,23 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       struct netlink_ext_ack *extack)
+static int nldev_stat_set_mode_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+				    struct netlink_ext_ack *extack,
+				    struct nlattr *tb[],
+				    struct ib_device *device, u32 port)
 {
-	u32 index, port, mode, mask = 0, qpn, cntn = 0;
-	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
-	struct ib_device *device;
+	u32 mode, mask = 0, qpn, cntn = 0;
 	struct sk_buff *msg;
 	int ret;
 
-	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
-			  nldev_policy, extack);
 	/* Currently only counter for QP is supported */
-	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
-	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
-	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
-		return -EINVAL;
-
 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
 		return -EINVAL;
 
-	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-	device = ib_device_get_by_index(sock_net(skb->sk), index);
-	if (!device)
-		return -EINVAL;
-
-	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
-	if (!rdma_is_port_valid(device, port)) {
-		ret = -EINVAL;
-		goto err;
-	}
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!msg)
+		return -ENOMEM;
+
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_STAT_SET),
@@ -1947,8 +1928,10 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (ret)
 			goto err_msg;
 	} else {
-		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) {
+			ret = -EINVAL;
 			goto err_msg;
+		}
 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
@@ -1970,14 +1953,45 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	nlmsg_end(msg, nlh);
-	ib_device_put(device);
 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
 err_msg:
 	nlmsg_free(msg);
-err:
+	return ret;
+}
+
+static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	u32 index, port;
+	int ret;
+
+	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+			  extack);
+	if (ret)
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(sock_net(skb->sk), index);
+	if (!device)
+		return -EINVAL;
+
+	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+	if (!rdma_is_port_valid(device, port)) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+		ret = -EINVAL;
+		goto end;
+	}
+	ret = nldev_stat_set_mode_doit(skb, nlh, extack, tb, device, port);
+end:
 	ib_device_put(device);
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (7 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 08/13] RDMA/nldev: Split nldev_stat_set_mode_doit out of nldev_stat_set_doit Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-07 13:07   ` Jason Gunthorpe
  2021-10-06  9:52 ` [PATCH rdma-next v3 10/13] RDMA/mlx5: Support optional counters in hw_stats initialization Leon Romanovsky
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Provide an option to allow users to enable/disable optional counters
through RDMA netlink. Limiting it to users with ADMIN capability only.

Examples:
1. Enable optional counters cc_rx_ce_pkts and cc_rx_cnp_pkts (and
   disable all others):
$ sudo rdma statistic set link rocep8s0f0/1 optional-counters \
    cc_rx_ce_pkts,cc_rx_cnp_pkts

2. Remove all optional counters:
$ sudo rdma statistic unset link rocep8s0f0/1 optional-counters

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/nldev.c | 54 ++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index bbe3dfca3d98..7bd4e08268a6 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1962,6 +1962,50 @@ static int nldev_stat_set_mode_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return ret;
 }
 
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+					       struct ib_device *device,
+					       u32 port)
+{
+	struct rdma_hw_stats *stats;
+	int rem, i, index, ret = 0;
+	struct nlattr *entry_attr;
+	unsigned long *target;
+
+	stats = ib_get_hw_stats_port(device, port);
+	if (!stats)
+		return -EINVAL;
+
+	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
+			 sizeof(*stats->is_disabled), GFP_KERNEL);
+	if (!target)
+		return -ENOMEM;
+
+	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
+			    rem) {
+		index = nla_get_u32(entry_attr);
+		if ((index >= stats->num_counters) ||
+		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		set_bit(index, target);
+	}
+
+	for (i = 0; i < stats->num_counters; i++) {
+		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+			continue;
+
+		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
+		if (ret)
+			goto out;
+	}
+
+out:
+	kfree(target);
+	return ret;
+}
+
 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack)
 {
@@ -1986,11 +2030,13 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto end;
 	}
 
-	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+	if (tb[RDMA_NLDEV_ATTR_STAT_MODE])
+		ret = nldev_stat_set_mode_doit(skb, nlh, extack, tb, device,
+					       port);
+	else if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC])
+		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+	else
 		ret = -EINVAL;
-		goto end;
-	}
-	ret = nldev_stat_set_mode_doit(skb, nlh, extack, tb, device, port);
 end:
 	ib_device_put(device);
 	return ret;
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 10/13] RDMA/mlx5: Support optional counters in hw_stats initialization
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (8 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 11/13] RDMA/mlx5: Add steering support in optional flow counters Leon Romanovsky
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Add optional counter support when allocate and initialize hw_stats
structure. Optional counters have IB_STAT_FLAG_OPTIONAL flag set
and are disabled by default.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/counters.c | 90 ++++++++++++++++++++++-----
 drivers/infiniband/hw/mlx5/mlx5_ib.h  |  1 +
 2 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index caa35ea14b48..8fe7900b29f0 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -75,6 +75,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 };
 
+#define INIT_OP_COUNTER(_name)                                          \
+	{ .name = #_name }
+
+static const struct mlx5_ib_counter basic_op_cnts[] = {
+	INIT_OP_COUNTER(cc_rx_ce_pkts),
+};
+
+static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
+	INIT_OP_COUNTER(cc_rx_cnp_pkts),
+};
+
+static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
+	INIT_OP_COUNTER(cc_tx_cnp_pkts),
+};
+
 static int mlx5_ib_read_counters(struct ib_counters *counters,
 				 struct ib_counters_read_attr *read_attr,
 				 struct uverbs_attr_bundle *attrs)
@@ -161,17 +176,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
 	return cnts->set_id;
 }
 
+static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
+{
+	struct rdma_hw_stats *stats;
+	u32 num_hw_counters;
+	int i;
+
+	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+			  cnts->num_ext_ppcnt_counters;
+	stats = rdma_alloc_hw_stats_struct(cnts->descs,
+					   num_hw_counters +
+					   cnts->num_op_counters,
+					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
+	if (!stats)
+		return NULL;
+
+	for (i = 0; i < cnts->num_op_counters; i++)
+		set_bit(num_hw_counters + i, stats->is_disabled);
+
+	return stats;
+}
+
 static struct rdma_hw_stats *
 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 
-	return rdma_alloc_hw_stats_struct(cnts->descs,
-					  cnts->num_q_counters +
-						  cnts->num_cong_counters +
-						  cnts->num_ext_ppcnt_counters,
-					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+	return do_alloc_stats(cnts);
 }
 
 static struct rdma_hw_stats *
@@ -180,11 +212,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
 
-	return rdma_alloc_hw_stats_struct(cnts->descs,
-					  cnts->num_q_counters +
-						  cnts->num_cong_counters +
-						  cnts->num_ext_ppcnt_counters,
-					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+	return do_alloc_stats(cnts);
 }
 
 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
@@ -302,11 +330,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 	const struct mlx5_ib_counters *cnts =
 		get_counters(dev, counter->port - 1);
 
-	return rdma_alloc_hw_stats_struct(cnts->descs,
-					  cnts->num_q_counters +
-					  cnts->num_cong_counters +
-					  cnts->num_ext_ppcnt_counters,
-					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
+	return do_alloc_stats(cnts);
 }
 
 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
@@ -423,13 +447,34 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 			offsets[j] = ext_ppcnt_cnts[i].offset;
 		}
 	}
+
+	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
+		descs[j].name = basic_op_cnts[i].name;
+		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+	}
+
+	if (MLX5_CAP_FLOWTABLE(dev->mdev,
+			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
+			descs[j].name = rdmarx_cnp_op_cnts[i].name;
+			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+		}
+	}
+
+	if (MLX5_CAP_FLOWTABLE(dev->mdev,
+			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
+			descs[j].name = rdmatx_cnp_op_cnts[i].name;
+			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+		}
+	}
 }
 
 
 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_counters *cnts)
 {
-	u32 num_counters;
+	u32 num_counters, num_op_counters;
 
 	num_counters = ARRAY_SIZE(basic_q_cnts);
 
@@ -455,6 +500,19 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 	}
+
+	num_op_counters = ARRAY_SIZE(basic_op_cnts);
+
+	if (MLX5_CAP_FLOWTABLE(dev->mdev,
+			       ft_field_support_2_nic_receive_rdma.bth_opcode))
+		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
+
+	if (MLX5_CAP_FLOWTABLE(dev->mdev,
+			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
+		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
+
+	cnts->num_op_counters = num_op_counters;
+	num_counters += num_op_counters;
 	cnts->descs = kcalloc(num_counters,
 			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
 	if (!cnts->descs)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6f5451d96dd7..8215d7ab579d 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -803,6 +803,7 @@ struct mlx5_ib_counters {
 	u32 num_q_counters;
 	u32 num_cong_counters;
 	u32 num_ext_ppcnt_counters;
+	u32 num_op_counters;
 	u16 set_id;
 };
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 11/13] RDMA/mlx5: Add steering support in optional flow counters
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (9 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 10/13] RDMA/mlx5: Support optional counters in hw_stats initialization Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 12/13] RDMA/mlx5: Add modify_op_stat() support Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 13/13] RDMA/mlx5: Add optional counter support in get_hw_stats callback Leon Romanovsky
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c      | 187 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  24 ++++
 include/rdma/ib_hdrs.h               |   1 +
 3 files changed, 212 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..b780185d9dc6 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
 #include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/ib_umem.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
 #include "mlx5_ib.h"
 #include "counters.h"
 #include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	return prio;
 }
 
+enum {
+	RDMA_RX_ECN_OPCOUNTER_PRIO,
+	RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+	RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+			      struct mlx5_flow_spec *spec)
+{
+	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+					ft_field_support.source_vhca_port) ||
+	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+					ft_field_support.source_vhca_port))
+		return -EOPNOTSUPP;
+
+	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+			 misc_parameters.source_vhca_port);
+	MLX5_SET(fte_match_param, &spec->match_value,
+		 misc_parameters.source_vhca_port, port_num);
+
+	return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+			   struct mlx5_flow_spec *spec, int ipv)
+{
+	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+					ft_field_support.outer_ip_version))
+		return -EOPNOTSUPP;
+
+	if (mlx5_core_mp_enabled(dev->mdev) &&
+	    set_vhca_port_spec(dev, port_num, spec))
+		return -EOPNOTSUPP;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 outer_headers.ip_ecn);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+		 INET_ECN_CE);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 outer_headers.ip_version);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+		 ipv);
+
+	spec->match_criteria_enable =
+		get_match_criteria_enable(spec->match_criteria);
+
+	return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+			struct mlx5_flow_spec *spec)
+{
+	if (mlx5_core_mp_enabled(dev->mdev) &&
+	    set_vhca_port_spec(dev, port_num, spec))
+		return -EOPNOTSUPP;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 misc_parameters.bth_opcode);
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+		 IB_BTH_OPCODE_CNP);
+
+	spec->match_criteria_enable =
+		get_match_criteria_enable(spec->match_criteria);
+
+	return 0;
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+			 struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type)
+{
+	enum mlx5_flow_namespace_type fn_type;
+	int priority, i, err, spec_num;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_destination dst;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_ib_flow_prio *prio;
+	struct mlx5_flow_spec *spec;
+
+	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	switch (type) {
+	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+		if (set_ecn_ce_spec(dev, port_num, &spec[0],
+				    MLX5_FS_IPV4_VERSION) ||
+		    set_ecn_ce_spec(dev, port_num, &spec[1],
+				    MLX5_FS_IPV6_VERSION)) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		spec_num = 2;
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+		    set_cnp_spec(dev, port_num, &spec[0])) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		spec_num = 1;
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+		    set_cnp_spec(dev, port_num, &spec[0])) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		spec_num = 1;
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+		break;
+
+	default:
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+	if (!ns) {
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	prio = &dev->flow_db->opfcs[type];
+	if (!prio->flow_table) {
+		prio = _get_prio(ns, prio, priority,
+				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
+		if (IS_ERR(prio)) {
+			err = PTR_ERR(prio);
+			goto free;
+		}
+	}
+
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dst.counter_id = mlx5_fc_id(opfc->fc);
+
+	flow_act.action =
+		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+	for (i = 0; i < spec_num; i++) {
+		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+						    &flow_act, &dst, 1);
+		if (IS_ERR(opfc->rule[i])) {
+			err = PTR_ERR(opfc->rule[i]);
+			goto del_rules;
+		}
+	}
+	prio->refcount += spec_num;
+	kfree(spec);
+
+	return 0;
+
+del_rules:
+	for (i -= 1; i >= 0; i--)
+		mlx5_del_flow_rules(opfc->rule[i]);
+	put_flow_table(dev, prio, false);
+free:
+	kfree(spec);
+	return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc,
+			     enum mlx5_ib_optional_counter_type type)
+{
+	int i;
+
+	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+		mlx5_del_flow_rules(opfc->rule[i]);
+		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+	}
+}
+
 static void set_underlay_qp(struct mlx5_ib_dev *dev,
 			    struct mlx5_flow_spec *spec,
 			    u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 8215d7ab579d..d81ff5078e5e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
 	struct mlx5_core_dev *mdev;
 };
 
+enum mlx5_ib_optional_counter_type {
+	MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+	MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+	MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+
+	MLX5_IB_OPCOUNTER_MAX,
+};
+
 struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	fdb;
 	struct mlx5_ib_flow_prio	rdma_rx[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	rdma_tx[MLX5_IB_NUM_FLOW_FT];
+	struct mlx5_ib_flow_prio	opfcs[MLX5_IB_OPCOUNTER_MAX];
 	struct mlx5_flow_table		*lag_demux_ft;
 	/* Protect flow steering bypass flow tables
 	 * when add/del flow rules.
@@ -797,6 +806,13 @@ struct mlx5_ib_resources {
 	struct mlx5_ib_port_resources ports[2];
 };
 
+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+	struct mlx5_fc *fc;
+	struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
+};
+
 struct mlx5_ib_counters {
 	struct rdma_stat_desc *descs;
 	size_t *offsets;
@@ -807,6 +823,14 @@ struct mlx5_ib_counters {
 	u16 set_id;
 };
 
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+			 struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc,
+			     enum mlx5_ib_optional_counter_type type);
+
 struct mlx5_ib_multiport_info;
 
 struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
 #define IB_BTH_SE_SHIFT	23
 #define IB_BTH_TVER_MASK	0xf
 #define IB_BTH_TVER_SHIFT	16
+#define IB_BTH_OPCODE_CNP	0x81
 
 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
 {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 12/13] RDMA/mlx5: Add modify_op_stat() support
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (10 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 11/13] RDMA/mlx5: Add steering support in optional flow counters Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  2021-10-06  9:52 ` [PATCH rdma-next v3 13/13] RDMA/mlx5: Add optional counter support in get_hw_stats callback Leon Romanovsky
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

Add support for ib callback modify_op_stat() to add or remove an
optional counter. When adding, a steering flow table is created
with a rule that catches and counts all the matching packets;
When removing, the table and flow counter are destroyed.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/counters.c | 79 +++++++++++++++++++++++++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h  |  1 +
 include/rdma/ib_verbs.h               |  2 +
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 8fe7900b29f0..6ee340c63b20 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -12,6 +12,7 @@
 struct mlx5_ib_counter {
 	const char *name;
 	size_t offset;
+	u32 type;
 };
 
 #define INIT_Q_COUNTER(_name)		\
@@ -75,19 +76,19 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 };
 
-#define INIT_OP_COUNTER(_name)                                          \
-	{ .name = #_name }
+#define INIT_OP_COUNTER(_name, _type)		\
+	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
 
 static const struct mlx5_ib_counter basic_op_cnts[] = {
-	INIT_OP_COUNTER(cc_rx_ce_pkts),
+	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
 };
 
 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
-	INIT_OP_COUNTER(cc_rx_cnp_pkts),
+	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
 };
 
 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
-	INIT_OP_COUNTER(cc_tx_cnp_pkts),
+	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
 };
 
 static int mlx5_ib_read_counters(struct ib_counters *counters,
@@ -451,6 +452,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
 		descs[j].name = basic_op_cnts[i].name;
 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+		descs[j].priv = &basic_op_cnts[i].type;
 	}
 
 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
@@ -458,6 +460,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
 			descs[j].name = rdmarx_cnp_op_cnts[i].name;
 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
 		}
 	}
 
@@ -466,6 +469,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
 			descs[j].name = rdmatx_cnp_op_cnts[i].name;
 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
 		}
 	}
 }
@@ -535,7 +539,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 	int num_cnt_ports;
-	int i;
+	int i, j;
 
 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
 
@@ -550,6 +554,18 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 		}
 		kfree(dev->port[i].cnts.descs);
 		kfree(dev->port[i].cnts.offsets);
+
+		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
+			if (!dev->port[i].cnts.opfcs[j].fc)
+				continue;
+
+			mlx5_ib_fs_remove_op_fc(dev,
+						&dev->port[i].cnts.opfcs[j],
+						j);
+			mlx5_fc_destroy(dev->mdev,
+					dev->port[i].cnts.opfcs[j].fc);
+			dev->port[i].cnts.opfcs[j].fc = NULL;
+		}
 	}
 }
 
@@ -729,6 +745,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters)
 	mutex_unlock(&mcounters->mcntrs_mutex);
 }
 
+static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
+			       unsigned int index, bool enable)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_ib_counters *cnts;
+	struct mlx5_ib_op_fc *opfc;
+	u32 num_hw_counters, type;
+	int ret;
+
+	cnts = &dev->port[port - 1].cnts;
+	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+		cnts->num_ext_ppcnt_counters;
+	if (index < num_hw_counters ||
+	    index >= (num_hw_counters + cnts->num_op_counters))
+		return -EINVAL;
+
+	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+		return -EINVAL;
+
+	type = *(u32 *)cnts->descs[index].priv;
+	if (type >= MLX5_IB_OPCOUNTER_MAX)
+		return -EINVAL;
+
+	opfc = &cnts->opfcs[type];
+
+	if (enable) {
+		if (opfc->fc)
+			return -EEXIST;
+
+		opfc->fc = mlx5_fc_create(dev->mdev, false);
+		if (IS_ERR(opfc->fc))
+			return PTR_ERR(opfc->fc);
+
+		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
+		if (ret) {
+			mlx5_fc_destroy(dev->mdev, opfc->fc);
+			opfc->fc = NULL;
+		}
+		return ret;
+	}
+
+	if (!opfc->fc)
+		return -EINVAL;
+
+	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
+	mlx5_fc_destroy(dev->mdev, opfc->fc);
+	opfc->fc = NULL;
+	return 0;
+}
+
 static const struct ib_device_ops hw_stats_ops = {
 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
 	.get_hw_stats = mlx5_ib_get_hw_stats,
@@ -737,6 +803,7 @@ static const struct ib_device_ops hw_stats_ops = {
 	.counter_dealloc = mlx5_ib_counter_dealloc,
 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
 	.counter_update_stats = mlx5_ib_counter_update_stats,
+	.modify_hw_stat = mlx5_ib_modify_stat,
 };
 
 static const struct ib_device_ops hw_switchdev_stats_ops = {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d81ff5078e5e..cf8b0653f0ce 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -821,6 +821,7 @@ struct mlx5_ib_counters {
 	u32 num_ext_ppcnt_counters;
 	u32 num_op_counters;
 	u16 set_id;
+	struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
 };
 
 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 2207f60b002f..7688720411a3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -553,10 +553,12 @@ enum ib_stat_flag {
  * struct rdma_stat_desc
  * @name - The name of the counter
  * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
+ * @priv - Driver private information; Core code should not use
  */
 struct rdma_stat_desc {
 	const char *name;
 	unsigned int flags;
+	const void *priv;
 };
 
 /**
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH rdma-next v3 13/13] RDMA/mlx5: Add optional counter support in get_hw_stats callback
       [not found] <cover.1633513239.git.leonro@nvidia.com>
                   ` (11 preceding siblings ...)
  2021-10-06  9:52 ` [PATCH rdma-next v3 12/13] RDMA/mlx5: Add modify_op_stat() support Leon Romanovsky
@ 2021-10-06  9:52 ` Leon Romanovsky
  12 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06  9:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, Neta Ostrovsky,
	netdev, Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

From: Aharon Landau <aharonl@nvidia.com>

When get_hw_stats is called, query and return the optional counter
statistic as well.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/counters.c | 88 ++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 6ee340c63b20..6f1c4b57110e 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -270,9 +270,9 @@ static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
 	return ret;
 }
 
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
-				struct rdma_hw_stats *stats,
-				u32 port_num, int index)
+static int do_get_hw_stats(struct ib_device *ibdev,
+			   struct rdma_hw_stats *stats,
+			   u32 port_num, int index)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
@@ -324,6 +324,88 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 	return num_counters;
 }
 
+static int do_get_op_stat(struct ib_device *ibdev,
+			  struct rdma_hw_stats *stats,
+			  u32 port_num, int index)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	const struct mlx5_ib_counters *cnts;
+	const struct mlx5_ib_op_fc *opfcs;
+	u64 packets = 0, bytes;
+	u32 type;
+	int ret;
+
+	cnts = get_counters(dev, port_num - 1);
+	opfcs = cnts->opfcs;
+	type = *(u32 *)cnts->descs[index].priv;
+	if (type >= MLX5_IB_OPCOUNTER_MAX)
+		return -EINVAL;
+
+	if (!opfcs[type].fc)
+		goto out;
+
+	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
+			    &packets, &bytes);
+	if (ret)
+		return ret;
+
+out:
+	stats->value[index] = packets;
+	return index;
+}
+
+static int do_get_op_stats(struct ib_device *ibdev,
+			   struct rdma_hw_stats *stats,
+			   u32 port_num)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	const struct mlx5_ib_counters *cnts;
+	int index, ret, num_hw_counters;
+
+	cnts = get_counters(dev, port_num - 1);
+	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+			  cnts->num_ext_ppcnt_counters;
+	for (index = num_hw_counters;
+	     index < (num_hw_counters + cnts->num_op_counters); index++) {
+		ret = do_get_op_stat(ibdev, stats, port_num, index);
+		if (ret != index)
+			return ret;
+	}
+
+	return cnts->num_op_counters;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+				struct rdma_hw_stats *stats,
+				u32 port_num, int index)
+{
+	int num_counters, num_hw_counters, num_op_counters;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	const struct mlx5_ib_counters *cnts;
+
+	cnts = get_counters(dev, port_num - 1);
+	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+		cnts->num_ext_ppcnt_counters;
+	num_counters = num_hw_counters + cnts->num_op_counters;
+
+	if (index < 0 || index > num_counters)
+		return -EINVAL;
+	else if (index > 0 && index < num_hw_counters)
+		return do_get_hw_stats(ibdev, stats, port_num, index);
+	else if (index >= num_hw_counters && index < num_counters)
+		return do_get_op_stat(ibdev, stats, port_num, index);
+
+	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
+	if (num_hw_counters < 0)
+		return num_hw_counters;
+
+	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
+	if (num_op_counters < 0)
+		return num_op_counters;
+
+	return num_hw_counters + num_op_counters;
+}
+
 static struct rdma_hw_stats *
 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats
  2021-10-06  9:52 ` [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats Leon Romanovsky
@ 2021-10-06 11:49   ` Leon Romanovsky
  0 siblings, 0 replies; 15+ messages in thread
From: Leon Romanovsky @ 2021-10-06 11:49 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Aharon Landau, David S. Miller, Dennis Dalessandro, Gal Pressman,
	Jakub Kicinski, linux-kernel, linux-rdma, Maor Gottlieb,
	Mark Zhang, Mike Marciniszyn, Mustafa Ismail, netdev,
	Potnuri Bharat Teja, Saeed Mahameed, Selvin Xavier,
	Shiraz Saleem, Yishai Hadas, Zhu Yanjun

On Wed, Oct 06, 2021 at 12:52:08PM +0300, Leon Romanovsky wrote:
> From: Aharon Landau <aharonl@nvidia.com>
> 
> Add a bitmap in rdma_hw_stat structure, with each bit indicates whether
> the corresponding counter is currently disabled or not. By default
> hwcounters are enabled.
> 
> Signed-off-by: Aharon Landau <aharonl@nvidia.com>
> Reviewed-by: Mark Zhang <markzhang@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/core/nldev.c | 11 ++++++++++-
>  drivers/infiniband/core/verbs.c | 10 ++++++++++
>  include/rdma/ib_verbs.h         |  3 +++
>  3 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index 3f6b98a87566..67519730b1ac 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -968,15 +968,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
>  	if (!table_attr)
>  		return -EMSGSIZE;
>  
> -	for (i = 0; i < st->num_counters; i++)
> +	mutex_lock(&st->lock);
> +	for (i = 0; i < st->num_counters; i++) {
> +		if (test_bit(i, st->is_disabled))
> +			continue;
>  		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
>  						 st->value[i]))
>  			goto err;
> +	}
> +	mutex_unlock(&st->lock);
>  
>  	nla_nest_end(msg, table_attr);
>  	return 0;
>  
>  err:
> +	mutex_unlock(&st->lock);
>  	nla_nest_cancel(msg, table_attr);
>  	return -EMSGSIZE;
>  }
> @@ -2104,6 +2110,9 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
>  		goto err_stats;
>  	}
>  	for (i = 0; i < num_cnts; i++) {
> +		if (test_bit(i, stats->is_disabled))
> +			continue;
> +
>  		v = stats->value[i] +
>  			rdma_counter_get_hwstat_value(device, port, i);
>  		if (rdma_nl_stat_hwcounter_entry(msg,
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index c3319a7584a2..d957b23a0e23 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -2994,11 +2994,20 @@ struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
>  	if (!stats)
>  		return NULL;
>  
> +	stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
> +				     sizeof(*stats->is_disabled), GFP_KERNEL);
> +	if (!stats->is_disabled)
> +		goto err;
> +
>  	stats->descs = descs;
>  	stats->num_counters = num_counters;
>  	stats->lifespan = msecs_to_jiffies(lifespan);
>  
>  	return stats;
> +
> +err:
> +	kfree(stats);
> +	return NULL;
>  }
>  EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
>  
> @@ -3008,6 +3017,7 @@ EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
>   */
>  void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
>  {
> +	kfree(stats->is_disabled);
>  	kfree(stats);

Jason,

This hunk needs to be the following for the representors case. Can you
please fix it locally?

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index d957b23a0e23..e9e042b31386 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -3017,6 +3017,9 @@ EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
  */
 void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
 {
+       if (!stats)
+               return;
+
        kfree(stats->is_disabled);
        kfree(stats);
 }

Thanks

>  }
>  EXPORT_SYMBOL(rdma_free_hw_stats_struct);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 938c0c0a1c19..ae467365706b 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -565,6 +565,8 @@ struct rdma_stat_desc {
>   *   their own value during their allocation routine.
>   * @descs - Array of pointers to static descriptors used for the counters
>   *   in directory.
> + * @is_disabled - A bitmap to indicate each counter is currently disabled
> + *   or not.
>   * @num_counters - How many hardware counters there are.  If name is
>   *   shorter than this number, a kernel oops will result.  Driver authors
>   *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
> @@ -577,6 +579,7 @@ struct rdma_hw_stats {
>  	unsigned long	timestamp;
>  	unsigned long	lifespan;
>  	const struct rdma_stat_desc *descs;
> +	unsigned long	*is_disabled;
>  	int		num_counters;
>  	u64		value[];
>  };
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink
  2021-10-06  9:52 ` [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink Leon Romanovsky
@ 2021-10-07 13:07   ` Jason Gunthorpe
  0 siblings, 0 replies; 15+ messages in thread
From: Jason Gunthorpe @ 2021-10-07 13:07 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Aharon Landau, David S. Miller, Dennis Dalessandro,
	Gal Pressman, Jakub Kicinski, linux-kernel, linux-rdma,
	Maor Gottlieb, Mark Zhang, Mike Marciniszyn, Mustafa Ismail,
	Neta Ostrovsky, netdev, Potnuri Bharat Teja, Saeed Mahameed,
	Selvin Xavier, Shiraz Saleem, Yishai Hadas, Zhu Yanjun

On Wed, Oct 06, 2021 at 12:52:12PM +0300, Leon Romanovsky wrote:

> @@ -1986,11 +2030,13 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
>  		goto end;
>  	}
>  
> -	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
> +	if (tb[RDMA_NLDEV_ATTR_STAT_MODE])
> +		ret = nldev_stat_set_mode_doit(skb, nlh, extack, tb, device,
> +					       port);
> +	else if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC])
> +		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
> +	else
>  		ret = -EINVAL;
> -		goto end;
> -	}

I missed this before.. Why is this exclusive?

If userspace sends both inputs then both should be set.

And shouldn't this reject NL attributes it doesn't support - eg
shouldn't this have a stat_set policy to check that?

Jason

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-10-07 13:07 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1633513239.git.leonro@nvidia.com>
2021-10-06  9:52 ` [PATCH mlx5-next v3 01/13] net/mlx5: Add ifc bits to support optional counters Leon Romanovsky
2021-10-06  9:52 ` [PATCH mlx5-next v3 02/13] net/mlx5: Add priorities for counters in RDMA namespaces Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 03/13] RDMA/counter: Add a descriptor in struct rdma_hw_stats Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 04/13] RDMA/core: Add a helper API rdma_free_hw_stats_struct Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 05/13] RDMA/counter: Add an is_disabled field in struct rdma_hw_stats Leon Romanovsky
2021-10-06 11:49   ` Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 06/13] RDMA/counter: Add optional counter support Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 07/13] RDMA/nldev: Add support to get status of all counters Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 08/13] RDMA/nldev: Split nldev_stat_set_mode_doit out of nldev_stat_set_doit Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 09/13] RDMA/nldev: Allow optional-counter status configuration through RDMA netlink Leon Romanovsky
2021-10-07 13:07   ` Jason Gunthorpe
2021-10-06  9:52 ` [PATCH rdma-next v3 10/13] RDMA/mlx5: Support optional counters in hw_stats initialization Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 11/13] RDMA/mlx5: Add steering support in optional flow counters Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 12/13] RDMA/mlx5: Add modify_op_stat() support Leon Romanovsky
2021-10-06  9:52 ` [PATCH rdma-next v3 13/13] RDMA/mlx5: Add optional counter support in get_hw_stats callback Leon Romanovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).