Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 // Copyright (c) 2019 Mellanox Technologies.
0003 
0004 #include "health.h"
0005 #include "lib/eq.h"
0006 #include "lib/mlx5.h"
0007 
0008 int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
0009 {
0010     int err;
0011 
0012     err = devlink_fmsg_pair_nest_start(fmsg, name);
0013     if (err)
0014         return err;
0015 
0016     err = devlink_fmsg_obj_nest_start(fmsg);
0017     if (err)
0018         return err;
0019 
0020     return 0;
0021 }
0022 
0023 int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
0024 {
0025     int err;
0026 
0027     err = devlink_fmsg_obj_nest_end(fmsg);
0028     if (err)
0029         return err;
0030 
0031     err = devlink_fmsg_pair_nest_end(fmsg);
0032     if (err)
0033         return err;
0034 
0035     return 0;
0036 }
0037 
0038 int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
0039 {
0040     u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
0041     u8 hw_status;
0042     void *cqc;
0043     int err;
0044 
0045     err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
0046     if (err)
0047         return err;
0048 
0049     cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
0050     hw_status = MLX5_GET(cqc, cqc, status);
0051 
0052     err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
0053     if (err)
0054         return err;
0055 
0056     err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
0057     if (err)
0058         return err;
0059 
0060     err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
0061     if (err)
0062         return err;
0063 
0064     err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
0065     if (err)
0066         return err;
0067 
0068     err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
0069     if (err)
0070         return err;
0071 
0072     err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
0073     if (err)
0074         return err;
0075 
0076     return 0;
0077 }
0078 
0079 int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
0080 {
0081     u8 cq_log_stride;
0082     u32 cq_sz;
0083     int err;
0084 
0085     cq_sz = mlx5_cqwq_get_size(&cq->wq);
0086     cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
0087 
0088     err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
0089     if (err)
0090         return err;
0091 
0092     err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
0093     if (err)
0094         return err;
0095 
0096     err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
0097     if (err)
0098         return err;
0099 
0100     err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
0101     if (err)
0102         return err;
0103 
0104     return 0;
0105 }
0106 
0107 int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
0108 {
0109     int err;
0110 
0111     err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
0112     if (err)
0113         return err;
0114 
0115     err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
0116     if (err)
0117         return err;
0118 
0119     err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
0120     if (err)
0121         return err;
0122 
0123     err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
0124     if (err)
0125         return err;
0126 
0127     err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
0128     if (err)
0129         return err;
0130 
0131     err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
0132     if (err)
0133         return err;
0134 
0135     return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
0136 }
0137 
0138 void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
0139 {
0140     mlx5e_reporter_tx_create(priv);
0141     mlx5e_reporter_rx_create(priv);
0142 }
0143 
0144 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
0145 {
0146     mlx5e_reporter_rx_destroy(priv);
0147     mlx5e_reporter_tx_destroy(priv);
0148 }
0149 
0150 void mlx5e_health_channels_update(struct mlx5e_priv *priv)
0151 {
0152     if (priv->tx_reporter)
0153         devlink_health_reporter_state_update(priv->tx_reporter,
0154                              DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
0155     if (priv->rx_reporter)
0156         devlink_health_reporter_state_update(priv->rx_reporter,
0157                              DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
0158 }
0159 
0160 int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
0161 {
0162     struct mlx5e_modify_sq_param msp = {};
0163     int err;
0164 
0165     msp.curr_state = MLX5_SQC_STATE_ERR;
0166     msp.next_state = MLX5_SQC_STATE_RST;
0167 
0168     err = mlx5e_modify_sq(mdev, sqn, &msp);
0169     if (err) {
0170         netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
0171         return err;
0172     }
0173 
0174     memset(&msp, 0, sizeof(msp));
0175     msp.curr_state = MLX5_SQC_STATE_RST;
0176     msp.next_state = MLX5_SQC_STATE_RDY;
0177 
0178     err = mlx5e_modify_sq(mdev, sqn, &msp);
0179     if (err) {
0180         netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
0181         return err;
0182     }
0183 
0184     return 0;
0185 }
0186 
0187 int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
0188 {
0189     int err = 0;
0190 
0191     rtnl_lock();
0192     mutex_lock(&priv->state_lock);
0193 
0194     if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
0195         goto out;
0196 
0197     err = mlx5e_safe_reopen_channels(priv);
0198 
0199 out:
0200     mutex_unlock(&priv->state_lock);
0201     rtnl_unlock();
0202 
0203     return err;
0204 }
0205 
0206 int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
0207                     struct mlx5e_ch_stats *stats)
0208 {
0209     u32 eqe_count;
0210 
0211     netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
0212            eq->core.eqn, eq->core.cons_index, eq->core.irqn);
0213 
0214     eqe_count = mlx5_eq_poll_irq_disabled(eq);
0215     if (!eqe_count)
0216         return -EIO;
0217 
0218     netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
0219            eqe_count, eq->core.eqn);
0220 
0221     stats->eq_rearm++;
0222     return 0;
0223 }
0224 
0225 int mlx5e_health_report(struct mlx5e_priv *priv,
0226             struct devlink_health_reporter *reporter, char *err_str,
0227             struct mlx5e_err_ctx *err_ctx)
0228 {
0229     netdev_err(priv->netdev, "%s\n", err_str);
0230 
0231     if (!reporter)
0232         return err_ctx->recover(err_ctx->ctx);
0233 
0234     return devlink_health_report(reporter, err_str, err_ctx);
0235 }
0236 
0237 #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
0238 static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
0239                     const void *value, u32 value_len)
0240 
0241 {
0242     u32 data_size;
0243     int err = 0;
0244     u32 offset;
0245 
0246     for (offset = 0; offset < value_len; offset += data_size) {
0247         data_size = value_len - offset;
0248         if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
0249             data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
0250         err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
0251         if (err)
0252             break;
0253     }
0254     return err;
0255 }
0256 
0257 int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
0258                    struct devlink_fmsg *fmsg)
0259 {
0260     struct mlx5_core_dev *mdev = priv->mdev;
0261     struct mlx5_rsc_dump_cmd *cmd;
0262     struct page *page;
0263     int cmd_err, err;
0264     int end_err;
0265     int size;
0266 
0267     if (IS_ERR_OR_NULL(mdev->rsc_dump))
0268         return -EOPNOTSUPP;
0269 
0270     page = alloc_page(GFP_KERNEL);
0271     if (!page)
0272         return -ENOMEM;
0273 
0274     err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
0275     if (err)
0276         goto free_page;
0277 
0278     cmd = mlx5_rsc_dump_cmd_create(mdev, key);
0279     if (IS_ERR(cmd)) {
0280         err = PTR_ERR(cmd);
0281         goto free_page;
0282     }
0283 
0284     do {
0285         cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
0286         if (cmd_err < 0) {
0287             err = cmd_err;
0288             goto destroy_cmd;
0289         }
0290 
0291         err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
0292         if (err)
0293             goto destroy_cmd;
0294 
0295     } while (cmd_err > 0);
0296 
0297 destroy_cmd:
0298     mlx5_rsc_dump_cmd_destroy(cmd);
0299     end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
0300     if (end_err)
0301         err = end_err;
0302 free_page:
0303     __free_page(page);
0304     return err;
0305 }
0306 
0307 int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
0308                 int queue_idx, char *lbl)
0309 {
0310     struct mlx5_rsc_key key = {};
0311     int err;
0312 
0313     key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
0314     key.index1 = queue_idx;
0315     key.size = PAGE_SIZE;
0316     key.num_of_obj1 = 1;
0317 
0318     err = devlink_fmsg_obj_nest_start(fmsg);
0319     if (err)
0320         return err;
0321 
0322     err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
0323     if (err)
0324         return err;
0325 
0326     err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
0327     if (err)
0328         return err;
0329 
0330     err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
0331     if (err)
0332         return err;
0333 
0334     err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
0335     if (err)
0336         return err;
0337 
0338     return devlink_fmsg_obj_nest_end(fmsg);
0339 }