Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 // Copyright (c) 2018 Mellanox Technologies
0003 
0004 #include <linux/mlx5/driver.h>
0005 
0006 #include "mlx5_core.h"
0007 #include "lib/eq.h"
0008 #include "lib/mlx5.h"
0009 
0010 struct mlx5_event_nb {
0011     struct mlx5_nb  nb;
0012     void           *ctx;
0013 };
0014 
0015 /* General events handlers for the low level mlx5_core driver
0016  *
0017  * Other Major feature specific events such as
0018  * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
0019  * separate notifiers callbacks, specifically by those mlx5 components.
0020  */
0021 static int any_notifier(struct notifier_block *, unsigned long, void *);
0022 static int temp_warn(struct notifier_block *, unsigned long, void *);
0023 static int port_module(struct notifier_block *, unsigned long, void *);
0024 static int pcie_core(struct notifier_block *, unsigned long, void *);
0025 
0026 /* handler which forwards the event to events->fw_nh, driver notifiers */
0027 static int forward_event(struct notifier_block *, unsigned long, void *);
0028 
0029 static struct mlx5_nb events_nbs_ref[] = {
0030     /* Events to be processed by mlx5_core */
0031     {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
0032     {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
0033     {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
0034     {.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
0035 
0036     /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
0037     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
0038     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
0039     /* QP/WQ resource events to forward */
0040     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
0041     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
0042     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
0043     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
0044     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
0045     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
0046     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
0047     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
0048     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
0049     /* SRQ events */
0050     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
0051     {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
0052 };
0053 
0054 struct mlx5_events {
0055     struct mlx5_core_dev *dev;
0056     struct workqueue_struct *wq;
0057     struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
0058     /* driver notifier chain for fw events */
0059     struct atomic_notifier_head fw_nh;
0060     /* port module events stats */
0061     struct mlx5_pme_stats pme_stats;
0062     /*pcie_core*/
0063     struct work_struct pcie_core_work;
0064     /* driver notifier chain for sw events */
0065     struct blocking_notifier_head sw_nh;
0066 };
0067 
0068 static const char *eqe_type_str(u8 type)
0069 {
0070     switch (type) {
0071     case MLX5_EVENT_TYPE_COMP:
0072         return "MLX5_EVENT_TYPE_COMP";
0073     case MLX5_EVENT_TYPE_PATH_MIG:
0074         return "MLX5_EVENT_TYPE_PATH_MIG";
0075     case MLX5_EVENT_TYPE_COMM_EST:
0076         return "MLX5_EVENT_TYPE_COMM_EST";
0077     case MLX5_EVENT_TYPE_SQ_DRAINED:
0078         return "MLX5_EVENT_TYPE_SQ_DRAINED";
0079     case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
0080         return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
0081     case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
0082         return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
0083     case MLX5_EVENT_TYPE_CQ_ERROR:
0084         return "MLX5_EVENT_TYPE_CQ_ERROR";
0085     case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
0086         return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
0087     case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
0088         return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
0089     case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
0090         return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
0091     case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
0092         return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
0093     case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
0094         return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
0095     case MLX5_EVENT_TYPE_INTERNAL_ERROR:
0096         return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
0097     case MLX5_EVENT_TYPE_PORT_CHANGE:
0098         return "MLX5_EVENT_TYPE_PORT_CHANGE";
0099     case MLX5_EVENT_TYPE_GPIO_EVENT:
0100         return "MLX5_EVENT_TYPE_GPIO_EVENT";
0101     case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
0102         return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
0103     case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
0104         return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
0105     case MLX5_EVENT_TYPE_REMOTE_CONFIG:
0106         return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
0107     case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
0108         return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
0109     case MLX5_EVENT_TYPE_STALL_EVENT:
0110         return "MLX5_EVENT_TYPE_STALL_EVENT";
0111     case MLX5_EVENT_TYPE_CMD:
0112         return "MLX5_EVENT_TYPE_CMD";
0113     case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
0114         return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
0115     case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
0116         return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
0117     case MLX5_EVENT_TYPE_PAGE_REQUEST:
0118         return "MLX5_EVENT_TYPE_PAGE_REQUEST";
0119     case MLX5_EVENT_TYPE_PAGE_FAULT:
0120         return "MLX5_EVENT_TYPE_PAGE_FAULT";
0121     case MLX5_EVENT_TYPE_PPS_EVENT:
0122         return "MLX5_EVENT_TYPE_PPS_EVENT";
0123     case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
0124         return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
0125     case MLX5_EVENT_TYPE_FPGA_ERROR:
0126         return "MLX5_EVENT_TYPE_FPGA_ERROR";
0127     case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
0128         return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
0129     case MLX5_EVENT_TYPE_GENERAL_EVENT:
0130         return "MLX5_EVENT_TYPE_GENERAL_EVENT";
0131     case MLX5_EVENT_TYPE_MONITOR_COUNTER:
0132         return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
0133     case MLX5_EVENT_TYPE_DEVICE_TRACER:
0134         return "MLX5_EVENT_TYPE_DEVICE_TRACER";
0135     default:
0136         return "Unrecognized event";
0137     }
0138 }
0139 
0140 /* handles all FW events, type == eqe->type */
0141 static int any_notifier(struct notifier_block *nb,
0142             unsigned long type, void *data)
0143 {
0144     struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
0145     struct mlx5_events   *events   = event_nb->ctx;
0146     struct mlx5_eqe      *eqe      = data;
0147 
0148     mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
0149               eqe_type_str(eqe->type), eqe->sub_type);
0150     return NOTIFY_OK;
0151 }
0152 
0153 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
0154 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
0155 {
0156     struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
0157     struct mlx5_events   *events   = event_nb->ctx;
0158     struct mlx5_eqe      *eqe      = data;
0159     u64 value_lsb;
0160     u64 value_msb;
0161 
0162     value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
0163     value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
0164 
0165     mlx5_core_warn(events->dev,
0166                "High temperature on sensors with bit set %llx %llx",
0167                value_msb, value_lsb);
0168 
0169     return NOTIFY_OK;
0170 }
0171 
0172 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
0173 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
0174 {
0175     switch (status) {
0176     case MLX5_MODULE_STATUS_PLUGGED:
0177         return "Cable plugged";
0178     case MLX5_MODULE_STATUS_UNPLUGGED:
0179         return "Cable unplugged";
0180     case MLX5_MODULE_STATUS_ERROR:
0181         return "Cable error";
0182     case MLX5_MODULE_STATUS_DISABLED:
0183         return "Cable disabled";
0184     default:
0185         return "Unknown status";
0186     }
0187 }
0188 
0189 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
0190 {
0191     switch (error) {
0192     case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
0193         return "Power budget exceeded";
0194     case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
0195         return "Long Range for non MLNX cable";
0196     case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
0197         return "Bus stuck (I2C or data shorted)";
0198     case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
0199         return "No EEPROM/retry timeout";
0200     case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
0201         return "Enforce part number list";
0202     case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
0203         return "Unknown identifier";
0204     case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
0205         return "High Temperature";
0206     case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
0207         return "Bad or shorted cable/module";
0208     case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
0209         return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
0210     default:
0211         return "Unknown error";
0212     }
0213 }
0214 
0215 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
0216 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
0217 {
0218     struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
0219     struct mlx5_events   *events   = event_nb->ctx;
0220     struct mlx5_eqe      *eqe      = data;
0221 
0222     enum port_module_event_status_type module_status;
0223     enum port_module_event_error_type error_type;
0224     struct mlx5_eqe_port_module *module_event_eqe;
0225     const char *status_str;
0226     u8 module_num;
0227 
0228     module_event_eqe = &eqe->data.port_module;
0229     module_status = module_event_eqe->module_status &
0230             PORT_MODULE_EVENT_MODULE_STATUS_MASK;
0231     error_type = module_event_eqe->error_type &
0232              PORT_MODULE_EVENT_ERROR_TYPE_MASK;
0233 
0234     if (module_status < MLX5_MODULE_STATUS_NUM)
0235         events->pme_stats.status_counters[module_status]++;
0236 
0237     if (module_status == MLX5_MODULE_STATUS_ERROR)
0238         if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
0239             events->pme_stats.error_counters[error_type]++;
0240 
0241     if (!printk_ratelimit())
0242         return NOTIFY_OK;
0243 
0244     module_num = module_event_eqe->module;
0245     status_str = mlx5_pme_status_to_string(module_status);
0246     if (module_status == MLX5_MODULE_STATUS_ERROR) {
0247         const char *error_str = mlx5_pme_error_to_string(error_type);
0248 
0249         mlx5_core_err(events->dev,
0250                   "Port module event[error]: module %u, %s, %s\n",
0251                   module_num, status_str, error_str);
0252     } else {
0253         mlx5_core_info(events->dev,
0254                    "Port module event: module %u, %s\n",
0255                    module_num, status_str);
0256     }
0257 
0258     return NOTIFY_OK;
0259 }
0260 
0261 enum {
0262     MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
0263     MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
0264     MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
0265 };
0266 
0267 static void mlx5_pcie_event(struct work_struct *work)
0268 {
0269     u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
0270     u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
0271     struct mlx5_events *events;
0272     struct mlx5_core_dev *dev;
0273     u8 power_status;
0274     u16 pci_power;
0275 
0276     events = container_of(work, struct mlx5_events, pcie_core_work);
0277     dev  = events->dev;
0278 
0279     if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
0280         return;
0281 
0282     mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
0283                  MLX5_REG_MPEIN, 0, 0);
0284     power_status = MLX5_GET(mpein_reg, out, pwr_status);
0285     pci_power = MLX5_GET(mpein_reg, out, pci_power);
0286 
0287     switch (power_status) {
0288     case MLX5_PCI_POWER_COULD_NOT_BE_READ:
0289         mlx5_core_info_rl(dev,
0290                   "PCIe slot power capability was not advertised.\n");
0291         break;
0292     case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
0293         mlx5_core_warn_rl(dev,
0294                   "Detected insufficient power on the PCIe slot (%uW).\n",
0295                   pci_power);
0296         break;
0297     case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
0298         mlx5_core_info_rl(dev,
0299                   "PCIe slot advertised sufficient power (%uW).\n",
0300                   pci_power);
0301         break;
0302     }
0303 }
0304 
0305 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
0306 {
0307     struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
0308                             struct mlx5_event_nb,
0309                             nb);
0310     struct mlx5_events      *events   = event_nb->ctx;
0311     struct mlx5_eqe         *eqe      = data;
0312 
0313     switch (eqe->sub_type) {
0314     case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
0315             queue_work(events->wq, &events->pcie_core_work);
0316         break;
0317     default:
0318         return NOTIFY_DONE;
0319     }
0320 
0321     return NOTIFY_OK;
0322 }
0323 
0324 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
0325 {
0326     *stats = dev->priv.events->pme_stats;
0327 }
0328 
0329 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
0330 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
0331 {
0332     struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
0333     struct mlx5_events   *events   = event_nb->ctx;
0334     struct mlx5_eqe      *eqe      = data;
0335 
0336     mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
0337               eqe_type_str(eqe->type), eqe->sub_type);
0338     atomic_notifier_call_chain(&events->fw_nh, event, data);
0339     return NOTIFY_OK;
0340 }
0341 
0342 int mlx5_events_init(struct mlx5_core_dev *dev)
0343 {
0344     struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
0345 
0346     if (!events)
0347         return -ENOMEM;
0348 
0349     ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
0350     events->dev = dev;
0351     dev->priv.events = events;
0352     events->wq = create_singlethread_workqueue("mlx5_events");
0353     if (!events->wq) {
0354         kfree(events);
0355         return -ENOMEM;
0356     }
0357     INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
0358     BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
0359 
0360     return 0;
0361 }
0362 
0363 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
0364 {
0365     destroy_workqueue(dev->priv.events->wq);
0366     kvfree(dev->priv.events);
0367 }
0368 
0369 void mlx5_events_start(struct mlx5_core_dev *dev)
0370 {
0371     struct mlx5_events *events = dev->priv.events;
0372     int i;
0373 
0374     for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
0375         events->notifiers[i].nb  = events_nbs_ref[i];
0376         events->notifiers[i].ctx = events;
0377         mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
0378     }
0379 }
0380 
0381 void mlx5_events_stop(struct mlx5_core_dev *dev)
0382 {
0383     struct mlx5_events *events = dev->priv.events;
0384     int i;
0385 
0386     for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
0387         mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
0388     flush_workqueue(events->wq);
0389 }
0390 
0391 /* This API is used only for processing and forwarding firmware
0392  * events to mlx5 consumer.
0393  */
0394 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
0395 {
0396     struct mlx5_events *events = dev->priv.events;
0397 
0398     return atomic_notifier_chain_register(&events->fw_nh, nb);
0399 }
0400 EXPORT_SYMBOL(mlx5_notifier_register);
0401 
0402 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
0403 {
0404     struct mlx5_events *events = dev->priv.events;
0405 
0406     return atomic_notifier_chain_unregister(&events->fw_nh, nb);
0407 }
0408 EXPORT_SYMBOL(mlx5_notifier_unregister);
0409 
0410 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
0411 {
0412     return atomic_notifier_call_chain(&events->fw_nh, event, data);
0413 }
0414 
0415 /* This API is used only for processing and forwarding driver-specific
0416  * events to mlx5 consumers.
0417  */
0418 int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
0419 {
0420     struct mlx5_events *events = dev->priv.events;
0421 
0422     return blocking_notifier_chain_register(&events->sw_nh, nb);
0423 }
0424 
0425 int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
0426 {
0427     struct mlx5_events *events = dev->priv.events;
0428 
0429     return blocking_notifier_chain_unregister(&events->sw_nh, nb);
0430 }
0431 
0432 int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
0433                       void *data)
0434 {
0435     struct mlx5_events *events = dev->priv.events;
0436 
0437     return blocking_notifier_call_chain(&events->sw_nh, event, data);
0438 }
0439 
0440 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
0441 {
0442     queue_work(dev->priv.events->wq, work);
0443 }