Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
0003 
0004 #include "eswitch.h"
0005 #include "esw/qos.h"
0006 #include "en/port.h"
0007 #define CREATE_TRACE_POINTS
0008 #include "diag/qos_tracepoint.h"
0009 
0010 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
0011 #define MLX5_MIN_BW_SHARE 1
0012 
0013 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
0014     min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
0015 
0016 struct mlx5_esw_rate_group {
0017     u32 tsar_ix;
0018     u32 max_rate;
0019     u32 min_rate;
0020     u32 bw_share;
0021     struct list_head list;
0022 };
0023 
0024 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
0025                    u32 parent_ix, u32 tsar_ix,
0026                    u32 max_rate, u32 bw_share)
0027 {
0028     u32 bitmask = 0;
0029 
0030     if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
0031         return -EOPNOTSUPP;
0032 
0033     MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
0034     MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
0035     MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
0036     bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
0037     bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
0038 
0039     return mlx5_modify_scheduling_element_cmd(dev,
0040                           SCHEDULING_HIERARCHY_E_SWITCH,
0041                           sched_ctx,
0042                           tsar_ix,
0043                           bitmask);
0044 }
0045 
0046 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
0047                 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
0048 {
0049     u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0050     struct mlx5_core_dev *dev = esw->dev;
0051     int err;
0052 
0053     err = esw_qos_tsar_config(dev, sched_ctx,
0054                   esw->qos.root_tsar_ix, group->tsar_ix,
0055                   max_rate, bw_share);
0056     if (err)
0057         NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
0058 
0059     trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
0060 
0061     return err;
0062 }
0063 
0064 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
0065                 struct mlx5_vport *vport,
0066                 u32 max_rate, u32 bw_share,
0067                 struct netlink_ext_ack *extack)
0068 {
0069     u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0070     struct mlx5_esw_rate_group *group = vport->qos.group;
0071     struct mlx5_core_dev *dev = esw->dev;
0072     u32 parent_tsar_ix;
0073     void *vport_elem;
0074     int err;
0075 
0076     if (!vport->qos.enabled)
0077         return -EIO;
0078 
0079     parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
0080     MLX5_SET(scheduling_context, sched_ctx, element_type,
0081          SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
0082     vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
0083                   element_attributes);
0084     MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
0085 
0086     err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
0087                   max_rate, bw_share);
0088     if (err) {
0089         esw_warn(esw->dev,
0090              "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
0091              vport->vport, err);
0092         NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
0093         return err;
0094     }
0095 
0096     trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
0097 
0098     return 0;
0099 }
0100 
0101 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
0102                           struct mlx5_esw_rate_group *group,
0103                           bool group_level)
0104 {
0105     u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
0106     struct mlx5_vport *evport;
0107     u32 max_guarantee = 0;
0108     unsigned long i;
0109 
0110     if (group_level) {
0111         struct mlx5_esw_rate_group *group;
0112 
0113         list_for_each_entry(group, &esw->qos.groups, list) {
0114             if (group->min_rate < max_guarantee)
0115                 continue;
0116             max_guarantee = group->min_rate;
0117         }
0118     } else {
0119         mlx5_esw_for_each_vport(esw, i, evport) {
0120             if (!evport->enabled || !evport->qos.enabled ||
0121                 evport->qos.group != group || evport->qos.min_rate < max_guarantee)
0122                 continue;
0123             max_guarantee = evport->qos.min_rate;
0124         }
0125     }
0126 
0127     if (max_guarantee)
0128         return max_t(u32, max_guarantee / fw_max_bw_share, 1);
0129 
0130     /* If vports min rate divider is 0 but their group has bw_share configured, then
0131      * need to set bw_share for vports to minimal value.
0132      */
0133     if (!group_level && !max_guarantee && group && group->bw_share)
0134         return 1;
0135     return 0;
0136 }
0137 
0138 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
0139 {
0140     if (divider)
0141         return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
0142 
0143     return 0;
0144 }
0145 
0146 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
0147                          struct mlx5_esw_rate_group *group,
0148                          struct netlink_ext_ack *extack)
0149 {
0150     u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
0151     u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
0152     struct mlx5_vport *evport;
0153     unsigned long i;
0154     u32 bw_share;
0155     int err;
0156 
0157     mlx5_esw_for_each_vport(esw, i, evport) {
0158         if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
0159             continue;
0160         bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
0161 
0162         if (bw_share == evport->qos.bw_share)
0163             continue;
0164 
0165         err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
0166         if (err)
0167             return err;
0168 
0169         evport->qos.bw_share = bw_share;
0170     }
0171 
0172     return 0;
0173 }
0174 
0175 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
0176                          struct netlink_ext_ack *extack)
0177 {
0178     u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
0179     struct mlx5_esw_rate_group *group;
0180     u32 bw_share;
0181     int err;
0182 
0183     list_for_each_entry(group, &esw->qos.groups, list) {
0184         bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
0185 
0186         if (bw_share == group->bw_share)
0187             continue;
0188 
0189         err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
0190         if (err)
0191             return err;
0192 
0193         group->bw_share = bw_share;
0194 
0195         /* All the group's vports need to be set with default bw_share
0196          * to enable them with QOS
0197          */
0198         err = esw_qos_normalize_vports_min_rate(esw, group, extack);
0199 
0200         if (err)
0201             return err;
0202     }
0203 
0204     return 0;
0205 }
0206 
0207 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
0208                       u32 min_rate, struct netlink_ext_ack *extack)
0209 {
0210     u32 fw_max_bw_share, previous_min_rate;
0211     bool min_rate_supported;
0212     int err;
0213 
0214     lockdep_assert_held(&esw->state_lock);
0215     fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
0216     min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
0217                 fw_max_bw_share >= MLX5_MIN_BW_SHARE;
0218     if (min_rate && !min_rate_supported)
0219         return -EOPNOTSUPP;
0220     if (min_rate == evport->qos.min_rate)
0221         return 0;
0222 
0223     previous_min_rate = evport->qos.min_rate;
0224     evport->qos.min_rate = min_rate;
0225     err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
0226     if (err)
0227         evport->qos.min_rate = previous_min_rate;
0228 
0229     return err;
0230 }
0231 
0232 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
0233                       u32 max_rate, struct netlink_ext_ack *extack)
0234 {
0235     u32 act_max_rate = max_rate;
0236     bool max_rate_supported;
0237     int err;
0238 
0239     lockdep_assert_held(&esw->state_lock);
0240     max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
0241 
0242     if (max_rate && !max_rate_supported)
0243         return -EOPNOTSUPP;
0244     if (max_rate == evport->qos.max_rate)
0245         return 0;
0246 
0247     /* If parent group has rate limit need to set to group
0248      * value when new max rate is 0.
0249      */
0250     if (evport->qos.group && !max_rate)
0251         act_max_rate = evport->qos.group->max_rate;
0252 
0253     err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
0254 
0255     if (!err)
0256         evport->qos.max_rate = max_rate;
0257 
0258     return err;
0259 }
0260 
0261 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
0262                       u32 min_rate, struct netlink_ext_ack *extack)
0263 {
0264     u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
0265     struct mlx5_core_dev *dev = esw->dev;
0266     u32 previous_min_rate, divider;
0267     int err;
0268 
0269     if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
0270         return -EOPNOTSUPP;
0271 
0272     if (min_rate == group->min_rate)
0273         return 0;
0274 
0275     previous_min_rate = group->min_rate;
0276     group->min_rate = min_rate;
0277     divider = esw_qos_calculate_min_rate_divider(esw, group, true);
0278     err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
0279     if (err) {
0280         group->min_rate = previous_min_rate;
0281         NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
0282 
0283         /* Attempt restoring previous configuration */
0284         divider = esw_qos_calculate_min_rate_divider(esw, group, true);
0285         if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
0286             NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
0287     }
0288 
0289     return err;
0290 }
0291 
0292 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
0293                       struct mlx5_esw_rate_group *group,
0294                       u32 max_rate, struct netlink_ext_ack *extack)
0295 {
0296     struct mlx5_vport *vport;
0297     unsigned long i;
0298     int err;
0299 
0300     if (group->max_rate == max_rate)
0301         return 0;
0302 
0303     err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
0304     if (err)
0305         return err;
0306 
0307     group->max_rate = max_rate;
0308 
0309     /* Any unlimited vports in the group should be set
0310      * with the value of the group.
0311      */
0312     mlx5_esw_for_each_vport(esw, i, vport) {
0313         if (!vport->enabled || !vport->qos.enabled ||
0314             vport->qos.group != group || vport->qos.max_rate)
0315             continue;
0316 
0317         err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
0318         if (err)
0319             NL_SET_ERR_MSG_MOD(extack,
0320                        "E-Switch vport implicit rate limit setting failed");
0321     }
0322 
0323     return err;
0324 }
0325 
0326 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
0327                           struct mlx5_vport *vport,
0328                           u32 max_rate, u32 bw_share)
0329 {
0330     u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0331     struct mlx5_esw_rate_group *group = vport->qos.group;
0332     struct mlx5_core_dev *dev = esw->dev;
0333     u32 parent_tsar_ix;
0334     void *vport_elem;
0335     int err;
0336 
0337     parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
0338     MLX5_SET(scheduling_context, sched_ctx, element_type,
0339          SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
0340     vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
0341     MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
0342     MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
0343     MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
0344     MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
0345 
0346     err = mlx5_create_scheduling_element_cmd(dev,
0347                          SCHEDULING_HIERARCHY_E_SWITCH,
0348                          sched_ctx,
0349                          &vport->qos.esw_tsar_ix);
0350     if (err) {
0351         esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
0352              vport->vport, err);
0353         return err;
0354     }
0355 
0356     return 0;
0357 }
0358 
0359 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
0360                            struct mlx5_vport *vport,
0361                            struct mlx5_esw_rate_group *curr_group,
0362                            struct mlx5_esw_rate_group *new_group,
0363                            struct netlink_ext_ack *extack)
0364 {
0365     u32 max_rate;
0366     int err;
0367 
0368     err = mlx5_destroy_scheduling_element_cmd(esw->dev,
0369                           SCHEDULING_HIERARCHY_E_SWITCH,
0370                           vport->qos.esw_tsar_ix);
0371     if (err) {
0372         NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
0373         return err;
0374     }
0375 
0376     vport->qos.group = new_group;
0377     max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
0378 
0379     /* If vport is unlimited, we set the group's value.
0380      * Therefore, if the group is limited it will apply to
0381      * the vport as well and if not, vport will remain unlimited.
0382      */
0383     err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
0384     if (err) {
0385         NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
0386         goto err_sched;
0387     }
0388 
0389     return 0;
0390 
0391 err_sched:
0392     vport->qos.group = curr_group;
0393     max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
0394     if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
0395         esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
0396              vport->vport);
0397 
0398     return err;
0399 }
0400 
0401 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
0402                       struct mlx5_vport *vport,
0403                       struct mlx5_esw_rate_group *group,
0404                       struct netlink_ext_ack *extack)
0405 {
0406     struct mlx5_esw_rate_group *new_group, *curr_group;
0407     int err;
0408 
0409     if (!vport->enabled)
0410         return -EINVAL;
0411 
0412     curr_group = vport->qos.group;
0413     new_group = group ?: esw->qos.group0;
0414     if (curr_group == new_group)
0415         return 0;
0416 
0417     err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
0418     if (err)
0419         return err;
0420 
0421     /* Recalculate bw share weights of old and new groups */
0422     if (vport->qos.bw_share || new_group->bw_share) {
0423         esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
0424         esw_qos_normalize_vports_min_rate(esw, new_group, extack);
0425     }
0426 
0427     return 0;
0428 }
0429 
0430 static struct mlx5_esw_rate_group *
0431 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
0432 {
0433     u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0434     struct mlx5_esw_rate_group *group;
0435     u32 divider;
0436     int err;
0437 
0438     group = kzalloc(sizeof(*group), GFP_KERNEL);
0439     if (!group)
0440         return ERR_PTR(-ENOMEM);
0441 
0442     MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
0443          esw->qos.root_tsar_ix);
0444     err = mlx5_create_scheduling_element_cmd(esw->dev,
0445                          SCHEDULING_HIERARCHY_E_SWITCH,
0446                          tsar_ctx,
0447                          &group->tsar_ix);
0448     if (err) {
0449         NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
0450         goto err_sched_elem;
0451     }
0452 
0453     list_add_tail(&group->list, &esw->qos.groups);
0454 
0455     divider = esw_qos_calculate_min_rate_divider(esw, group, true);
0456     if (divider) {
0457         err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
0458         if (err) {
0459             NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
0460             goto err_min_rate;
0461         }
0462     }
0463     trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
0464 
0465     return group;
0466 
0467 err_min_rate:
0468     list_del(&group->list);
0469     if (mlx5_destroy_scheduling_element_cmd(esw->dev,
0470                         SCHEDULING_HIERARCHY_E_SWITCH,
0471                         group->tsar_ix))
0472         NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
0473 err_sched_elem:
0474     kfree(group);
0475     return ERR_PTR(err);
0476 }
0477 
0478 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
0479 static void esw_qos_put(struct mlx5_eswitch *esw);
0480 
0481 static struct mlx5_esw_rate_group *
0482 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
0483 {
0484     struct mlx5_esw_rate_group *group;
0485     int err;
0486 
0487     if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
0488         return ERR_PTR(-EOPNOTSUPP);
0489 
0490     err = esw_qos_get(esw, extack);
0491     if (err)
0492         return ERR_PTR(err);
0493 
0494     group = __esw_qos_create_rate_group(esw, extack);
0495     if (IS_ERR(group))
0496         esw_qos_put(esw);
0497 
0498     return group;
0499 }
0500 
0501 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
0502                     struct mlx5_esw_rate_group *group,
0503                     struct netlink_ext_ack *extack)
0504 {
0505     u32 divider;
0506     int err;
0507 
0508     list_del(&group->list);
0509 
0510     divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
0511     err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
0512     if (err)
0513         NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
0514 
0515     err = mlx5_destroy_scheduling_element_cmd(esw->dev,
0516                           SCHEDULING_HIERARCHY_E_SWITCH,
0517                           group->tsar_ix);
0518     if (err)
0519         NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
0520 
0521     trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
0522 
0523     kfree(group);
0524 
0525     return err;
0526 }
0527 
0528 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
0529                       struct mlx5_esw_rate_group *group,
0530                       struct netlink_ext_ack *extack)
0531 {
0532     int err;
0533 
0534     err = __esw_qos_destroy_rate_group(esw, group, extack);
0535     esw_qos_put(esw);
0536 
0537     return err;
0538 }
0539 
0540 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
0541 {
0542     switch (type) {
0543     case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
0544         return MLX5_CAP_QOS(dev, esw_element_type) &
0545                ELEMENT_TYPE_CAP_MASK_TASR;
0546     case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
0547         return MLX5_CAP_QOS(dev, esw_element_type) &
0548                ELEMENT_TYPE_CAP_MASK_VPORT;
0549     case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
0550         return MLX5_CAP_QOS(dev, esw_element_type) &
0551                ELEMENT_TYPE_CAP_MASK_VPORT_TC;
0552     case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
0553         return MLX5_CAP_QOS(dev, esw_element_type) &
0554                ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
0555     }
0556     return false;
0557 }
0558 
0559 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
0560 {
0561     u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0562     struct mlx5_core_dev *dev = esw->dev;
0563     __be32 *attr;
0564     int err;
0565 
0566     if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
0567         return -EOPNOTSUPP;
0568 
0569     if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
0570         return -EOPNOTSUPP;
0571 
0572     MLX5_SET(scheduling_context, tsar_ctx, element_type,
0573          SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
0574 
0575     attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
0576     *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
0577 
0578     err = mlx5_create_scheduling_element_cmd(dev,
0579                          SCHEDULING_HIERARCHY_E_SWITCH,
0580                          tsar_ctx,
0581                          &esw->qos.root_tsar_ix);
0582     if (err) {
0583         esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
0584         return err;
0585     }
0586 
0587     INIT_LIST_HEAD(&esw->qos.groups);
0588     if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
0589         esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
0590         if (IS_ERR(esw->qos.group0)) {
0591             esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
0592                  PTR_ERR(esw->qos.group0));
0593             err = PTR_ERR(esw->qos.group0);
0594             goto err_group0;
0595         }
0596     }
0597     refcount_set(&esw->qos.refcnt, 1);
0598 
0599     return 0;
0600 
0601 err_group0:
0602     if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
0603                         esw->qos.root_tsar_ix))
0604         esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
0605 
0606     return err;
0607 }
0608 
0609 static void esw_qos_destroy(struct mlx5_eswitch *esw)
0610 {
0611     int err;
0612 
0613     if (esw->qos.group0)
0614         __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
0615 
0616     err = mlx5_destroy_scheduling_element_cmd(esw->dev,
0617                           SCHEDULING_HIERARCHY_E_SWITCH,
0618                           esw->qos.root_tsar_ix);
0619     if (err)
0620         esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
0621 }
0622 
0623 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
0624 {
0625     int err = 0;
0626 
0627     lockdep_assert_held(&esw->state_lock);
0628 
0629     if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
0630         /* esw_qos_create() set refcount to 1 only on success.
0631          * No need to decrement on failure.
0632          */
0633         err = esw_qos_create(esw, extack);
0634     }
0635 
0636     return err;
0637 }
0638 
0639 static void esw_qos_put(struct mlx5_eswitch *esw)
0640 {
0641     lockdep_assert_held(&esw->state_lock);
0642     if (refcount_dec_and_test(&esw->qos.refcnt))
0643         esw_qos_destroy(esw);
0644 }
0645 
0646 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
0647                 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
0648 {
0649     int err;
0650 
0651     lockdep_assert_held(&esw->state_lock);
0652     if (vport->qos.enabled)
0653         return 0;
0654 
0655     err = esw_qos_get(esw, extack);
0656     if (err)
0657         return err;
0658 
0659     vport->qos.group = esw->qos.group0;
0660 
0661     err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
0662     if (err)
0663         goto err_out;
0664 
0665     vport->qos.enabled = true;
0666     trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
0667 
0668     return 0;
0669 
0670 err_out:
0671     esw_qos_put(esw);
0672 
0673     return err;
0674 }
0675 
0676 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
0677 {
0678     int err;
0679 
0680     lockdep_assert_held(&esw->state_lock);
0681     if (!vport->qos.enabled)
0682         return;
0683     WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
0684          "Disabling QoS on port before detaching it from group");
0685 
0686     err = mlx5_destroy_scheduling_element_cmd(esw->dev,
0687                           SCHEDULING_HIERARCHY_E_SWITCH,
0688                           vport->qos.esw_tsar_ix);
0689     if (err)
0690         esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
0691              vport->vport, err);
0692 
0693     memset(&vport->qos, 0, sizeof(vport->qos));
0694     trace_mlx5_esw_vport_qos_destroy(vport);
0695 
0696     esw_qos_put(esw);
0697 }
0698 
0699 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
0700                 u32 max_rate, u32 min_rate)
0701 {
0702     int err;
0703 
0704     lockdep_assert_held(&esw->state_lock);
0705     err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
0706     if (err)
0707         return err;
0708 
0709     err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
0710     if (!err)
0711         err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
0712 
0713     return err;
0714 }
0715 
0716 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
0717 {
0718     u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
0719     struct mlx5_vport *vport;
0720     u32 bitmask;
0721     int err;
0722 
0723     vport = mlx5_eswitch_get_vport(esw, vport_num);
0724     if (IS_ERR(vport))
0725         return PTR_ERR(vport);
0726 
0727     mutex_lock(&esw->state_lock);
0728     if (!vport->qos.enabled) {
0729         /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
0730         err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
0731     } else {
0732         MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
0733 
0734         bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
0735         err = mlx5_modify_scheduling_element_cmd(esw->dev,
0736                              SCHEDULING_HIERARCHY_E_SWITCH,
0737                              ctx,
0738                              vport->qos.esw_tsar_ix,
0739                              bitmask);
0740     }
0741     mutex_unlock(&esw->state_lock);
0742 
0743     return err;
0744 }
0745 
0746 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
0747 
0748 /* Converts bytes per second value passed in a pointer into megabits per
0749  * second, rewriting last. If converted rate exceed link speed or is not a
0750  * fraction of Mbps - returns error.
0751  */
0752 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
0753                     u64 *rate, struct netlink_ext_ack *extack)
0754 {
0755     u32 link_speed_max, reminder;
0756     u64 value;
0757     int err;
0758 
0759     err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
0760     if (err) {
0761         NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
0762         return err;
0763     }
0764 
0765     value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
0766     if (reminder) {
0767         pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
0768                name, *rate);
0769         NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
0770         return -EINVAL;
0771     }
0772 
0773     if (value > link_speed_max) {
0774         pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
0775                name, value, link_speed_max);
0776         NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
0777         return -EINVAL;
0778     }
0779 
0780     *rate = value;
0781     return 0;
0782 }
0783 
0784 /* Eswitch devlink rate API */
0785 
0786 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
0787                         u64 tx_share, struct netlink_ext_ack *extack)
0788 {
0789     struct mlx5_vport *vport = priv;
0790     struct mlx5_eswitch *esw;
0791     int err;
0792 
0793     esw = vport->dev->priv.eswitch;
0794     if (!mlx5_esw_allowed(esw))
0795         return -EPERM;
0796 
0797     err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
0798     if (err)
0799         return err;
0800 
0801     mutex_lock(&esw->state_lock);
0802     err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
0803     if (err)
0804         goto unlock;
0805 
0806     err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
0807 unlock:
0808     mutex_unlock(&esw->state_lock);
0809     return err;
0810 }
0811 
0812 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
0813                       u64 tx_max, struct netlink_ext_ack *extack)
0814 {
0815     struct mlx5_vport *vport = priv;
0816     struct mlx5_eswitch *esw;
0817     int err;
0818 
0819     esw = vport->dev->priv.eswitch;
0820     if (!mlx5_esw_allowed(esw))
0821         return -EPERM;
0822 
0823     err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
0824     if (err)
0825         return err;
0826 
0827     mutex_lock(&esw->state_lock);
0828     err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
0829     if (err)
0830         goto unlock;
0831 
0832     err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
0833 unlock:
0834     mutex_unlock(&esw->state_lock);
0835     return err;
0836 }
0837 
0838 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
0839                         u64 tx_share, struct netlink_ext_ack *extack)
0840 {
0841     struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
0842     struct mlx5_eswitch *esw = dev->priv.eswitch;
0843     struct mlx5_esw_rate_group *group = priv;
0844     int err;
0845 
0846     err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
0847     if (err)
0848         return err;
0849 
0850     mutex_lock(&esw->state_lock);
0851     err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
0852     mutex_unlock(&esw->state_lock);
0853     return err;
0854 }
0855 
0856 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
0857                       u64 tx_max, struct netlink_ext_ack *extack)
0858 {
0859     struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
0860     struct mlx5_eswitch *esw = dev->priv.eswitch;
0861     struct mlx5_esw_rate_group *group = priv;
0862     int err;
0863 
0864     err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
0865     if (err)
0866         return err;
0867 
0868     mutex_lock(&esw->state_lock);
0869     err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
0870     mutex_unlock(&esw->state_lock);
0871     return err;
0872 }
0873 
0874 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
0875                    struct netlink_ext_ack *extack)
0876 {
0877     struct mlx5_esw_rate_group *group;
0878     struct mlx5_eswitch *esw;
0879     int err = 0;
0880 
0881     esw = mlx5_devlink_eswitch_get(rate_node->devlink);
0882     if (IS_ERR(esw))
0883         return PTR_ERR(esw);
0884 
0885     mutex_lock(&esw->state_lock);
0886     if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
0887         NL_SET_ERR_MSG_MOD(extack,
0888                    "Rate node creation supported only in switchdev mode");
0889         err = -EOPNOTSUPP;
0890         goto unlock;
0891     }
0892 
0893     group = esw_qos_create_rate_group(esw, extack);
0894     if (IS_ERR(group)) {
0895         err = PTR_ERR(group);
0896         goto unlock;
0897     }
0898 
0899     *priv = group;
0900 unlock:
0901     mutex_unlock(&esw->state_lock);
0902     return err;
0903 }
0904 
0905 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
0906                    struct netlink_ext_ack *extack)
0907 {
0908     struct mlx5_esw_rate_group *group = priv;
0909     struct mlx5_eswitch *esw;
0910     int err;
0911 
0912     esw = mlx5_devlink_eswitch_get(rate_node->devlink);
0913     if (IS_ERR(esw))
0914         return PTR_ERR(esw);
0915 
0916     mutex_lock(&esw->state_lock);
0917     err = esw_qos_destroy_rate_group(esw, group, extack);
0918     mutex_unlock(&esw->state_lock);
0919     return err;
0920 }
0921 
0922 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
0923                     struct mlx5_vport *vport,
0924                     struct mlx5_esw_rate_group *group,
0925                     struct netlink_ext_ack *extack)
0926 {
0927     int err;
0928 
0929     mutex_lock(&esw->state_lock);
0930     err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
0931     if (!err)
0932         err = esw_qos_vport_update_group(esw, vport, group, extack);
0933     mutex_unlock(&esw->state_lock);
0934     return err;
0935 }
0936 
0937 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
0938                      struct devlink_rate *parent,
0939                      void *priv, void *parent_priv,
0940                      struct netlink_ext_ack *extack)
0941 {
0942     struct mlx5_esw_rate_group *group;
0943     struct mlx5_vport *vport = priv;
0944 
0945     if (!parent)
0946         return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
0947                                vport, NULL, extack);
0948 
0949     group = parent_priv;
0950     return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
0951 }