Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * cgroups support for the BFQ I/O scheduler.
0004  */
0005 #include <linux/module.h>
0006 #include <linux/slab.h>
0007 #include <linux/blkdev.h>
0008 #include <linux/cgroup.h>
0009 #include <linux/ktime.h>
0010 #include <linux/rbtree.h>
0011 #include <linux/ioprio.h>
0012 #include <linux/sbitmap.h>
0013 #include <linux/delay.h>
0014 
0015 #include "elevator.h"
0016 #include "bfq-iosched.h"
0017 
0018 #ifdef CONFIG_BFQ_CGROUP_DEBUG
0019 static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
0020 {
0021     int ret;
0022 
0023     ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
0024     if (ret)
0025         return ret;
0026 
0027     atomic64_set(&stat->aux_cnt, 0);
0028     return 0;
0029 }
0030 
0031 static void bfq_stat_exit(struct bfq_stat *stat)
0032 {
0033     percpu_counter_destroy(&stat->cpu_cnt);
0034 }
0035 
0036 /**
0037  * bfq_stat_add - add a value to a bfq_stat
0038  * @stat: target bfq_stat
0039  * @val: value to add
0040  *
0041  * Add @val to @stat.  The caller must ensure that IRQ on the same CPU
0042  * don't re-enter this function for the same counter.
0043  */
0044 static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
0045 {
0046     percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
0047 }
0048 
0049 /**
0050  * bfq_stat_read - read the current value of a bfq_stat
0051  * @stat: bfq_stat to read
0052  */
0053 static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
0054 {
0055     return percpu_counter_sum_positive(&stat->cpu_cnt);
0056 }
0057 
0058 /**
0059  * bfq_stat_reset - reset a bfq_stat
0060  * @stat: bfq_stat to reset
0061  */
0062 static inline void bfq_stat_reset(struct bfq_stat *stat)
0063 {
0064     percpu_counter_set(&stat->cpu_cnt, 0);
0065     atomic64_set(&stat->aux_cnt, 0);
0066 }
0067 
0068 /**
0069  * bfq_stat_add_aux - add a bfq_stat into another's aux count
0070  * @to: the destination bfq_stat
0071  * @from: the source
0072  *
0073  * Add @from's count including the aux one to @to's aux count.
0074  */
0075 static inline void bfq_stat_add_aux(struct bfq_stat *to,
0076                      struct bfq_stat *from)
0077 {
0078     atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
0079              &to->aux_cnt);
0080 }
0081 
0082 /**
0083  * blkg_prfill_stat - prfill callback for bfq_stat
0084  * @sf: seq_file to print to
0085  * @pd: policy private data of interest
0086  * @off: offset to the bfq_stat in @pd
0087  *
0088  * prfill callback for printing a bfq_stat.
0089  */
0090 static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
0091         int off)
0092 {
0093     return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
0094 }
0095 
0096 /* bfqg stats flags */
0097 enum bfqg_stats_flags {
0098     BFQG_stats_waiting = 0,
0099     BFQG_stats_idling,
0100     BFQG_stats_empty,
0101 };
0102 
0103 #define BFQG_FLAG_FNS(name)                     \
0104 static void bfqg_stats_mark_##name(struct bfqg_stats *stats)    \
0105 {                                   \
0106     stats->flags |= (1 << BFQG_stats_##name);           \
0107 }                                   \
0108 static void bfqg_stats_clear_##name(struct bfqg_stats *stats)   \
0109 {                                   \
0110     stats->flags &= ~(1 << BFQG_stats_##name);          \
0111 }                                   \
0112 static int bfqg_stats_##name(struct bfqg_stats *stats)      \
0113 {                                   \
0114     return (stats->flags & (1 << BFQG_stats_##name)) != 0;      \
0115 }                                   \
0116 
0117 BFQG_FLAG_FNS(waiting)
0118 BFQG_FLAG_FNS(idling)
0119 BFQG_FLAG_FNS(empty)
0120 #undef BFQG_FLAG_FNS
0121 
0122 /* This should be called with the scheduler lock held. */
0123 static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
0124 {
0125     u64 now;
0126 
0127     if (!bfqg_stats_waiting(stats))
0128         return;
0129 
0130     now = ktime_get_ns();
0131     if (now > stats->start_group_wait_time)
0132         bfq_stat_add(&stats->group_wait_time,
0133                   now - stats->start_group_wait_time);
0134     bfqg_stats_clear_waiting(stats);
0135 }
0136 
0137 /* This should be called with the scheduler lock held. */
0138 static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
0139                          struct bfq_group *curr_bfqg)
0140 {
0141     struct bfqg_stats *stats = &bfqg->stats;
0142 
0143     if (bfqg_stats_waiting(stats))
0144         return;
0145     if (bfqg == curr_bfqg)
0146         return;
0147     stats->start_group_wait_time = ktime_get_ns();
0148     bfqg_stats_mark_waiting(stats);
0149 }
0150 
0151 /* This should be called with the scheduler lock held. */
0152 static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
0153 {
0154     u64 now;
0155 
0156     if (!bfqg_stats_empty(stats))
0157         return;
0158 
0159     now = ktime_get_ns();
0160     if (now > stats->start_empty_time)
0161         bfq_stat_add(&stats->empty_time,
0162                   now - stats->start_empty_time);
0163     bfqg_stats_clear_empty(stats);
0164 }
0165 
0166 void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
0167 {
0168     bfq_stat_add(&bfqg->stats.dequeue, 1);
0169 }
0170 
0171 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
0172 {
0173     struct bfqg_stats *stats = &bfqg->stats;
0174 
0175     if (blkg_rwstat_total(&stats->queued))
0176         return;
0177 
0178     /*
0179      * group is already marked empty. This can happen if bfqq got new
0180      * request in parent group and moved to this group while being added
0181      * to service tree. Just ignore the event and move on.
0182      */
0183     if (bfqg_stats_empty(stats))
0184         return;
0185 
0186     stats->start_empty_time = ktime_get_ns();
0187     bfqg_stats_mark_empty(stats);
0188 }
0189 
0190 void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
0191 {
0192     struct bfqg_stats *stats = &bfqg->stats;
0193 
0194     if (bfqg_stats_idling(stats)) {
0195         u64 now = ktime_get_ns();
0196 
0197         if (now > stats->start_idle_time)
0198             bfq_stat_add(&stats->idle_time,
0199                       now - stats->start_idle_time);
0200         bfqg_stats_clear_idling(stats);
0201     }
0202 }
0203 
0204 void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
0205 {
0206     struct bfqg_stats *stats = &bfqg->stats;
0207 
0208     stats->start_idle_time = ktime_get_ns();
0209     bfqg_stats_mark_idling(stats);
0210 }
0211 
0212 void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
0213 {
0214     struct bfqg_stats *stats = &bfqg->stats;
0215 
0216     bfq_stat_add(&stats->avg_queue_size_sum,
0217               blkg_rwstat_total(&stats->queued));
0218     bfq_stat_add(&stats->avg_queue_size_samples, 1);
0219     bfqg_stats_update_group_wait_time(stats);
0220 }
0221 
0222 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
0223                   blk_opf_t opf)
0224 {
0225     blkg_rwstat_add(&bfqg->stats.queued, opf, 1);
0226     bfqg_stats_end_empty_time(&bfqg->stats);
0227     if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
0228         bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
0229 }
0230 
0231 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf)
0232 {
0233     blkg_rwstat_add(&bfqg->stats.queued, opf, -1);
0234 }
0235 
0236 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf)
0237 {
0238     blkg_rwstat_add(&bfqg->stats.merged, opf, 1);
0239 }
0240 
0241 void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
0242                   u64 io_start_time_ns, blk_opf_t opf)
0243 {
0244     struct bfqg_stats *stats = &bfqg->stats;
0245     u64 now = ktime_get_ns();
0246 
0247     if (now > io_start_time_ns)
0248         blkg_rwstat_add(&stats->service_time, opf,
0249                 now - io_start_time_ns);
0250     if (io_start_time_ns > start_time_ns)
0251         blkg_rwstat_add(&stats->wait_time, opf,
0252                 io_start_time_ns - start_time_ns);
0253 }
0254 
0255 #else /* CONFIG_BFQ_CGROUP_DEBUG */
0256 
0257 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
0258                   blk_opf_t opf) { }
0259 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { }
0260 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { }
0261 void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
0262                   u64 io_start_time_ns, blk_opf_t opf) { }
0263 void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
0264 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
0265 void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
0266 void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
0267 void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
0268 
0269 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
0270 
0271 #ifdef CONFIG_BFQ_GROUP_IOSCHED
0272 
0273 /*
0274  * blk-cgroup policy-related handlers
0275  * The following functions help in converting between blk-cgroup
0276  * internal structures and BFQ-specific structures.
0277  */
0278 
0279 static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
0280 {
0281     return pd ? container_of(pd, struct bfq_group, pd) : NULL;
0282 }
0283 
0284 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
0285 {
0286     return pd_to_blkg(&bfqg->pd);
0287 }
0288 
0289 static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
0290 {
0291     return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
0292 }
0293 
0294 /*
0295  * bfq_group handlers
0296  * The following functions help in navigating the bfq_group hierarchy
0297  * by allowing to find the parent of a bfq_group or the bfq_group
0298  * associated to a bfq_queue.
0299  */
0300 
0301 static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
0302 {
0303     struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
0304 
0305     return pblkg ? blkg_to_bfqg(pblkg) : NULL;
0306 }
0307 
0308 struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
0309 {
0310     struct bfq_entity *group_entity = bfqq->entity.parent;
0311 
0312     return group_entity ? container_of(group_entity, struct bfq_group,
0313                        entity) :
0314                   bfqq->bfqd->root_group;
0315 }
0316 
0317 /*
0318  * The following two functions handle get and put of a bfq_group by
0319  * wrapping the related blk-cgroup hooks.
0320  */
0321 
0322 static void bfqg_get(struct bfq_group *bfqg)
0323 {
0324     bfqg->ref++;
0325 }
0326 
0327 static void bfqg_put(struct bfq_group *bfqg)
0328 {
0329     bfqg->ref--;
0330 
0331     if (bfqg->ref == 0)
0332         kfree(bfqg);
0333 }
0334 
0335 static void bfqg_and_blkg_get(struct bfq_group *bfqg)
0336 {
0337     /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
0338     bfqg_get(bfqg);
0339 
0340     blkg_get(bfqg_to_blkg(bfqg));
0341 }
0342 
0343 void bfqg_and_blkg_put(struct bfq_group *bfqg)
0344 {
0345     blkg_put(bfqg_to_blkg(bfqg));
0346 
0347     bfqg_put(bfqg);
0348 }
0349 
0350 void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
0351 {
0352     struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
0353 
0354     if (!bfqg)
0355         return;
0356 
0357     blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
0358     blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
0359 }
0360 
0361 /* @stats = 0 */
0362 static void bfqg_stats_reset(struct bfqg_stats *stats)
0363 {
0364 #ifdef CONFIG_BFQ_CGROUP_DEBUG
0365     /* queued stats shouldn't be cleared */
0366     blkg_rwstat_reset(&stats->merged);
0367     blkg_rwstat_reset(&stats->service_time);
0368     blkg_rwstat_reset(&stats->wait_time);
0369     bfq_stat_reset(&stats->time);
0370     bfq_stat_reset(&stats->avg_queue_size_sum);
0371     bfq_stat_reset(&stats->avg_queue_size_samples);
0372     bfq_stat_reset(&stats->dequeue);
0373     bfq_stat_reset(&stats->group_wait_time);
0374     bfq_stat_reset(&stats->idle_time);
0375     bfq_stat_reset(&stats->empty_time);
0376 #endif
0377 }
0378 
0379 /* @to += @from */
0380 static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
0381 {
0382     if (!to || !from)
0383         return;
0384 
0385 #ifdef CONFIG_BFQ_CGROUP_DEBUG
0386     /* queued stats shouldn't be cleared */
0387     blkg_rwstat_add_aux(&to->merged, &from->merged);
0388     blkg_rwstat_add_aux(&to->service_time, &from->service_time);
0389     blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
0390     bfq_stat_add_aux(&from->time, &from->time);
0391     bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
0392     bfq_stat_add_aux(&to->avg_queue_size_samples,
0393               &from->avg_queue_size_samples);
0394     bfq_stat_add_aux(&to->dequeue, &from->dequeue);
0395     bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
0396     bfq_stat_add_aux(&to->idle_time, &from->idle_time);
0397     bfq_stat_add_aux(&to->empty_time, &from->empty_time);
0398 #endif
0399 }
0400 
0401 /*
0402  * Transfer @bfqg's stats to its parent's aux counts so that the ancestors'
0403  * recursive stats can still account for the amount used by this bfqg after
0404  * it's gone.
0405  */
0406 static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
0407 {
0408     struct bfq_group *parent;
0409 
0410     if (!bfqg) /* root_group */
0411         return;
0412 
0413     parent = bfqg_parent(bfqg);
0414 
0415     lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
0416 
0417     if (unlikely(!parent))
0418         return;
0419 
0420     bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
0421     bfqg_stats_reset(&bfqg->stats);
0422 }
0423 
0424 void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
0425 {
0426     struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
0427 
0428     entity->weight = entity->new_weight;
0429     entity->orig_weight = entity->new_weight;
0430     if (bfqq) {
0431         bfqq->ioprio = bfqq->new_ioprio;
0432         bfqq->ioprio_class = bfqq->new_ioprio_class;
0433         /*
0434          * Make sure that bfqg and its associated blkg do not
0435          * disappear before entity.
0436          */
0437         bfqg_and_blkg_get(bfqg);
0438     }
0439     entity->parent = bfqg->my_entity; /* NULL for root group */
0440     entity->sched_data = &bfqg->sched_data;
0441 }
0442 
0443 static void bfqg_stats_exit(struct bfqg_stats *stats)
0444 {
0445     blkg_rwstat_exit(&stats->bytes);
0446     blkg_rwstat_exit(&stats->ios);
0447 #ifdef CONFIG_BFQ_CGROUP_DEBUG
0448     blkg_rwstat_exit(&stats->merged);
0449     blkg_rwstat_exit(&stats->service_time);
0450     blkg_rwstat_exit(&stats->wait_time);
0451     blkg_rwstat_exit(&stats->queued);
0452     bfq_stat_exit(&stats->time);
0453     bfq_stat_exit(&stats->avg_queue_size_sum);
0454     bfq_stat_exit(&stats->avg_queue_size_samples);
0455     bfq_stat_exit(&stats->dequeue);
0456     bfq_stat_exit(&stats->group_wait_time);
0457     bfq_stat_exit(&stats->idle_time);
0458     bfq_stat_exit(&stats->empty_time);
0459 #endif
0460 }
0461 
0462 static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
0463 {
0464     if (blkg_rwstat_init(&stats->bytes, gfp) ||
0465         blkg_rwstat_init(&stats->ios, gfp))
0466         goto error;
0467 
0468 #ifdef CONFIG_BFQ_CGROUP_DEBUG
0469     if (blkg_rwstat_init(&stats->merged, gfp) ||
0470         blkg_rwstat_init(&stats->service_time, gfp) ||
0471         blkg_rwstat_init(&stats->wait_time, gfp) ||
0472         blkg_rwstat_init(&stats->queued, gfp) ||
0473         bfq_stat_init(&stats->time, gfp) ||
0474         bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
0475         bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
0476         bfq_stat_init(&stats->dequeue, gfp) ||
0477         bfq_stat_init(&stats->group_wait_time, gfp) ||
0478         bfq_stat_init(&stats->idle_time, gfp) ||
0479         bfq_stat_init(&stats->empty_time, gfp))
0480         goto error;
0481 #endif
0482 
0483     return 0;
0484 
0485 error:
0486     bfqg_stats_exit(stats);
0487     return -ENOMEM;
0488 }
0489 
0490 static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
0491 {
0492     return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
0493 }
0494 
0495 static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
0496 {
0497     return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
0498 }
0499 
0500 static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
0501 {
0502     struct bfq_group_data *bgd;
0503 
0504     bgd = kzalloc(sizeof(*bgd), gfp);
0505     if (!bgd)
0506         return NULL;
0507     return &bgd->pd;
0508 }
0509 
0510 static void bfq_cpd_init(struct blkcg_policy_data *cpd)
0511 {
0512     struct bfq_group_data *d = cpd_to_bfqgd(cpd);
0513 
0514     d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
0515         CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
0516 }
0517 
0518 static void bfq_cpd_free(struct blkcg_policy_data *cpd)
0519 {
0520     kfree(cpd_to_bfqgd(cpd));
0521 }
0522 
0523 static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
0524                          struct blkcg *blkcg)
0525 {
0526     struct bfq_group *bfqg;
0527 
0528     bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
0529     if (!bfqg)
0530         return NULL;
0531 
0532     if (bfqg_stats_init(&bfqg->stats, gfp)) {
0533         kfree(bfqg);
0534         return NULL;
0535     }
0536 
0537     /* see comments in bfq_bic_update_cgroup for why refcounting */
0538     bfqg_get(bfqg);
0539     return &bfqg->pd;
0540 }
0541 
0542 static void bfq_pd_init(struct blkg_policy_data *pd)
0543 {
0544     struct blkcg_gq *blkg = pd_to_blkg(pd);
0545     struct bfq_group *bfqg = blkg_to_bfqg(blkg);
0546     struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
0547     struct bfq_entity *entity = &bfqg->entity;
0548     struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
0549 
0550     entity->orig_weight = entity->weight = entity->new_weight = d->weight;
0551     entity->my_sched_data = &bfqg->sched_data;
0552     entity->last_bfqq_created = NULL;
0553 
0554     bfqg->my_entity = entity; /*
0555                    * the root_group's will be set to NULL
0556                    * in bfq_init_queue()
0557                    */
0558     bfqg->bfqd = bfqd;
0559     bfqg->active_entities = 0;
0560     bfqg->online = true;
0561     bfqg->rq_pos_tree = RB_ROOT;
0562 }
0563 
0564 static void bfq_pd_free(struct blkg_policy_data *pd)
0565 {
0566     struct bfq_group *bfqg = pd_to_bfqg(pd);
0567 
0568     bfqg_stats_exit(&bfqg->stats);
0569     bfqg_put(bfqg);
0570 }
0571 
0572 static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
0573 {
0574     struct bfq_group *bfqg = pd_to_bfqg(pd);
0575 
0576     bfqg_stats_reset(&bfqg->stats);
0577 }
0578 
0579 static void bfq_group_set_parent(struct bfq_group *bfqg,
0580                     struct bfq_group *parent)
0581 {
0582     struct bfq_entity *entity;
0583 
0584     entity = &bfqg->entity;
0585     entity->parent = parent->my_entity;
0586     entity->sched_data = &parent->sched_data;
0587 }
0588 
0589 static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
0590 {
0591     struct bfq_group *parent;
0592     struct bfq_entity *entity;
0593 
0594     /*
0595      * Update chain of bfq_groups as we might be handling a leaf group
0596      * which, along with some of its relatives, has not been hooked yet
0597      * to the private hierarchy of BFQ.
0598      */
0599     entity = &bfqg->entity;
0600     for_each_entity(entity) {
0601         struct bfq_group *curr_bfqg = container_of(entity,
0602                         struct bfq_group, entity);
0603         if (curr_bfqg != bfqd->root_group) {
0604             parent = bfqg_parent(curr_bfqg);
0605             if (!parent)
0606                 parent = bfqd->root_group;
0607             bfq_group_set_parent(curr_bfqg, parent);
0608         }
0609     }
0610 }
0611 
0612 struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
0613 {
0614     struct blkcg_gq *blkg = bio->bi_blkg;
0615     struct bfq_group *bfqg;
0616 
0617     while (blkg) {
0618         bfqg = blkg_to_bfqg(blkg);
0619         if (bfqg->online) {
0620             bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
0621             return bfqg;
0622         }
0623         blkg = blkg->parent;
0624     }
0625     bio_associate_blkg_from_css(bio,
0626                 &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
0627     return bfqd->root_group;
0628 }
0629 
0630 /**
0631  * bfq_bfqq_move - migrate @bfqq to @bfqg.
0632  * @bfqd: queue descriptor.
0633  * @bfqq: the queue to move.
0634  * @bfqg: the group to move to.
0635  *
0636  * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
0637  * it on the new one.  Avoid putting the entity on the old group idle tree.
0638  *
0639  * Must be called under the scheduler lock, to make sure that the blkg
0640  * owning @bfqg does not disappear (see comments in
0641  * bfq_bic_update_cgroup on guaranteeing the consistency of blkg
0642  * objects).
0643  */
0644 void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
0645            struct bfq_group *bfqg)
0646 {
0647     struct bfq_entity *entity = &bfqq->entity;
0648     struct bfq_group *old_parent = bfqq_group(bfqq);
0649 
0650     /*
0651      * No point to move bfqq to the same group, which can happen when
0652      * root group is offlined
0653      */
0654     if (old_parent == bfqg)
0655         return;
0656 
0657     /*
0658      * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
0659      * until elevator exit.
0660      */
0661     if (bfqq == &bfqd->oom_bfqq)
0662         return;
0663     /*
0664      * Get extra reference to prevent bfqq from being freed in
0665      * next possible expire or deactivate.
0666      */
0667     bfqq->ref++;
0668 
0669     /* If bfqq is empty, then bfq_bfqq_expire also invokes
0670      * bfq_del_bfqq_busy, thereby removing bfqq and its entity
0671      * from data structures related to current group. Otherwise we
0672      * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
0673      * we do below.
0674      */
0675     if (bfqq == bfqd->in_service_queue)
0676         bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
0677                 false, BFQQE_PREEMPTED);
0678 
0679     if (bfq_bfqq_busy(bfqq))
0680         bfq_deactivate_bfqq(bfqd, bfqq, false, false);
0681     else if (entity->on_st_or_in_serv)
0682         bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
0683     bfqg_and_blkg_put(old_parent);
0684 
0685     if (entity->parent &&
0686         entity->parent->last_bfqq_created == bfqq)
0687         entity->parent->last_bfqq_created = NULL;
0688     else if (bfqd->last_bfqq_created == bfqq)
0689         bfqd->last_bfqq_created = NULL;
0690 
0691     entity->parent = bfqg->my_entity;
0692     entity->sched_data = &bfqg->sched_data;
0693     /* pin down bfqg and its associated blkg  */
0694     bfqg_and_blkg_get(bfqg);
0695 
0696     if (bfq_bfqq_busy(bfqq)) {
0697         if (unlikely(!bfqd->nonrot_with_queueing))
0698             bfq_pos_tree_add_move(bfqd, bfqq);
0699         bfq_activate_bfqq(bfqd, bfqq);
0700     }
0701 
0702     if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
0703         bfq_schedule_dispatch(bfqd);
0704     /* release extra ref taken above, bfqq may happen to be freed now */
0705     bfq_put_queue(bfqq);
0706 }
0707 
0708 /**
0709  * __bfq_bic_change_cgroup - move @bic to @bfqg.
0710  * @bfqd: the queue descriptor.
0711  * @bic: the bic to move.
0712  * @bfqg: the group to move to.
0713  *
0714  * Move bic to blkcg, assuming that bfqd->lock is held; which makes
0715  * sure that the reference to cgroup is valid across the call (see
0716  * comments in bfq_bic_update_cgroup on this issue)
0717  */
0718 static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
0719                      struct bfq_io_cq *bic,
0720                      struct bfq_group *bfqg)
0721 {
0722     struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
0723     struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
0724     struct bfq_entity *entity;
0725 
0726     if (async_bfqq) {
0727         entity = &async_bfqq->entity;
0728 
0729         if (entity->sched_data != &bfqg->sched_data) {
0730             bic_set_bfqq(bic, NULL, 0);
0731             bfq_release_process_ref(bfqd, async_bfqq);
0732         }
0733     }
0734 
0735     if (sync_bfqq) {
0736         if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
0737             /* We are the only user of this bfqq, just move it */
0738             if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
0739                 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
0740         } else {
0741             struct bfq_queue *bfqq;
0742 
0743             /*
0744              * The queue was merged to a different queue. Check
0745              * that the merge chain still belongs to the same
0746              * cgroup.
0747              */
0748             for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
0749                 if (bfqq->entity.sched_data !=
0750                     &bfqg->sched_data)
0751                     break;
0752             if (bfqq) {
0753                 /*
0754                  * Some queue changed cgroup so the merge is
0755                  * not valid anymore. We cannot easily just
0756                  * cancel the merge (by clearing new_bfqq) as
0757                  * there may be other processes using this
0758                  * queue and holding refs to all queues below
0759                  * sync_bfqq->new_bfqq. Similarly if the merge
0760                  * already happened, we need to detach from
0761                  * bfqq now so that we cannot merge bio to a
0762                  * request from the old cgroup.
0763                  */
0764                 bfq_put_cooperator(sync_bfqq);
0765                 bfq_release_process_ref(bfqd, sync_bfqq);
0766                 bic_set_bfqq(bic, NULL, 1);
0767             }
0768         }
0769     }
0770 
0771     return bfqg;
0772 }
0773 
0774 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
0775 {
0776     struct bfq_data *bfqd = bic_to_bfqd(bic);
0777     struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
0778     uint64_t serial_nr;
0779 
0780     serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
0781 
0782     /*
0783      * Check whether blkcg has changed.  The condition may trigger
0784      * spuriously on a newly created cic but there's no harm.
0785      */
0786     if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
0787         return;
0788 
0789     /*
0790      * New cgroup for this process. Make sure it is linked to bfq internal
0791      * cgroup hierarchy.
0792      */
0793     bfq_link_bfqg(bfqd, bfqg);
0794     __bfq_bic_change_cgroup(bfqd, bic, bfqg);
0795     /*
0796      * Update blkg_path for bfq_log_* functions. We cache this
0797      * path, and update it here, for the following
0798      * reasons. Operations on blkg objects in blk-cgroup are
0799      * protected with the request_queue lock, and not with the
0800      * lock that protects the instances of this scheduler
0801      * (bfqd->lock). This exposes BFQ to the following sort of
0802      * race.
0803      *
0804      * The blkg_lookup performed in bfq_get_queue, protected
0805      * through rcu, may happen to return the address of a copy of
0806      * the original blkg. If this is the case, then the
0807      * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
0808      * the blkg, is useless: it does not prevent blk-cgroup code
0809      * from destroying both the original blkg and all objects
0810      * directly or indirectly referred by the copy of the
0811      * blkg.
0812      *
0813      * On the bright side, destroy operations on a blkg invoke, as
0814      * a first step, hooks of the scheduler associated with the
0815      * blkg. And these hooks are executed with bfqd->lock held for
0816      * BFQ. As a consequence, for any blkg associated with the
0817      * request queue this instance of the scheduler is attached
0818      * to, we are guaranteed that such a blkg is not destroyed, and
0819      * that all the pointers it contains are consistent, while we
0820      * are holding bfqd->lock. A blkg_lookup performed with
0821      * bfqd->lock held then returns a fully consistent blkg, which
0822      * remains consistent until this lock is held.
0823      *
0824      * Thanks to the last fact, and to the fact that: (1) bfqg has
0825      * been obtained through a blkg_lookup in the above
0826      * assignment, and (2) bfqd->lock is being held, here we can
0827      * safely use the policy data for the involved blkg (i.e., the
0828      * field bfqg->pd) to get to the blkg associated with bfqg,
0829      * and then we can safely use any field of blkg. After we
0830      * release bfqd->lock, even just getting blkg through this
0831      * bfqg may cause dangling references to be traversed, as
0832      * bfqg->pd may not exist any more.
0833      *
0834      * In view of the above facts, here we cache, in the bfqg, any
0835      * blkg data we may need for this bic, and for its associated
0836      * bfq_queue. As of now, we need to cache only the path of the
0837      * blkg, which is used in the bfq_log_* functions.
0838      *
0839      * Finally, note that bfqg itself needs to be protected from
0840      * destruction on the blkg_free of the original blkg (which
0841      * invokes bfq_pd_free). We use an additional private
0842      * refcounter for bfqg, to let it disappear only after no
0843      * bfq_queue refers to it any longer.
0844      */
0845     blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
0846     bic->blkcg_serial_nr = serial_nr;
0847 }
0848 
0849 /**
0850  * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
0851  * @st: the service tree being flushed.
0852  */
0853 static void bfq_flush_idle_tree(struct bfq_service_tree *st)
0854 {
0855     struct bfq_entity *entity = st->first_idle;
0856 
0857     for (; entity ; entity = st->first_idle)
0858         __bfq_deactivate_entity(entity, false);
0859 }
0860 
0861 /**
0862  * bfq_reparent_leaf_entity - move leaf entity to the root_group.
0863  * @bfqd: the device data structure with the root group.
0864  * @entity: the entity to move, if entity is a leaf; or the parent entity
0865  *      of an active leaf entity to move, if entity is not a leaf.
0866  * @ioprio_class: I/O priority class to reparent.
0867  */
0868 static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
0869                      struct bfq_entity *entity,
0870                      int ioprio_class)
0871 {
0872     struct bfq_queue *bfqq;
0873     struct bfq_entity *child_entity = entity;
0874 
0875     while (child_entity->my_sched_data) { /* leaf not reached yet */
0876         struct bfq_sched_data *child_sd = child_entity->my_sched_data;
0877         struct bfq_service_tree *child_st = child_sd->service_tree +
0878             ioprio_class;
0879         struct rb_root *child_active = &child_st->active;
0880 
0881         child_entity = bfq_entity_of(rb_first(child_active));
0882 
0883         if (!child_entity)
0884             child_entity = child_sd->in_service_entity;
0885     }
0886 
0887     bfqq = bfq_entity_to_bfqq(child_entity);
0888     bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
0889 }
0890 
0891 /**
0892  * bfq_reparent_active_queues - move to the root group all active queues.
0893  * @bfqd: the device data structure with the root group.
0894  * @bfqg: the group to move from.
0895  * @st: the service tree to start the search from.
0896  * @ioprio_class: I/O priority class to reparent.
0897  */
0898 static void bfq_reparent_active_queues(struct bfq_data *bfqd,
0899                        struct bfq_group *bfqg,
0900                        struct bfq_service_tree *st,
0901                        int ioprio_class)
0902 {
0903     struct rb_root *active = &st->active;
0904     struct bfq_entity *entity;
0905 
0906     while ((entity = bfq_entity_of(rb_first(active))))
0907         bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
0908 
0909     if (bfqg->sched_data.in_service_entity)
0910         bfq_reparent_leaf_entity(bfqd,
0911                      bfqg->sched_data.in_service_entity,
0912                      ioprio_class);
0913 }
0914 
0915 /**
0916  * bfq_pd_offline - deactivate the entity associated with @pd,
0917  *          and reparent its children entities.
0918  * @pd: descriptor of the policy going offline.
0919  *
0920  * blkio already grabs the queue_lock for us, so no need to use
0921  * RCU-based magic
0922  */
0923 static void bfq_pd_offline(struct blkg_policy_data *pd)
0924 {
0925     struct bfq_service_tree *st;
0926     struct bfq_group *bfqg = pd_to_bfqg(pd);
0927     struct bfq_data *bfqd = bfqg->bfqd;
0928     struct bfq_entity *entity = bfqg->my_entity;
0929     unsigned long flags;
0930     int i;
0931 
0932     spin_lock_irqsave(&bfqd->lock, flags);
0933 
0934     if (!entity) /* root group */
0935         goto put_async_queues;
0936 
0937     /*
0938      * Empty all service_trees belonging to this group before
0939      * deactivating the group itself.
0940      */
0941     for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
0942         st = bfqg->sched_data.service_tree + i;
0943 
0944         /*
0945          * It may happen that some queues are still active
0946          * (busy) upon group destruction (if the corresponding
0947          * processes have been forced to terminate). We move
0948          * all the leaf entities corresponding to these queues
0949          * to the root_group.
0950          * Also, it may happen that the group has an entity
0951          * in service, which is disconnected from the active
0952          * tree: it must be moved, too.
0953          * There is no need to put the sync queues, as the
0954          * scheduler has taken no reference.
0955          */
0956         bfq_reparent_active_queues(bfqd, bfqg, st, i);
0957 
0958         /*
0959          * The idle tree may still contain bfq_queues
0960          * belonging to exited task because they never
0961          * migrated to a different cgroup from the one being
0962          * destroyed now. In addition, even
0963          * bfq_reparent_active_queues() may happen to add some
0964          * entities to the idle tree. It happens if, in some
0965          * of the calls to bfq_bfqq_move() performed by
0966          * bfq_reparent_active_queues(), the queue to move is
0967          * empty and gets expired.
0968          */
0969         bfq_flush_idle_tree(st);
0970     }
0971 
0972     __bfq_deactivate_entity(entity, false);
0973 
0974 put_async_queues:
0975     bfq_put_async_queues(bfqd, bfqg);
0976     bfqg->online = false;
0977 
0978     spin_unlock_irqrestore(&bfqd->lock, flags);
0979     /*
0980      * @blkg is going offline and will be ignored by
0981      * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
0982      * that they don't get lost.  If IOs complete after this point, the
0983      * stats for them will be lost.  Oh well...
0984      */
0985     bfqg_stats_xfer_dead(bfqg);
0986 }
0987 
0988 void bfq_end_wr_async(struct bfq_data *bfqd)
0989 {
0990     struct blkcg_gq *blkg;
0991 
0992     list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
0993         struct bfq_group *bfqg = blkg_to_bfqg(blkg);
0994 
0995         bfq_end_wr_async_queues(bfqd, bfqg);
0996     }
0997     bfq_end_wr_async_queues(bfqd, bfqd->root_group);
0998 }
0999 
1000 static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
1001 {
1002     struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1003     struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1004     unsigned int val = 0;
1005 
1006     if (bfqgd)
1007         val = bfqgd->weight;
1008 
1009     seq_printf(sf, "%u\n", val);
1010 
1011     return 0;
1012 }
1013 
1014 static u64 bfqg_prfill_weight_device(struct seq_file *sf,
1015                      struct blkg_policy_data *pd, int off)
1016 {
1017     struct bfq_group *bfqg = pd_to_bfqg(pd);
1018 
1019     if (!bfqg->entity.dev_weight)
1020         return 0;
1021     return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
1022 }
1023 
1024 static int bfq_io_show_weight(struct seq_file *sf, void *v)
1025 {
1026     struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1027     struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1028 
1029     seq_printf(sf, "default %u\n", bfqgd->weight);
1030     blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
1031               &blkcg_policy_bfq, 0, false);
1032     return 0;
1033 }
1034 
1035 static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
1036 {
1037     weight = dev_weight ?: weight;
1038 
1039     bfqg->entity.dev_weight = dev_weight;
1040     /*
1041      * Setting the prio_changed flag of the entity
1042      * to 1 with new_weight == weight would re-set
1043      * the value of the weight to its ioprio mapping.
1044      * Set the flag only if necessary.
1045      */
1046     if ((unsigned short)weight != bfqg->entity.new_weight) {
1047         bfqg->entity.new_weight = (unsigned short)weight;
1048         /*
1049          * Make sure that the above new value has been
1050          * stored in bfqg->entity.new_weight before
1051          * setting the prio_changed flag. In fact,
1052          * this flag may be read asynchronously (in
1053          * critical sections protected by a different
1054          * lock than that held here), and finding this
1055          * flag set may cause the execution of the code
1056          * for updating parameters whose value may
1057          * depend also on bfqg->entity.new_weight (in
1058          * __bfq_entity_update_weight_prio).
1059          * This barrier makes sure that the new value
1060          * of bfqg->entity.new_weight is correctly
1061          * seen in that code.
1062          */
1063         smp_wmb();
1064         bfqg->entity.prio_changed = 1;
1065     }
1066 }
1067 
1068 static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1069                     struct cftype *cftype,
1070                     u64 val)
1071 {
1072     struct blkcg *blkcg = css_to_blkcg(css);
1073     struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1074     struct blkcg_gq *blkg;
1075     int ret = -ERANGE;
1076 
1077     if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1078         return ret;
1079 
1080     ret = 0;
1081     spin_lock_irq(&blkcg->lock);
1082     bfqgd->weight = (unsigned short)val;
1083     hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1084         struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1085 
1086         if (bfqg)
1087             bfq_group_set_weight(bfqg, val, 0);
1088     }
1089     spin_unlock_irq(&blkcg->lock);
1090 
1091     return ret;
1092 }
1093 
1094 static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1095                     char *buf, size_t nbytes,
1096                     loff_t off)
1097 {
1098     int ret;
1099     struct blkg_conf_ctx ctx;
1100     struct blkcg *blkcg = css_to_blkcg(of_css(of));
1101     struct bfq_group *bfqg;
1102     u64 v;
1103 
1104     ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
1105     if (ret)
1106         return ret;
1107 
1108     if (sscanf(ctx.body, "%llu", &v) == 1) {
1109         /* require "default" on dfl */
1110         ret = -ERANGE;
1111         if (!v)
1112             goto out;
1113     } else if (!strcmp(strim(ctx.body), "default")) {
1114         v = 0;
1115     } else {
1116         ret = -EINVAL;
1117         goto out;
1118     }
1119 
1120     bfqg = blkg_to_bfqg(ctx.blkg);
1121 
1122     ret = -ERANGE;
1123     if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1124         bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1125         ret = 0;
1126     }
1127 out:
1128     blkg_conf_finish(&ctx);
1129     return ret ?: nbytes;
1130 }
1131 
1132 static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1133                  char *buf, size_t nbytes,
1134                  loff_t off)
1135 {
1136     char *endp;
1137     int ret;
1138     u64 v;
1139 
1140     buf = strim(buf);
1141 
1142     /* "WEIGHT" or "default WEIGHT" sets the default weight */
1143     v = simple_strtoull(buf, &endp, 0);
1144     if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1145         ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1146         return ret ?: nbytes;
1147     }
1148 
1149     return bfq_io_set_device_weight(of, buf, nbytes, off);
1150 }
1151 
1152 static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1153 {
1154     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1155               &blkcg_policy_bfq, seq_cft(sf)->private, true);
1156     return 0;
1157 }
1158 
1159 static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1160                     struct blkg_policy_data *pd, int off)
1161 {
1162     struct blkg_rwstat_sample sum;
1163 
1164     blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1165     return __blkg_prfill_rwstat(sf, pd, &sum);
1166 }
1167 
1168 static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1169 {
1170     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1171               bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1172               seq_cft(sf)->private, true);
1173     return 0;
1174 }
1175 
1176 #ifdef CONFIG_BFQ_CGROUP_DEBUG
1177 static int bfqg_print_stat(struct seq_file *sf, void *v)
1178 {
1179     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1180               &blkcg_policy_bfq, seq_cft(sf)->private, false);
1181     return 0;
1182 }
1183 
1184 static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1185                       struct blkg_policy_data *pd, int off)
1186 {
1187     struct blkcg_gq *blkg = pd_to_blkg(pd);
1188     struct blkcg_gq *pos_blkg;
1189     struct cgroup_subsys_state *pos_css;
1190     u64 sum = 0;
1191 
1192     lockdep_assert_held(&blkg->q->queue_lock);
1193 
1194     rcu_read_lock();
1195     blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1196         struct bfq_stat *stat;
1197 
1198         if (!pos_blkg->online)
1199             continue;
1200 
1201         stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1202         sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1203     }
1204     rcu_read_unlock();
1205 
1206     return __blkg_prfill_u64(sf, pd, sum);
1207 }
1208 
1209 static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1210 {
1211     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1212               bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1213               seq_cft(sf)->private, false);
1214     return 0;
1215 }
1216 
1217 static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1218                    int off)
1219 {
1220     struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1221     u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1222 
1223     return __blkg_prfill_u64(sf, pd, sum >> 9);
1224 }
1225 
1226 static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1227 {
1228     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1229               bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1230     return 0;
1231 }
1232 
1233 static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1234                      struct blkg_policy_data *pd, int off)
1235 {
1236     struct blkg_rwstat_sample tmp;
1237 
1238     blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1239             offsetof(struct bfq_group, stats.bytes), &tmp);
1240 
1241     return __blkg_prfill_u64(sf, pd,
1242         (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1243 }
1244 
1245 static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1246 {
1247     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1248               bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1249               false);
1250     return 0;
1251 }
1252 
1253 static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1254                       struct blkg_policy_data *pd, int off)
1255 {
1256     struct bfq_group *bfqg = pd_to_bfqg(pd);
1257     u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1258     u64 v = 0;
1259 
1260     if (samples) {
1261         v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1262         v = div64_u64(v, samples);
1263     }
1264     __blkg_prfill_u64(sf, pd, v);
1265     return 0;
1266 }
1267 
1268 /* print avg_queue_size */
1269 static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1270 {
1271     blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1272               bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1273               0, false);
1274     return 0;
1275 }
1276 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
1277 
1278 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1279 {
1280     int ret;
1281 
1282     ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
1283     if (ret)
1284         return NULL;
1285 
1286     return blkg_to_bfqg(bfqd->queue->root_blkg);
1287 }
1288 
1289 struct blkcg_policy blkcg_policy_bfq = {
1290     .dfl_cftypes        = bfq_blkg_files,
1291     .legacy_cftypes     = bfq_blkcg_legacy_files,
1292 
1293     .cpd_alloc_fn       = bfq_cpd_alloc,
1294     .cpd_init_fn        = bfq_cpd_init,
1295     .cpd_bind_fn            = bfq_cpd_init,
1296     .cpd_free_fn        = bfq_cpd_free,
1297 
1298     .pd_alloc_fn        = bfq_pd_alloc,
1299     .pd_init_fn     = bfq_pd_init,
1300     .pd_offline_fn      = bfq_pd_offline,
1301     .pd_free_fn     = bfq_pd_free,
1302     .pd_reset_stats_fn  = bfq_pd_reset_stats,
1303 };
1304 
1305 struct cftype bfq_blkcg_legacy_files[] = {
1306     {
1307         .name = "bfq.weight",
1308         .flags = CFTYPE_NOT_ON_ROOT,
1309         .seq_show = bfq_io_show_weight_legacy,
1310         .write_u64 = bfq_io_set_weight_legacy,
1311     },
1312     {
1313         .name = "bfq.weight_device",
1314         .flags = CFTYPE_NOT_ON_ROOT,
1315         .seq_show = bfq_io_show_weight,
1316         .write = bfq_io_set_weight,
1317     },
1318 
1319     /* statistics, covers only the tasks in the bfqg */
1320     {
1321         .name = "bfq.io_service_bytes",
1322         .private = offsetof(struct bfq_group, stats.bytes),
1323         .seq_show = bfqg_print_rwstat,
1324     },
1325     {
1326         .name = "bfq.io_serviced",
1327         .private = offsetof(struct bfq_group, stats.ios),
1328         .seq_show = bfqg_print_rwstat,
1329     },
1330 #ifdef CONFIG_BFQ_CGROUP_DEBUG
1331     {
1332         .name = "bfq.time",
1333         .private = offsetof(struct bfq_group, stats.time),
1334         .seq_show = bfqg_print_stat,
1335     },
1336     {
1337         .name = "bfq.sectors",
1338         .seq_show = bfqg_print_stat_sectors,
1339     },
1340     {
1341         .name = "bfq.io_service_time",
1342         .private = offsetof(struct bfq_group, stats.service_time),
1343         .seq_show = bfqg_print_rwstat,
1344     },
1345     {
1346         .name = "bfq.io_wait_time",
1347         .private = offsetof(struct bfq_group, stats.wait_time),
1348         .seq_show = bfqg_print_rwstat,
1349     },
1350     {
1351         .name = "bfq.io_merged",
1352         .private = offsetof(struct bfq_group, stats.merged),
1353         .seq_show = bfqg_print_rwstat,
1354     },
1355     {
1356         .name = "bfq.io_queued",
1357         .private = offsetof(struct bfq_group, stats.queued),
1358         .seq_show = bfqg_print_rwstat,
1359     },
1360 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
1361 
1362     /* the same statistics which cover the bfqg and its descendants */
1363     {
1364         .name = "bfq.io_service_bytes_recursive",
1365         .private = offsetof(struct bfq_group, stats.bytes),
1366         .seq_show = bfqg_print_rwstat_recursive,
1367     },
1368     {
1369         .name = "bfq.io_serviced_recursive",
1370         .private = offsetof(struct bfq_group, stats.ios),
1371         .seq_show = bfqg_print_rwstat_recursive,
1372     },
1373 #ifdef CONFIG_BFQ_CGROUP_DEBUG
1374     {
1375         .name = "bfq.time_recursive",
1376         .private = offsetof(struct bfq_group, stats.time),
1377         .seq_show = bfqg_print_stat_recursive,
1378     },
1379     {
1380         .name = "bfq.sectors_recursive",
1381         .seq_show = bfqg_print_stat_sectors_recursive,
1382     },
1383     {
1384         .name = "bfq.io_service_time_recursive",
1385         .private = offsetof(struct bfq_group, stats.service_time),
1386         .seq_show = bfqg_print_rwstat_recursive,
1387     },
1388     {
1389         .name = "bfq.io_wait_time_recursive",
1390         .private = offsetof(struct bfq_group, stats.wait_time),
1391         .seq_show = bfqg_print_rwstat_recursive,
1392     },
1393     {
1394         .name = "bfq.io_merged_recursive",
1395         .private = offsetof(struct bfq_group, stats.merged),
1396         .seq_show = bfqg_print_rwstat_recursive,
1397     },
1398     {
1399         .name = "bfq.io_queued_recursive",
1400         .private = offsetof(struct bfq_group, stats.queued),
1401         .seq_show = bfqg_print_rwstat_recursive,
1402     },
1403     {
1404         .name = "bfq.avg_queue_size",
1405         .seq_show = bfqg_print_avg_queue_size,
1406     },
1407     {
1408         .name = "bfq.group_wait_time",
1409         .private = offsetof(struct bfq_group, stats.group_wait_time),
1410         .seq_show = bfqg_print_stat,
1411     },
1412     {
1413         .name = "bfq.idle_time",
1414         .private = offsetof(struct bfq_group, stats.idle_time),
1415         .seq_show = bfqg_print_stat,
1416     },
1417     {
1418         .name = "bfq.empty_time",
1419         .private = offsetof(struct bfq_group, stats.empty_time),
1420         .seq_show = bfqg_print_stat,
1421     },
1422     {
1423         .name = "bfq.dequeue",
1424         .private = offsetof(struct bfq_group, stats.dequeue),
1425         .seq_show = bfqg_print_stat,
1426     },
1427 #endif  /* CONFIG_BFQ_CGROUP_DEBUG */
1428     { } /* terminate */
1429 };
1430 
1431 struct cftype bfq_blkg_files[] = {
1432     {
1433         .name = "bfq.weight",
1434         .flags = CFTYPE_NOT_ON_ROOT,
1435         .seq_show = bfq_io_show_weight,
1436         .write = bfq_io_set_weight,
1437     },
1438     {} /* terminate */
1439 };
1440 
1441 #else   /* CONFIG_BFQ_GROUP_IOSCHED */
1442 
1443 void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1444            struct bfq_group *bfqg) {}
1445 
1446 void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1447 {
1448     struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1449 
1450     entity->weight = entity->new_weight;
1451     entity->orig_weight = entity->new_weight;
1452     if (bfqq) {
1453         bfqq->ioprio = bfqq->new_ioprio;
1454         bfqq->ioprio_class = bfqq->new_ioprio_class;
1455     }
1456     entity->sched_data = &bfqg->sched_data;
1457 }
1458 
1459 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1460 
1461 void bfq_end_wr_async(struct bfq_data *bfqd)
1462 {
1463     bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1464 }
1465 
1466 struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
1467 {
1468     return bfqd->root_group;
1469 }
1470 
1471 struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1472 {
1473     return bfqq->bfqd->root_group;
1474 }
1475 
1476 void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1477 
1478 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1479 {
1480     struct bfq_group *bfqg;
1481     int i;
1482 
1483     bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1484     if (!bfqg)
1485         return NULL;
1486 
1487     for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1488         bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1489 
1490     return bfqg;
1491 }
1492 #endif  /* CONFIG_BFQ_GROUP_IOSCHED */