0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067 #include <linux/kernel.h>
0068 #include <linux/blk_types.h>
0069 #include <linux/backing-dev.h>
0070 #include <linux/module.h>
0071 #include <linux/timer.h>
0072 #include <linux/memcontrol.h>
0073 #include <linux/sched/loadavg.h>
0074 #include <linux/sched/signal.h>
0075 #include <trace/events/block.h>
0076 #include <linux/blk-mq.h>
0077 #include "blk-rq-qos.h"
0078 #include "blk-stat.h"
0079 #include "blk-cgroup.h"
0080 #include "blk.h"
0081
0082 #define DEFAULT_SCALE_COOKIE 1000000U
0083
0084 static struct blkcg_policy blkcg_policy_iolatency;
0085 struct iolatency_grp;
0086
0087 struct blk_iolatency {
0088 struct rq_qos rqos;
0089 struct timer_list timer;
0090
0091
0092
0093
0094
0095
0096
0097
0098 bool enabled;
0099 atomic_t enable_cnt;
0100 struct work_struct enable_work;
0101 };
0102
0103 static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
0104 {
0105 return container_of(rqos, struct blk_iolatency, rqos);
0106 }
0107
0108 struct child_latency_info {
0109 spinlock_t lock;
0110
0111
0112 u64 last_scale_event;
0113
0114
0115 u64 scale_lat;
0116
0117
0118 u64 nr_samples;
0119
0120
0121 struct iolatency_grp *scale_grp;
0122
0123
0124 atomic_t scale_cookie;
0125 };
0126
0127 struct percentile_stats {
0128 u64 total;
0129 u64 missed;
0130 };
0131
0132 struct latency_stat {
0133 union {
0134 struct percentile_stats ps;
0135 struct blk_rq_stat rqs;
0136 };
0137 };
0138
0139 struct iolatency_grp {
0140 struct blkg_policy_data pd;
0141 struct latency_stat __percpu *stats;
0142 struct latency_stat cur_stat;
0143 struct blk_iolatency *blkiolat;
0144 struct rq_depth rq_depth;
0145 struct rq_wait rq_wait;
0146 atomic64_t window_start;
0147 atomic_t scale_cookie;
0148 u64 min_lat_nsec;
0149 u64 cur_win_nsec;
0150
0151
0152 u64 lat_avg;
0153
0154
0155 u64 nr_samples;
0156
0157 bool ssd;
0158 struct child_latency_info child_lat;
0159 };
0160
0161 #define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC)
0162 #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172 #define BLKIOLATENCY_NR_EXP_FACTORS 5
0173 #define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \
0174 (BLKIOLATENCY_NR_EXP_FACTORS - 1))
0175 static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = {
0176 2045,
0177 2039,
0178 2031,
0179 2023,
0180 2014,
0181 };
0182
0183 static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
0184 {
0185 return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
0186 }
0187
0188 static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
0189 {
0190 return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
0191 }
0192
0193 static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
0194 {
0195 return pd_to_blkg(&iolat->pd);
0196 }
0197
0198 static inline void latency_stat_init(struct iolatency_grp *iolat,
0199 struct latency_stat *stat)
0200 {
0201 if (iolat->ssd) {
0202 stat->ps.total = 0;
0203 stat->ps.missed = 0;
0204 } else
0205 blk_rq_stat_init(&stat->rqs);
0206 }
0207
0208 static inline void latency_stat_sum(struct iolatency_grp *iolat,
0209 struct latency_stat *sum,
0210 struct latency_stat *stat)
0211 {
0212 if (iolat->ssd) {
0213 sum->ps.total += stat->ps.total;
0214 sum->ps.missed += stat->ps.missed;
0215 } else
0216 blk_rq_stat_sum(&sum->rqs, &stat->rqs);
0217 }
0218
0219 static inline void latency_stat_record_time(struct iolatency_grp *iolat,
0220 u64 req_time)
0221 {
0222 struct latency_stat *stat = get_cpu_ptr(iolat->stats);
0223 if (iolat->ssd) {
0224 if (req_time >= iolat->min_lat_nsec)
0225 stat->ps.missed++;
0226 stat->ps.total++;
0227 } else
0228 blk_rq_stat_add(&stat->rqs, req_time);
0229 put_cpu_ptr(stat);
0230 }
0231
0232 static inline bool latency_sum_ok(struct iolatency_grp *iolat,
0233 struct latency_stat *stat)
0234 {
0235 if (iolat->ssd) {
0236 u64 thresh = div64_u64(stat->ps.total, 10);
0237 thresh = max(thresh, 1ULL);
0238 return stat->ps.missed < thresh;
0239 }
0240 return stat->rqs.mean <= iolat->min_lat_nsec;
0241 }
0242
0243 static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
0244 struct latency_stat *stat)
0245 {
0246 if (iolat->ssd)
0247 return stat->ps.total;
0248 return stat->rqs.nr_samples;
0249 }
0250
0251 static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
0252 struct latency_stat *stat)
0253 {
0254 int exp_idx;
0255
0256 if (iolat->ssd)
0257 return;
0258
0259
0260
0261
0262
0263
0264
0265
0266 exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
0267 div64_u64(iolat->cur_win_nsec,
0268 BLKIOLATENCY_EXP_BUCKET_SIZE));
0269 iolat->lat_avg = calc_load(iolat->lat_avg,
0270 iolatency_exp_factors[exp_idx],
0271 stat->rqs.mean);
0272 }
0273
0274 static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
0275 {
0276 atomic_dec(&rqw->inflight);
0277 wake_up(&rqw->wait);
0278 }
0279
0280 static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
0281 {
0282 struct iolatency_grp *iolat = private_data;
0283 return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
0284 }
0285
0286 static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
0287 struct iolatency_grp *iolat,
0288 bool issue_as_root,
0289 bool use_memdelay)
0290 {
0291 struct rq_wait *rqw = &iolat->rq_wait;
0292 unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
0293
0294 if (use_delay)
0295 blkcg_schedule_throttle(rqos->q, use_memdelay);
0296
0297
0298
0299
0300
0301
0302
0303
0304 if (issue_as_root || fatal_signal_pending(current)) {
0305 atomic_inc(&rqw->inflight);
0306 return;
0307 }
0308
0309 rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
0310 }
0311
0312 #define SCALE_DOWN_FACTOR 2
0313 #define SCALE_UP_FACTOR 4
0314
0315 static inline unsigned long scale_amount(unsigned long qd, bool up)
0316 {
0317 return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
0318 }
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329 static void scale_cookie_change(struct blk_iolatency *blkiolat,
0330 struct child_latency_info *lat_info,
0331 bool up)
0332 {
0333 unsigned long qd = blkiolat->rqos.q->nr_requests;
0334 unsigned long scale = scale_amount(qd, up);
0335 unsigned long old = atomic_read(&lat_info->scale_cookie);
0336 unsigned long max_scale = qd << 1;
0337 unsigned long diff = 0;
0338
0339 if (old < DEFAULT_SCALE_COOKIE)
0340 diff = DEFAULT_SCALE_COOKIE - old;
0341
0342 if (up) {
0343 if (scale + old > DEFAULT_SCALE_COOKIE)
0344 atomic_set(&lat_info->scale_cookie,
0345 DEFAULT_SCALE_COOKIE);
0346 else if (diff > qd)
0347 atomic_inc(&lat_info->scale_cookie);
0348 else
0349 atomic_add(scale, &lat_info->scale_cookie);
0350 } else {
0351
0352
0353
0354
0355
0356
0357 if (diff > qd) {
0358 if (diff < max_scale)
0359 atomic_dec(&lat_info->scale_cookie);
0360 } else {
0361 atomic_sub(scale, &lat_info->scale_cookie);
0362 }
0363 }
0364 }
0365
0366
0367
0368
0369
0370
0371 static void scale_change(struct iolatency_grp *iolat, bool up)
0372 {
0373 unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
0374 unsigned long scale = scale_amount(qd, up);
0375 unsigned long old = iolat->rq_depth.max_depth;
0376
0377 if (old > qd)
0378 old = qd;
0379
0380 if (up) {
0381 if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
0382 return;
0383
0384 if (old < qd) {
0385 old += scale;
0386 old = min(old, qd);
0387 iolat->rq_depth.max_depth = old;
0388 wake_up_all(&iolat->rq_wait.wait);
0389 }
0390 } else {
0391 old >>= 1;
0392 iolat->rq_depth.max_depth = max(old, 1UL);
0393 }
0394 }
0395
0396
0397 static void check_scale_change(struct iolatency_grp *iolat)
0398 {
0399 struct iolatency_grp *parent;
0400 struct child_latency_info *lat_info;
0401 unsigned int cur_cookie;
0402 unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
0403 u64 scale_lat;
0404 int direction = 0;
0405
0406 if (lat_to_blkg(iolat)->parent == NULL)
0407 return;
0408
0409 parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
0410 if (!parent)
0411 return;
0412
0413 lat_info = &parent->child_lat;
0414 cur_cookie = atomic_read(&lat_info->scale_cookie);
0415 scale_lat = READ_ONCE(lat_info->scale_lat);
0416
0417 if (cur_cookie < our_cookie)
0418 direction = -1;
0419 else if (cur_cookie > our_cookie)
0420 direction = 1;
0421 else
0422 return;
0423
0424 if (!atomic_try_cmpxchg(&iolat->scale_cookie, &our_cookie, cur_cookie)) {
0425
0426 return;
0427 }
0428
0429 if (direction < 0 && iolat->min_lat_nsec) {
0430 u64 samples_thresh;
0431
0432 if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
0433 return;
0434
0435
0436
0437
0438
0439
0440
0441 samples_thresh = lat_info->nr_samples * 5;
0442 samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
0443 if (iolat->nr_samples <= samples_thresh)
0444 return;
0445 }
0446
0447
0448 if (iolat->rq_depth.max_depth == 1 && direction < 0) {
0449 blkcg_use_delay(lat_to_blkg(iolat));
0450 return;
0451 }
0452
0453
0454 if (cur_cookie == DEFAULT_SCALE_COOKIE) {
0455 blkcg_clear_delay(lat_to_blkg(iolat));
0456 iolat->rq_depth.max_depth = UINT_MAX;
0457 wake_up_all(&iolat->rq_wait.wait);
0458 return;
0459 }
0460
0461 scale_change(iolat, direction > 0);
0462 }
0463
0464 static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
0465 {
0466 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
0467 struct blkcg_gq *blkg = bio->bi_blkg;
0468 bool issue_as_root = bio_issue_as_root_blkg(bio);
0469
0470 if (!blkiolat->enabled)
0471 return;
0472
0473 while (blkg && blkg->parent) {
0474 struct iolatency_grp *iolat = blkg_to_lat(blkg);
0475 if (!iolat) {
0476 blkg = blkg->parent;
0477 continue;
0478 }
0479
0480 check_scale_change(iolat);
0481 __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
0482 (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
0483 blkg = blkg->parent;
0484 }
0485 if (!timer_pending(&blkiolat->timer))
0486 mod_timer(&blkiolat->timer, jiffies + HZ);
0487 }
0488
0489 static void iolatency_record_time(struct iolatency_grp *iolat,
0490 struct bio_issue *issue, u64 now,
0491 bool issue_as_root)
0492 {
0493 u64 start = bio_issue_time(issue);
0494 u64 req_time;
0495
0496
0497
0498
0499
0500 now = __bio_issue_time(now);
0501
0502 if (now <= start)
0503 return;
0504
0505 req_time = now - start;
0506
0507
0508
0509
0510
0511 if (unlikely(issue_as_root && iolat->rq_depth.max_depth != UINT_MAX)) {
0512 u64 sub = iolat->min_lat_nsec;
0513 if (req_time < sub)
0514 blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
0515 return;
0516 }
0517
0518 latency_stat_record_time(iolat, req_time);
0519 }
0520
0521 #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
0522 #define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
0523
0524 static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
0525 {
0526 struct blkcg_gq *blkg = lat_to_blkg(iolat);
0527 struct iolatency_grp *parent;
0528 struct child_latency_info *lat_info;
0529 struct latency_stat stat;
0530 unsigned long flags;
0531 int cpu;
0532
0533 latency_stat_init(iolat, &stat);
0534 preempt_disable();
0535 for_each_online_cpu(cpu) {
0536 struct latency_stat *s;
0537 s = per_cpu_ptr(iolat->stats, cpu);
0538 latency_stat_sum(iolat, &stat, s);
0539 latency_stat_init(iolat, s);
0540 }
0541 preempt_enable();
0542
0543 parent = blkg_to_lat(blkg->parent);
0544 if (!parent)
0545 return;
0546
0547 lat_info = &parent->child_lat;
0548
0549 iolat_update_total_lat_avg(iolat, &stat);
0550
0551
0552 if (latency_sum_ok(iolat, &stat) &&
0553 atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
0554 return;
0555
0556
0557 spin_lock_irqsave(&lat_info->lock, flags);
0558
0559 latency_stat_sum(iolat, &iolat->cur_stat, &stat);
0560 lat_info->nr_samples -= iolat->nr_samples;
0561 lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
0562 iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
0563
0564 if ((lat_info->last_scale_event >= now ||
0565 now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
0566 goto out;
0567
0568 if (latency_sum_ok(iolat, &iolat->cur_stat) &&
0569 latency_sum_ok(iolat, &stat)) {
0570 if (latency_stat_samples(iolat, &iolat->cur_stat) <
0571 BLKIOLATENCY_MIN_GOOD_SAMPLES)
0572 goto out;
0573 if (lat_info->scale_grp == iolat) {
0574 lat_info->last_scale_event = now;
0575 scale_cookie_change(iolat->blkiolat, lat_info, true);
0576 }
0577 } else if (lat_info->scale_lat == 0 ||
0578 lat_info->scale_lat >= iolat->min_lat_nsec) {
0579 lat_info->last_scale_event = now;
0580 if (!lat_info->scale_grp ||
0581 lat_info->scale_lat > iolat->min_lat_nsec) {
0582 WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
0583 lat_info->scale_grp = iolat;
0584 }
0585 scale_cookie_change(iolat->blkiolat, lat_info, false);
0586 }
0587 latency_stat_init(iolat, &iolat->cur_stat);
0588 out:
0589 spin_unlock_irqrestore(&lat_info->lock, flags);
0590 }
0591
0592 static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
0593 {
0594 struct blkcg_gq *blkg;
0595 struct rq_wait *rqw;
0596 struct iolatency_grp *iolat;
0597 u64 window_start;
0598 u64 now;
0599 bool issue_as_root = bio_issue_as_root_blkg(bio);
0600 int inflight = 0;
0601
0602 blkg = bio->bi_blkg;
0603 if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
0604 return;
0605
0606 iolat = blkg_to_lat(bio->bi_blkg);
0607 if (!iolat)
0608 return;
0609
0610 if (!iolat->blkiolat->enabled)
0611 return;
0612
0613 now = ktime_to_ns(ktime_get());
0614 while (blkg && blkg->parent) {
0615 iolat = blkg_to_lat(blkg);
0616 if (!iolat) {
0617 blkg = blkg->parent;
0618 continue;
0619 }
0620 rqw = &iolat->rq_wait;
0621
0622 inflight = atomic_dec_return(&rqw->inflight);
0623 WARN_ON_ONCE(inflight < 0);
0624
0625
0626
0627
0628 if (iolat->min_lat_nsec && bio->bi_status != BLK_STS_AGAIN) {
0629 iolatency_record_time(iolat, &bio->bi_issue, now,
0630 issue_as_root);
0631 window_start = atomic64_read(&iolat->window_start);
0632 if (now > window_start &&
0633 (now - window_start) >= iolat->cur_win_nsec) {
0634 if (atomic64_try_cmpxchg(&iolat->window_start,
0635 &window_start, now))
0636 iolatency_check_latencies(iolat, now);
0637 }
0638 }
0639 wake_up(&rqw->wait);
0640 blkg = blkg->parent;
0641 }
0642 }
0643
0644 static void blkcg_iolatency_exit(struct rq_qos *rqos)
0645 {
0646 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
0647
0648 del_timer_sync(&blkiolat->timer);
0649 flush_work(&blkiolat->enable_work);
0650 blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
0651 kfree(blkiolat);
0652 }
0653
0654 static struct rq_qos_ops blkcg_iolatency_ops = {
0655 .throttle = blkcg_iolatency_throttle,
0656 .done_bio = blkcg_iolatency_done_bio,
0657 .exit = blkcg_iolatency_exit,
0658 };
0659
0660 static void blkiolatency_timer_fn(struct timer_list *t)
0661 {
0662 struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
0663 struct blkcg_gq *blkg;
0664 struct cgroup_subsys_state *pos_css;
0665 u64 now = ktime_to_ns(ktime_get());
0666
0667 rcu_read_lock();
0668 blkg_for_each_descendant_pre(blkg, pos_css,
0669 blkiolat->rqos.q->root_blkg) {
0670 struct iolatency_grp *iolat;
0671 struct child_latency_info *lat_info;
0672 unsigned long flags;
0673 u64 cookie;
0674
0675
0676
0677
0678
0679 if (!blkg_tryget(blkg))
0680 continue;
0681
0682 iolat = blkg_to_lat(blkg);
0683 if (!iolat)
0684 goto next;
0685
0686 lat_info = &iolat->child_lat;
0687 cookie = atomic_read(&lat_info->scale_cookie);
0688
0689 if (cookie >= DEFAULT_SCALE_COOKIE)
0690 goto next;
0691
0692 spin_lock_irqsave(&lat_info->lock, flags);
0693 if (lat_info->last_scale_event >= now)
0694 goto next_lock;
0695
0696
0697
0698
0699
0700 if (lat_info->scale_grp == NULL) {
0701 scale_cookie_change(iolat->blkiolat, lat_info, true);
0702 goto next_lock;
0703 }
0704
0705
0706
0707
0708
0709
0710 if (now - lat_info->last_scale_event >=
0711 ((u64)NSEC_PER_SEC * 5))
0712 lat_info->scale_grp = NULL;
0713 next_lock:
0714 spin_unlock_irqrestore(&lat_info->lock, flags);
0715 next:
0716 blkg_put(blkg);
0717 }
0718 rcu_read_unlock();
0719 }
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736 static void blkiolatency_enable_work_fn(struct work_struct *work)
0737 {
0738 struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
0739 enable_work);
0740 bool enabled;
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751 enabled = atomic_read(&blkiolat->enable_cnt);
0752 if (enabled != blkiolat->enabled) {
0753 blk_mq_freeze_queue(blkiolat->rqos.q);
0754 blkiolat->enabled = enabled;
0755 blk_mq_unfreeze_queue(blkiolat->rqos.q);
0756 }
0757 }
0758
0759 int blk_iolatency_init(struct request_queue *q)
0760 {
0761 struct blk_iolatency *blkiolat;
0762 struct rq_qos *rqos;
0763 int ret;
0764
0765 blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
0766 if (!blkiolat)
0767 return -ENOMEM;
0768
0769 rqos = &blkiolat->rqos;
0770 rqos->id = RQ_QOS_LATENCY;
0771 rqos->ops = &blkcg_iolatency_ops;
0772 rqos->q = q;
0773
0774 ret = rq_qos_add(q, rqos);
0775 if (ret)
0776 goto err_free;
0777 ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
0778 if (ret)
0779 goto err_qos_del;
0780
0781 timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
0782 INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
0783
0784 return 0;
0785
0786 err_qos_del:
0787 rq_qos_del(q, rqos);
0788 err_free:
0789 kfree(blkiolat);
0790 return ret;
0791 }
0792
0793 static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
0794 {
0795 struct iolatency_grp *iolat = blkg_to_lat(blkg);
0796 struct blk_iolatency *blkiolat = iolat->blkiolat;
0797 u64 oldval = iolat->min_lat_nsec;
0798
0799 iolat->min_lat_nsec = val;
0800 iolat->cur_win_nsec = max_t(u64, val << 4, BLKIOLATENCY_MIN_WIN_SIZE);
0801 iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
0802 BLKIOLATENCY_MAX_WIN_SIZE);
0803
0804 if (!oldval && val) {
0805 if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
0806 schedule_work(&blkiolat->enable_work);
0807 }
0808 if (oldval && !val) {
0809 blkcg_clear_delay(blkg);
0810 if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
0811 schedule_work(&blkiolat->enable_work);
0812 }
0813 }
0814
0815 static void iolatency_clear_scaling(struct blkcg_gq *blkg)
0816 {
0817 if (blkg->parent) {
0818 struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
0819 struct child_latency_info *lat_info;
0820 if (!iolat)
0821 return;
0822
0823 lat_info = &iolat->child_lat;
0824 spin_lock(&lat_info->lock);
0825 atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
0826 lat_info->last_scale_event = 0;
0827 lat_info->scale_grp = NULL;
0828 lat_info->scale_lat = 0;
0829 spin_unlock(&lat_info->lock);
0830 }
0831 }
0832
0833 static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
0834 size_t nbytes, loff_t off)
0835 {
0836 struct blkcg *blkcg = css_to_blkcg(of_css(of));
0837 struct blkcg_gq *blkg;
0838 struct blkg_conf_ctx ctx;
0839 struct iolatency_grp *iolat;
0840 char *p, *tok;
0841 u64 lat_val = 0;
0842 u64 oldval;
0843 int ret;
0844
0845 ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
0846 if (ret)
0847 return ret;
0848
0849 iolat = blkg_to_lat(ctx.blkg);
0850 p = ctx.body;
0851
0852 ret = -EINVAL;
0853 while ((tok = strsep(&p, " "))) {
0854 char key[16];
0855 char val[21];
0856
0857 if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
0858 goto out;
0859
0860 if (!strcmp(key, "target")) {
0861 u64 v;
0862
0863 if (!strcmp(val, "max"))
0864 lat_val = 0;
0865 else if (sscanf(val, "%llu", &v) == 1)
0866 lat_val = v * NSEC_PER_USEC;
0867 else
0868 goto out;
0869 } else {
0870 goto out;
0871 }
0872 }
0873
0874
0875 blkg = ctx.blkg;
0876 oldval = iolat->min_lat_nsec;
0877
0878 iolatency_set_min_lat_nsec(blkg, lat_val);
0879 if (oldval != iolat->min_lat_nsec)
0880 iolatency_clear_scaling(blkg);
0881 ret = 0;
0882 out:
0883 blkg_conf_finish(&ctx);
0884 return ret ?: nbytes;
0885 }
0886
0887 static u64 iolatency_prfill_limit(struct seq_file *sf,
0888 struct blkg_policy_data *pd, int off)
0889 {
0890 struct iolatency_grp *iolat = pd_to_lat(pd);
0891 const char *dname = blkg_dev_name(pd->blkg);
0892
0893 if (!dname || !iolat->min_lat_nsec)
0894 return 0;
0895 seq_printf(sf, "%s target=%llu\n",
0896 dname, div_u64(iolat->min_lat_nsec, NSEC_PER_USEC));
0897 return 0;
0898 }
0899
0900 static int iolatency_print_limit(struct seq_file *sf, void *v)
0901 {
0902 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
0903 iolatency_prfill_limit,
0904 &blkcg_policy_iolatency, seq_cft(sf)->private, false);
0905 return 0;
0906 }
0907
0908 static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
0909 {
0910 struct latency_stat stat;
0911 int cpu;
0912
0913 latency_stat_init(iolat, &stat);
0914 preempt_disable();
0915 for_each_online_cpu(cpu) {
0916 struct latency_stat *s;
0917 s = per_cpu_ptr(iolat->stats, cpu);
0918 latency_stat_sum(iolat, &stat, s);
0919 }
0920 preempt_enable();
0921
0922 if (iolat->rq_depth.max_depth == UINT_MAX)
0923 seq_printf(s, " missed=%llu total=%llu depth=max",
0924 (unsigned long long)stat.ps.missed,
0925 (unsigned long long)stat.ps.total);
0926 else
0927 seq_printf(s, " missed=%llu total=%llu depth=%u",
0928 (unsigned long long)stat.ps.missed,
0929 (unsigned long long)stat.ps.total,
0930 iolat->rq_depth.max_depth);
0931 }
0932
0933 static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
0934 {
0935 struct iolatency_grp *iolat = pd_to_lat(pd);
0936 unsigned long long avg_lat;
0937 unsigned long long cur_win;
0938
0939 if (!blkcg_debug_stats)
0940 return;
0941
0942 if (iolat->ssd)
0943 return iolatency_ssd_stat(iolat, s);
0944
0945 avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
0946 cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
0947 if (iolat->rq_depth.max_depth == UINT_MAX)
0948 seq_printf(s, " depth=max avg_lat=%llu win=%llu",
0949 avg_lat, cur_win);
0950 else
0951 seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
0952 iolat->rq_depth.max_depth, avg_lat, cur_win);
0953 }
0954
0955 static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
0956 struct request_queue *q,
0957 struct blkcg *blkcg)
0958 {
0959 struct iolatency_grp *iolat;
0960
0961 iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
0962 if (!iolat)
0963 return NULL;
0964 iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
0965 __alignof__(struct latency_stat), gfp);
0966 if (!iolat->stats) {
0967 kfree(iolat);
0968 return NULL;
0969 }
0970 return &iolat->pd;
0971 }
0972
0973 static void iolatency_pd_init(struct blkg_policy_data *pd)
0974 {
0975 struct iolatency_grp *iolat = pd_to_lat(pd);
0976 struct blkcg_gq *blkg = lat_to_blkg(iolat);
0977 struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
0978 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
0979 u64 now = ktime_to_ns(ktime_get());
0980 int cpu;
0981
0982 if (blk_queue_nonrot(blkg->q))
0983 iolat->ssd = true;
0984 else
0985 iolat->ssd = false;
0986
0987 for_each_possible_cpu(cpu) {
0988 struct latency_stat *stat;
0989 stat = per_cpu_ptr(iolat->stats, cpu);
0990 latency_stat_init(iolat, stat);
0991 }
0992
0993 latency_stat_init(iolat, &iolat->cur_stat);
0994 rq_wait_init(&iolat->rq_wait);
0995 spin_lock_init(&iolat->child_lat.lock);
0996 iolat->rq_depth.queue_depth = blkg->q->nr_requests;
0997 iolat->rq_depth.max_depth = UINT_MAX;
0998 iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
0999 iolat->blkiolat = blkiolat;
1000 iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
1001 atomic64_set(&iolat->window_start, now);
1002
1003
1004
1005
1006
1007 if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
1008 struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
1009 atomic_set(&iolat->scale_cookie,
1010 atomic_read(&parent->child_lat.scale_cookie));
1011 } else {
1012 atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
1013 }
1014
1015 atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
1016 }
1017
1018 static void iolatency_pd_offline(struct blkg_policy_data *pd)
1019 {
1020 struct iolatency_grp *iolat = pd_to_lat(pd);
1021 struct blkcg_gq *blkg = lat_to_blkg(iolat);
1022
1023 iolatency_set_min_lat_nsec(blkg, 0);
1024 iolatency_clear_scaling(blkg);
1025 }
1026
1027 static void iolatency_pd_free(struct blkg_policy_data *pd)
1028 {
1029 struct iolatency_grp *iolat = pd_to_lat(pd);
1030 free_percpu(iolat->stats);
1031 kfree(iolat);
1032 }
1033
1034 static struct cftype iolatency_files[] = {
1035 {
1036 .name = "latency",
1037 .flags = CFTYPE_NOT_ON_ROOT,
1038 .seq_show = iolatency_print_limit,
1039 .write = iolatency_set_limit,
1040 },
1041 {}
1042 };
1043
1044 static struct blkcg_policy blkcg_policy_iolatency = {
1045 .dfl_cftypes = iolatency_files,
1046 .pd_alloc_fn = iolatency_pd_alloc,
1047 .pd_init_fn = iolatency_pd_init,
1048 .pd_offline_fn = iolatency_pd_offline,
1049 .pd_free_fn = iolatency_pd_free,
1050 .pd_stat_fn = iolatency_pd_stat,
1051 };
1052
1053 static int __init iolatency_init(void)
1054 {
1055 return blkcg_policy_register(&blkcg_policy_iolatency);
1056 }
1057
1058 static void __exit iolatency_exit(void)
1059 {
1060 blkcg_policy_unregister(&blkcg_policy_iolatency);
1061 }
1062
1063 module_init(iolatency_init);
1064 module_exit(iolatency_exit);