0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/kernel.h>
0010 #include <linux/module.h>
0011 #include <linux/init.h>
0012 #include <linux/moduleparam.h>
0013 #include <linux/cpuidle.h>
0014 #include <linux/cpu.h>
0015 #include <linux/notifier.h>
0016
0017 #include <asm/paca.h>
0018 #include <asm/reg.h>
0019 #include <asm/machdep.h>
0020 #include <asm/firmware.h>
0021 #include <asm/runlatch.h>
0022 #include <asm/idle.h>
0023 #include <asm/plpar_wrappers.h>
0024 #include <asm/rtas.h>
0025
0026 static struct cpuidle_driver pseries_idle_driver = {
0027 .name = "pseries_idle",
0028 .owner = THIS_MODULE,
0029 };
0030
0031 static int max_idle_state __read_mostly;
0032 static struct cpuidle_state *cpuidle_state_table __read_mostly;
0033 static u64 snooze_timeout __read_mostly;
0034 static bool snooze_timeout_en __read_mostly;
0035
0036 static int snooze_loop(struct cpuidle_device *dev,
0037 struct cpuidle_driver *drv,
0038 int index)
0039 {
0040 u64 snooze_exit_time;
0041
0042 set_thread_flag(TIF_POLLING_NRFLAG);
0043
0044 pseries_idle_prolog();
0045 local_irq_enable();
0046 snooze_exit_time = get_tb() + snooze_timeout;
0047
0048 while (!need_resched()) {
0049 HMT_low();
0050 HMT_very_low();
0051 if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
0052
0053
0054
0055
0056
0057 clear_thread_flag(TIF_POLLING_NRFLAG);
0058 smp_mb();
0059 break;
0060 }
0061 }
0062
0063 HMT_medium();
0064 clear_thread_flag(TIF_POLLING_NRFLAG);
0065
0066 local_irq_disable();
0067
0068 pseries_idle_epilog();
0069
0070 return index;
0071 }
0072
0073 static void check_and_cede_processor(void)
0074 {
0075
0076
0077
0078
0079
0080 if (prep_irq_for_idle()) {
0081 cede_processor();
0082 #ifdef CONFIG_TRACE_IRQFLAGS
0083
0084 if (WARN_ON(!(mfmsr() & MSR_EE)))
0085 __hard_irq_enable();
0086 #endif
0087 }
0088 }
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102 #define CEDE_LATENCY_TOKEN 45
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143 struct xcede_latency_record {
0144 u8 hint;
0145 __be64 latency_ticks;
0146 u8 wake_on_irqs;
0147 } __packed;
0148
0149
0150 struct xcede_latency_payload {
0151 u8 record_size;
0152 struct xcede_latency_record records[16];
0153 } __packed;
0154
0155 struct xcede_latency_parameter {
0156 __be16 payload_size;
0157 struct xcede_latency_payload payload;
0158 u8 null_char;
0159 } __packed;
0160
0161 static unsigned int nr_xcede_records;
0162 static struct xcede_latency_parameter xcede_latency_parameter __initdata;
0163
0164 static int __init parse_cede_parameters(void)
0165 {
0166 struct xcede_latency_payload *payload;
0167 u32 total_xcede_records_size;
0168 u8 xcede_record_size;
0169 u16 payload_size;
0170 int ret, i;
0171
0172 ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
0173 NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter),
0174 sizeof(xcede_latency_parameter));
0175 if (ret) {
0176 pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n");
0177 return ret;
0178 }
0179
0180 payload_size = be16_to_cpu(xcede_latency_parameter.payload_size);
0181 payload = &xcede_latency_parameter.payload;
0182
0183 xcede_record_size = payload->record_size + 1;
0184
0185 if (xcede_record_size != sizeof(struct xcede_latency_record)) {
0186 pr_err("xcede: Expected record-size %lu. Observed size %u.\n",
0187 sizeof(struct xcede_latency_record), xcede_record_size);
0188 return -EINVAL;
0189 }
0190
0191 pr_info("xcede: xcede_record_size = %d\n", xcede_record_size);
0192
0193
0194
0195
0196
0197
0198 total_xcede_records_size = payload_size - 2;
0199 nr_xcede_records = total_xcede_records_size / xcede_record_size;
0200
0201 for (i = 0; i < nr_xcede_records; i++) {
0202 struct xcede_latency_record *record = &payload->records[i];
0203 u64 latency_ticks = be64_to_cpu(record->latency_ticks);
0204 u8 wake_on_irqs = record->wake_on_irqs;
0205 u8 hint = record->hint;
0206
0207 pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n",
0208 i, hint, latency_ticks, wake_on_irqs);
0209 }
0210
0211 return 0;
0212 }
0213
0214 #define NR_DEDICATED_STATES 2
0215 static u8 cede_latency_hint[NR_DEDICATED_STATES];
0216
0217 static int dedicated_cede_loop(struct cpuidle_device *dev,
0218 struct cpuidle_driver *drv,
0219 int index)
0220 {
0221 u8 old_latency_hint;
0222
0223 pseries_idle_prolog();
0224 get_lppaca()->donate_dedicated_cpu = 1;
0225 old_latency_hint = get_lppaca()->cede_latency_hint;
0226 get_lppaca()->cede_latency_hint = cede_latency_hint[index];
0227
0228 HMT_medium();
0229 check_and_cede_processor();
0230
0231 local_irq_disable();
0232 get_lppaca()->donate_dedicated_cpu = 0;
0233 get_lppaca()->cede_latency_hint = old_latency_hint;
0234
0235 pseries_idle_epilog();
0236
0237 return index;
0238 }
0239
0240 static int shared_cede_loop(struct cpuidle_device *dev,
0241 struct cpuidle_driver *drv,
0242 int index)
0243 {
0244
0245 pseries_idle_prolog();
0246
0247
0248
0249
0250
0251
0252
0253
0254 check_and_cede_processor();
0255
0256 local_irq_disable();
0257 pseries_idle_epilog();
0258
0259 return index;
0260 }
0261
0262
0263
0264
0265 static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = {
0266 {
0267 .name = "snooze",
0268 .desc = "snooze",
0269 .exit_latency = 0,
0270 .target_residency = 0,
0271 .enter = &snooze_loop },
0272 {
0273 .name = "CEDE",
0274 .desc = "CEDE",
0275 .exit_latency = 10,
0276 .target_residency = 100,
0277 .enter = &dedicated_cede_loop },
0278 };
0279
0280
0281
0282
0283 static struct cpuidle_state shared_states[] = {
0284 {
0285 .name = "snooze",
0286 .desc = "snooze",
0287 .exit_latency = 0,
0288 .target_residency = 0,
0289 .enter = &snooze_loop },
0290 {
0291 .name = "Shared Cede",
0292 .desc = "Shared Cede",
0293 .exit_latency = 10,
0294 .target_residency = 100,
0295 .enter = &shared_cede_loop },
0296 };
0297
0298 static int pseries_cpuidle_cpu_online(unsigned int cpu)
0299 {
0300 struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
0301
0302 if (dev && cpuidle_get_driver()) {
0303 cpuidle_pause_and_lock();
0304 cpuidle_enable_device(dev);
0305 cpuidle_resume_and_unlock();
0306 }
0307 return 0;
0308 }
0309
0310 static int pseries_cpuidle_cpu_dead(unsigned int cpu)
0311 {
0312 struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
0313
0314 if (dev && cpuidle_get_driver()) {
0315 cpuidle_pause_and_lock();
0316 cpuidle_disable_device(dev);
0317 cpuidle_resume_and_unlock();
0318 }
0319 return 0;
0320 }
0321
0322
0323
0324
0325 static int pseries_cpuidle_driver_init(void)
0326 {
0327 int idle_state;
0328 struct cpuidle_driver *drv = &pseries_idle_driver;
0329
0330 drv->state_count = 0;
0331
0332 for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
0333
0334 if (cpuidle_state_table[idle_state].enter == NULL)
0335 continue;
0336
0337 drv->states[drv->state_count] =
0338 cpuidle_state_table[idle_state];
0339
0340 drv->state_count += 1;
0341 }
0342
0343 return 0;
0344 }
0345
0346 static void __init fixup_cede0_latency(void)
0347 {
0348 struct xcede_latency_payload *payload;
0349 u64 min_xcede_latency_us = UINT_MAX;
0350 int i;
0351
0352 if (parse_cede_parameters())
0353 return;
0354
0355 pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n",
0356 nr_xcede_records);
0357
0358 payload = &xcede_latency_parameter.payload;
0359
0360
0361
0362
0363
0364
0365
0366
0367 for (i = 0; i < nr_xcede_records; i++) {
0368 struct xcede_latency_record *record = &payload->records[i];
0369 u8 hint = record->hint;
0370 u64 latency_tb = be64_to_cpu(record->latency_ticks);
0371 u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383 if (latency_us == 0) {
0384 pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n",
0385 i, hint);
0386 continue;
0387 }
0388
0389 if (latency_us < min_xcede_latency_us)
0390 min_xcede_latency_us = latency_us;
0391 }
0392
0393 if (min_xcede_latency_us != UINT_MAX) {
0394 dedicated_states[1].exit_latency = min_xcede_latency_us;
0395 dedicated_states[1].target_residency = 10 * (min_xcede_latency_us);
0396 pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
0397 min_xcede_latency_us);
0398 }
0399
0400 }
0401
0402
0403
0404
0405
0406 static int __init pseries_idle_probe(void)
0407 {
0408
0409 if (cpuidle_disable != IDLE_NO_OVERRIDE)
0410 return -ENODEV;
0411
0412 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
0413
0414
0415
0416
0417
0418
0419 if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
0420 cpuidle_state_table = shared_states;
0421 max_idle_state = ARRAY_SIZE(shared_states);
0422 } else {
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436 if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
0437 fixup_cede0_latency();
0438 cpuidle_state_table = dedicated_states;
0439 max_idle_state = NR_DEDICATED_STATES;
0440 }
0441 } else
0442 return -ENODEV;
0443
0444 if (max_idle_state > 1) {
0445 snooze_timeout_en = true;
0446 snooze_timeout = cpuidle_state_table[1].target_residency *
0447 tb_ticks_per_usec;
0448 }
0449 return 0;
0450 }
0451
0452 static int __init pseries_processor_idle_init(void)
0453 {
0454 int retval;
0455
0456 retval = pseries_idle_probe();
0457 if (retval)
0458 return retval;
0459
0460 pseries_cpuidle_driver_init();
0461 retval = cpuidle_register(&pseries_idle_driver, NULL);
0462 if (retval) {
0463 printk(KERN_DEBUG "Registration of pseries driver failed.\n");
0464 return retval;
0465 }
0466
0467 retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0468 "cpuidle/pseries:online",
0469 pseries_cpuidle_cpu_online, NULL);
0470 WARN_ON(retval < 0);
0471 retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
0472 "cpuidle/pseries:DEAD", NULL,
0473 pseries_cpuidle_cpu_dead);
0474 WARN_ON(retval < 0);
0475 printk(KERN_DEBUG "pseries_idle_driver registered\n");
0476 return 0;
0477 }
0478
0479 device_initcall(pseries_processor_idle_init);