0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/kernel.h>
0012 #include <linux/interrupt.h>
0013 #include <linux/clocksource.h>
0014 #include <linux/clockchips.h>
0015 #include <linux/gfp.h>
0016 #include <linux/slab.h>
0017 #include <linux/pvclock_gtod.h>
0018 #include <linux/timekeeper_internal.h>
0019
0020 #include <asm/pvclock.h>
0021 #include <asm/xen/hypervisor.h>
0022 #include <asm/xen/hypercall.h>
0023
0024 #include <xen/events.h>
0025 #include <xen/features.h>
0026 #include <xen/interface/xen.h>
0027 #include <xen/interface/vcpu.h>
0028
0029 #include "xen-ops.h"
0030
0031
0032 #define TIMER_SLOP 100000
0033
0034 static u64 xen_sched_clock_offset __read_mostly;
0035
0036
0037 static unsigned long xen_tsc_khz(void)
0038 {
0039 struct pvclock_vcpu_time_info *info =
0040 &HYPERVISOR_shared_info->vcpu_info[0].time;
0041
0042 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
0043 return pvclock_tsc_khz(info);
0044 }
0045
0046 static u64 xen_clocksource_read(void)
0047 {
0048 struct pvclock_vcpu_time_info *src;
0049 u64 ret;
0050
0051 preempt_disable_notrace();
0052 src = &__this_cpu_read(xen_vcpu)->time;
0053 ret = pvclock_clocksource_read(src);
0054 preempt_enable_notrace();
0055 return ret;
0056 }
0057
0058 static u64 xen_clocksource_get_cycles(struct clocksource *cs)
0059 {
0060 return xen_clocksource_read();
0061 }
0062
0063 static u64 xen_sched_clock(void)
0064 {
0065 return xen_clocksource_read() - xen_sched_clock_offset;
0066 }
0067
0068 static void xen_read_wallclock(struct timespec64 *ts)
0069 {
0070 struct shared_info *s = HYPERVISOR_shared_info;
0071 struct pvclock_wall_clock *wall_clock = &(s->wc);
0072 struct pvclock_vcpu_time_info *vcpu_time;
0073
0074 vcpu_time = &get_cpu_var(xen_vcpu)->time;
0075 pvclock_read_wallclock(wall_clock, vcpu_time, ts);
0076 put_cpu_var(xen_vcpu);
0077 }
0078
0079 static void xen_get_wallclock(struct timespec64 *now)
0080 {
0081 xen_read_wallclock(now);
0082 }
0083
0084 static int xen_set_wallclock(const struct timespec64 *now)
0085 {
0086 return -ENODEV;
0087 }
0088
0089 static int xen_pvclock_gtod_notify(struct notifier_block *nb,
0090 unsigned long was_set, void *priv)
0091 {
0092
0093 static struct timespec64 next_sync;
0094
0095 struct xen_platform_op op;
0096 struct timespec64 now;
0097 struct timekeeper *tk = priv;
0098 static bool settime64_supported = true;
0099 int ret;
0100
0101 now.tv_sec = tk->xtime_sec;
0102 now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
0103
0104
0105
0106
0107
0108 if (!was_set && timespec64_compare(&now, &next_sync) < 0)
0109 return NOTIFY_OK;
0110
0111 again:
0112 if (settime64_supported) {
0113 op.cmd = XENPF_settime64;
0114 op.u.settime64.mbz = 0;
0115 op.u.settime64.secs = now.tv_sec;
0116 op.u.settime64.nsecs = now.tv_nsec;
0117 op.u.settime64.system_time = xen_clocksource_read();
0118 } else {
0119 op.cmd = XENPF_settime32;
0120 op.u.settime32.secs = now.tv_sec;
0121 op.u.settime32.nsecs = now.tv_nsec;
0122 op.u.settime32.system_time = xen_clocksource_read();
0123 }
0124
0125 ret = HYPERVISOR_platform_op(&op);
0126
0127 if (ret == -ENOSYS && settime64_supported) {
0128 settime64_supported = false;
0129 goto again;
0130 }
0131 if (ret < 0)
0132 return NOTIFY_BAD;
0133
0134
0135
0136
0137
0138
0139 next_sync = now;
0140 next_sync.tv_sec += 11 * 60;
0141
0142 return NOTIFY_OK;
0143 }
0144
0145 static struct notifier_block xen_pvclock_gtod_notifier = {
0146 .notifier_call = xen_pvclock_gtod_notify,
0147 };
0148
0149 static int xen_cs_enable(struct clocksource *cs)
0150 {
0151 vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
0152 return 0;
0153 }
0154
0155 static struct clocksource xen_clocksource __read_mostly = {
0156 .name = "xen",
0157 .rating = 400,
0158 .read = xen_clocksource_get_cycles,
0159 .mask = CLOCKSOURCE_MASK(64),
0160 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
0161 .enable = xen_cs_enable,
0162 };
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195 static s64 get_abs_timeout(unsigned long delta)
0196 {
0197 return xen_clocksource_read() + delta;
0198 }
0199
0200 static int xen_timerop_shutdown(struct clock_event_device *evt)
0201 {
0202
0203 HYPERVISOR_set_timer_op(0);
0204
0205 return 0;
0206 }
0207
0208 static int xen_timerop_set_next_event(unsigned long delta,
0209 struct clock_event_device *evt)
0210 {
0211 WARN_ON(!clockevent_state_oneshot(evt));
0212
0213 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
0214 BUG();
0215
0216
0217
0218
0219
0220 return 0;
0221 }
0222
0223 static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
0224 .name = "xen",
0225 .features = CLOCK_EVT_FEAT_ONESHOT,
0226
0227 .max_delta_ns = 0xffffffff,
0228 .max_delta_ticks = 0xffffffff,
0229 .min_delta_ns = TIMER_SLOP,
0230 .min_delta_ticks = TIMER_SLOP,
0231
0232 .mult = 1,
0233 .shift = 0,
0234 .rating = 500,
0235
0236 .set_state_shutdown = xen_timerop_shutdown,
0237 .set_next_event = xen_timerop_set_next_event,
0238 };
0239
0240 static int xen_vcpuop_shutdown(struct clock_event_device *evt)
0241 {
0242 int cpu = smp_processor_id();
0243
0244 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
0245 NULL) ||
0246 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
0247 NULL))
0248 BUG();
0249
0250 return 0;
0251 }
0252
0253 static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
0254 {
0255 int cpu = smp_processor_id();
0256
0257 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
0258 NULL))
0259 BUG();
0260
0261 return 0;
0262 }
0263
0264 static int xen_vcpuop_set_next_event(unsigned long delta,
0265 struct clock_event_device *evt)
0266 {
0267 int cpu = smp_processor_id();
0268 struct vcpu_set_singleshot_timer single;
0269 int ret;
0270
0271 WARN_ON(!clockevent_state_oneshot(evt));
0272
0273 single.timeout_abs_ns = get_abs_timeout(delta);
0274
0275 single.flags = 0;
0276
0277 ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
0278 &single);
0279 BUG_ON(ret != 0);
0280
0281 return ret;
0282 }
0283
0284 static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
0285 .name = "xen",
0286 .features = CLOCK_EVT_FEAT_ONESHOT,
0287
0288 .max_delta_ns = 0xffffffff,
0289 .max_delta_ticks = 0xffffffff,
0290 .min_delta_ns = TIMER_SLOP,
0291 .min_delta_ticks = TIMER_SLOP,
0292
0293 .mult = 1,
0294 .shift = 0,
0295 .rating = 500,
0296
0297 .set_state_shutdown = xen_vcpuop_shutdown,
0298 .set_state_oneshot = xen_vcpuop_set_oneshot,
0299 .set_next_event = xen_vcpuop_set_next_event,
0300 };
0301
0302 static const struct clock_event_device *xen_clockevent =
0303 &xen_timerop_clockevent;
0304
0305 struct xen_clock_event_device {
0306 struct clock_event_device evt;
0307 char name[16];
0308 };
0309 static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
0310
0311 static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
0312 {
0313 struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
0314 irqreturn_t ret;
0315
0316 ret = IRQ_NONE;
0317 if (evt->event_handler) {
0318 evt->event_handler(evt);
0319 ret = IRQ_HANDLED;
0320 }
0321
0322 return ret;
0323 }
0324
0325 void xen_teardown_timer(int cpu)
0326 {
0327 struct clock_event_device *evt;
0328 evt = &per_cpu(xen_clock_events, cpu).evt;
0329
0330 if (evt->irq >= 0) {
0331 unbind_from_irqhandler(evt->irq, NULL);
0332 evt->irq = -1;
0333 }
0334 }
0335
0336 void xen_setup_timer(int cpu)
0337 {
0338 struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
0339 struct clock_event_device *evt = &xevt->evt;
0340 int irq;
0341
0342 WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
0343 if (evt->irq >= 0)
0344 xen_teardown_timer(cpu);
0345
0346 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
0347
0348 snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
0349
0350 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
0351 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
0352 IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
0353 xevt->name, NULL);
0354 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
0355
0356 memcpy(evt, xen_clockevent, sizeof(*evt));
0357
0358 evt->cpumask = cpumask_of(cpu);
0359 evt->irq = irq;
0360 }
0361
0362
0363 void xen_setup_cpu_clockevents(void)
0364 {
0365 clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
0366 }
0367
0368 void xen_timer_resume(void)
0369 {
0370 int cpu;
0371
0372 if (xen_clockevent != &xen_vcpuop_clockevent)
0373 return;
0374
0375 for_each_online_cpu(cpu) {
0376 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
0377 xen_vcpu_nr(cpu), NULL))
0378 BUG();
0379 }
0380 }
0381
0382 static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
0383 static u64 xen_clock_value_saved;
0384
0385 void xen_save_time_memory_area(void)
0386 {
0387 struct vcpu_register_time_memory_area t;
0388 int ret;
0389
0390 xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
0391
0392 if (!xen_clock)
0393 return;
0394
0395 t.addr.v = NULL;
0396
0397 ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
0398 if (ret != 0)
0399 pr_notice("Cannot save secondary vcpu_time_info (err %d)",
0400 ret);
0401 else
0402 clear_page(xen_clock);
0403 }
0404
0405 void xen_restore_time_memory_area(void)
0406 {
0407 struct vcpu_register_time_memory_area t;
0408 int ret;
0409
0410 if (!xen_clock)
0411 goto out;
0412
0413 t.addr.v = &xen_clock->pvti;
0414
0415 ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426 if (ret != 0)
0427 pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
0428 ret);
0429
0430 out:
0431
0432 pvclock_resume();
0433 xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
0434 }
0435
0436 static void xen_setup_vsyscall_time_info(void)
0437 {
0438 struct vcpu_register_time_memory_area t;
0439 struct pvclock_vsyscall_time_info *ti;
0440 int ret;
0441
0442 ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
0443 if (!ti)
0444 return;
0445
0446 t.addr.v = &ti->pvti;
0447
0448 ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
0449 if (ret) {
0450 pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
0451 free_page((unsigned long)ti);
0452 return;
0453 }
0454
0455
0456
0457
0458
0459
0460 if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
0461 t.addr.v = NULL;
0462 ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
0463 0, &t);
0464 if (!ret)
0465 free_page((unsigned long)ti);
0466
0467 pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
0468 return;
0469 }
0470
0471 xen_clock = ti;
0472 pvclock_set_pvti_cpu0_va(xen_clock);
0473
0474 xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
0475 }
0476
0477 static void __init xen_time_init(void)
0478 {
0479 struct pvclock_vcpu_time_info *pvti;
0480 int cpu = smp_processor_id();
0481 struct timespec64 tp;
0482
0483
0484 if (xen_initial_domain())
0485 xen_clocksource.rating = 275;
0486
0487 clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
0488
0489 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
0490 NULL) == 0) {
0491
0492
0493 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
0494 xen_clockevent = &xen_vcpuop_clockevent;
0495 }
0496
0497
0498 xen_read_wallclock(&tp);
0499 do_settimeofday64(&tp);
0500
0501 setup_force_cpu_cap(X86_FEATURE_TSC);
0502
0503
0504
0505
0506
0507 pvti = &__this_cpu_read(xen_vcpu)->time;
0508 if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
0509 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
0510 xen_setup_vsyscall_time_info();
0511 }
0512
0513 xen_setup_runstate_info(cpu);
0514 xen_setup_timer(cpu);
0515 xen_setup_cpu_clockevents();
0516
0517 xen_time_setup_guest();
0518
0519 if (xen_initial_domain())
0520 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
0521 }
0522
0523 static void __init xen_init_time_common(void)
0524 {
0525 xen_sched_clock_offset = xen_clocksource_read();
0526 static_call_update(pv_steal_clock, xen_steal_clock);
0527 paravirt_set_sched_clock(xen_sched_clock);
0528
0529 x86_platform.calibrate_tsc = xen_tsc_khz;
0530 x86_platform.get_wallclock = xen_get_wallclock;
0531 }
0532
0533 void __init xen_init_time_ops(void)
0534 {
0535 xen_init_time_common();
0536
0537 x86_init.timers.timer_init = xen_time_init;
0538 x86_init.timers.setup_percpu_clockev = x86_init_noop;
0539 x86_cpuinit.setup_percpu_clockev = x86_init_noop;
0540
0541
0542 if (!xen_initial_domain())
0543 x86_platform.set_wallclock = xen_set_wallclock;
0544 }
0545
0546 #ifdef CONFIG_XEN_PVHVM
0547 static void xen_hvm_setup_cpu_clockevents(void)
0548 {
0549 int cpu = smp_processor_id();
0550 xen_setup_runstate_info(cpu);
0551
0552
0553
0554
0555
0556 xen_setup_cpu_clockevents();
0557 }
0558
0559 void __init xen_hvm_init_time_ops(void)
0560 {
0561 static bool hvm_time_initialized;
0562
0563 if (hvm_time_initialized)
0564 return;
0565
0566
0567
0568
0569
0570
0571 if (!xen_have_vector_callback)
0572 return;
0573
0574 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
0575 pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
0576 return;
0577 }
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588 if (!__this_cpu_read(xen_vcpu)) {
0589 pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
0590 xen_vcpu_nr(0));
0591 return;
0592 }
0593
0594 xen_init_time_common();
0595
0596 x86_init.timers.setup_percpu_clockev = xen_time_init;
0597 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
0598
0599 x86_platform.set_wallclock = xen_set_wallclock;
0600
0601 hvm_time_initialized = true;
0602 }
0603 #endif
0604
0605
0606 static int __init parse_xen_timer_slop(char *ptr)
0607 {
0608 unsigned long slop = memparse(ptr, NULL);
0609
0610 xen_timerop_clockevent.min_delta_ns = slop;
0611 xen_timerop_clockevent.min_delta_ticks = slop;
0612 xen_vcpuop_clockevent.min_delta_ns = slop;
0613 xen_vcpuop_clockevent.min_delta_ticks = slop;
0614
0615 return 0;
0616 }
0617 early_param("xen_timer_slop", parse_xen_timer_slop);