0001
0002
0003
0004
0005
0006
0007
0008
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010
0011 #include <linux/list.h>
0012 #include <linux/rculist.h>
0013 #include <linux/spinlock.h>
0014 #include <linux/hash.h>
0015 #include <linux/export.h>
0016 #include <linux/kernel.h>
0017 #include <linux/uaccess.h>
0018 #include <linux/ptrace.h>
0019 #include <linux/preempt.h>
0020 #include <linux/percpu.h>
0021 #include <linux/kdebug.h>
0022 #include <linux/mutex.h>
0023 #include <linux/io.h>
0024 #include <linux/slab.h>
0025 #include <asm/cacheflush.h>
0026 #include <asm/tlbflush.h>
0027 #include <linux/errno.h>
0028 #include <asm/debugreg.h>
0029 #include <linux/mmiotrace.h>
0030
0031 #define KMMIO_PAGE_HASH_BITS 4
0032 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
0033
0034 struct kmmio_fault_page {
0035 struct list_head list;
0036 struct kmmio_fault_page *release_next;
0037 unsigned long addr;
0038 pteval_t old_presence;
0039 bool armed;
0040
0041
0042
0043
0044
0045
0046
0047 int count;
0048
0049 bool scheduled_for_release;
0050 };
0051
0052 struct kmmio_delayed_release {
0053 struct rcu_head rcu;
0054 struct kmmio_fault_page *release_list;
0055 };
0056
0057 struct kmmio_context {
0058 struct kmmio_fault_page *fpage;
0059 struct kmmio_probe *probe;
0060 unsigned long saved_flags;
0061 unsigned long addr;
0062 int active;
0063 };
0064
0065 static DEFINE_SPINLOCK(kmmio_lock);
0066
0067
0068 unsigned int kmmio_count;
0069
0070
0071 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
0072 static LIST_HEAD(kmmio_probes);
0073
0074 static struct list_head *kmmio_page_list(unsigned long addr)
0075 {
0076 unsigned int l;
0077 pte_t *pte = lookup_address(addr, &l);
0078
0079 if (!pte)
0080 return NULL;
0081 addr &= page_level_mask(l);
0082
0083 return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
0084 }
0085
0086
0087 static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098 static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
0099 {
0100 struct kmmio_probe *p;
0101 list_for_each_entry_rcu(p, &kmmio_probes, list) {
0102 if (addr >= p->addr && addr < (p->addr + p->len))
0103 return p;
0104 }
0105 return NULL;
0106 }
0107
0108
0109 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
0110 {
0111 struct list_head *head;
0112 struct kmmio_fault_page *f;
0113 unsigned int l;
0114 pte_t *pte = lookup_address(addr, &l);
0115
0116 if (!pte)
0117 return NULL;
0118 addr &= page_level_mask(l);
0119 head = kmmio_page_list(addr);
0120 list_for_each_entry_rcu(f, head, list) {
0121 if (f->addr == addr)
0122 return f;
0123 }
0124 return NULL;
0125 }
0126
0127 static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
0128 {
0129 pmd_t new_pmd;
0130 pmdval_t v = pmd_val(*pmd);
0131 if (clear) {
0132 *old = v;
0133 new_pmd = pmd_mkinvalid(*pmd);
0134 } else {
0135
0136 new_pmd = __pmd(*old);
0137 }
0138 set_pmd(pmd, new_pmd);
0139 }
0140
0141 static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
0142 {
0143 pteval_t v = pte_val(*pte);
0144 if (clear) {
0145 *old = v;
0146
0147 pte_clear(&init_mm, 0, pte);
0148 } else {
0149
0150 set_pte_atomic(pte, __pte(*old));
0151 }
0152 }
0153
0154 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
0155 {
0156 unsigned int level;
0157 pte_t *pte = lookup_address(f->addr, &level);
0158
0159 if (!pte) {
0160 pr_err("no pte for addr 0x%08lx\n", f->addr);
0161 return -1;
0162 }
0163
0164 switch (level) {
0165 case PG_LEVEL_2M:
0166 clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
0167 break;
0168 case PG_LEVEL_4K:
0169 clear_pte_presence(pte, clear, &f->old_presence);
0170 break;
0171 default:
0172 pr_err("unexpected page level 0x%x.\n", level);
0173 return -1;
0174 }
0175
0176 flush_tlb_one_kernel(f->addr);
0177 return 0;
0178 }
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191 static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
0192 {
0193 int ret;
0194 WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
0195 if (f->armed) {
0196 pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
0197 f->addr, f->count, !!f->old_presence);
0198 }
0199 ret = clear_page_presence(f, true);
0200 WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
0201 f->addr);
0202 f->armed = true;
0203 return ret;
0204 }
0205
0206
0207 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
0208 {
0209 int ret = clear_page_presence(f, false);
0210 WARN_ONCE(ret < 0,
0211 KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
0212 f->armed = false;
0213 }
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230 int kmmio_handler(struct pt_regs *regs, unsigned long addr)
0231 {
0232 struct kmmio_context *ctx;
0233 struct kmmio_fault_page *faultpage;
0234 int ret = 0;
0235 unsigned long page_base = addr;
0236 unsigned int l;
0237 pte_t *pte = lookup_address(addr, &l);
0238 if (!pte)
0239 return -EINVAL;
0240 page_base &= page_level_mask(l);
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250 preempt_disable();
0251 rcu_read_lock();
0252
0253 faultpage = get_kmmio_fault_page(page_base);
0254 if (!faultpage) {
0255
0256
0257
0258
0259
0260 goto no_kmmio;
0261 }
0262
0263 ctx = this_cpu_ptr(&kmmio_ctx);
0264 if (ctx->active) {
0265 if (page_base == ctx->addr) {
0266
0267
0268
0269
0270
0271 pr_debug("secondary hit for 0x%08lx CPU %d.\n",
0272 addr, smp_processor_id());
0273
0274 if (!faultpage->old_presence)
0275 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
0276 addr, smp_processor_id());
0277 } else {
0278
0279
0280
0281
0282
0283 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
0284 smp_processor_id(), addr);
0285 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
0286 disarm_kmmio_fault_page(faultpage);
0287 }
0288 goto no_kmmio;
0289 }
0290 ctx->active++;
0291
0292 ctx->fpage = faultpage;
0293 ctx->probe = get_kmmio_probe(page_base);
0294 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
0295 ctx->addr = page_base;
0296
0297 if (ctx->probe && ctx->probe->pre_handler)
0298 ctx->probe->pre_handler(ctx->probe, regs, addr);
0299
0300
0301
0302
0303
0304 regs->flags |= X86_EFLAGS_TF;
0305 regs->flags &= ~X86_EFLAGS_IF;
0306
0307
0308 disarm_kmmio_fault_page(ctx->fpage);
0309
0310
0311
0312
0313
0314
0315
0316
0317 return 1;
0318
0319 no_kmmio:
0320 rcu_read_unlock();
0321 preempt_enable_no_resched();
0322 return ret;
0323 }
0324
0325
0326
0327
0328
0329
0330 static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
0331 {
0332 int ret = 0;
0333 struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
0334
0335 if (!ctx->active) {
0336
0337
0338
0339
0340
0341 pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
0342 goto out;
0343 }
0344
0345 if (ctx->probe && ctx->probe->post_handler)
0346 ctx->probe->post_handler(ctx->probe, condition, regs);
0347
0348
0349 spin_lock(&kmmio_lock);
0350 if (ctx->fpage->count)
0351 arm_kmmio_fault_page(ctx->fpage);
0352 spin_unlock(&kmmio_lock);
0353
0354 regs->flags &= ~X86_EFLAGS_TF;
0355 regs->flags |= ctx->saved_flags;
0356
0357
0358 ctx->active--;
0359 BUG_ON(ctx->active);
0360 rcu_read_unlock();
0361 preempt_enable_no_resched();
0362
0363
0364
0365
0366
0367
0368 if (!(regs->flags & X86_EFLAGS_TF))
0369 ret = 1;
0370 out:
0371 return ret;
0372 }
0373
0374
0375 static int add_kmmio_fault_page(unsigned long addr)
0376 {
0377 struct kmmio_fault_page *f;
0378
0379 f = get_kmmio_fault_page(addr);
0380 if (f) {
0381 if (!f->count)
0382 arm_kmmio_fault_page(f);
0383 f->count++;
0384 return 0;
0385 }
0386
0387 f = kzalloc(sizeof(*f), GFP_ATOMIC);
0388 if (!f)
0389 return -1;
0390
0391 f->count = 1;
0392 f->addr = addr;
0393
0394 if (arm_kmmio_fault_page(f)) {
0395 kfree(f);
0396 return -1;
0397 }
0398
0399 list_add_rcu(&f->list, kmmio_page_list(f->addr));
0400
0401 return 0;
0402 }
0403
0404
0405 static void release_kmmio_fault_page(unsigned long addr,
0406 struct kmmio_fault_page **release_list)
0407 {
0408 struct kmmio_fault_page *f;
0409
0410 f = get_kmmio_fault_page(addr);
0411 if (!f)
0412 return;
0413
0414 f->count--;
0415 BUG_ON(f->count < 0);
0416 if (!f->count) {
0417 disarm_kmmio_fault_page(f);
0418 if (!f->scheduled_for_release) {
0419 f->release_next = *release_list;
0420 *release_list = f;
0421 f->scheduled_for_release = true;
0422 }
0423 }
0424 }
0425
0426
0427
0428
0429
0430
0431
0432
0433 int register_kmmio_probe(struct kmmio_probe *p)
0434 {
0435 unsigned long flags;
0436 int ret = 0;
0437 unsigned long size = 0;
0438 unsigned long addr = p->addr & PAGE_MASK;
0439 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
0440 unsigned int l;
0441 pte_t *pte;
0442
0443 spin_lock_irqsave(&kmmio_lock, flags);
0444 if (get_kmmio_probe(addr)) {
0445 ret = -EEXIST;
0446 goto out;
0447 }
0448
0449 pte = lookup_address(addr, &l);
0450 if (!pte) {
0451 ret = -EINVAL;
0452 goto out;
0453 }
0454
0455 kmmio_count++;
0456 list_add_rcu(&p->list, &kmmio_probes);
0457 while (size < size_lim) {
0458 if (add_kmmio_fault_page(addr + size))
0459 pr_err("Unable to set page fault.\n");
0460 size += page_level_size(l);
0461 }
0462 out:
0463 spin_unlock_irqrestore(&kmmio_lock, flags);
0464
0465
0466
0467
0468
0469 return ret;
0470 }
0471 EXPORT_SYMBOL(register_kmmio_probe);
0472
0473 static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
0474 {
0475 struct kmmio_delayed_release *dr = container_of(
0476 head,
0477 struct kmmio_delayed_release,
0478 rcu);
0479 struct kmmio_fault_page *f = dr->release_list;
0480 while (f) {
0481 struct kmmio_fault_page *next = f->release_next;
0482 BUG_ON(f->count);
0483 kfree(f);
0484 f = next;
0485 }
0486 kfree(dr);
0487 }
0488
0489 static void remove_kmmio_fault_pages(struct rcu_head *head)
0490 {
0491 struct kmmio_delayed_release *dr =
0492 container_of(head, struct kmmio_delayed_release, rcu);
0493 struct kmmio_fault_page *f = dr->release_list;
0494 struct kmmio_fault_page **prevp = &dr->release_list;
0495 unsigned long flags;
0496
0497 spin_lock_irqsave(&kmmio_lock, flags);
0498 while (f) {
0499 if (!f->count) {
0500 list_del_rcu(&f->list);
0501 prevp = &f->release_next;
0502 } else {
0503 *prevp = f->release_next;
0504 f->release_next = NULL;
0505 f->scheduled_for_release = false;
0506 }
0507 f = *prevp;
0508 }
0509 spin_unlock_irqrestore(&kmmio_lock, flags);
0510
0511
0512 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
0513 }
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528 void unregister_kmmio_probe(struct kmmio_probe *p)
0529 {
0530 unsigned long flags;
0531 unsigned long size = 0;
0532 unsigned long addr = p->addr & PAGE_MASK;
0533 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
0534 struct kmmio_fault_page *release_list = NULL;
0535 struct kmmio_delayed_release *drelease;
0536 unsigned int l;
0537 pte_t *pte;
0538
0539 pte = lookup_address(addr, &l);
0540 if (!pte)
0541 return;
0542
0543 spin_lock_irqsave(&kmmio_lock, flags);
0544 while (size < size_lim) {
0545 release_kmmio_fault_page(addr + size, &release_list);
0546 size += page_level_size(l);
0547 }
0548 list_del_rcu(&p->list);
0549 kmmio_count--;
0550 spin_unlock_irqrestore(&kmmio_lock, flags);
0551
0552 if (!release_list)
0553 return;
0554
0555 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
0556 if (!drelease) {
0557 pr_crit("leaking kmmio_fault_page objects.\n");
0558 return;
0559 }
0560 drelease->release_list = release_list;
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
0577 }
0578 EXPORT_SYMBOL(unregister_kmmio_probe);
0579
0580 static int
0581 kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
0582 {
0583 struct die_args *arg = args;
0584 unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
0585
0586 if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
0587 if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
0588
0589
0590
0591
0592 *dr6_p &= ~DR_STEP;
0593 return NOTIFY_STOP;
0594 }
0595
0596 return NOTIFY_DONE;
0597 }
0598
0599 static struct notifier_block nb_die = {
0600 .notifier_call = kmmio_die_notifier
0601 };
0602
0603 int kmmio_init(void)
0604 {
0605 int i;
0606
0607 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
0608 INIT_LIST_HEAD(&kmmio_page_table[i]);
0609
0610 return register_die_notifier(&nb_die);
0611 }
0612
0613 void kmmio_cleanup(void)
0614 {
0615 int i;
0616
0617 unregister_die_notifier(&nb_die);
0618 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
0619 WARN_ONCE(!list_empty(&kmmio_page_table[i]),
0620 KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
0621 }
0622 }