0001
0002 #define pr_fmt(fmt) "SMP alternatives: " fmt
0003
0004 #include <linux/module.h>
0005 #include <linux/sched.h>
0006 #include <linux/perf_event.h>
0007 #include <linux/mutex.h>
0008 #include <linux/list.h>
0009 #include <linux/stringify.h>
0010 #include <linux/highmem.h>
0011 #include <linux/mm.h>
0012 #include <linux/vmalloc.h>
0013 #include <linux/memory.h>
0014 #include <linux/stop_machine.h>
0015 #include <linux/slab.h>
0016 #include <linux/kdebug.h>
0017 #include <linux/kprobes.h>
0018 #include <linux/mmu_context.h>
0019 #include <linux/bsearch.h>
0020 #include <linux/sync_core.h>
0021 #include <asm/text-patching.h>
0022 #include <asm/alternative.h>
0023 #include <asm/sections.h>
0024 #include <asm/mce.h>
0025 #include <asm/nmi.h>
0026 #include <asm/cacheflush.h>
0027 #include <asm/tlbflush.h>
0028 #include <asm/insn.h>
0029 #include <asm/io.h>
0030 #include <asm/fixmap.h>
0031 #include <asm/paravirt.h>
0032 #include <asm/asm-prototypes.h>
0033
0034 int __read_mostly alternatives_patched;
0035
0036 EXPORT_SYMBOL_GPL(alternatives_patched);
0037
0038 #define MAX_PATCH_LEN (255-1)
0039
0040 static int __initdata_or_module debug_alternative;
0041
0042 static int __init debug_alt(char *str)
0043 {
0044 debug_alternative = 1;
0045 return 1;
0046 }
0047 __setup("debug-alternative", debug_alt);
0048
0049 static int noreplace_smp;
0050
0051 static int __init setup_noreplace_smp(char *str)
0052 {
0053 noreplace_smp = 1;
0054 return 1;
0055 }
0056 __setup("noreplace-smp", setup_noreplace_smp);
0057
0058 #define DPRINTK(fmt, args...) \
0059 do { \
0060 if (debug_alternative) \
0061 printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \
0062 } while (0)
0063
0064 #define DUMP_BYTES(buf, len, fmt, args...) \
0065 do { \
0066 if (unlikely(debug_alternative)) { \
0067 int j; \
0068 \
0069 if (!(len)) \
0070 break; \
0071 \
0072 printk(KERN_DEBUG pr_fmt(fmt), ##args); \
0073 for (j = 0; j < (len) - 1; j++) \
0074 printk(KERN_CONT "%02hhx ", buf[j]); \
0075 printk(KERN_CONT "%02hhx\n", buf[j]); \
0076 } \
0077 } while (0)
0078
0079 static const unsigned char x86nops[] =
0080 {
0081 BYTES_NOP1,
0082 BYTES_NOP2,
0083 BYTES_NOP3,
0084 BYTES_NOP4,
0085 BYTES_NOP5,
0086 BYTES_NOP6,
0087 BYTES_NOP7,
0088 BYTES_NOP8,
0089 };
0090
0091 const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
0092 {
0093 NULL,
0094 x86nops,
0095 x86nops + 1,
0096 x86nops + 1 + 2,
0097 x86nops + 1 + 2 + 3,
0098 x86nops + 1 + 2 + 3 + 4,
0099 x86nops + 1 + 2 + 3 + 4 + 5,
0100 x86nops + 1 + 2 + 3 + 4 + 5 + 6,
0101 x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
0102 };
0103
0104
0105 static void __init_or_module add_nops(void *insns, unsigned int len)
0106 {
0107 while (len > 0) {
0108 unsigned int noplen = len;
0109 if (noplen > ASM_NOP_MAX)
0110 noplen = ASM_NOP_MAX;
0111 memcpy(insns, x86_nops[noplen], noplen);
0112 insns += noplen;
0113 len -= noplen;
0114 }
0115 }
0116
0117 extern s32 __retpoline_sites[], __retpoline_sites_end[];
0118 extern s32 __return_sites[], __return_sites_end[];
0119 extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
0120 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
0121 extern s32 __smp_locks[], __smp_locks_end[];
0122 void text_poke_early(void *addr, const void *opcode, size_t len);
0123
0124
0125
0126
0127 static inline bool is_jmp(const u8 opcode)
0128 {
0129 return opcode == 0xeb || opcode == 0xe9;
0130 }
0131
0132 static void __init_or_module
0133 recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
0134 {
0135 u8 *next_rip, *tgt_rip;
0136 s32 n_dspl, o_dspl;
0137 int repl_len;
0138
0139 if (a->replacementlen != 5)
0140 return;
0141
0142 o_dspl = *(s32 *)(insn_buff + 1);
0143
0144
0145 next_rip = repl_insn + a->replacementlen;
0146
0147 tgt_rip = next_rip + o_dspl;
0148 n_dspl = tgt_rip - orig_insn;
0149
0150 DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
0151
0152 if (tgt_rip - orig_insn >= 0) {
0153 if (n_dspl - 2 <= 127)
0154 goto two_byte_jmp;
0155 else
0156 goto five_byte_jmp;
0157
0158 } else {
0159 if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
0160 goto two_byte_jmp;
0161 else
0162 goto five_byte_jmp;
0163 }
0164
0165 two_byte_jmp:
0166 n_dspl -= 2;
0167
0168 insn_buff[0] = 0xeb;
0169 insn_buff[1] = (s8)n_dspl;
0170 add_nops(insn_buff + 2, 3);
0171
0172 repl_len = 2;
0173 goto done;
0174
0175 five_byte_jmp:
0176 n_dspl -= 5;
0177
0178 insn_buff[0] = 0xe9;
0179 *(s32 *)&insn_buff[1] = n_dspl;
0180
0181 repl_len = 5;
0182
0183 done:
0184
0185 DPRINTK("final displ: 0x%08x, JMP 0x%lx",
0186 n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
0187 }
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
0199 {
0200 unsigned long flags;
0201 int i = off, nnops;
0202
0203 while (i < instrlen) {
0204 if (instr[i] != 0x90)
0205 break;
0206
0207 i++;
0208 }
0209
0210 nnops = i - off;
0211
0212 if (nnops <= 1)
0213 return nnops;
0214
0215 local_irq_save(flags);
0216 add_nops(instr + off, nnops);
0217 local_irq_restore(flags);
0218
0219 DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
0220
0221 return nnops;
0222 }
0223
0224
0225
0226
0227
0228 static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
0229 {
0230 struct insn insn;
0231 int i = 0;
0232
0233
0234
0235
0236
0237 for (;;) {
0238 if (insn_decode_kernel(&insn, &instr[i]))
0239 return;
0240
0241
0242
0243
0244
0245 if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
0246 i += optimize_nops_range(instr, len, i);
0247 else
0248 i += insn.length;
0249
0250 if (i >= len)
0251 return;
0252 }
0253 }
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265 void __init_or_module noinline apply_alternatives(struct alt_instr *start,
0266 struct alt_instr *end)
0267 {
0268 struct alt_instr *a;
0269 u8 *instr, *replacement;
0270 u8 insn_buff[MAX_PATCH_LEN];
0271
0272 DPRINTK("alt table %px, -> %px", start, end);
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282 for (a = start; a < end; a++) {
0283 int insn_buff_sz = 0;
0284
0285 u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
0286
0287 instr = (u8 *)&a->instr_offset + a->instr_offset;
0288 replacement = (u8 *)&a->repl_offset + a->repl_offset;
0289 BUG_ON(a->instrlen > sizeof(insn_buff));
0290 BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
0291
0292
0293
0294
0295
0296
0297
0298 if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
0299 goto next;
0300
0301 DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
0302 (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
0303 feature >> 5,
0304 feature & 0x1f,
0305 instr, instr, a->instrlen,
0306 replacement, a->replacementlen);
0307
0308 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
0309 DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
0310
0311 memcpy(insn_buff, replacement, a->replacementlen);
0312 insn_buff_sz = a->replacementlen;
0313
0314
0315
0316
0317
0318
0319
0320 if (a->replacementlen == 5 && *insn_buff == 0xe8) {
0321 *(s32 *)(insn_buff + 1) += replacement - instr;
0322 DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
0323 *(s32 *)(insn_buff + 1),
0324 (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
0325 }
0326
0327 if (a->replacementlen && is_jmp(replacement[0]))
0328 recompute_jump(a, instr, replacement, insn_buff);
0329
0330 for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
0331 insn_buff[insn_buff_sz] = 0x90;
0332
0333 DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
0334
0335 text_poke_early(instr, insn_buff, insn_buff_sz);
0336
0337 next:
0338 optimize_nops(instr, a->instrlen);
0339 }
0340 }
0341
0342 #if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL)
0343
0344
0345
0346
0347 static int emit_indirect(int op, int reg, u8 *bytes)
0348 {
0349 int i = 0;
0350 u8 modrm;
0351
0352 switch (op) {
0353 case CALL_INSN_OPCODE:
0354 modrm = 0x10;
0355 break;
0356
0357 case JMP32_INSN_OPCODE:
0358 modrm = 0x20;
0359 break;
0360
0361 default:
0362 WARN_ON_ONCE(1);
0363 return -1;
0364 }
0365
0366 if (reg >= 8) {
0367 bytes[i++] = 0x41;
0368 reg -= 8;
0369 }
0370
0371 modrm |= 0xc0;
0372 modrm += reg;
0373
0374 bytes[i++] = 0xff;
0375 bytes[i++] = modrm;
0376
0377 return i;
0378 }
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396 static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
0397 {
0398 retpoline_thunk_t *target;
0399 int reg, ret, i = 0;
0400 u8 op, cc;
0401
0402 target = addr + insn->length + insn->immediate.value;
0403 reg = target - __x86_indirect_thunk_array;
0404
0405 if (WARN_ON_ONCE(reg & ~0xf))
0406 return -1;
0407
0408
0409 BUG_ON(reg == 4);
0410
0411 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
0412 !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
0413 return -1;
0414
0415 op = insn->opcode.bytes[0];
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431 if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
0432 cc = insn->opcode.bytes[1] & 0xf;
0433 cc ^= 1;
0434
0435 bytes[i++] = 0x70 + cc;
0436 bytes[i++] = insn->length - 2;
0437
0438
0439 op = JMP32_INSN_OPCODE;
0440 }
0441
0442
0443
0444
0445 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
0446 bytes[i++] = 0x0f;
0447 bytes[i++] = 0xae;
0448 bytes[i++] = 0xe8;
0449 }
0450
0451 ret = emit_indirect(op, reg, bytes + i);
0452 if (ret < 0)
0453 return ret;
0454 i += ret;
0455
0456 for (; i < insn->length;)
0457 bytes[i++] = BYTES_NOP1;
0458
0459 return i;
0460 }
0461
0462
0463
0464
0465 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
0466 {
0467 s32 *s;
0468
0469 for (s = start; s < end; s++) {
0470 void *addr = (void *)s + *s;
0471 struct insn insn;
0472 int len, ret;
0473 u8 bytes[16];
0474 u8 op1, op2;
0475
0476 ret = insn_decode_kernel(&insn, addr);
0477 if (WARN_ON_ONCE(ret < 0))
0478 continue;
0479
0480 op1 = insn.opcode.bytes[0];
0481 op2 = insn.opcode.bytes[1];
0482
0483 switch (op1) {
0484 case CALL_INSN_OPCODE:
0485 case JMP32_INSN_OPCODE:
0486 break;
0487
0488 case 0x0f:
0489 if (op2 >= 0x80 && op2 <= 0x8f)
0490 break;
0491 fallthrough;
0492 default:
0493 WARN_ON_ONCE(1);
0494 continue;
0495 }
0496
0497 DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
0498 addr, addr, insn.length,
0499 addr + insn.length + insn.immediate.value);
0500
0501 len = patch_retpoline(addr, &insn, bytes);
0502 if (len == insn.length) {
0503 optimize_nops(bytes, len);
0504 DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
0505 DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
0506 text_poke_early(addr, bytes, len);
0507 }
0508 }
0509 }
0510
0511 #ifdef CONFIG_RETHUNK
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523 static int patch_return(void *addr, struct insn *insn, u8 *bytes)
0524 {
0525 int i = 0;
0526
0527 if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
0528 return -1;
0529
0530 bytes[i++] = RET_INSN_OPCODE;
0531
0532 for (; i < insn->length;)
0533 bytes[i++] = INT3_INSN_OPCODE;
0534
0535 return i;
0536 }
0537
0538 void __init_or_module noinline apply_returns(s32 *start, s32 *end)
0539 {
0540 s32 *s;
0541
0542 for (s = start; s < end; s++) {
0543 void *dest = NULL, *addr = (void *)s + *s;
0544 struct insn insn;
0545 int len, ret;
0546 u8 bytes[16];
0547 u8 op;
0548
0549 ret = insn_decode_kernel(&insn, addr);
0550 if (WARN_ON_ONCE(ret < 0))
0551 continue;
0552
0553 op = insn.opcode.bytes[0];
0554 if (op == JMP32_INSN_OPCODE)
0555 dest = addr + insn.length + insn.immediate.value;
0556
0557 if (__static_call_fixup(addr, op, dest) ||
0558 WARN_ONCE(dest != &__x86_return_thunk,
0559 "missing return thunk: %pS-%pS: %*ph",
0560 addr, dest, 5, addr))
0561 continue;
0562
0563 DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
0564 addr, addr, insn.length,
0565 addr + insn.length + insn.immediate.value);
0566
0567 len = patch_return(addr, &insn, bytes);
0568 if (len == insn.length) {
0569 DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
0570 DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
0571 text_poke_early(addr, bytes, len);
0572 }
0573 }
0574 }
0575 #else
0576 void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
0577 #endif
0578
0579 #else
0580
0581 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
0582 void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
0583
0584 #endif
0585
0586 #ifdef CONFIG_X86_KERNEL_IBT
0587
0588
0589
0590
0591 void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
0592 {
0593 s32 *s;
0594
0595 for (s = start; s < end; s++) {
0596 u32 endbr, poison = gen_endbr_poison();
0597 void *addr = (void *)s + *s;
0598
0599 if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
0600 continue;
0601
0602 if (WARN_ON_ONCE(!is_endbr(endbr)))
0603 continue;
0604
0605 DPRINTK("ENDBR at: %pS (%px)", addr, addr);
0606
0607
0608
0609
0610 DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
0611 DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
0612 text_poke_early(addr, &poison, 4);
0613 }
0614 }
0615
0616 #else
0617
0618 void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
0619
0620 #endif
0621
0622 #ifdef CONFIG_SMP
0623 static void alternatives_smp_lock(const s32 *start, const s32 *end,
0624 u8 *text, u8 *text_end)
0625 {
0626 const s32 *poff;
0627
0628 for (poff = start; poff < end; poff++) {
0629 u8 *ptr = (u8 *)poff + *poff;
0630
0631 if (!*poff || ptr < text || ptr >= text_end)
0632 continue;
0633
0634 if (*ptr == 0x3e)
0635 text_poke(ptr, ((unsigned char []){0xf0}), 1);
0636 }
0637 }
0638
0639 static void alternatives_smp_unlock(const s32 *start, const s32 *end,
0640 u8 *text, u8 *text_end)
0641 {
0642 const s32 *poff;
0643
0644 for (poff = start; poff < end; poff++) {
0645 u8 *ptr = (u8 *)poff + *poff;
0646
0647 if (!*poff || ptr < text || ptr >= text_end)
0648 continue;
0649
0650 if (*ptr == 0xf0)
0651 text_poke(ptr, ((unsigned char []){0x3E}), 1);
0652 }
0653 }
0654
0655 struct smp_alt_module {
0656
0657 struct module *mod;
0658 char *name;
0659
0660
0661 const s32 *locks;
0662 const s32 *locks_end;
0663
0664
0665 u8 *text;
0666 u8 *text_end;
0667
0668 struct list_head next;
0669 };
0670 static LIST_HEAD(smp_alt_modules);
0671 static bool uniproc_patched = false;
0672
0673 void __init_or_module alternatives_smp_module_add(struct module *mod,
0674 char *name,
0675 void *locks, void *locks_end,
0676 void *text, void *text_end)
0677 {
0678 struct smp_alt_module *smp;
0679
0680 mutex_lock(&text_mutex);
0681 if (!uniproc_patched)
0682 goto unlock;
0683
0684 if (num_possible_cpus() == 1)
0685
0686 goto smp_unlock;
0687
0688 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
0689 if (NULL == smp)
0690
0691 goto unlock;
0692
0693 smp->mod = mod;
0694 smp->name = name;
0695 smp->locks = locks;
0696 smp->locks_end = locks_end;
0697 smp->text = text;
0698 smp->text_end = text_end;
0699 DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
0700 smp->locks, smp->locks_end,
0701 smp->text, smp->text_end, smp->name);
0702
0703 list_add_tail(&smp->next, &smp_alt_modules);
0704 smp_unlock:
0705 alternatives_smp_unlock(locks, locks_end, text, text_end);
0706 unlock:
0707 mutex_unlock(&text_mutex);
0708 }
0709
0710 void __init_or_module alternatives_smp_module_del(struct module *mod)
0711 {
0712 struct smp_alt_module *item;
0713
0714 mutex_lock(&text_mutex);
0715 list_for_each_entry(item, &smp_alt_modules, next) {
0716 if (mod != item->mod)
0717 continue;
0718 list_del(&item->next);
0719 kfree(item);
0720 break;
0721 }
0722 mutex_unlock(&text_mutex);
0723 }
0724
0725 void alternatives_enable_smp(void)
0726 {
0727 struct smp_alt_module *mod;
0728
0729
0730 BUG_ON(num_possible_cpus() == 1);
0731
0732 mutex_lock(&text_mutex);
0733
0734 if (uniproc_patched) {
0735 pr_info("switching to SMP code\n");
0736 BUG_ON(num_online_cpus() != 1);
0737 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
0738 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
0739 list_for_each_entry(mod, &smp_alt_modules, next)
0740 alternatives_smp_lock(mod->locks, mod->locks_end,
0741 mod->text, mod->text_end);
0742 uniproc_patched = false;
0743 }
0744 mutex_unlock(&text_mutex);
0745 }
0746
0747
0748
0749
0750
0751 int alternatives_text_reserved(void *start, void *end)
0752 {
0753 struct smp_alt_module *mod;
0754 const s32 *poff;
0755 u8 *text_start = start;
0756 u8 *text_end = end;
0757
0758 lockdep_assert_held(&text_mutex);
0759
0760 list_for_each_entry(mod, &smp_alt_modules, next) {
0761 if (mod->text > text_end || mod->text_end < text_start)
0762 continue;
0763 for (poff = mod->locks; poff < mod->locks_end; poff++) {
0764 const u8 *ptr = (const u8 *)poff + *poff;
0765
0766 if (text_start <= ptr && text_end > ptr)
0767 return 1;
0768 }
0769 }
0770
0771 return 0;
0772 }
0773 #endif
0774
0775 #ifdef CONFIG_PARAVIRT
0776 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
0777 struct paravirt_patch_site *end)
0778 {
0779 struct paravirt_patch_site *p;
0780 char insn_buff[MAX_PATCH_LEN];
0781
0782 for (p = start; p < end; p++) {
0783 unsigned int used;
0784
0785 BUG_ON(p->len > MAX_PATCH_LEN);
0786
0787 memcpy(insn_buff, p->instr, p->len);
0788 used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
0789
0790 BUG_ON(used > p->len);
0791
0792
0793 add_nops(insn_buff + used, p->len - used);
0794 text_poke_early(p->instr, insn_buff, p->len);
0795 }
0796 }
0797 extern struct paravirt_patch_site __start_parainstructions[],
0798 __stop_parainstructions[];
0799 #endif
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817 extern void int3_magic(unsigned int *ptr);
0818
0819 asm (
0820 " .pushsection .init.text, \"ax\", @progbits\n"
0821 " .type int3_magic, @function\n"
0822 "int3_magic:\n"
0823 ANNOTATE_NOENDBR
0824 " movl $1, (%" _ASM_ARG1 ")\n"
0825 ASM_RET
0826 " .size int3_magic, .-int3_magic\n"
0827 " .popsection\n"
0828 );
0829
0830 extern void int3_selftest_ip(void);
0831
0832 static int __init
0833 int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
0834 {
0835 unsigned long selftest = (unsigned long)&int3_selftest_ip;
0836 struct die_args *args = data;
0837 struct pt_regs *regs = args->regs;
0838
0839 OPTIMIZER_HIDE_VAR(selftest);
0840
0841 if (!regs || user_mode(regs))
0842 return NOTIFY_DONE;
0843
0844 if (val != DIE_INT3)
0845 return NOTIFY_DONE;
0846
0847 if (regs->ip - INT3_INSN_SIZE != selftest)
0848 return NOTIFY_DONE;
0849
0850 int3_emulate_call(regs, (unsigned long)&int3_magic);
0851 return NOTIFY_STOP;
0852 }
0853
0854
0855 static noinline void __init int3_selftest(void)
0856 {
0857 static __initdata struct notifier_block int3_exception_nb = {
0858 .notifier_call = int3_exception_notify,
0859 .priority = INT_MAX-1,
0860 };
0861 unsigned int val = 0;
0862
0863 BUG_ON(register_die_notifier(&int3_exception_nb));
0864
0865
0866
0867
0868
0869
0870
0871 asm volatile ("int3_selftest_ip:\n\t"
0872 ANNOTATE_NOENDBR
0873 " int3; nop; nop; nop; nop\n\t"
0874 : ASM_CALL_CONSTRAINT
0875 : __ASM_SEL_RAW(a, D) (&val)
0876 : "memory");
0877
0878 BUG_ON(val != 1);
0879
0880 unregister_die_notifier(&int3_exception_nb);
0881 }
0882
0883 void __init alternative_instructions(void)
0884 {
0885 int3_selftest();
0886
0887
0888
0889
0890
0891
0892 stop_nmi();
0893
0894
0895
0896
0897
0898
0899
0900
0901
0902
0903
0904
0905
0906
0907
0908
0909
0910
0911
0912
0913
0914
0915
0916
0917
0918
0919
0920 paravirt_set_cap();
0921
0922
0923
0924
0925
0926 apply_paravirt(__parainstructions, __parainstructions_end);
0927
0928
0929
0930
0931
0932 apply_retpolines(__retpoline_sites, __retpoline_sites_end);
0933 apply_returns(__return_sites, __return_sites_end);
0934
0935
0936
0937
0938
0939 apply_alternatives(__alt_instructions, __alt_instructions_end);
0940
0941 apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
0942
0943 #ifdef CONFIG_SMP
0944
0945 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
0946 uniproc_patched = true;
0947 alternatives_smp_module_add(NULL, "core kernel",
0948 __smp_locks, __smp_locks_end,
0949 _text, _etext);
0950 }
0951
0952 if (!uniproc_patched || num_possible_cpus() == 1) {
0953 free_init_pages("SMP alternatives",
0954 (unsigned long)__smp_locks,
0955 (unsigned long)__smp_locks_end);
0956 }
0957 #endif
0958
0959 restart_nmi();
0960 alternatives_patched = 1;
0961 }
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975 void __init_or_module text_poke_early(void *addr, const void *opcode,
0976 size_t len)
0977 {
0978 unsigned long flags;
0979
0980 if (boot_cpu_has(X86_FEATURE_NX) &&
0981 is_module_text_address((unsigned long)addr)) {
0982
0983
0984
0985
0986
0987 memcpy(addr, opcode, len);
0988 } else {
0989 local_irq_save(flags);
0990 memcpy(addr, opcode, len);
0991 local_irq_restore(flags);
0992 sync_core();
0993
0994
0995
0996
0997
0998 }
0999 }
1000
1001 typedef struct {
1002 struct mm_struct *mm;
1003 } temp_mm_state_t;
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
1019 {
1020 temp_mm_state_t temp_state;
1021
1022 lockdep_assert_irqs_disabled();
1023
1024
1025
1026
1027
1028
1029 if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
1030 leave_mm(smp_processor_id());
1031
1032 temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
1033 switch_mm_irqs_off(NULL, mm, current);
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046 if (hw_breakpoint_active())
1047 hw_breakpoint_disable();
1048
1049 return temp_state;
1050 }
1051
1052 static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
1053 {
1054 lockdep_assert_irqs_disabled();
1055 switch_mm_irqs_off(NULL, prev_state.mm, current);
1056
1057
1058
1059
1060
1061 if (hw_breakpoint_active())
1062 hw_breakpoint_restore();
1063 }
1064
1065 __ro_after_init struct mm_struct *poking_mm;
1066 __ro_after_init unsigned long poking_addr;
1067
1068 static void text_poke_memcpy(void *dst, const void *src, size_t len)
1069 {
1070 memcpy(dst, src, len);
1071 }
1072
1073 static void text_poke_memset(void *dst, const void *src, size_t len)
1074 {
1075 int c = *(const int *)src;
1076
1077 memset(dst, c, len);
1078 }
1079
1080 typedef void text_poke_f(void *dst, const void *src, size_t len);
1081
1082 static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len)
1083 {
1084 bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
1085 struct page *pages[2] = {NULL};
1086 temp_mm_state_t prev;
1087 unsigned long flags;
1088 pte_t pte, *ptep;
1089 spinlock_t *ptl;
1090 pgprot_t pgprot;
1091
1092
1093
1094
1095
1096 BUG_ON(!after_bootmem);
1097
1098 if (!core_kernel_text((unsigned long)addr)) {
1099 pages[0] = vmalloc_to_page(addr);
1100 if (cross_page_boundary)
1101 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
1102 } else {
1103 pages[0] = virt_to_page(addr);
1104 WARN_ON(!PageReserved(pages[0]));
1105 if (cross_page_boundary)
1106 pages[1] = virt_to_page(addr + PAGE_SIZE);
1107 }
1108
1109
1110
1111
1112 BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
1113
1114
1115
1116
1117
1118 pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
1119
1120
1121
1122
1123 ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
1124
1125
1126
1127
1128 VM_BUG_ON(!ptep);
1129
1130 local_irq_save(flags);
1131
1132 pte = mk_pte(pages[0], pgprot);
1133 set_pte_at(poking_mm, poking_addr, ptep, pte);
1134
1135 if (cross_page_boundary) {
1136 pte = mk_pte(pages[1], pgprot);
1137 set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
1138 }
1139
1140
1141
1142
1143
1144 prev = use_temporary_mm(poking_mm);
1145
1146 kasan_disable_current();
1147 func((u8 *)poking_addr + offset_in_page(addr), src, len);
1148 kasan_enable_current();
1149
1150
1151
1152
1153
1154 barrier();
1155
1156 pte_clear(poking_mm, poking_addr, ptep);
1157 if (cross_page_boundary)
1158 pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
1159
1160
1161
1162
1163
1164
1165 unuse_temporary_mm(prev);
1166
1167
1168
1169
1170
1171 flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
1172 (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
1173 PAGE_SHIFT, false);
1174
1175 if (func == text_poke_memcpy) {
1176
1177
1178
1179
1180 BUG_ON(memcmp(addr, src, len));
1181 }
1182
1183 local_irq_restore(flags);
1184 pte_unmap_unlock(ptep, ptl);
1185 return addr;
1186 }
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204 void *text_poke(void *addr, const void *opcode, size_t len)
1205 {
1206 lockdep_assert_held(&text_mutex);
1207
1208 return __text_poke(text_poke_memcpy, addr, opcode, len);
1209 }
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
1226 {
1227 return __text_poke(text_poke_memcpy, addr, opcode, len);
1228 }
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242 void *text_poke_copy(void *addr, const void *opcode, size_t len)
1243 {
1244 unsigned long start = (unsigned long)addr;
1245 size_t patched = 0;
1246
1247 if (WARN_ON_ONCE(core_kernel_text(start)))
1248 return NULL;
1249
1250 mutex_lock(&text_mutex);
1251 while (patched < len) {
1252 unsigned long ptr = start + patched;
1253 size_t s;
1254
1255 s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
1256
1257 __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
1258 patched += s;
1259 }
1260 mutex_unlock(&text_mutex);
1261 return addr;
1262 }
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273 void *text_poke_set(void *addr, int c, size_t len)
1274 {
1275 unsigned long start = (unsigned long)addr;
1276 size_t patched = 0;
1277
1278 if (WARN_ON_ONCE(core_kernel_text(start)))
1279 return NULL;
1280
1281 mutex_lock(&text_mutex);
1282 while (patched < len) {
1283 unsigned long ptr = start + patched;
1284 size_t s;
1285
1286 s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
1287
1288 __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s);
1289 patched += s;
1290 }
1291 mutex_unlock(&text_mutex);
1292 return addr;
1293 }
1294
1295 static void do_sync_core(void *info)
1296 {
1297 sync_core();
1298 }
1299
1300 void text_poke_sync(void)
1301 {
1302 on_each_cpu(do_sync_core, NULL, 1);
1303 }
1304
1305 struct text_poke_loc {
1306
1307 s32 rel_addr;
1308 s32 disp;
1309 u8 len;
1310 u8 opcode;
1311 const u8 text[POKE_MAX_OPCODE_SIZE];
1312
1313 u8 old;
1314 };
1315
1316 struct bp_patching_desc {
1317 struct text_poke_loc *vec;
1318 int nr_entries;
1319 atomic_t refs;
1320 };
1321
1322 static struct bp_patching_desc bp_desc;
1323
1324 static __always_inline
1325 struct bp_patching_desc *try_get_desc(void)
1326 {
1327 struct bp_patching_desc *desc = &bp_desc;
1328
1329 if (!arch_atomic_inc_not_zero(&desc->refs))
1330 return NULL;
1331
1332 return desc;
1333 }
1334
1335 static __always_inline void put_desc(void)
1336 {
1337 struct bp_patching_desc *desc = &bp_desc;
1338
1339 smp_mb__before_atomic();
1340 arch_atomic_dec(&desc->refs);
1341 }
1342
1343 static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
1344 {
1345 return _stext + tp->rel_addr;
1346 }
1347
1348 static __always_inline int patch_cmp(const void *key, const void *elt)
1349 {
1350 struct text_poke_loc *tp = (struct text_poke_loc *) elt;
1351
1352 if (key < text_poke_addr(tp))
1353 return -1;
1354 if (key > text_poke_addr(tp))
1355 return 1;
1356 return 0;
1357 }
1358
1359 noinstr int poke_int3_handler(struct pt_regs *regs)
1360 {
1361 struct bp_patching_desc *desc;
1362 struct text_poke_loc *tp;
1363 int ret = 0;
1364 void *ip;
1365
1366 if (user_mode(regs))
1367 return 0;
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377 smp_rmb();
1378
1379 desc = try_get_desc();
1380 if (!desc)
1381 return 0;
1382
1383
1384
1385
1386 ip = (void *) regs->ip - INT3_INSN_SIZE;
1387
1388
1389
1390
1391 if (unlikely(desc->nr_entries > 1)) {
1392 tp = __inline_bsearch(ip, desc->vec, desc->nr_entries,
1393 sizeof(struct text_poke_loc),
1394 patch_cmp);
1395 if (!tp)
1396 goto out_put;
1397 } else {
1398 tp = desc->vec;
1399 if (text_poke_addr(tp) != ip)
1400 goto out_put;
1401 }
1402
1403 ip += tp->len;
1404
1405 switch (tp->opcode) {
1406 case INT3_INSN_OPCODE:
1407
1408
1409
1410
1411 goto out_put;
1412
1413 case RET_INSN_OPCODE:
1414 int3_emulate_ret(regs);
1415 break;
1416
1417 case CALL_INSN_OPCODE:
1418 int3_emulate_call(regs, (long)ip + tp->disp);
1419 break;
1420
1421 case JMP32_INSN_OPCODE:
1422 case JMP8_INSN_OPCODE:
1423 int3_emulate_jmp(regs, (long)ip + tp->disp);
1424 break;
1425
1426 default:
1427 BUG();
1428 }
1429
1430 ret = 1;
1431
1432 out_put:
1433 put_desc();
1434 return ret;
1435 }
1436
1437 #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
1438 static struct text_poke_loc tp_vec[TP_VEC_MAX];
1439 static int tp_vec_nr;
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
1463 {
1464 unsigned char int3 = INT3_INSN_OPCODE;
1465 unsigned int i;
1466 int do_sync;
1467
1468 lockdep_assert_held(&text_mutex);
1469
1470 bp_desc.vec = tp;
1471 bp_desc.nr_entries = nr_entries;
1472
1473
1474
1475
1476
1477 atomic_set_release(&bp_desc.refs, 1);
1478
1479
1480
1481
1482
1483 smp_wmb();
1484
1485
1486
1487
1488 for (i = 0; i < nr_entries; i++) {
1489 tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
1490 text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
1491 }
1492
1493 text_poke_sync();
1494
1495
1496
1497
1498 for (do_sync = 0, i = 0; i < nr_entries; i++) {
1499 u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
1500 int len = tp[i].len;
1501
1502 if (len - INT3_INSN_SIZE > 0) {
1503 memcpy(old + INT3_INSN_SIZE,
1504 text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
1505 len - INT3_INSN_SIZE);
1506 text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
1507 (const char *)tp[i].text + INT3_INSN_SIZE,
1508 len - INT3_INSN_SIZE);
1509 do_sync++;
1510 }
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536 perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
1537 tp[i].text, len);
1538 }
1539
1540 if (do_sync) {
1541
1542
1543
1544
1545
1546 text_poke_sync();
1547 }
1548
1549
1550
1551
1552
1553 for (do_sync = 0, i = 0; i < nr_entries; i++) {
1554 if (tp[i].text[0] == INT3_INSN_OPCODE)
1555 continue;
1556
1557 text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
1558 do_sync++;
1559 }
1560
1561 if (do_sync)
1562 text_poke_sync();
1563
1564
1565
1566
1567 if (!atomic_dec_and_test(&bp_desc.refs))
1568 atomic_cond_read_acquire(&bp_desc.refs, !VAL);
1569 }
1570
1571 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
1572 const void *opcode, size_t len, const void *emulate)
1573 {
1574 struct insn insn;
1575 int ret, i;
1576
1577 memcpy((void *)tp->text, opcode, len);
1578 if (!emulate)
1579 emulate = opcode;
1580
1581 ret = insn_decode_kernel(&insn, emulate);
1582 BUG_ON(ret < 0);
1583
1584 tp->rel_addr = addr - (void *)_stext;
1585 tp->len = len;
1586 tp->opcode = insn.opcode.bytes[0];
1587
1588 switch (tp->opcode) {
1589 case RET_INSN_OPCODE:
1590 case JMP32_INSN_OPCODE:
1591 case JMP8_INSN_OPCODE:
1592
1593
1594
1595
1596 for (i = insn.length; i < len; i++)
1597 BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
1598 break;
1599
1600 default:
1601 BUG_ON(len != insn.length);
1602 };
1603
1604
1605 switch (tp->opcode) {
1606 case INT3_INSN_OPCODE:
1607 case RET_INSN_OPCODE:
1608 break;
1609
1610 case CALL_INSN_OPCODE:
1611 case JMP32_INSN_OPCODE:
1612 case JMP8_INSN_OPCODE:
1613 tp->disp = insn.immediate.value;
1614 break;
1615
1616 default:
1617 switch (len) {
1618 case 2:
1619 BUG_ON(memcmp(emulate, x86_nops[len], len));
1620 tp->opcode = JMP8_INSN_OPCODE;
1621 tp->disp = 0;
1622 break;
1623
1624 case 5:
1625 BUG_ON(memcmp(emulate, x86_nops[len], len));
1626 tp->opcode = JMP32_INSN_OPCODE;
1627 tp->disp = 0;
1628 break;
1629
1630 default:
1631 BUG();
1632 }
1633 break;
1634 }
1635 }
1636
1637
1638
1639
1640
1641 static bool tp_order_fail(void *addr)
1642 {
1643 struct text_poke_loc *tp;
1644
1645 if (!tp_vec_nr)
1646 return false;
1647
1648 if (!addr)
1649 return true;
1650
1651 tp = &tp_vec[tp_vec_nr - 1];
1652 if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
1653 return true;
1654
1655 return false;
1656 }
1657
1658 static void text_poke_flush(void *addr)
1659 {
1660 if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
1661 text_poke_bp_batch(tp_vec, tp_vec_nr);
1662 tp_vec_nr = 0;
1663 }
1664 }
1665
1666 void text_poke_finish(void)
1667 {
1668 text_poke_flush(NULL);
1669 }
1670
1671 void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
1672 {
1673 struct text_poke_loc *tp;
1674
1675 if (unlikely(system_state == SYSTEM_BOOTING)) {
1676 text_poke_early(addr, opcode, len);
1677 return;
1678 }
1679
1680 text_poke_flush(addr);
1681
1682 tp = &tp_vec[tp_vec_nr++];
1683 text_poke_loc_init(tp, addr, opcode, len, emulate);
1684 }
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697 void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
1698 {
1699 struct text_poke_loc tp;
1700
1701 if (unlikely(system_state == SYSTEM_BOOTING)) {
1702 text_poke_early(addr, opcode, len);
1703 return;
1704 }
1705
1706 text_poke_loc_init(&tp, addr, opcode, len, emulate);
1707 text_poke_bp_batch(&tp, 1);
1708 }