0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/kprobes.h>
0009 #include <linux/perf_event.h>
0010 #include <linux/ptrace.h>
0011 #include <linux/string.h>
0012 #include <linux/slab.h>
0013 #include <linux/hardirq.h>
0014 #include <linux/preempt.h>
0015 #include <linux/extable.h>
0016 #include <linux/kdebug.h>
0017 #include <linux/kallsyms.h>
0018 #include <linux/ftrace.h>
0019 #include <linux/objtool.h>
0020 #include <linux/pgtable.h>
0021 #include <linux/static_call.h>
0022
0023 #include <asm/text-patching.h>
0024 #include <asm/cacheflush.h>
0025 #include <asm/desc.h>
0026 #include <linux/uaccess.h>
0027 #include <asm/alternative.h>
0028 #include <asm/insn.h>
0029 #include <asm/debugreg.h>
0030 #include <asm/set_memory.h>
0031 #include <asm/sections.h>
0032 #include <asm/nospec-branch.h>
0033
0034 #include "common.h"
0035
0036 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
0037 {
0038 struct optimized_kprobe *op;
0039 struct kprobe *kp;
0040 long offs;
0041 int i;
0042
0043 for (i = 0; i < JMP32_INSN_SIZE; i++) {
0044 kp = get_kprobe((void *)addr - i);
0045
0046 if (kp && kprobe_optimized(kp)) {
0047 op = container_of(kp, struct optimized_kprobe, kp);
0048
0049 if (list_empty(&op->list))
0050 goto found;
0051 }
0052 }
0053
0054 return addr;
0055 found:
0056
0057
0058
0059
0060
0061 if (copy_from_kernel_nofault(buf, (void *)addr,
0062 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
0063 return 0UL;
0064
0065 if (addr == (unsigned long)kp->addr) {
0066 buf[0] = kp->opcode;
0067 memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
0068 } else {
0069 offs = addr - (unsigned long)kp->addr - 1;
0070 memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
0071 }
0072
0073 return (unsigned long)buf;
0074 }
0075
0076 static void synthesize_clac(kprobe_opcode_t *addr)
0077 {
0078
0079
0080
0081
0082 if (!boot_cpu_has(X86_FEATURE_SMAP))
0083 return;
0084
0085
0086 addr[0] = 0x0f;
0087 addr[1] = 0x01;
0088 addr[2] = 0xca;
0089 }
0090
0091
0092 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
0093 {
0094 #ifdef CONFIG_X86_64
0095 *addr++ = 0x48;
0096 *addr++ = 0xbf;
0097 #else
0098 *addr++ = 0xb8;
0099 #endif
0100 *(unsigned long *)addr = val;
0101 }
0102
0103 asm (
0104 ".pushsection .rodata\n"
0105 "optprobe_template_func:\n"
0106 ".global optprobe_template_entry\n"
0107 "optprobe_template_entry:\n"
0108 #ifdef CONFIG_X86_64
0109 " pushq $" __stringify(__KERNEL_DS) "\n"
0110
0111 " pushq %rsp\n"
0112 " pushfq\n"
0113 ".global optprobe_template_clac\n"
0114 "optprobe_template_clac:\n"
0115 ASM_NOP3
0116 SAVE_REGS_STRING
0117 " movq %rsp, %rsi\n"
0118 ".global optprobe_template_val\n"
0119 "optprobe_template_val:\n"
0120 ASM_NOP5
0121 ASM_NOP5
0122 ".global optprobe_template_call\n"
0123 "optprobe_template_call:\n"
0124 ASM_NOP5
0125
0126 " movq 18*8(%rsp), %rdx\n"
0127 " movq %rdx, 20*8(%rsp)\n"
0128 RESTORE_REGS_STRING
0129
0130 " addq $16, %rsp\n"
0131
0132 " popfq\n"
0133 #else
0134 " pushl %ss\n"
0135
0136 " pushl %esp\n"
0137 " pushfl\n"
0138 ".global optprobe_template_clac\n"
0139 "optprobe_template_clac:\n"
0140 ASM_NOP3
0141 SAVE_REGS_STRING
0142 " movl %esp, %edx\n"
0143 ".global optprobe_template_val\n"
0144 "optprobe_template_val:\n"
0145 ASM_NOP5
0146 ".global optprobe_template_call\n"
0147 "optprobe_template_call:\n"
0148 ASM_NOP5
0149
0150 " movl 14*4(%esp), %edx\n"
0151 " movl %edx, 16*4(%esp)\n"
0152 RESTORE_REGS_STRING
0153
0154 " addl $8, %esp\n"
0155
0156 " popfl\n"
0157 #endif
0158 ".global optprobe_template_end\n"
0159 "optprobe_template_end:\n"
0160 ".popsection\n");
0161
0162 void optprobe_template_func(void);
0163 STACK_FRAME_NON_STANDARD(optprobe_template_func);
0164
0165 #define TMPL_CLAC_IDX \
0166 ((long)optprobe_template_clac - (long)optprobe_template_entry)
0167 #define TMPL_MOVE_IDX \
0168 ((long)optprobe_template_val - (long)optprobe_template_entry)
0169 #define TMPL_CALL_IDX \
0170 ((long)optprobe_template_call - (long)optprobe_template_entry)
0171 #define TMPL_END_IDX \
0172 ((long)optprobe_template_end - (long)optprobe_template_entry)
0173
0174
0175 static void
0176 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
0177 {
0178
0179 if (kprobe_disabled(&op->kp))
0180 return;
0181
0182 preempt_disable();
0183 if (kprobe_running()) {
0184 kprobes_inc_nmissed_count(&op->kp);
0185 } else {
0186 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
0187
0188 regs->sp += sizeof(long);
0189
0190 regs->cs = __KERNEL_CS;
0191 #ifdef CONFIG_X86_32
0192 regs->gs = 0;
0193 #endif
0194 regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
0195 regs->orig_ax = ~0UL;
0196
0197 __this_cpu_write(current_kprobe, &op->kp);
0198 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
0199 opt_pre_handler(&op->kp, regs);
0200 __this_cpu_write(current_kprobe, NULL);
0201 }
0202 preempt_enable();
0203 }
0204 NOKPROBE_SYMBOL(optimized_callback);
0205
0206 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
0207 {
0208 struct insn insn;
0209 int len = 0, ret;
0210
0211 while (len < JMP32_INSN_SIZE) {
0212 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
0213 if (!ret || !can_boost(&insn, src + len))
0214 return -EINVAL;
0215 len += ret;
0216 }
0217
0218 if (ftrace_text_reserved(src, src + len - 1) ||
0219 alternatives_text_reserved(src, src + len - 1) ||
0220 jump_label_text_reserved(src, src + len - 1) ||
0221 static_call_text_reserved(src, src + len - 1))
0222 return -EBUSY;
0223
0224 return len;
0225 }
0226
0227
0228 static int __insn_is_indirect_jump(struct insn *insn)
0229 {
0230 return ((insn->opcode.bytes[0] == 0xff &&
0231 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) ||
0232 insn->opcode.bytes[0] == 0xea);
0233 }
0234
0235
0236 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
0237 {
0238 unsigned long target = 0;
0239
0240 switch (insn->opcode.bytes[0]) {
0241 case 0xe0:
0242 case 0xe1:
0243 case 0xe2:
0244 case 0xe3:
0245 case 0xe9:
0246 case 0xeb:
0247 break;
0248 case 0x0f:
0249 if ((insn->opcode.bytes[1] & 0xf0) == 0x80)
0250 break;
0251 return 0;
0252 default:
0253 if ((insn->opcode.bytes[0] & 0xf0) == 0x70)
0254 break;
0255 return 0;
0256 }
0257 target = (unsigned long)insn->next_byte + insn->immediate.value;
0258
0259 return (start <= target && target <= start + len);
0260 }
0261
0262 static int insn_is_indirect_jump(struct insn *insn)
0263 {
0264 int ret = __insn_is_indirect_jump(insn);
0265
0266 #ifdef CONFIG_RETPOLINE
0267
0268
0269
0270
0271
0272
0273 if (!ret)
0274 ret = insn_jump_into_range(insn,
0275 (unsigned long)__indirect_thunk_start,
0276 (unsigned long)__indirect_thunk_end -
0277 (unsigned long)__indirect_thunk_start);
0278 #endif
0279 return ret;
0280 }
0281
0282 static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
0283 {
0284 unsigned char ops;
0285
0286 for (; addr < eaddr; addr++) {
0287 if (get_kernel_nofault(ops, (void *)addr) < 0 ||
0288 ops != INT3_INSN_OPCODE)
0289 return false;
0290 }
0291
0292 return true;
0293 }
0294
0295
0296 static int can_optimize(unsigned long paddr)
0297 {
0298 unsigned long addr, size = 0, offset = 0;
0299 struct insn insn;
0300 kprobe_opcode_t buf[MAX_INSN_SIZE];
0301
0302
0303 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
0304 return 0;
0305
0306
0307
0308
0309
0310 if (((paddr >= (unsigned long)__entry_text_start) &&
0311 (paddr < (unsigned long)__entry_text_end)))
0312 return 0;
0313
0314
0315 if (size - offset < JMP32_INSN_SIZE)
0316 return 0;
0317
0318
0319 addr = paddr - offset;
0320 while (addr < paddr - offset + size) {
0321 unsigned long recovered_insn;
0322 int ret;
0323
0324 if (search_exception_tables(addr))
0325
0326
0327
0328
0329 return 0;
0330 recovered_insn = recover_probed_instruction(buf, addr);
0331 if (!recovered_insn)
0332 return 0;
0333
0334 ret = insn_decode_kernel(&insn, (void *)recovered_insn);
0335 if (ret < 0)
0336 return 0;
0337
0338
0339
0340
0341
0342
0343 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
0344 return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
0345
0346
0347 insn.kaddr = (void *)addr;
0348 insn.next_byte = (void *)(addr + insn.length);
0349
0350 if (insn_is_indirect_jump(&insn) ||
0351 insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
0352 DISP32_SIZE))
0353 return 0;
0354 addr += insn.length;
0355 }
0356
0357 return 1;
0358 }
0359
0360
0361 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
0362 {
0363 int i;
0364 struct kprobe *p;
0365
0366 for (i = 1; i < op->optinsn.size; i++) {
0367 p = get_kprobe(op->kp.addr + i);
0368 if (p && !kprobe_disabled(p))
0369 return -EEXIST;
0370 }
0371
0372 return 0;
0373 }
0374
0375
0376 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
0377 kprobe_opcode_t *addr)
0378 {
0379 return (op->kp.addr <= addr &&
0380 op->kp.addr + op->optinsn.size > addr);
0381 }
0382
0383
0384 static
0385 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
0386 {
0387 u8 *slot = op->optinsn.insn;
0388 if (slot) {
0389 int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
0390
0391
0392 if (dirty)
0393 perf_event_text_poke(slot, slot, len, NULL, 0);
0394
0395 free_optinsn_slot(slot, dirty);
0396 op->optinsn.insn = NULL;
0397 op->optinsn.size = 0;
0398 }
0399 }
0400
0401 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
0402 {
0403 __arch_remove_optimized_kprobe(op, 1);
0404 }
0405
0406
0407
0408
0409
0410
0411 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
0412 struct kprobe *__unused)
0413 {
0414 u8 *buf = NULL, *slot;
0415 int ret, len;
0416 long rel;
0417
0418 if (!can_optimize((unsigned long)op->kp.addr))
0419 return -EILSEQ;
0420
0421 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
0422 if (!buf)
0423 return -ENOMEM;
0424
0425 op->optinsn.insn = slot = get_optinsn_slot();
0426 if (!slot) {
0427 ret = -ENOMEM;
0428 goto out;
0429 }
0430
0431
0432
0433
0434
0435 rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
0436 if (abs(rel) > 0x7fffffff) {
0437 ret = -ERANGE;
0438 goto err;
0439 }
0440
0441
0442 memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
0443
0444
0445 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
0446 slot + TMPL_END_IDX);
0447 if (ret < 0)
0448 goto err;
0449 op->optinsn.size = ret;
0450 len = TMPL_END_IDX + op->optinsn.size;
0451
0452 synthesize_clac(buf + TMPL_CLAC_IDX);
0453
0454
0455 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
0456
0457
0458 synthesize_relcall(buf + TMPL_CALL_IDX,
0459 slot + TMPL_CALL_IDX, optimized_callback);
0460
0461
0462 synthesize_reljump(buf + len, slot + len,
0463 (u8 *)op->kp.addr + op->optinsn.size);
0464 len += JMP32_INSN_SIZE;
0465
0466
0467
0468
0469
0470
0471
0472 perf_event_text_poke(slot, NULL, 0, buf, len);
0473 text_poke(slot, buf, len);
0474
0475 ret = 0;
0476 out:
0477 kfree(buf);
0478 return ret;
0479
0480 err:
0481 __arch_remove_optimized_kprobe(op, 0);
0482 goto out;
0483 }
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493 void arch_optimize_kprobes(struct list_head *oplist)
0494 {
0495 struct optimized_kprobe *op, *tmp;
0496 u8 insn_buff[JMP32_INSN_SIZE];
0497
0498 list_for_each_entry_safe(op, tmp, oplist, list) {
0499 s32 rel = (s32)((long)op->optinsn.insn -
0500 ((long)op->kp.addr + JMP32_INSN_SIZE));
0501
0502 WARN_ON(kprobe_disabled(&op->kp));
0503
0504
0505 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
0506 DISP32_SIZE);
0507
0508 insn_buff[0] = JMP32_INSN_OPCODE;
0509 *(s32 *)(&insn_buff[1]) = rel;
0510
0511 text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
0512
0513 list_del_init(&op->list);
0514 }
0515 }
0516
0517
0518
0519
0520
0521
0522
0523
0524 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
0525 {
0526 u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
0527 u8 old[JMP32_INSN_SIZE];
0528 u8 *addr = op->kp.addr;
0529
0530 memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
0531 memcpy(new + INT3_INSN_SIZE,
0532 op->optinsn.copied_insn,
0533 JMP32_INSN_SIZE - INT3_INSN_SIZE);
0534
0535 text_poke(addr, new, INT3_INSN_SIZE);
0536 text_poke_sync();
0537 text_poke(addr + INT3_INSN_SIZE,
0538 new + INT3_INSN_SIZE,
0539 JMP32_INSN_SIZE - INT3_INSN_SIZE);
0540 text_poke_sync();
0541
0542 perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
0543 }
0544
0545
0546
0547
0548
0549 extern void arch_unoptimize_kprobes(struct list_head *oplist,
0550 struct list_head *done_list)
0551 {
0552 struct optimized_kprobe *op, *tmp;
0553
0554 list_for_each_entry_safe(op, tmp, oplist, list) {
0555 arch_unoptimize_kprobe(op);
0556 list_move(&op->list, done_list);
0557 }
0558 }
0559
0560 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
0561 {
0562 struct optimized_kprobe *op;
0563
0564 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
0565
0566 op = container_of(p, struct optimized_kprobe, kp);
0567
0568 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
0569 if (!reenter)
0570 reset_current_kprobe();
0571 return 1;
0572 }
0573 return 0;
0574 }
0575 NOKPROBE_SYMBOL(setup_detour_execution);