0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/netdevice.h>
0009 #include <linux/filter.h>
0010 #include <linux/if_vlan.h>
0011 #include <linux/bpf.h>
0012 #include <linux/memory.h>
0013 #include <linux/sort.h>
0014 #include <asm/extable.h>
0015 #include <asm/set_memory.h>
0016 #include <asm/nospec-branch.h>
0017 #include <asm/text-patching.h>
0018
0019 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
0020 {
0021 if (len == 1)
0022 *ptr = bytes;
0023 else if (len == 2)
0024 *(u16 *)ptr = bytes;
0025 else {
0026 *(u32 *)ptr = bytes;
0027 barrier();
0028 }
0029 return ptr + len;
0030 }
0031
0032 #define EMIT(bytes, len) \
0033 do { prog = emit_code(prog, bytes, len); } while (0)
0034
0035 #define EMIT1(b1) EMIT(b1, 1)
0036 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
0037 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
0038 #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
0039
0040 #define EMIT1_off32(b1, off) \
0041 do { EMIT1(b1); EMIT(off, 4); } while (0)
0042 #define EMIT2_off32(b1, b2, off) \
0043 do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
0044 #define EMIT3_off32(b1, b2, b3, off) \
0045 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
0046 #define EMIT4_off32(b1, b2, b3, b4, off) \
0047 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
0048
0049 #ifdef CONFIG_X86_KERNEL_IBT
0050 #define EMIT_ENDBR() EMIT(gen_endbr(), 4)
0051 #else
0052 #define EMIT_ENDBR()
0053 #endif
0054
0055 static bool is_imm8(int value)
0056 {
0057 return value <= 127 && value >= -128;
0058 }
0059
0060 static bool is_simm32(s64 value)
0061 {
0062 return value == (s64)(s32)value;
0063 }
0064
0065 static bool is_uimm32(u64 value)
0066 {
0067 return value == (u64)(u32)value;
0068 }
0069
0070
0071 #define EMIT_mov(DST, SRC) \
0072 do { \
0073 if (DST != SRC) \
0074 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
0075 } while (0)
0076
0077 static int bpf_size_to_x86_bytes(int bpf_size)
0078 {
0079 if (bpf_size == BPF_W)
0080 return 4;
0081 else if (bpf_size == BPF_H)
0082 return 2;
0083 else if (bpf_size == BPF_B)
0084 return 1;
0085 else if (bpf_size == BPF_DW)
0086 return 4;
0087 else
0088 return 0;
0089 }
0090
0091
0092
0093
0094
0095 #define X86_JB 0x72
0096 #define X86_JAE 0x73
0097 #define X86_JE 0x74
0098 #define X86_JNE 0x75
0099 #define X86_JBE 0x76
0100 #define X86_JA 0x77
0101 #define X86_JL 0x7C
0102 #define X86_JGE 0x7D
0103 #define X86_JLE 0x7E
0104 #define X86_JG 0x7F
0105
0106
0107 #define AUX_REG (MAX_BPF_JIT_REG + 1)
0108 #define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120 static const int reg2hex[] = {
0121 [BPF_REG_0] = 0,
0122 [BPF_REG_1] = 7,
0123 [BPF_REG_2] = 6,
0124 [BPF_REG_3] = 2,
0125 [BPF_REG_4] = 1,
0126 [BPF_REG_5] = 0,
0127 [BPF_REG_6] = 3,
0128 [BPF_REG_7] = 5,
0129 [BPF_REG_8] = 6,
0130 [BPF_REG_9] = 7,
0131 [BPF_REG_FP] = 5,
0132 [BPF_REG_AX] = 2,
0133 [AUX_REG] = 3,
0134 [X86_REG_R9] = 1,
0135 };
0136
0137 static const int reg2pt_regs[] = {
0138 [BPF_REG_0] = offsetof(struct pt_regs, ax),
0139 [BPF_REG_1] = offsetof(struct pt_regs, di),
0140 [BPF_REG_2] = offsetof(struct pt_regs, si),
0141 [BPF_REG_3] = offsetof(struct pt_regs, dx),
0142 [BPF_REG_4] = offsetof(struct pt_regs, cx),
0143 [BPF_REG_5] = offsetof(struct pt_regs, r8),
0144 [BPF_REG_6] = offsetof(struct pt_regs, bx),
0145 [BPF_REG_7] = offsetof(struct pt_regs, r13),
0146 [BPF_REG_8] = offsetof(struct pt_regs, r14),
0147 [BPF_REG_9] = offsetof(struct pt_regs, r15),
0148 };
0149
0150
0151
0152
0153
0154
0155 static bool is_ereg(u32 reg)
0156 {
0157 return (1 << reg) & (BIT(BPF_REG_5) |
0158 BIT(AUX_REG) |
0159 BIT(BPF_REG_7) |
0160 BIT(BPF_REG_8) |
0161 BIT(BPF_REG_9) |
0162 BIT(X86_REG_R9) |
0163 BIT(BPF_REG_AX));
0164 }
0165
0166
0167
0168
0169
0170
0171 static bool is_ereg_8l(u32 reg)
0172 {
0173 return is_ereg(reg) ||
0174 (1 << reg) & (BIT(BPF_REG_1) |
0175 BIT(BPF_REG_2) |
0176 BIT(BPF_REG_FP));
0177 }
0178
0179 static bool is_axreg(u32 reg)
0180 {
0181 return reg == BPF_REG_0;
0182 }
0183
0184
0185 static u8 add_1mod(u8 byte, u32 reg)
0186 {
0187 if (is_ereg(reg))
0188 byte |= 1;
0189 return byte;
0190 }
0191
0192 static u8 add_2mod(u8 byte, u32 r1, u32 r2)
0193 {
0194 if (is_ereg(r1))
0195 byte |= 1;
0196 if (is_ereg(r2))
0197 byte |= 4;
0198 return byte;
0199 }
0200
0201
0202 static u8 add_1reg(u8 byte, u32 dst_reg)
0203 {
0204 return byte + reg2hex[dst_reg];
0205 }
0206
0207
0208 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
0209 {
0210 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
0211 }
0212
0213
0214 static u8 simple_alu_opcodes[] = {
0215 [BPF_ADD] = 0x01,
0216 [BPF_SUB] = 0x29,
0217 [BPF_AND] = 0x21,
0218 [BPF_OR] = 0x09,
0219 [BPF_XOR] = 0x31,
0220 [BPF_LSH] = 0xE0,
0221 [BPF_RSH] = 0xE8,
0222 [BPF_ARSH] = 0xF8,
0223 };
0224
0225 static void jit_fill_hole(void *area, unsigned int size)
0226 {
0227
0228 memset(area, 0xcc, size);
0229 }
0230
0231 int bpf_arch_text_invalidate(void *dst, size_t len)
0232 {
0233 return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len));
0234 }
0235
0236 struct jit_context {
0237 int cleanup_addr;
0238
0239
0240
0241
0242
0243
0244 int tail_call_direct_label;
0245 int tail_call_indirect_label;
0246 };
0247
0248
0249 #define BPF_MAX_INSN_SIZE 128
0250 #define BPF_INSN_SAFETY 64
0251
0252
0253 #define X86_PATCH_SIZE 5
0254
0255 #define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
0256
0257 static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
0258 {
0259 u8 *prog = *pprog;
0260
0261 if (callee_regs_used[0])
0262 EMIT1(0x53);
0263 if (callee_regs_used[1])
0264 EMIT2(0x41, 0x55);
0265 if (callee_regs_used[2])
0266 EMIT2(0x41, 0x56);
0267 if (callee_regs_used[3])
0268 EMIT2(0x41, 0x57);
0269 *pprog = prog;
0270 }
0271
0272 static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
0273 {
0274 u8 *prog = *pprog;
0275
0276 if (callee_regs_used[3])
0277 EMIT2(0x41, 0x5F);
0278 if (callee_regs_used[2])
0279 EMIT2(0x41, 0x5E);
0280 if (callee_regs_used[1])
0281 EMIT2(0x41, 0x5D);
0282 if (callee_regs_used[0])
0283 EMIT1(0x5B);
0284 *pprog = prog;
0285 }
0286
0287
0288
0289
0290
0291
0292 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
0293 bool tail_call_reachable, bool is_subprog)
0294 {
0295 u8 *prog = *pprog;
0296
0297
0298
0299
0300 EMIT_ENDBR();
0301 memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
0302 prog += X86_PATCH_SIZE;
0303 if (!ebpf_from_cbpf) {
0304 if (tail_call_reachable && !is_subprog)
0305 EMIT2(0x31, 0xC0);
0306 else
0307 EMIT2(0x66, 0x90);
0308 }
0309 EMIT1(0x55);
0310 EMIT3(0x48, 0x89, 0xE5);
0311
0312
0313 EMIT_ENDBR();
0314
0315
0316 if (stack_depth)
0317 EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
0318 if (tail_call_reachable)
0319 EMIT1(0x50);
0320 *pprog = prog;
0321 }
0322
0323 static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
0324 {
0325 u8 *prog = *pprog;
0326 s64 offset;
0327
0328 offset = func - (ip + X86_PATCH_SIZE);
0329 if (!is_simm32(offset)) {
0330 pr_err("Target call %p is out of range\n", func);
0331 return -ERANGE;
0332 }
0333 EMIT1_off32(opcode, offset);
0334 *pprog = prog;
0335 return 0;
0336 }
0337
0338 static int emit_call(u8 **pprog, void *func, void *ip)
0339 {
0340 return emit_patch(pprog, func, ip, 0xE8);
0341 }
0342
0343 static int emit_jump(u8 **pprog, void *func, void *ip)
0344 {
0345 return emit_patch(pprog, func, ip, 0xE9);
0346 }
0347
0348 static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
0349 void *old_addr, void *new_addr)
0350 {
0351 const u8 *nop_insn = x86_nops[5];
0352 u8 old_insn[X86_PATCH_SIZE];
0353 u8 new_insn[X86_PATCH_SIZE];
0354 u8 *prog;
0355 int ret;
0356
0357 memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
0358 if (old_addr) {
0359 prog = old_insn;
0360 ret = t == BPF_MOD_CALL ?
0361 emit_call(&prog, old_addr, ip) :
0362 emit_jump(&prog, old_addr, ip);
0363 if (ret)
0364 return ret;
0365 }
0366
0367 memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
0368 if (new_addr) {
0369 prog = new_insn;
0370 ret = t == BPF_MOD_CALL ?
0371 emit_call(&prog, new_addr, ip) :
0372 emit_jump(&prog, new_addr, ip);
0373 if (ret)
0374 return ret;
0375 }
0376
0377 ret = -EBUSY;
0378 mutex_lock(&text_mutex);
0379 if (memcmp(ip, old_insn, X86_PATCH_SIZE))
0380 goto out;
0381 ret = 1;
0382 if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
0383 text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
0384 ret = 0;
0385 }
0386 out:
0387 mutex_unlock(&text_mutex);
0388 return ret;
0389 }
0390
0391 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
0392 void *old_addr, void *new_addr)
0393 {
0394 if (!is_kernel_text((long)ip) &&
0395 !is_bpf_text_address((long)ip))
0396
0397 return -EINVAL;
0398
0399
0400
0401
0402
0403 if (is_endbr(*(u32 *)ip))
0404 ip += ENDBR_INSN_SIZE;
0405
0406 return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
0407 }
0408
0409 #define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
0410
0411 static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
0412 {
0413 u8 *prog = *pprog;
0414
0415 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
0416 EMIT_LFENCE();
0417 EMIT2(0xFF, 0xE0 + reg);
0418 } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
0419 OPTIMIZER_HIDE_VAR(reg);
0420 emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
0421 } else {
0422 EMIT2(0xFF, 0xE0 + reg);
0423 }
0424
0425 *pprog = prog;
0426 }
0427
0428 static void emit_return(u8 **pprog, u8 *ip)
0429 {
0430 u8 *prog = *pprog;
0431
0432 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
0433 emit_jump(&prog, &__x86_return_thunk, ip);
0434 } else {
0435 EMIT1(0xC3);
0436 if (IS_ENABLED(CONFIG_SLS))
0437 EMIT1(0xCC);
0438 }
0439
0440 *pprog = prog;
0441 }
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457 static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
0458 u32 stack_depth, u8 *ip,
0459 struct jit_context *ctx)
0460 {
0461 int tcc_off = -4 - round_up(stack_depth, 8);
0462 u8 *prog = *pprog, *start = *pprog;
0463 int offset;
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475 EMIT2(0x89, 0xD2);
0476 EMIT3(0x39, 0x56,
0477 offsetof(struct bpf_array, map.max_entries));
0478
0479 offset = ctx->tail_call_indirect_label - (prog + 2 - start);
0480 EMIT2(X86_JBE, offset);
0481
0482
0483
0484
0485
0486 EMIT2_off32(0x8B, 0x85, tcc_off);
0487 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);
0488
0489 offset = ctx->tail_call_indirect_label - (prog + 2 - start);
0490 EMIT2(X86_JAE, offset);
0491 EMIT3(0x83, 0xC0, 0x01);
0492 EMIT2_off32(0x89, 0x85, tcc_off);
0493
0494
0495 EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6,
0496 offsetof(struct bpf_array, ptrs));
0497
0498
0499
0500
0501
0502 EMIT3(0x48, 0x85, 0xC9);
0503
0504 offset = ctx->tail_call_indirect_label - (prog + 2 - start);
0505 EMIT2(X86_JE, offset);
0506
0507 pop_callee_regs(&prog, callee_regs_used);
0508
0509 EMIT1(0x58);
0510 if (stack_depth)
0511 EMIT3_off32(0x48, 0x81, 0xC4,
0512 round_up(stack_depth, 8));
0513
0514
0515 EMIT4(0x48, 0x8B, 0x49,
0516 offsetof(struct bpf_prog, bpf_func));
0517 EMIT4(0x48, 0x83, 0xC1,
0518 X86_TAIL_CALL_OFFSET);
0519
0520
0521
0522
0523
0524 emit_indirect_jump(&prog, 1 , ip + (prog - start));
0525
0526
0527 ctx->tail_call_indirect_label = prog - start;
0528 *pprog = prog;
0529 }
0530
0531 static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
0532 u8 **pprog, u8 *ip,
0533 bool *callee_regs_used, u32 stack_depth,
0534 struct jit_context *ctx)
0535 {
0536 int tcc_off = -4 - round_up(stack_depth, 8);
0537 u8 *prog = *pprog, *start = *pprog;
0538 int offset;
0539
0540
0541
0542
0543
0544 EMIT2_off32(0x8B, 0x85, tcc_off);
0545 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);
0546
0547 offset = ctx->tail_call_direct_label - (prog + 2 - start);
0548 EMIT2(X86_JAE, offset);
0549 EMIT3(0x83, 0xC0, 0x01);
0550 EMIT2_off32(0x89, 0x85, tcc_off);
0551
0552 poke->tailcall_bypass = ip + (prog - start);
0553 poke->adj_off = X86_TAIL_CALL_OFFSET;
0554 poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
0555 poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
0556
0557 emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
0558 poke->tailcall_bypass);
0559
0560 pop_callee_regs(&prog, callee_regs_used);
0561 EMIT1(0x58);
0562 if (stack_depth)
0563 EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
0564
0565 memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
0566 prog += X86_PATCH_SIZE;
0567
0568
0569 ctx->tail_call_direct_label = prog - start;
0570
0571 *pprog = prog;
0572 }
0573
0574 static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
0575 {
0576 struct bpf_jit_poke_descriptor *poke;
0577 struct bpf_array *array;
0578 struct bpf_prog *target;
0579 int i, ret;
0580
0581 for (i = 0; i < prog->aux->size_poke_tab; i++) {
0582 poke = &prog->aux->poke_tab[i];
0583 if (poke->aux && poke->aux != prog->aux)
0584 continue;
0585
0586 WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
0587
0588 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
0589 continue;
0590
0591 array = container_of(poke->tail_call.map, struct bpf_array, map);
0592 mutex_lock(&array->aux->poke_mutex);
0593 target = array->ptrs[poke->tail_call.key];
0594 if (target) {
0595 ret = __bpf_arch_text_poke(poke->tailcall_target,
0596 BPF_MOD_JUMP, NULL,
0597 (u8 *)target->bpf_func +
0598 poke->adj_off);
0599 BUG_ON(ret < 0);
0600 ret = __bpf_arch_text_poke(poke->tailcall_bypass,
0601 BPF_MOD_JUMP,
0602 (u8 *)poke->tailcall_target +
0603 X86_PATCH_SIZE, NULL);
0604 BUG_ON(ret < 0);
0605 }
0606 WRITE_ONCE(poke->tailcall_target_stable, true);
0607 mutex_unlock(&array->aux->poke_mutex);
0608 }
0609 }
0610
0611 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
0612 u32 dst_reg, const u32 imm32)
0613 {
0614 u8 *prog = *pprog;
0615 u8 b1, b2, b3;
0616
0617
0618
0619
0620
0621 if (sign_propagate && (s32)imm32 < 0) {
0622
0623 b1 = add_1mod(0x48, dst_reg);
0624 b2 = 0xC7;
0625 b3 = 0xC0;
0626 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
0627 goto done;
0628 }
0629
0630
0631
0632
0633
0634 if (imm32 == 0) {
0635 if (is_ereg(dst_reg))
0636 EMIT1(add_2mod(0x40, dst_reg, dst_reg));
0637 b2 = 0x31;
0638 b3 = 0xC0;
0639 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
0640 goto done;
0641 }
0642
0643
0644 if (is_ereg(dst_reg))
0645 EMIT1(add_1mod(0x40, dst_reg));
0646 EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
0647 done:
0648 *pprog = prog;
0649 }
0650
0651 static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
0652 const u32 imm32_hi, const u32 imm32_lo)
0653 {
0654 u8 *prog = *pprog;
0655
0656 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
0657
0658
0659
0660
0661
0662
0663 emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
0664 } else {
0665
0666 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
0667 EMIT(imm32_lo, 4);
0668 EMIT(imm32_hi, 4);
0669 }
0670
0671 *pprog = prog;
0672 }
0673
0674 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
0675 {
0676 u8 *prog = *pprog;
0677
0678 if (is64) {
0679
0680 EMIT_mov(dst_reg, src_reg);
0681 } else {
0682
0683 if (is_ereg(dst_reg) || is_ereg(src_reg))
0684 EMIT1(add_2mod(0x40, dst_reg, src_reg));
0685 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
0686 }
0687
0688 *pprog = prog;
0689 }
0690
0691
0692 static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
0693 {
0694 u8 *prog = *pprog;
0695
0696 if (is_imm8(off)) {
0697
0698
0699
0700
0701
0702
0703 EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
0704 } else {
0705
0706 EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
0707 }
0708 *pprog = prog;
0709 }
0710
0711
0712
0713
0714 static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
0715 {
0716 u8 *prog = *pprog;
0717
0718 if (is64)
0719 EMIT1(add_2mod(0x48, dst_reg, src_reg));
0720 else if (is_ereg(dst_reg) || is_ereg(src_reg))
0721 EMIT1(add_2mod(0x40, dst_reg, src_reg));
0722 *pprog = prog;
0723 }
0724
0725
0726
0727
0728 static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64)
0729 {
0730 u8 *prog = *pprog;
0731
0732 if (is64)
0733 EMIT1(add_1mod(0x48, reg));
0734 else if (is_ereg(reg))
0735 EMIT1(add_1mod(0x40, reg));
0736 *pprog = prog;
0737 }
0738
0739
0740 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
0741 {
0742 u8 *prog = *pprog;
0743
0744 switch (size) {
0745 case BPF_B:
0746
0747 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
0748 break;
0749 case BPF_H:
0750
0751 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
0752 break;
0753 case BPF_W:
0754
0755 if (is_ereg(dst_reg) || is_ereg(src_reg))
0756 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
0757 else
0758 EMIT1(0x8B);
0759 break;
0760 case BPF_DW:
0761
0762 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
0763 break;
0764 }
0765 emit_insn_suffix(&prog, src_reg, dst_reg, off);
0766 *pprog = prog;
0767 }
0768
0769
0770 static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
0771 {
0772 u8 *prog = *pprog;
0773
0774 switch (size) {
0775 case BPF_B:
0776
0777 if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
0778
0779 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
0780 else
0781 EMIT1(0x88);
0782 break;
0783 case BPF_H:
0784 if (is_ereg(dst_reg) || is_ereg(src_reg))
0785 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
0786 else
0787 EMIT2(0x66, 0x89);
0788 break;
0789 case BPF_W:
0790 if (is_ereg(dst_reg) || is_ereg(src_reg))
0791 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
0792 else
0793 EMIT1(0x89);
0794 break;
0795 case BPF_DW:
0796 EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
0797 break;
0798 }
0799 emit_insn_suffix(&prog, dst_reg, src_reg, off);
0800 *pprog = prog;
0801 }
0802
0803 static int emit_atomic(u8 **pprog, u8 atomic_op,
0804 u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
0805 {
0806 u8 *prog = *pprog;
0807
0808 EMIT1(0xF0);
0809
0810 maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW);
0811
0812
0813 switch (atomic_op) {
0814 case BPF_ADD:
0815 case BPF_AND:
0816 case BPF_OR:
0817 case BPF_XOR:
0818
0819 EMIT1(simple_alu_opcodes[atomic_op]);
0820 break;
0821 case BPF_ADD | BPF_FETCH:
0822
0823 EMIT2(0x0F, 0xC1);
0824 break;
0825 case BPF_XCHG:
0826
0827 EMIT1(0x87);
0828 break;
0829 case BPF_CMPXCHG:
0830
0831 EMIT2(0x0F, 0xB1);
0832 break;
0833 default:
0834 pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
0835 return -EFAULT;
0836 }
0837
0838 emit_insn_suffix(&prog, dst_reg, src_reg, off);
0839
0840 *pprog = prog;
0841 return 0;
0842 }
0843
0844 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
0845 {
0846 u32 reg = x->fixup >> 8;
0847
0848
0849 *(unsigned long *)((void *)regs + reg) = 0;
0850 regs->ip += x->fixup & 0xff;
0851 return true;
0852 }
0853
0854 static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
0855 bool *regs_used, bool *tail_call_seen)
0856 {
0857 int i;
0858
0859 for (i = 1; i <= insn_cnt; i++, insn++) {
0860 if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
0861 *tail_call_seen = true;
0862 if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
0863 regs_used[0] = true;
0864 if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
0865 regs_used[1] = true;
0866 if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
0867 regs_used[2] = true;
0868 if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
0869 regs_used[3] = true;
0870 }
0871 }
0872
0873 static void emit_nops(u8 **pprog, int len)
0874 {
0875 u8 *prog = *pprog;
0876 int i, noplen;
0877
0878 while (len > 0) {
0879 noplen = len;
0880
0881 if (noplen > ASM_NOP_MAX)
0882 noplen = ASM_NOP_MAX;
0883
0884 for (i = 0; i < noplen; i++)
0885 EMIT1(x86_nops[noplen][i]);
0886 len -= noplen;
0887 }
0888
0889 *pprog = prog;
0890 }
0891
0892 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
0893
0894 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
0895 int oldproglen, struct jit_context *ctx, bool jmp_padding)
0896 {
0897 bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
0898 struct bpf_insn *insn = bpf_prog->insnsi;
0899 bool callee_regs_used[4] = {};
0900 int insn_cnt = bpf_prog->len;
0901 bool tail_call_seen = false;
0902 bool seen_exit = false;
0903 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
0904 int i, excnt = 0;
0905 int ilen, proglen = 0;
0906 u8 *prog = temp;
0907 int err;
0908
0909 detect_reg_usage(insn, insn_cnt, callee_regs_used,
0910 &tail_call_seen);
0911
0912
0913 tail_call_reachable |= tail_call_seen;
0914
0915 emit_prologue(&prog, bpf_prog->aux->stack_depth,
0916 bpf_prog_was_classic(bpf_prog), tail_call_reachable,
0917 bpf_prog->aux->func_idx != 0);
0918 push_callee_regs(&prog, callee_regs_used);
0919
0920 ilen = prog - temp;
0921 if (rw_image)
0922 memcpy(rw_image + proglen, temp, ilen);
0923 proglen += ilen;
0924 addrs[0] = proglen;
0925 prog = temp;
0926
0927 for (i = 1; i <= insn_cnt; i++, insn++) {
0928 const s32 imm32 = insn->imm;
0929 u32 dst_reg = insn->dst_reg;
0930 u32 src_reg = insn->src_reg;
0931 u8 b2 = 0, b3 = 0;
0932 u8 *start_of_ldx;
0933 s64 jmp_offset;
0934 u8 jmp_cond;
0935 u8 *func;
0936 int nops;
0937
0938 switch (insn->code) {
0939
0940 case BPF_ALU | BPF_ADD | BPF_X:
0941 case BPF_ALU | BPF_SUB | BPF_X:
0942 case BPF_ALU | BPF_AND | BPF_X:
0943 case BPF_ALU | BPF_OR | BPF_X:
0944 case BPF_ALU | BPF_XOR | BPF_X:
0945 case BPF_ALU64 | BPF_ADD | BPF_X:
0946 case BPF_ALU64 | BPF_SUB | BPF_X:
0947 case BPF_ALU64 | BPF_AND | BPF_X:
0948 case BPF_ALU64 | BPF_OR | BPF_X:
0949 case BPF_ALU64 | BPF_XOR | BPF_X:
0950 maybe_emit_mod(&prog, dst_reg, src_reg,
0951 BPF_CLASS(insn->code) == BPF_ALU64);
0952 b2 = simple_alu_opcodes[BPF_OP(insn->code)];
0953 EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
0954 break;
0955
0956 case BPF_ALU64 | BPF_MOV | BPF_X:
0957 case BPF_ALU | BPF_MOV | BPF_X:
0958 emit_mov_reg(&prog,
0959 BPF_CLASS(insn->code) == BPF_ALU64,
0960 dst_reg, src_reg);
0961 break;
0962
0963
0964 case BPF_ALU | BPF_NEG:
0965 case BPF_ALU64 | BPF_NEG:
0966 maybe_emit_1mod(&prog, dst_reg,
0967 BPF_CLASS(insn->code) == BPF_ALU64);
0968 EMIT2(0xF7, add_1reg(0xD8, dst_reg));
0969 break;
0970
0971 case BPF_ALU | BPF_ADD | BPF_K:
0972 case BPF_ALU | BPF_SUB | BPF_K:
0973 case BPF_ALU | BPF_AND | BPF_K:
0974 case BPF_ALU | BPF_OR | BPF_K:
0975 case BPF_ALU | BPF_XOR | BPF_K:
0976 case BPF_ALU64 | BPF_ADD | BPF_K:
0977 case BPF_ALU64 | BPF_SUB | BPF_K:
0978 case BPF_ALU64 | BPF_AND | BPF_K:
0979 case BPF_ALU64 | BPF_OR | BPF_K:
0980 case BPF_ALU64 | BPF_XOR | BPF_K:
0981 maybe_emit_1mod(&prog, dst_reg,
0982 BPF_CLASS(insn->code) == BPF_ALU64);
0983
0984
0985
0986
0987
0988 switch (BPF_OP(insn->code)) {
0989 case BPF_ADD:
0990 b3 = 0xC0;
0991 b2 = 0x05;
0992 break;
0993 case BPF_SUB:
0994 b3 = 0xE8;
0995 b2 = 0x2D;
0996 break;
0997 case BPF_AND:
0998 b3 = 0xE0;
0999 b2 = 0x25;
1000 break;
1001 case BPF_OR:
1002 b3 = 0xC8;
1003 b2 = 0x0D;
1004 break;
1005 case BPF_XOR:
1006 b3 = 0xF0;
1007 b2 = 0x35;
1008 break;
1009 }
1010
1011 if (is_imm8(imm32))
1012 EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
1013 else if (is_axreg(dst_reg))
1014 EMIT1_off32(b2, imm32);
1015 else
1016 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
1017 break;
1018
1019 case BPF_ALU64 | BPF_MOV | BPF_K:
1020 case BPF_ALU | BPF_MOV | BPF_K:
1021 emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
1022 dst_reg, imm32);
1023 break;
1024
1025 case BPF_LD | BPF_IMM | BPF_DW:
1026 emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
1027 insn++;
1028 i++;
1029 break;
1030
1031
1032 case BPF_ALU | BPF_MOD | BPF_X:
1033 case BPF_ALU | BPF_DIV | BPF_X:
1034 case BPF_ALU | BPF_MOD | BPF_K:
1035 case BPF_ALU | BPF_DIV | BPF_K:
1036 case BPF_ALU64 | BPF_MOD | BPF_X:
1037 case BPF_ALU64 | BPF_DIV | BPF_X:
1038 case BPF_ALU64 | BPF_MOD | BPF_K:
1039 case BPF_ALU64 | BPF_DIV | BPF_K: {
1040 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1041
1042 if (dst_reg != BPF_REG_0)
1043 EMIT1(0x50);
1044 if (dst_reg != BPF_REG_3)
1045 EMIT1(0x52);
1046
1047 if (BPF_SRC(insn->code) == BPF_X) {
1048 if (src_reg == BPF_REG_0 ||
1049 src_reg == BPF_REG_3) {
1050
1051 EMIT_mov(AUX_REG, src_reg);
1052 src_reg = AUX_REG;
1053 }
1054 } else {
1055
1056 EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
1057 src_reg = AUX_REG;
1058 }
1059
1060 if (dst_reg != BPF_REG_0)
1061
1062 emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
1063
1064
1065
1066
1067
1068 EMIT2(0x31, 0xd2);
1069
1070
1071 maybe_emit_1mod(&prog, src_reg, is64);
1072 EMIT2(0xF7, add_1reg(0xF0, src_reg));
1073
1074 if (BPF_OP(insn->code) == BPF_MOD &&
1075 dst_reg != BPF_REG_3)
1076
1077 emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
1078 else if (BPF_OP(insn->code) == BPF_DIV &&
1079 dst_reg != BPF_REG_0)
1080
1081 emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
1082
1083 if (dst_reg != BPF_REG_3)
1084 EMIT1(0x5A);
1085 if (dst_reg != BPF_REG_0)
1086 EMIT1(0x58);
1087 break;
1088 }
1089
1090 case BPF_ALU | BPF_MUL | BPF_K:
1091 case BPF_ALU64 | BPF_MUL | BPF_K:
1092 maybe_emit_mod(&prog, dst_reg, dst_reg,
1093 BPF_CLASS(insn->code) == BPF_ALU64);
1094
1095 if (is_imm8(imm32))
1096
1097 EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg),
1098 imm32);
1099 else
1100
1101 EMIT2_off32(0x69,
1102 add_2reg(0xC0, dst_reg, dst_reg),
1103 imm32);
1104 break;
1105
1106 case BPF_ALU | BPF_MUL | BPF_X:
1107 case BPF_ALU64 | BPF_MUL | BPF_X:
1108 maybe_emit_mod(&prog, src_reg, dst_reg,
1109 BPF_CLASS(insn->code) == BPF_ALU64);
1110
1111
1112 EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg));
1113 break;
1114
1115
1116 case BPF_ALU | BPF_LSH | BPF_K:
1117 case BPF_ALU | BPF_RSH | BPF_K:
1118 case BPF_ALU | BPF_ARSH | BPF_K:
1119 case BPF_ALU64 | BPF_LSH | BPF_K:
1120 case BPF_ALU64 | BPF_RSH | BPF_K:
1121 case BPF_ALU64 | BPF_ARSH | BPF_K:
1122 maybe_emit_1mod(&prog, dst_reg,
1123 BPF_CLASS(insn->code) == BPF_ALU64);
1124
1125 b3 = simple_alu_opcodes[BPF_OP(insn->code)];
1126 if (imm32 == 1)
1127 EMIT2(0xD1, add_1reg(b3, dst_reg));
1128 else
1129 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
1130 break;
1131
1132 case BPF_ALU | BPF_LSH | BPF_X:
1133 case BPF_ALU | BPF_RSH | BPF_X:
1134 case BPF_ALU | BPF_ARSH | BPF_X:
1135 case BPF_ALU64 | BPF_LSH | BPF_X:
1136 case BPF_ALU64 | BPF_RSH | BPF_X:
1137 case BPF_ALU64 | BPF_ARSH | BPF_X:
1138
1139
1140 if (dst_reg == BPF_REG_4) {
1141
1142 EMIT_mov(AUX_REG, dst_reg);
1143 dst_reg = AUX_REG;
1144 }
1145
1146 if (src_reg != BPF_REG_4) {
1147 EMIT1(0x51);
1148
1149
1150 EMIT_mov(BPF_REG_4, src_reg);
1151 }
1152
1153
1154 maybe_emit_1mod(&prog, dst_reg,
1155 BPF_CLASS(insn->code) == BPF_ALU64);
1156
1157 b3 = simple_alu_opcodes[BPF_OP(insn->code)];
1158 EMIT2(0xD3, add_1reg(b3, dst_reg));
1159
1160 if (src_reg != BPF_REG_4)
1161 EMIT1(0x59);
1162
1163 if (insn->dst_reg == BPF_REG_4)
1164
1165 EMIT_mov(insn->dst_reg, AUX_REG);
1166 break;
1167
1168 case BPF_ALU | BPF_END | BPF_FROM_BE:
1169 switch (imm32) {
1170 case 16:
1171
1172 EMIT1(0x66);
1173 if (is_ereg(dst_reg))
1174 EMIT1(0x41);
1175 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
1176
1177
1178 if (is_ereg(dst_reg))
1179 EMIT3(0x45, 0x0F, 0xB7);
1180 else
1181 EMIT2(0x0F, 0xB7);
1182 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
1183 break;
1184 case 32:
1185
1186 if (is_ereg(dst_reg))
1187 EMIT2(0x41, 0x0F);
1188 else
1189 EMIT1(0x0F);
1190 EMIT1(add_1reg(0xC8, dst_reg));
1191 break;
1192 case 64:
1193
1194 EMIT3(add_1mod(0x48, dst_reg), 0x0F,
1195 add_1reg(0xC8, dst_reg));
1196 break;
1197 }
1198 break;
1199
1200 case BPF_ALU | BPF_END | BPF_FROM_LE:
1201 switch (imm32) {
1202 case 16:
1203
1204
1205
1206
1207 if (is_ereg(dst_reg))
1208 EMIT3(0x45, 0x0F, 0xB7);
1209 else
1210 EMIT2(0x0F, 0xB7);
1211 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
1212 break;
1213 case 32:
1214
1215 if (is_ereg(dst_reg))
1216 EMIT1(0x45);
1217 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
1218 break;
1219 case 64:
1220
1221 break;
1222 }
1223 break;
1224
1225
1226 case BPF_ST | BPF_NOSPEC:
1227 if (boot_cpu_has(X86_FEATURE_XMM2))
1228 EMIT_LFENCE();
1229 break;
1230
1231
1232 case BPF_ST | BPF_MEM | BPF_B:
1233 if (is_ereg(dst_reg))
1234 EMIT2(0x41, 0xC6);
1235 else
1236 EMIT1(0xC6);
1237 goto st;
1238 case BPF_ST | BPF_MEM | BPF_H:
1239 if (is_ereg(dst_reg))
1240 EMIT3(0x66, 0x41, 0xC7);
1241 else
1242 EMIT2(0x66, 0xC7);
1243 goto st;
1244 case BPF_ST | BPF_MEM | BPF_W:
1245 if (is_ereg(dst_reg))
1246 EMIT2(0x41, 0xC7);
1247 else
1248 EMIT1(0xC7);
1249 goto st;
1250 case BPF_ST | BPF_MEM | BPF_DW:
1251 EMIT2(add_1mod(0x48, dst_reg), 0xC7);
1252
1253 st: if (is_imm8(insn->off))
1254 EMIT2(add_1reg(0x40, dst_reg), insn->off);
1255 else
1256 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
1257
1258 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
1259 break;
1260
1261
1262 case BPF_STX | BPF_MEM | BPF_B:
1263 case BPF_STX | BPF_MEM | BPF_H:
1264 case BPF_STX | BPF_MEM | BPF_W:
1265 case BPF_STX | BPF_MEM | BPF_DW:
1266 emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
1267 break;
1268
1269
1270 case BPF_LDX | BPF_MEM | BPF_B:
1271 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1272 case BPF_LDX | BPF_MEM | BPF_H:
1273 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1274 case BPF_LDX | BPF_MEM | BPF_W:
1275 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1276 case BPF_LDX | BPF_MEM | BPF_DW:
1277 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1278 if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
1279
1280
1281
1282
1283
1284 u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off);
1285 u8 *end_of_jmp1, *end_of_jmp2;
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295 EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
1296 EMIT((u32)limit, 4);
1297 EMIT(limit >> 32, 4);
1298
1299 maybe_emit_mod(&prog, src_reg, AUX_REG, true);
1300 EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
1301
1302 EMIT2(X86_JB, 0);
1303 end_of_jmp1 = prog;
1304
1305
1306 emit_mov_reg(&prog, true, AUX_REG, src_reg);
1307
1308 maybe_emit_1mod(&prog, AUX_REG, true);
1309 EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
1310
1311
1312
1313
1314 EMIT2(0x73 , 0);
1315 end_of_jmp2 = prog;
1316
1317
1318 emit_mov_imm32(&prog, false, dst_reg, 0);
1319
1320 EMIT2(0xEB, 0);
1321
1322
1323 end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1;
1324
1325 start_of_ldx = prog;
1326 end_of_jmp2[-1] = start_of_ldx - end_of_jmp2;
1327 }
1328 emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
1329 if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
1330 struct exception_table_entry *ex;
1331 u8 *_insn = image + proglen + (start_of_ldx - temp);
1332 s64 delta;
1333
1334
1335 start_of_ldx[-1] = prog - start_of_ldx;
1336
1337 if (!bpf_prog->aux->extable)
1338 break;
1339
1340 if (excnt >= bpf_prog->aux->num_exentries) {
1341 pr_err("ex gen bug\n");
1342 return -EFAULT;
1343 }
1344 ex = &bpf_prog->aux->extable[excnt++];
1345
1346 delta = _insn - (u8 *)&ex->insn;
1347 if (!is_simm32(delta)) {
1348 pr_err("extable->insn doesn't fit into 32-bit\n");
1349 return -EFAULT;
1350 }
1351
1352 ex = (void *)rw_image + ((void *)ex - (void *)image);
1353
1354 ex->insn = delta;
1355
1356 ex->data = EX_TYPE_BPF;
1357
1358 if (dst_reg > BPF_REG_9) {
1359 pr_err("verifier error\n");
1360 return -EFAULT;
1361 }
1362
1363
1364
1365
1366
1367
1368
1369
1370 ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
1371 }
1372 break;
1373
1374 case BPF_STX | BPF_ATOMIC | BPF_W:
1375 case BPF_STX | BPF_ATOMIC | BPF_DW:
1376 if (insn->imm == (BPF_AND | BPF_FETCH) ||
1377 insn->imm == (BPF_OR | BPF_FETCH) ||
1378 insn->imm == (BPF_XOR | BPF_FETCH)) {
1379 bool is64 = BPF_SIZE(insn->code) == BPF_DW;
1380 u32 real_src_reg = src_reg;
1381 u32 real_dst_reg = dst_reg;
1382 u8 *branch_target;
1383
1384
1385
1386
1387
1388
1389
1390 emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
1391 if (src_reg == BPF_REG_0)
1392 real_src_reg = BPF_REG_AX;
1393 if (dst_reg == BPF_REG_0)
1394 real_dst_reg = BPF_REG_AX;
1395
1396 branch_target = prog;
1397
1398 emit_ldx(&prog, BPF_SIZE(insn->code),
1399 BPF_REG_0, real_dst_reg, insn->off);
1400
1401
1402
1403
1404 emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
1405 maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64);
1406 EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
1407 add_2reg(0xC0, AUX_REG, real_src_reg));
1408
1409 err = emit_atomic(&prog, BPF_CMPXCHG,
1410 real_dst_reg, AUX_REG,
1411 insn->off,
1412 BPF_SIZE(insn->code));
1413 if (WARN_ON(err))
1414 return err;
1415
1416
1417
1418
1419 EMIT2(X86_JNE, -(prog - branch_target) - 2);
1420
1421 emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0);
1422
1423 emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
1424 break;
1425 }
1426
1427 err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
1428 insn->off, BPF_SIZE(insn->code));
1429 if (err)
1430 return err;
1431 break;
1432
1433
1434 case BPF_JMP | BPF_CALL:
1435 func = (u8 *) __bpf_call_base + imm32;
1436 if (tail_call_reachable) {
1437
1438 EMIT3_off32(0x48, 0x8B, 0x85,
1439 -round_up(bpf_prog->aux->stack_depth, 8) - 8);
1440 if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
1441 return -EINVAL;
1442 } else {
1443 if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
1444 return -EINVAL;
1445 }
1446 break;
1447
1448 case BPF_JMP | BPF_TAIL_CALL:
1449 if (imm32)
1450 emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
1451 &prog, image + addrs[i - 1],
1452 callee_regs_used,
1453 bpf_prog->aux->stack_depth,
1454 ctx);
1455 else
1456 emit_bpf_tail_call_indirect(&prog,
1457 callee_regs_used,
1458 bpf_prog->aux->stack_depth,
1459 image + addrs[i - 1],
1460 ctx);
1461 break;
1462
1463
1464 case BPF_JMP | BPF_JEQ | BPF_X:
1465 case BPF_JMP | BPF_JNE | BPF_X:
1466 case BPF_JMP | BPF_JGT | BPF_X:
1467 case BPF_JMP | BPF_JLT | BPF_X:
1468 case BPF_JMP | BPF_JGE | BPF_X:
1469 case BPF_JMP | BPF_JLE | BPF_X:
1470 case BPF_JMP | BPF_JSGT | BPF_X:
1471 case BPF_JMP | BPF_JSLT | BPF_X:
1472 case BPF_JMP | BPF_JSGE | BPF_X:
1473 case BPF_JMP | BPF_JSLE | BPF_X:
1474 case BPF_JMP32 | BPF_JEQ | BPF_X:
1475 case BPF_JMP32 | BPF_JNE | BPF_X:
1476 case BPF_JMP32 | BPF_JGT | BPF_X:
1477 case BPF_JMP32 | BPF_JLT | BPF_X:
1478 case BPF_JMP32 | BPF_JGE | BPF_X:
1479 case BPF_JMP32 | BPF_JLE | BPF_X:
1480 case BPF_JMP32 | BPF_JSGT | BPF_X:
1481 case BPF_JMP32 | BPF_JSLT | BPF_X:
1482 case BPF_JMP32 | BPF_JSGE | BPF_X:
1483 case BPF_JMP32 | BPF_JSLE | BPF_X:
1484
1485 maybe_emit_mod(&prog, dst_reg, src_reg,
1486 BPF_CLASS(insn->code) == BPF_JMP);
1487 EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
1488 goto emit_cond_jmp;
1489
1490 case BPF_JMP | BPF_JSET | BPF_X:
1491 case BPF_JMP32 | BPF_JSET | BPF_X:
1492
1493 maybe_emit_mod(&prog, dst_reg, src_reg,
1494 BPF_CLASS(insn->code) == BPF_JMP);
1495 EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
1496 goto emit_cond_jmp;
1497
1498 case BPF_JMP | BPF_JSET | BPF_K:
1499 case BPF_JMP32 | BPF_JSET | BPF_K:
1500
1501 maybe_emit_1mod(&prog, dst_reg,
1502 BPF_CLASS(insn->code) == BPF_JMP);
1503 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
1504 goto emit_cond_jmp;
1505
1506 case BPF_JMP | BPF_JEQ | BPF_K:
1507 case BPF_JMP | BPF_JNE | BPF_K:
1508 case BPF_JMP | BPF_JGT | BPF_K:
1509 case BPF_JMP | BPF_JLT | BPF_K:
1510 case BPF_JMP | BPF_JGE | BPF_K:
1511 case BPF_JMP | BPF_JLE | BPF_K:
1512 case BPF_JMP | BPF_JSGT | BPF_K:
1513 case BPF_JMP | BPF_JSLT | BPF_K:
1514 case BPF_JMP | BPF_JSGE | BPF_K:
1515 case BPF_JMP | BPF_JSLE | BPF_K:
1516 case BPF_JMP32 | BPF_JEQ | BPF_K:
1517 case BPF_JMP32 | BPF_JNE | BPF_K:
1518 case BPF_JMP32 | BPF_JGT | BPF_K:
1519 case BPF_JMP32 | BPF_JLT | BPF_K:
1520 case BPF_JMP32 | BPF_JGE | BPF_K:
1521 case BPF_JMP32 | BPF_JLE | BPF_K:
1522 case BPF_JMP32 | BPF_JSGT | BPF_K:
1523 case BPF_JMP32 | BPF_JSLT | BPF_K:
1524 case BPF_JMP32 | BPF_JSGE | BPF_K:
1525 case BPF_JMP32 | BPF_JSLE | BPF_K:
1526
1527 if (imm32 == 0) {
1528 maybe_emit_mod(&prog, dst_reg, dst_reg,
1529 BPF_CLASS(insn->code) == BPF_JMP);
1530 EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
1531 goto emit_cond_jmp;
1532 }
1533
1534
1535 maybe_emit_1mod(&prog, dst_reg,
1536 BPF_CLASS(insn->code) == BPF_JMP);
1537
1538 if (is_imm8(imm32))
1539 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
1540 else
1541 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
1542
1543 emit_cond_jmp:
1544 switch (BPF_OP(insn->code)) {
1545 case BPF_JEQ:
1546 jmp_cond = X86_JE;
1547 break;
1548 case BPF_JSET:
1549 case BPF_JNE:
1550 jmp_cond = X86_JNE;
1551 break;
1552 case BPF_JGT:
1553
1554 jmp_cond = X86_JA;
1555 break;
1556 case BPF_JLT:
1557
1558 jmp_cond = X86_JB;
1559 break;
1560 case BPF_JGE:
1561
1562 jmp_cond = X86_JAE;
1563 break;
1564 case BPF_JLE:
1565
1566 jmp_cond = X86_JBE;
1567 break;
1568 case BPF_JSGT:
1569
1570 jmp_cond = X86_JG;
1571 break;
1572 case BPF_JSLT:
1573
1574 jmp_cond = X86_JL;
1575 break;
1576 case BPF_JSGE:
1577
1578 jmp_cond = X86_JGE;
1579 break;
1580 case BPF_JSLE:
1581
1582 jmp_cond = X86_JLE;
1583 break;
1584 default:
1585 return -EFAULT;
1586 }
1587 jmp_offset = addrs[i + insn->off] - addrs[i];
1588 if (is_imm8(jmp_offset)) {
1589 if (jmp_padding) {
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605 nops = INSN_SZ_DIFF - 2;
1606 if (nops != 0 && nops != 4) {
1607 pr_err("unexpected jmp_cond padding: %d bytes\n",
1608 nops);
1609 return -EFAULT;
1610 }
1611 emit_nops(&prog, nops);
1612 }
1613 EMIT2(jmp_cond, jmp_offset);
1614 } else if (is_simm32(jmp_offset)) {
1615 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
1616 } else {
1617 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
1618 return -EFAULT;
1619 }
1620
1621 break;
1622
1623 case BPF_JMP | BPF_JA:
1624 if (insn->off == -1)
1625
1626
1627
1628
1629
1630
1631 jmp_offset = -2;
1632 else
1633 jmp_offset = addrs[i + insn->off] - addrs[i];
1634
1635 if (!jmp_offset) {
1636
1637
1638
1639
1640 if (jmp_padding) {
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651 nops = INSN_SZ_DIFF;
1652 if (nops != 0 && nops != 2 && nops != 5) {
1653 pr_err("unexpected nop jump padding: %d bytes\n",
1654 nops);
1655 return -EFAULT;
1656 }
1657 emit_nops(&prog, nops);
1658 }
1659 break;
1660 }
1661 emit_jmp:
1662 if (is_imm8(jmp_offset)) {
1663 if (jmp_padding) {
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676 nops = INSN_SZ_DIFF - 2;
1677 if (nops != 0 && nops != 3) {
1678 pr_err("unexpected jump padding: %d bytes\n",
1679 nops);
1680 return -EFAULT;
1681 }
1682 emit_nops(&prog, INSN_SZ_DIFF - 2);
1683 }
1684 EMIT2(0xEB, jmp_offset);
1685 } else if (is_simm32(jmp_offset)) {
1686 EMIT1_off32(0xE9, jmp_offset);
1687 } else {
1688 pr_err("jmp gen bug %llx\n", jmp_offset);
1689 return -EFAULT;
1690 }
1691 break;
1692
1693 case BPF_JMP | BPF_EXIT:
1694 if (seen_exit) {
1695 jmp_offset = ctx->cleanup_addr - addrs[i];
1696 goto emit_jmp;
1697 }
1698 seen_exit = true;
1699
1700 ctx->cleanup_addr = proglen;
1701 pop_callee_regs(&prog, callee_regs_used);
1702 EMIT1(0xC9);
1703 emit_return(&prog, image + addrs[i - 1] + (prog - temp));
1704 break;
1705
1706 default:
1707
1708
1709
1710
1711
1712
1713 pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1714 return -EINVAL;
1715 }
1716
1717 ilen = prog - temp;
1718 if (ilen > BPF_MAX_INSN_SIZE) {
1719 pr_err("bpf_jit: fatal insn size error\n");
1720 return -EFAULT;
1721 }
1722
1723 if (image) {
1724
1725
1726
1727
1728
1729
1730
1731
1732 if (unlikely(proglen + ilen > oldproglen ||
1733 proglen + ilen != addrs[i])) {
1734 pr_err("bpf_jit: fatal error\n");
1735 return -EFAULT;
1736 }
1737 memcpy(rw_image + proglen, temp, ilen);
1738 }
1739 proglen += ilen;
1740 addrs[i] = proglen;
1741 prog = temp;
1742 }
1743
1744 if (image && excnt != bpf_prog->aux->num_exentries) {
1745 pr_err("extable is not populated\n");
1746 return -EFAULT;
1747 }
1748 return proglen;
1749 }
1750
1751 static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
1752 int stack_size)
1753 {
1754 int i;
1755
1756
1757
1758
1759
1760 for (i = 0; i < min(nr_args, 6); i++)
1761 emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
1762 BPF_REG_FP,
1763 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1764 -(stack_size - i * 8));
1765 }
1766
1767 static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
1768 int stack_size)
1769 {
1770 int i;
1771
1772
1773
1774
1775
1776
1777 for (i = 0; i < min(nr_args, 6); i++)
1778 emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
1779 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1780 BPF_REG_FP,
1781 -(stack_size - i * 8));
1782 }
1783
1784 static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
1785 struct bpf_tramp_link *l, int stack_size,
1786 int run_ctx_off, bool save_ret)
1787 {
1788 void (*exit)(struct bpf_prog *prog, u64 start,
1789 struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
1790 u64 (*enter)(struct bpf_prog *prog,
1791 struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
1792 u8 *prog = *pprog;
1793 u8 *jmp_insn;
1794 int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1795 struct bpf_prog *p = l->link.prog;
1796 u64 cookie = l->cookie;
1797
1798
1799 emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie);
1800
1801
1802
1803
1804
1805
1806
1807
1808 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
1809
1810 if (p->aux->sleepable) {
1811 enter = __bpf_prog_enter_sleepable;
1812 exit = __bpf_prog_exit_sleepable;
1813 } else if (p->expected_attach_type == BPF_LSM_CGROUP) {
1814 enter = __bpf_prog_enter_lsm_cgroup;
1815 exit = __bpf_prog_exit_lsm_cgroup;
1816 }
1817
1818
1819 emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
1820
1821 EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
1822
1823 if (emit_call(&prog, enter, prog))
1824 return -EINVAL;
1825
1826 emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
1827
1828
1829
1830
1831 EMIT3(0x48, 0x85, 0xC0);
1832
1833 jmp_insn = prog;
1834 emit_nops(&prog, 2);
1835
1836
1837 EMIT4(0x48, 0x8D, 0x7D, -stack_size);
1838
1839 if (!p->jited)
1840 emit_mov_imm64(&prog, BPF_REG_2,
1841 (long) p->insnsi >> 32,
1842 (u32) (long) p->insnsi);
1843
1844 if (emit_call(&prog, p->bpf_func, prog))
1845 return -EINVAL;
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855 if (save_ret)
1856 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
1857
1858
1859 jmp_insn[0] = X86_JE;
1860 jmp_insn[1] = prog - jmp_insn - 2;
1861
1862
1863 emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
1864
1865 emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
1866
1867 EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
1868 if (emit_call(&prog, exit, prog))
1869 return -EINVAL;
1870
1871 *pprog = prog;
1872 return 0;
1873 }
1874
1875 static void emit_align(u8 **pprog, u32 align)
1876 {
1877 u8 *target, *prog = *pprog;
1878
1879 target = PTR_ALIGN(prog, align);
1880 if (target != prog)
1881 emit_nops(&prog, target - prog);
1882
1883 *pprog = prog;
1884 }
1885
1886 static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
1887 {
1888 u8 *prog = *pprog;
1889 s64 offset;
1890
1891 offset = func - (ip + 2 + 4);
1892 if (!is_simm32(offset)) {
1893 pr_err("Target %p is out of range\n", func);
1894 return -EINVAL;
1895 }
1896 EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
1897 *pprog = prog;
1898 return 0;
1899 }
1900
1901 static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
1902 struct bpf_tramp_links *tl, int stack_size,
1903 int run_ctx_off, bool save_ret)
1904 {
1905 int i;
1906 u8 *prog = *pprog;
1907
1908 for (i = 0; i < tl->nr_links; i++) {
1909 if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
1910 run_ctx_off, save_ret))
1911 return -EINVAL;
1912 }
1913 *pprog = prog;
1914 return 0;
1915 }
1916
1917 static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
1918 struct bpf_tramp_links *tl, int stack_size,
1919 int run_ctx_off, u8 **branches)
1920 {
1921 u8 *prog = *pprog;
1922 int i;
1923
1924
1925
1926
1927 emit_mov_imm32(&prog, false, BPF_REG_0, 0);
1928 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
1929 for (i = 0; i < tl->nr_links; i++) {
1930 if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
1931 return -EINVAL;
1932
1933
1934
1935
1936
1937
1938 EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
1939
1940
1941
1942
1943
1944
1945 branches[i] = prog;
1946 emit_nops(&prog, 4 + 2);
1947 }
1948
1949 *pprog = prog;
1950 return 0;
1951 }
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
2014 const struct btf_func_model *m, u32 flags,
2015 struct bpf_tramp_links *tlinks,
2016 void *orig_call)
2017 {
2018 int ret, i, nr_args = m->nr_args;
2019 int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
2020 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2021 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2022 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2023 u8 **branches = NULL;
2024 u8 *prog;
2025 bool save_ret;
2026
2027
2028 if (nr_args > 6)
2029 return -ENOTSUPP;
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
2052 if (save_ret)
2053 stack_size += 8;
2054
2055 regs_off = stack_size;
2056
2057
2058 stack_size += 8;
2059 args_off = stack_size;
2060
2061 if (flags & BPF_TRAMP_F_IP_ARG)
2062 stack_size += 8;
2063
2064 ip_off = stack_size;
2065
2066 stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
2067 run_ctx_off = stack_size;
2068
2069 if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2070
2071
2072
2073 if (is_endbr(*(u32 *)orig_call))
2074 orig_call += ENDBR_INSN_SIZE;
2075 orig_call += X86_PATCH_SIZE;
2076 }
2077
2078 prog = image;
2079
2080 EMIT_ENDBR();
2081 EMIT1(0x55);
2082 EMIT3(0x48, 0x89, 0xE5);
2083 EMIT4(0x48, 0x83, 0xEC, stack_size);
2084 EMIT1(0x53);
2085
2086
2087
2088
2089
2090 emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args);
2091 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
2092
2093 if (flags & BPF_TRAMP_F_IP_ARG) {
2094
2095
2096
2097
2098
2099 emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
2100 EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
2101 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
2102 }
2103
2104 save_regs(m, &prog, nr_args, regs_off);
2105
2106 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2107
2108 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
2109 if (emit_call(&prog, __bpf_tramp_enter, prog)) {
2110 ret = -EINVAL;
2111 goto cleanup;
2112 }
2113 }
2114
2115 if (fentry->nr_links)
2116 if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
2117 flags & BPF_TRAMP_F_RET_FENTRY_RET))
2118 return -EINVAL;
2119
2120 if (fmod_ret->nr_links) {
2121 branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
2122 GFP_KERNEL);
2123 if (!branches)
2124 return -ENOMEM;
2125
2126 if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
2127 run_ctx_off, branches)) {
2128 ret = -EINVAL;
2129 goto cleanup;
2130 }
2131 }
2132
2133 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2134 restore_regs(m, &prog, nr_args, regs_off);
2135
2136 if (flags & BPF_TRAMP_F_ORIG_STACK) {
2137 emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
2138 EMIT2(0xff, 0xd0);
2139 } else {
2140
2141 if (emit_call(&prog, orig_call, prog)) {
2142 ret = -EINVAL;
2143 goto cleanup;
2144 }
2145 }
2146
2147 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
2148 im->ip_after_call = prog;
2149 memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
2150 prog += X86_PATCH_SIZE;
2151 }
2152
2153 if (fmod_ret->nr_links) {
2154
2155
2156
2157
2158
2159 emit_align(&prog, 16);
2160
2161
2162
2163 for (i = 0; i < fmod_ret->nr_links; i++)
2164 emit_cond_near_jump(&branches[i], prog, branches[i],
2165 X86_JNE);
2166 }
2167
2168 if (fexit->nr_links)
2169 if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
2170 ret = -EINVAL;
2171 goto cleanup;
2172 }
2173
2174 if (flags & BPF_TRAMP_F_RESTORE_REGS)
2175 restore_regs(m, &prog, nr_args, regs_off);
2176
2177
2178
2179
2180
2181 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2182 im->ip_epilogue = prog;
2183
2184 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
2185 if (emit_call(&prog, __bpf_tramp_exit, prog)) {
2186 ret = -EINVAL;
2187 goto cleanup;
2188 }
2189 }
2190
2191 if (save_ret)
2192 emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
2193
2194 EMIT1(0x5B);
2195 EMIT1(0xC9);
2196 if (flags & BPF_TRAMP_F_SKIP_FRAME)
2197
2198 EMIT4(0x48, 0x83, 0xC4, 8);
2199 emit_return(&prog, prog);
2200
2201 if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
2202 ret = -EFAULT;
2203 goto cleanup;
2204 }
2205 ret = prog - (u8 *)image;
2206
2207 cleanup:
2208 kfree(branches);
2209 return ret;
2210 }
2211
2212 static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
2213 {
2214 u8 *jg_reloc, *prog = *pprog;
2215 int pivot, err, jg_bytes = 1;
2216 s64 jg_offset;
2217
2218 if (a == b) {
2219
2220
2221
2222 EMIT1(add_1mod(0x48, BPF_REG_3));
2223 if (!is_simm32(progs[a]))
2224 return -1;
2225 EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
2226 progs[a]);
2227 err = emit_cond_near_jump(&prog,
2228 (void *)progs[a], prog,
2229 X86_JE);
2230 if (err)
2231 return err;
2232
2233 emit_indirect_jump(&prog, 2 , prog);
2234
2235 *pprog = prog;
2236 return 0;
2237 }
2238
2239
2240
2241
2242 pivot = (b - a) / 2;
2243 EMIT1(add_1mod(0x48, BPF_REG_3));
2244 if (!is_simm32(progs[a + pivot]))
2245 return -1;
2246 EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
2247
2248 if (pivot > 2) {
2249
2250 jg_bytes = 4;
2251 EMIT2_off32(0x0F, X86_JG + 0x10, 0);
2252 } else {
2253 EMIT2(X86_JG, 0);
2254 }
2255 jg_reloc = prog;
2256
2257 err = emit_bpf_dispatcher(&prog, a, a + pivot,
2258 progs);
2259 if (err)
2260 return err;
2261
2262
2263
2264
2265
2266
2267 emit_align(&prog, 16);
2268 jg_offset = prog - jg_reloc;
2269 emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
2270
2271 err = emit_bpf_dispatcher(&prog, a + pivot + 1,
2272 b, progs);
2273 if (err)
2274 return err;
2275
2276 *pprog = prog;
2277 return 0;
2278 }
2279
2280 static int cmp_ips(const void *a, const void *b)
2281 {
2282 const s64 *ipa = a;
2283 const s64 *ipb = b;
2284
2285 if (*ipa > *ipb)
2286 return 1;
2287 if (*ipa < *ipb)
2288 return -1;
2289 return 0;
2290 }
2291
2292 int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
2293 {
2294 u8 *prog = image;
2295
2296 sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
2297 return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
2298 }
2299
2300 struct x64_jit_data {
2301 struct bpf_binary_header *rw_header;
2302 struct bpf_binary_header *header;
2303 int *addrs;
2304 u8 *image;
2305 int proglen;
2306 struct jit_context ctx;
2307 };
2308
2309 #define MAX_PASSES 20
2310 #define PADDING_PASSES (MAX_PASSES - 5)
2311
2312 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2313 {
2314 struct bpf_binary_header *rw_header = NULL;
2315 struct bpf_binary_header *header = NULL;
2316 struct bpf_prog *tmp, *orig_prog = prog;
2317 struct x64_jit_data *jit_data;
2318 int proglen, oldproglen = 0;
2319 struct jit_context ctx = {};
2320 bool tmp_blinded = false;
2321 bool extra_pass = false;
2322 bool padding = false;
2323 u8 *rw_image = NULL;
2324 u8 *image = NULL;
2325 int *addrs;
2326 int pass;
2327 int i;
2328
2329 if (!prog->jit_requested)
2330 return orig_prog;
2331
2332 tmp = bpf_jit_blind_constants(prog);
2333
2334
2335
2336
2337 if (IS_ERR(tmp))
2338 return orig_prog;
2339 if (tmp != prog) {
2340 tmp_blinded = true;
2341 prog = tmp;
2342 }
2343
2344 jit_data = prog->aux->jit_data;
2345 if (!jit_data) {
2346 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
2347 if (!jit_data) {
2348 prog = orig_prog;
2349 goto out;
2350 }
2351 prog->aux->jit_data = jit_data;
2352 }
2353 addrs = jit_data->addrs;
2354 if (addrs) {
2355 ctx = jit_data->ctx;
2356 oldproglen = jit_data->proglen;
2357 image = jit_data->image;
2358 header = jit_data->header;
2359 rw_header = jit_data->rw_header;
2360 rw_image = (void *)rw_header + ((void *)image - (void *)header);
2361 extra_pass = true;
2362 padding = true;
2363 goto skip_init_addrs;
2364 }
2365 addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
2366 if (!addrs) {
2367 prog = orig_prog;
2368 goto out_addrs;
2369 }
2370
2371
2372
2373
2374
2375 for (proglen = 0, i = 0; i <= prog->len; i++) {
2376 proglen += 64;
2377 addrs[i] = proglen;
2378 }
2379 ctx.cleanup_addr = proglen;
2380 skip_init_addrs:
2381
2382
2383
2384
2385
2386
2387
2388 for (pass = 0; pass < MAX_PASSES || image; pass++) {
2389 if (!padding && pass >= PADDING_PASSES)
2390 padding = true;
2391 proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
2392 if (proglen <= 0) {
2393 out_image:
2394 image = NULL;
2395 if (header) {
2396 bpf_arch_text_copy(&header->size, &rw_header->size,
2397 sizeof(rw_header->size));
2398 bpf_jit_binary_pack_free(header, rw_header);
2399 }
2400
2401 prog = orig_prog;
2402 if (extra_pass) {
2403 prog->bpf_func = NULL;
2404 prog->jited = 0;
2405 prog->jited_len = 0;
2406 }
2407 goto out_addrs;
2408 }
2409 if (image) {
2410 if (proglen != oldproglen) {
2411 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2412 proglen, oldproglen);
2413 goto out_image;
2414 }
2415 break;
2416 }
2417 if (proglen == oldproglen) {
2418
2419
2420
2421
2422
2423
2424 u32 align = __alignof__(struct exception_table_entry);
2425 u32 extable_size = prog->aux->num_exentries *
2426 sizeof(struct exception_table_entry);
2427
2428
2429 header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
2430 &image, align, &rw_header, &rw_image,
2431 jit_fill_hole);
2432 if (!header) {
2433 prog = orig_prog;
2434 goto out_addrs;
2435 }
2436 prog->aux->extable = (void *) image + roundup(proglen, align);
2437 }
2438 oldproglen = proglen;
2439 cond_resched();
2440 }
2441
2442 if (bpf_jit_enable > 1)
2443 bpf_jit_dump(prog->len, proglen, pass + 1, image);
2444
2445 if (image) {
2446 if (!prog->is_func || extra_pass) {
2447
2448
2449
2450
2451
2452
2453
2454 if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) {
2455
2456 header = NULL;
2457 goto out_image;
2458 }
2459
2460 bpf_tail_call_direct_fixup(prog);
2461 } else {
2462 jit_data->addrs = addrs;
2463 jit_data->ctx = ctx;
2464 jit_data->proglen = proglen;
2465 jit_data->image = image;
2466 jit_data->header = header;
2467 jit_data->rw_header = rw_header;
2468 }
2469 prog->bpf_func = (void *)image;
2470 prog->jited = 1;
2471 prog->jited_len = proglen;
2472 } else {
2473 prog = orig_prog;
2474 }
2475
2476 if (!image || !prog->is_func || extra_pass) {
2477 if (image)
2478 bpf_prog_fill_jited_linfo(prog, addrs + 1);
2479 out_addrs:
2480 kvfree(addrs);
2481 kfree(jit_data);
2482 prog->aux->jit_data = NULL;
2483 }
2484 out:
2485 if (tmp_blinded)
2486 bpf_jit_prog_release_other(prog, prog == orig_prog ?
2487 tmp : orig_prog);
2488 return prog;
2489 }
2490
2491 bool bpf_jit_supports_kfunc_call(void)
2492 {
2493 return true;
2494 }
2495
2496 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
2497 {
2498 if (text_poke_copy(dst, src, len) == NULL)
2499 return ERR_PTR(-EINVAL);
2500 return dst;
2501 }
2502
2503
2504 bool bpf_jit_supports_subprog_tailcalls(void)
2505 {
2506 return true;
2507 }
2508
2509 void bpf_jit_free(struct bpf_prog *prog)
2510 {
2511 if (prog->jited) {
2512 struct x64_jit_data *jit_data = prog->aux->jit_data;
2513 struct bpf_binary_header *hdr;
2514
2515
2516
2517
2518
2519
2520 if (jit_data) {
2521 bpf_jit_binary_pack_finalize(prog, jit_data->header,
2522 jit_data->rw_header);
2523 kvfree(jit_data->addrs);
2524 kfree(jit_data);
2525 }
2526 hdr = bpf_jit_binary_pack_hdr(prog);
2527 bpf_jit_binary_pack_free(hdr, NULL);
2528 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
2529 }
2530
2531 bpf_prog_unlock_free(prog);
2532 }