Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Just-In-Time compiler for eBPF bytecode on MIPS.
0004  * Implementation of JIT functions for 32-bit CPUs.
0005  *
0006  * Copyright (c) 2021 Anyfi Networks AB.
0007  * Author: Johan Almbladh <johan.almbladh@gmail.com>
0008  *
0009  * Based on code and ideas from
0010  * Copyright (c) 2017 Cavium, Inc.
0011  * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
0012  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
0013  */
0014 
0015 #include <linux/math64.h>
0016 #include <linux/errno.h>
0017 #include <linux/filter.h>
0018 #include <linux/bpf.h>
0019 #include <asm/cpu-features.h>
0020 #include <asm/isa-rev.h>
0021 #include <asm/uasm.h>
0022 
0023 #include "bpf_jit_comp.h"
0024 
0025 /* MIPS a4-a7 are not available in the o32 ABI */
0026 #undef MIPS_R_A4
0027 #undef MIPS_R_A5
0028 #undef MIPS_R_A6
0029 #undef MIPS_R_A7
0030 
0031 /* Stack is 8-byte aligned in o32 ABI */
0032 #define MIPS_STACK_ALIGNMENT 8
0033 
0034 /*
0035  * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
0036  * This corresponds to stack space for register arguments a0-a3.
0037  */
0038 #define JIT_RESERVED_STACK 16
0039 
0040 /* Temporary 64-bit register used by JIT */
0041 #define JIT_REG_TMP MAX_BPF_JIT_REG
0042 
0043 /*
0044  * Number of prologue bytes to skip when doing a tail call.
0045  * Tail call count (TCC) initialization (8 bytes) always, plus
0046  * R0-to-v0 assignment (4 bytes) if big endian.
0047  */
0048 #ifdef __BIG_ENDIAN
0049 #define JIT_TCALL_SKIP 12
0050 #else
0051 #define JIT_TCALL_SKIP 8
0052 #endif
0053 
0054 /* CPU registers holding the callee return value */
0055 #define JIT_RETURN_REGS   \
0056     (BIT(MIPS_R_V0) | \
0057      BIT(MIPS_R_V1))
0058 
0059 /* CPU registers arguments passed to callee directly */
0060 #define JIT_ARG_REGS      \
0061     (BIT(MIPS_R_A0) | \
0062      BIT(MIPS_R_A1) | \
0063      BIT(MIPS_R_A2) | \
0064      BIT(MIPS_R_A3))
0065 
0066 /* CPU register arguments passed to callee on stack */
0067 #define JIT_STACK_REGS    \
0068     (BIT(MIPS_R_T0) | \
0069      BIT(MIPS_R_T1) | \
0070      BIT(MIPS_R_T2) | \
0071      BIT(MIPS_R_T3) | \
0072      BIT(MIPS_R_T4) | \
0073      BIT(MIPS_R_T5))
0074 
0075 /* Caller-saved CPU registers */
0076 #define JIT_CALLER_REGS    \
0077     (JIT_RETURN_REGS | \
0078      JIT_ARG_REGS    | \
0079      JIT_STACK_REGS)
0080 
0081 /* Callee-saved CPU registers */
0082 #define JIT_CALLEE_REGS   \
0083     (BIT(MIPS_R_S0) | \
0084      BIT(MIPS_R_S1) | \
0085      BIT(MIPS_R_S2) | \
0086      BIT(MIPS_R_S3) | \
0087      BIT(MIPS_R_S4) | \
0088      BIT(MIPS_R_S5) | \
0089      BIT(MIPS_R_S6) | \
0090      BIT(MIPS_R_S7) | \
0091      BIT(MIPS_R_GP) | \
0092      BIT(MIPS_R_FP) | \
0093      BIT(MIPS_R_RA))
0094 
0095 /*
0096  * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
0097  *
0098  * 1) Native register pairs are ordered according to CPU endiannes, following
0099  *    the MIPS convention for passing 64-bit arguments and return values.
0100  * 2) The eBPF return value, arguments and callee-saved registers are mapped
0101  *    to their native MIPS equivalents.
0102  * 3) Since the 32 highest bits in the eBPF FP register are always zero,
0103  *    only one general-purpose register is actually needed for the mapping.
0104  *    We use the fp register for this purpose, and map the highest bits to
0105  *    the MIPS register r0 (zero).
0106  * 4) We use the MIPS gp and at registers as internal temporary registers
0107  *    for constant blinding. The gp register is callee-saved.
0108  * 5) One 64-bit temporary register is mapped for use when sign-extending
0109  *    immediate operands. MIPS registers t6-t9 are available to the JIT
0110  *    for as temporaries when implementing complex 64-bit operations.
0111  *
0112  * With this scheme all eBPF registers are being mapped to native MIPS
0113  * registers without having to use any stack scratch space. The direct
0114  * register mapping (2) simplifies the handling of function calls.
0115  */
0116 static const u8 bpf2mips32[][2] = {
0117     /* Return value from in-kernel function, and exit value from eBPF */
0118     [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
0119     /* Arguments from eBPF program to in-kernel function */
0120     [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
0121     [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
0122     /* Remaining arguments, to be passed on the stack per O32 ABI */
0123     [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
0124     [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
0125     [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
0126     /* Callee-saved registers that in-kernel function will preserve */
0127     [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
0128     [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
0129     [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
0130     [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
0131     /* Read-only frame pointer to access the eBPF stack */
0132 #ifdef __BIG_ENDIAN
0133     [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
0134 #else
0135     [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
0136 #endif
0137     /* Temporary register for blinding constants */
0138     [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
0139     /* Temporary register for internal JIT use */
0140     [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
0141 };
0142 
0143 /* Get low CPU register for a 64-bit eBPF register mapping */
0144 static inline u8 lo(const u8 reg[])
0145 {
0146 #ifdef __BIG_ENDIAN
0147     return reg[0];
0148 #else
0149     return reg[1];
0150 #endif
0151 }
0152 
0153 /* Get high CPU register for a 64-bit eBPF register mapping */
0154 static inline u8 hi(const u8 reg[])
0155 {
0156 #ifdef __BIG_ENDIAN
0157     return reg[1];
0158 #else
0159     return reg[0];
0160 #endif
0161 }
0162 
0163 /*
0164  * Mark a 64-bit CPU register pair as clobbered, it needs to be
0165  * saved/restored by the program if callee-saved.
0166  */
0167 static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
0168 {
0169     clobber_reg(ctx, reg[0]);
0170     clobber_reg(ctx, reg[1]);
0171 }
0172 
0173 /* dst = imm (sign-extended) */
0174 static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
0175 {
0176     emit_mov_i(ctx, lo(dst), imm);
0177     if (imm < 0)
0178         emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
0179     else
0180         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0181     clobber_reg64(ctx, dst);
0182 }
0183 
0184 /* Zero extension, if verifier does not do it for us  */
0185 static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
0186 {
0187     if (!ctx->program->aux->verifier_zext) {
0188         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0189         clobber_reg(ctx, hi(dst));
0190     }
0191 }
0192 
0193 /* Load delay slot, if ISA mandates it */
0194 static void emit_load_delay(struct jit_context *ctx)
0195 {
0196     if (!cpu_has_mips_2_3_4_5_r)
0197         emit(ctx, nop);
0198 }
0199 
0200 /* ALU immediate operation (64-bit) */
0201 static void emit_alu_i64(struct jit_context *ctx,
0202              const u8 dst[], s32 imm, u8 op)
0203 {
0204     u8 src = MIPS_R_T6;
0205 
0206     /*
0207      * ADD/SUB with all but the max negative imm can be handled by
0208      * inverting the operation and the imm value, saving one insn.
0209      */
0210     if (imm > S32_MIN && imm < 0)
0211         switch (op) {
0212         case BPF_ADD:
0213             op = BPF_SUB;
0214             imm = -imm;
0215             break;
0216         case BPF_SUB:
0217             op = BPF_ADD;
0218             imm = -imm;
0219             break;
0220         }
0221 
0222     /* Move immediate to temporary register */
0223     emit_mov_i(ctx, src, imm);
0224 
0225     switch (op) {
0226     /* dst = dst + imm */
0227     case BPF_ADD:
0228         emit(ctx, addu, lo(dst), lo(dst), src);
0229         emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
0230         emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
0231         if (imm < 0)
0232             emit(ctx, addiu, hi(dst), hi(dst), -1);
0233         break;
0234     /* dst = dst - imm */
0235     case BPF_SUB:
0236         emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
0237         emit(ctx, subu, lo(dst), lo(dst), src);
0238         emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
0239         if (imm < 0)
0240             emit(ctx, addiu, hi(dst), hi(dst), 1);
0241         break;
0242     /* dst = dst | imm */
0243     case BPF_OR:
0244         emit(ctx, or, lo(dst), lo(dst), src);
0245         if (imm < 0)
0246             emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
0247         break;
0248     /* dst = dst & imm */
0249     case BPF_AND:
0250         emit(ctx, and, lo(dst), lo(dst), src);
0251         if (imm >= 0)
0252             emit(ctx, move, hi(dst), MIPS_R_ZERO);
0253         break;
0254     /* dst = dst ^ imm */
0255     case BPF_XOR:
0256         emit(ctx, xor, lo(dst), lo(dst), src);
0257         if (imm < 0) {
0258             emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
0259             emit(ctx, addiu, hi(dst), hi(dst), -1);
0260         }
0261         break;
0262     }
0263     clobber_reg64(ctx, dst);
0264 }
0265 
0266 /* ALU register operation (64-bit) */
0267 static void emit_alu_r64(struct jit_context *ctx,
0268              const u8 dst[], const u8 src[], u8 op)
0269 {
0270     switch (BPF_OP(op)) {
0271     /* dst = dst + src */
0272     case BPF_ADD:
0273         if (src == dst) {
0274             emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
0275             emit(ctx, addu, lo(dst), lo(dst), lo(dst));
0276         } else {
0277             emit(ctx, addu, lo(dst), lo(dst), lo(src));
0278             emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
0279         }
0280         emit(ctx, addu, hi(dst), hi(dst), hi(src));
0281         emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
0282         break;
0283     /* dst = dst - src */
0284     case BPF_SUB:
0285         emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
0286         emit(ctx, subu, lo(dst), lo(dst), lo(src));
0287         emit(ctx, subu, hi(dst), hi(dst), hi(src));
0288         emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
0289         break;
0290     /* dst = dst | src */
0291     case BPF_OR:
0292         emit(ctx, or, lo(dst), lo(dst), lo(src));
0293         emit(ctx, or, hi(dst), hi(dst), hi(src));
0294         break;
0295     /* dst = dst & src */
0296     case BPF_AND:
0297         emit(ctx, and, lo(dst), lo(dst), lo(src));
0298         emit(ctx, and, hi(dst), hi(dst), hi(src));
0299         break;
0300     /* dst = dst ^ src */
0301     case BPF_XOR:
0302         emit(ctx, xor, lo(dst), lo(dst), lo(src));
0303         emit(ctx, xor, hi(dst), hi(dst), hi(src));
0304         break;
0305     }
0306     clobber_reg64(ctx, dst);
0307 }
0308 
0309 /* ALU invert (64-bit) */
0310 static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
0311 {
0312     emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
0313     emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
0314     emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
0315     emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
0316 
0317     clobber_reg64(ctx, dst);
0318 }
0319 
0320 /* ALU shift immediate (64-bit) */
0321 static void emit_shift_i64(struct jit_context *ctx,
0322                const u8 dst[], u32 imm, u8 op)
0323 {
0324     switch (BPF_OP(op)) {
0325     /* dst = dst << imm */
0326     case BPF_LSH:
0327         if (imm < 32) {
0328             emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
0329             emit(ctx, sll, lo(dst), lo(dst), imm);
0330             emit(ctx, sll, hi(dst), hi(dst), imm);
0331             emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
0332         } else {
0333             emit(ctx, sll, hi(dst), lo(dst), imm - 32);
0334             emit(ctx, move, lo(dst), MIPS_R_ZERO);
0335         }
0336         break;
0337     /* dst = dst >> imm */
0338     case BPF_RSH:
0339         if (imm < 32) {
0340             emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
0341             emit(ctx, srl, lo(dst), lo(dst), imm);
0342             emit(ctx, srl, hi(dst), hi(dst), imm);
0343             emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
0344         } else {
0345             emit(ctx, srl, lo(dst), hi(dst), imm - 32);
0346             emit(ctx, move, hi(dst), MIPS_R_ZERO);
0347         }
0348         break;
0349     /* dst = dst >> imm (arithmetic) */
0350     case BPF_ARSH:
0351         if (imm < 32) {
0352             emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
0353             emit(ctx, srl, lo(dst), lo(dst), imm);
0354             emit(ctx, sra, hi(dst), hi(dst), imm);
0355             emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
0356         } else {
0357             emit(ctx, sra, lo(dst), hi(dst), imm - 32);
0358             emit(ctx, sra, hi(dst), hi(dst), 31);
0359         }
0360         break;
0361     }
0362     clobber_reg64(ctx, dst);
0363 }
0364 
0365 /* ALU shift register (64-bit) */
0366 static void emit_shift_r64(struct jit_context *ctx,
0367                const u8 dst[], u8 src, u8 op)
0368 {
0369     u8 t1 = MIPS_R_T8;
0370     u8 t2 = MIPS_R_T9;
0371 
0372     emit(ctx, andi, t1, src, 32);              /* t1 = src & 32          */
0373     emit(ctx, beqz, t1, 16);                   /* PC += 16 if t1 == 0    */
0374     emit(ctx, nor, t2, src, MIPS_R_ZERO);      /* t2 = ~src (delay slot) */
0375 
0376     switch (BPF_OP(op)) {
0377     /* dst = dst << src */
0378     case BPF_LSH:
0379         /* Next: shift >= 32 */
0380         emit(ctx, sllv, hi(dst), lo(dst), src);    /* dh = dl << src */
0381         emit(ctx, move, lo(dst), MIPS_R_ZERO);     /* dl = 0         */
0382         emit(ctx, b, 20);                          /* PC += 20       */
0383         /* +16: shift < 32 */
0384         emit(ctx, srl, t1, lo(dst), 1);            /* t1 = dl >> 1   */
0385         emit(ctx, srlv, t1, t1, t2);               /* t1 = t1 >> t2  */
0386         emit(ctx, sllv, lo(dst), lo(dst), src);    /* dl = dl << src */
0387         emit(ctx, sllv, hi(dst), hi(dst), src);    /* dh = dh << src */
0388         emit(ctx, or, hi(dst), hi(dst), t1);       /* dh = dh | t1   */
0389         break;
0390     /* dst = dst >> src */
0391     case BPF_RSH:
0392         /* Next: shift >= 32 */
0393         emit(ctx, srlv, lo(dst), hi(dst), src);    /* dl = dh >> src */
0394         emit(ctx, move, hi(dst), MIPS_R_ZERO);     /* dh = 0         */
0395         emit(ctx, b, 20);                          /* PC += 20       */
0396         /* +16: shift < 32 */
0397         emit(ctx, sll, t1, hi(dst), 1);            /* t1 = dl << 1   */
0398         emit(ctx, sllv, t1, t1, t2);               /* t1 = t1 << t2  */
0399         emit(ctx, srlv, lo(dst), lo(dst), src);    /* dl = dl >> src */
0400         emit(ctx, srlv, hi(dst), hi(dst), src);    /* dh = dh >> src */
0401         emit(ctx, or, lo(dst), lo(dst), t1);       /* dl = dl | t1   */
0402         break;
0403     /* dst = dst >> src (arithmetic) */
0404     case BPF_ARSH:
0405         /* Next: shift >= 32 */
0406         emit(ctx, srav, lo(dst), hi(dst), src);   /* dl = dh >>a src */
0407         emit(ctx, sra, hi(dst), hi(dst), 31);     /* dh = dh >>a 31  */
0408         emit(ctx, b, 20);                         /* PC += 20        */
0409         /* +16: shift < 32 */
0410         emit(ctx, sll, t1, hi(dst), 1);           /* t1 = dl << 1    */
0411         emit(ctx, sllv, t1, t1, t2);              /* t1 = t1 << t2   */
0412         emit(ctx, srlv, lo(dst), lo(dst), src);   /* dl = dl >>a src */
0413         emit(ctx, srav, hi(dst), hi(dst), src);   /* dh = dh >> src  */
0414         emit(ctx, or, lo(dst), lo(dst), t1);      /* dl = dl | t1    */
0415         break;
0416     }
0417 
0418     /* +20: Done */
0419     clobber_reg64(ctx, dst);
0420 }
0421 
0422 /* ALU mul immediate (64x32-bit) */
0423 static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
0424 {
0425     u8 src = MIPS_R_T6;
0426     u8 tmp = MIPS_R_T9;
0427 
0428     switch (imm) {
0429     /* dst = dst * 1 is a no-op */
0430     case 1:
0431         break;
0432     /* dst = dst * -1 */
0433     case -1:
0434         emit_neg_i64(ctx, dst);
0435         break;
0436     case 0:
0437         emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
0438         emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
0439         break;
0440     /* Full 64x32 multiply */
0441     default:
0442         /* hi(dst) = hi(dst) * src(imm) */
0443         emit_mov_i(ctx, src, imm);
0444         if (cpu_has_mips32r1 || cpu_has_mips32r6) {
0445             emit(ctx, mul, hi(dst), hi(dst), src);
0446         } else {
0447             emit(ctx, multu, hi(dst), src);
0448             emit(ctx, mflo, hi(dst));
0449         }
0450 
0451         /* hi(dst) = hi(dst) - lo(dst) */
0452         if (imm < 0)
0453             emit(ctx, subu, hi(dst), hi(dst), lo(dst));
0454 
0455         /* tmp = lo(dst) * src(imm) >> 32 */
0456         /* lo(dst) = lo(dst) * src(imm) */
0457         if (cpu_has_mips32r6) {
0458             emit(ctx, muhu, tmp, lo(dst), src);
0459             emit(ctx, mulu, lo(dst), lo(dst), src);
0460         } else {
0461             emit(ctx, multu, lo(dst), src);
0462             emit(ctx, mflo, lo(dst));
0463             emit(ctx, mfhi, tmp);
0464         }
0465 
0466         /* hi(dst) += tmp */
0467         emit(ctx, addu, hi(dst), hi(dst), tmp);
0468         clobber_reg64(ctx, dst);
0469         break;
0470     }
0471 }
0472 
0473 /* ALU mul register (64x64-bit) */
0474 static void emit_mul_r64(struct jit_context *ctx,
0475              const u8 dst[], const u8 src[])
0476 {
0477     u8 acc = MIPS_R_T8;
0478     u8 tmp = MIPS_R_T9;
0479 
0480     /* acc = hi(dst) * lo(src) */
0481     if (cpu_has_mips32r1 || cpu_has_mips32r6) {
0482         emit(ctx, mul, acc, hi(dst), lo(src));
0483     } else {
0484         emit(ctx, multu, hi(dst), lo(src));
0485         emit(ctx, mflo, acc);
0486     }
0487 
0488     /* tmp = lo(dst) * hi(src) */
0489     if (cpu_has_mips32r1 || cpu_has_mips32r6) {
0490         emit(ctx, mul, tmp, lo(dst), hi(src));
0491     } else {
0492         emit(ctx, multu, lo(dst), hi(src));
0493         emit(ctx, mflo, tmp);
0494     }
0495 
0496     /* acc += tmp */
0497     emit(ctx, addu, acc, acc, tmp);
0498 
0499     /* tmp = lo(dst) * lo(src) >> 32 */
0500     /* lo(dst) = lo(dst) * lo(src) */
0501     if (cpu_has_mips32r6) {
0502         emit(ctx, muhu, tmp, lo(dst), lo(src));
0503         emit(ctx, mulu, lo(dst), lo(dst), lo(src));
0504     } else {
0505         emit(ctx, multu, lo(dst), lo(src));
0506         emit(ctx, mflo, lo(dst));
0507         emit(ctx, mfhi, tmp);
0508     }
0509 
0510     /* hi(dst) = acc + tmp */
0511     emit(ctx, addu, hi(dst), acc, tmp);
0512     clobber_reg64(ctx, dst);
0513 }
0514 
0515 /* Helper function for 64-bit modulo */
0516 static u64 jit_mod64(u64 a, u64 b)
0517 {
0518     u64 rem;
0519 
0520     div64_u64_rem(a, b, &rem);
0521     return rem;
0522 }
0523 
0524 /* ALU div/mod register (64-bit) */
0525 static void emit_divmod_r64(struct jit_context *ctx,
0526                 const u8 dst[], const u8 src[], u8 op)
0527 {
0528     const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
0529     const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
0530     const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
0531     int exclude, k;
0532     u32 addr = 0;
0533 
0534     /* Push caller-saved registers on stack */
0535     push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0536           0, JIT_RESERVED_STACK);
0537 
0538     /* Put 64-bit arguments 1 and 2 in registers a0-a3 */
0539     for (k = 0; k < 2; k++) {
0540         emit(ctx, move, MIPS_R_T9, src[k]);
0541         emit(ctx, move, r1[k], dst[k]);
0542         emit(ctx, move, r2[k], MIPS_R_T9);
0543     }
0544 
0545     /* Emit function call */
0546     switch (BPF_OP(op)) {
0547     /* dst = dst / src */
0548     case BPF_DIV:
0549         addr = (u32)&div64_u64;
0550         break;
0551     /* dst = dst % src */
0552     case BPF_MOD:
0553         addr = (u32)&jit_mod64;
0554         break;
0555     }
0556     emit_mov_i(ctx, MIPS_R_T9, addr);
0557     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
0558     emit(ctx, nop); /* Delay slot */
0559 
0560     /* Store the 64-bit result in dst */
0561     emit(ctx, move, dst[0], r0[0]);
0562     emit(ctx, move, dst[1], r0[1]);
0563 
0564     /* Restore caller-saved registers, excluding the computed result */
0565     exclude = BIT(lo(dst)) | BIT(hi(dst));
0566     pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0567          exclude, JIT_RESERVED_STACK);
0568     emit_load_delay(ctx);
0569 
0570     clobber_reg64(ctx, dst);
0571     clobber_reg(ctx, MIPS_R_V0);
0572     clobber_reg(ctx, MIPS_R_V1);
0573     clobber_reg(ctx, MIPS_R_RA);
0574 }
0575 
0576 /* Swap bytes in a register word */
0577 static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
0578 {
0579     u8 tmp = MIPS_R_T9;
0580 
0581     emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
0582     emit(ctx, sll, tmp, tmp, 8);    /* tmp = tmp << 8         */
0583     emit(ctx, srl, dst, src, 8);    /* dst = src >> 8         */
0584     emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
0585     emit(ctx, or,  dst, dst, tmp);  /* dst = dst | tmp        */
0586 }
0587 
0588 /* Swap half words in a register word */
0589 static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
0590 {
0591     u8 tmp = MIPS_R_T9;
0592 
0593     emit(ctx, sll, tmp, src, 16);  /* tmp = src << 16 */
0594     emit(ctx, srl, dst, src, 16);  /* dst = src >> 16 */
0595     emit(ctx, or,  dst, dst, tmp); /* dst = dst | tmp */
0596 }
0597 
0598 /* Swap bytes and truncate a register double word, word or half word */
0599 static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
0600 {
0601     u8 tmp = MIPS_R_T8;
0602 
0603     switch (width) {
0604     /* Swap bytes in a double word */
0605     case 64:
0606         if (cpu_has_mips32r2 || cpu_has_mips32r6) {
0607             emit(ctx, rotr, tmp, hi(dst), 16);
0608             emit(ctx, rotr, hi(dst), lo(dst), 16);
0609             emit(ctx, wsbh, lo(dst), tmp);
0610             emit(ctx, wsbh, hi(dst), hi(dst));
0611         } else {
0612             emit_swap16_r(ctx, tmp, lo(dst));
0613             emit_swap16_r(ctx, lo(dst), hi(dst));
0614             emit(ctx, move, hi(dst), tmp);
0615 
0616             emit(ctx, lui, tmp, 0xff);      /* tmp = 0x00ff0000 */
0617             emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
0618             emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
0619             emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
0620         }
0621         break;
0622     /* Swap bytes in a word */
0623     /* Swap bytes in a half word */
0624     case 32:
0625     case 16:
0626         emit_bswap_r(ctx, lo(dst), width);
0627         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0628         break;
0629     }
0630     clobber_reg64(ctx, dst);
0631 }
0632 
0633 /* Truncate a register double word, word or half word */
0634 static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
0635 {
0636     switch (width) {
0637     case 64:
0638         break;
0639     /* Zero-extend a word */
0640     case 32:
0641         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0642         clobber_reg(ctx, hi(dst));
0643         break;
0644     /* Zero-extend a half word */
0645     case 16:
0646         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0647         emit(ctx, andi, lo(dst), lo(dst), 0xffff);
0648         clobber_reg64(ctx, dst);
0649         break;
0650     }
0651 }
0652 
0653 /* Load operation: dst = *(size*)(src + off) */
0654 static void emit_ldx(struct jit_context *ctx,
0655              const u8 dst[], u8 src, s16 off, u8 size)
0656 {
0657     switch (size) {
0658     /* Load a byte */
0659     case BPF_B:
0660         emit(ctx, lbu, lo(dst), off, src);
0661         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0662         break;
0663     /* Load a half word */
0664     case BPF_H:
0665         emit(ctx, lhu, lo(dst), off, src);
0666         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0667         break;
0668     /* Load a word */
0669     case BPF_W:
0670         emit(ctx, lw, lo(dst), off, src);
0671         emit(ctx, move, hi(dst), MIPS_R_ZERO);
0672         break;
0673     /* Load a double word */
0674     case BPF_DW:
0675         if (dst[1] == src) {
0676             emit(ctx, lw, dst[0], off + 4, src);
0677             emit(ctx, lw, dst[1], off, src);
0678         } else {
0679             emit(ctx, lw, dst[1], off, src);
0680             emit(ctx, lw, dst[0], off + 4, src);
0681         }
0682         emit_load_delay(ctx);
0683         break;
0684     }
0685     clobber_reg64(ctx, dst);
0686 }
0687 
0688 /* Store operation: *(size *)(dst + off) = src */
0689 static void emit_stx(struct jit_context *ctx,
0690              const u8 dst, const u8 src[], s16 off, u8 size)
0691 {
0692     switch (size) {
0693     /* Store a byte */
0694     case BPF_B:
0695         emit(ctx, sb, lo(src), off, dst);
0696         break;
0697     /* Store a half word */
0698     case BPF_H:
0699         emit(ctx, sh, lo(src), off, dst);
0700         break;
0701     /* Store a word */
0702     case BPF_W:
0703         emit(ctx, sw, lo(src), off, dst);
0704         break;
0705     /* Store a double word */
0706     case BPF_DW:
0707         emit(ctx, sw, src[1], off, dst);
0708         emit(ctx, sw, src[0], off + 4, dst);
0709         break;
0710     }
0711 }
0712 
0713 /* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
0714 static void emit_atomic_r32(struct jit_context *ctx,
0715                 u8 dst, u8 src, s16 off, u8 code)
0716 {
0717     u32 exclude = 0;
0718     u32 addr = 0;
0719 
0720     /* Push caller-saved registers on stack */
0721     push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0722           0, JIT_RESERVED_STACK);
0723     /*
0724      * Argument 1: dst+off if xchg, otherwise src, passed in register a0
0725      * Argument 2: src if xchg, otherwise dst+off, passed in register a1
0726      */
0727     emit(ctx, move, MIPS_R_T9, dst);
0728     if (code == BPF_XCHG) {
0729         emit(ctx, move, MIPS_R_A1, src);
0730         emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
0731     } else {
0732         emit(ctx, move, MIPS_R_A0, src);
0733         emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
0734     }
0735 
0736     /* Emit function call */
0737     switch (code) {
0738     case BPF_ADD:
0739         addr = (u32)&atomic_add;
0740         break;
0741     case BPF_ADD | BPF_FETCH:
0742         addr = (u32)&atomic_fetch_add;
0743         break;
0744     case BPF_SUB:
0745         addr = (u32)&atomic_sub;
0746         break;
0747     case BPF_SUB | BPF_FETCH:
0748         addr = (u32)&atomic_fetch_sub;
0749         break;
0750     case BPF_OR:
0751         addr = (u32)&atomic_or;
0752         break;
0753     case BPF_OR | BPF_FETCH:
0754         addr = (u32)&atomic_fetch_or;
0755         break;
0756     case BPF_AND:
0757         addr = (u32)&atomic_and;
0758         break;
0759     case BPF_AND | BPF_FETCH:
0760         addr = (u32)&atomic_fetch_and;
0761         break;
0762     case BPF_XOR:
0763         addr = (u32)&atomic_xor;
0764         break;
0765     case BPF_XOR | BPF_FETCH:
0766         addr = (u32)&atomic_fetch_xor;
0767         break;
0768     case BPF_XCHG:
0769         addr = (u32)&atomic_xchg;
0770         break;
0771     }
0772     emit_mov_i(ctx, MIPS_R_T9, addr);
0773     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
0774     emit(ctx, nop); /* Delay slot */
0775 
0776     /* Update src register with old value, if specified */
0777     if (code & BPF_FETCH) {
0778         emit(ctx, move, src, MIPS_R_V0);
0779         exclude = BIT(src);
0780         clobber_reg(ctx, src);
0781     }
0782 
0783     /* Restore caller-saved registers, except any fetched value */
0784     pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0785          exclude, JIT_RESERVED_STACK);
0786     emit_load_delay(ctx);
0787     clobber_reg(ctx, MIPS_R_RA);
0788 }
0789 
0790 /* Helper function for 64-bit atomic exchange */
0791 static s64 jit_xchg64(s64 a, atomic64_t *v)
0792 {
0793     return atomic64_xchg(v, a);
0794 }
0795 
0796 /* Atomic read-modify-write (64-bit) */
0797 static void emit_atomic_r64(struct jit_context *ctx,
0798                 u8 dst, const u8 src[], s16 off, u8 code)
0799 {
0800     const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
0801     const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
0802     u32 exclude = 0;
0803     u32 addr = 0;
0804 
0805     /* Push caller-saved registers on stack */
0806     push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0807           0, JIT_RESERVED_STACK);
0808     /*
0809      * Argument 1: 64-bit src, passed in registers a0-a1
0810      * Argument 2: 32-bit dst+off, passed in register a2
0811      */
0812     emit(ctx, move, MIPS_R_T9, dst);
0813     emit(ctx, move, r1[0], src[0]);
0814     emit(ctx, move, r1[1], src[1]);
0815     emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
0816 
0817     /* Emit function call */
0818     switch (code) {
0819     case BPF_ADD:
0820         addr = (u32)&atomic64_add;
0821         break;
0822     case BPF_ADD | BPF_FETCH:
0823         addr = (u32)&atomic64_fetch_add;
0824         break;
0825     case BPF_SUB:
0826         addr = (u32)&atomic64_sub;
0827         break;
0828     case BPF_SUB | BPF_FETCH:
0829         addr = (u32)&atomic64_fetch_sub;
0830         break;
0831     case BPF_OR:
0832         addr = (u32)&atomic64_or;
0833         break;
0834     case BPF_OR | BPF_FETCH:
0835         addr = (u32)&atomic64_fetch_or;
0836         break;
0837     case BPF_AND:
0838         addr = (u32)&atomic64_and;
0839         break;
0840     case BPF_AND | BPF_FETCH:
0841         addr = (u32)&atomic64_fetch_and;
0842         break;
0843     case BPF_XOR:
0844         addr = (u32)&atomic64_xor;
0845         break;
0846     case BPF_XOR | BPF_FETCH:
0847         addr = (u32)&atomic64_fetch_xor;
0848         break;
0849     case BPF_XCHG:
0850         addr = (u32)&jit_xchg64;
0851         break;
0852     }
0853     emit_mov_i(ctx, MIPS_R_T9, addr);
0854     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
0855     emit(ctx, nop); /* Delay slot */
0856 
0857     /* Update src register with old value, if specified */
0858     if (code & BPF_FETCH) {
0859         emit(ctx, move, lo(src), lo(r0));
0860         emit(ctx, move, hi(src), hi(r0));
0861         exclude = BIT(src[0]) | BIT(src[1]);
0862         clobber_reg64(ctx, src);
0863     }
0864 
0865     /* Restore caller-saved registers, except any fetched value */
0866     pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0867          exclude, JIT_RESERVED_STACK);
0868     emit_load_delay(ctx);
0869     clobber_reg(ctx, MIPS_R_RA);
0870 }
0871 
0872 /* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
0873 static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
0874 {
0875     const u8 *r0 = bpf2mips32[BPF_REG_0];
0876 
0877     /* Push caller-saved registers on stack */
0878     push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0879           JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
0880     /*
0881      * Argument 1: 32-bit dst+off, passed in register a0
0882      * Argument 2: 32-bit r0, passed in register a1
0883      * Argument 3: 32-bit src, passed in register a2
0884      */
0885     emit(ctx, addiu, MIPS_R_T9, dst, off);
0886     emit(ctx, move, MIPS_R_T8, src);
0887     emit(ctx, move, MIPS_R_A1, lo(r0));
0888     emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
0889     emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
0890 
0891     /* Emit function call */
0892     emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
0893     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
0894     emit(ctx, nop); /* Delay slot */
0895 
0896 #ifdef __BIG_ENDIAN
0897     emit(ctx, move, lo(r0), MIPS_R_V0);
0898 #endif
0899     /* Restore caller-saved registers, except the return value */
0900     pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0901          JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
0902     emit_load_delay(ctx);
0903     clobber_reg(ctx, MIPS_R_V0);
0904     clobber_reg(ctx, MIPS_R_V1);
0905     clobber_reg(ctx, MIPS_R_RA);
0906 }
0907 
0908 /* Atomic compare-and-exchange (64-bit) */
0909 static void emit_cmpxchg_r64(struct jit_context *ctx,
0910                  u8 dst, const u8 src[], s16 off)
0911 {
0912     const u8 *r0 = bpf2mips32[BPF_REG_0];
0913     const u8 *r2 = bpf2mips32[BPF_REG_2];
0914 
0915     /* Push caller-saved registers on stack */
0916     push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0917           JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
0918     /*
0919      * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
0920      * Argument 2: 64-bit r0, passed in registers a2-a3
0921      * Argument 3: 64-bit src, passed on stack
0922      */
0923     push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
0924     emit(ctx, addiu, MIPS_R_T9, dst, off);
0925     emit(ctx, move, r2[0], r0[0]);
0926     emit(ctx, move, r2[1], r0[1]);
0927     emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
0928 
0929     /* Emit function call */
0930     emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
0931     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
0932     emit(ctx, nop); /* Delay slot */
0933 
0934     /* Restore caller-saved registers, except the return value */
0935     pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
0936          JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
0937     emit_load_delay(ctx);
0938     clobber_reg(ctx, MIPS_R_V0);
0939     clobber_reg(ctx, MIPS_R_V1);
0940     clobber_reg(ctx, MIPS_R_RA);
0941 }
0942 
0943 /*
0944  * Conditional movz or an emulated equivalent.
0945  * Note that the rs register may be modified.
0946  */
0947 static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
0948 {
0949     if (cpu_has_mips_2) {
0950         emit(ctx, movz, rd, rs, rt);           /* rd = rt ? rd : rs  */
0951     } else if (cpu_has_mips32r6) {
0952         if (rs != MIPS_R_ZERO)
0953             emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0  */
0954         emit(ctx, selnez, rd, rd, rt);         /* rd = 0 if rt != 0  */
0955         if (rs != MIPS_R_ZERO)
0956             emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
0957     } else {
0958         emit(ctx, bnez, rt, 8);                /* PC += 8 if rd != 0 */
0959         emit(ctx, nop);                        /* +0: delay slot     */
0960         emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
0961     }
0962     clobber_reg(ctx, rd);
0963     clobber_reg(ctx, rs);
0964 }
0965 
0966 /*
0967  * Conditional movn or an emulated equivalent.
0968  * Note that the rs register may be modified.
0969  */
0970 static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
0971 {
0972     if (cpu_has_mips_2) {
0973         emit(ctx, movn, rd, rs, rt);           /* rd = rt ? rs : rd  */
0974     } else if (cpu_has_mips32r6) {
0975         if (rs != MIPS_R_ZERO)
0976             emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0  */
0977         emit(ctx, seleqz, rd, rd, rt);         /* rd = 0 if rt != 0  */
0978         if (rs != MIPS_R_ZERO)
0979             emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
0980     } else {
0981         emit(ctx, beqz, rt, 8);                /* PC += 8 if rd == 0 */
0982         emit(ctx, nop);                        /* +0: delay slot     */
0983         emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
0984     }
0985     clobber_reg(ctx, rd);
0986     clobber_reg(ctx, rs);
0987 }
0988 
0989 /* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
0990 static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
0991                const u8 rs[], s64 imm)
0992 {
0993     u8 tmp = MIPS_R_T9;
0994 
0995     if (imm < 0) {
0996         emit_mov_i(ctx, rd, imm);                 /* rd = imm        */
0997         emit(ctx, sltu, rd, lo(rs), rd);          /* rd = rsl < rd   */
0998         emit(ctx, sltiu, tmp, hi(rs), -1);        /* tmp = rsh < ~0U */
0999         emit(ctx, or, rd, rd, tmp);               /* rd = rd | tmp   */
1000     } else { /* imm >= 0 */
1001         if (imm > 0x7fff) {
1002             emit_mov_i(ctx, rd, (s32)imm);     /* rd = imm       */
1003             emit(ctx, sltu, rd, lo(rs), rd);   /* rd = rsl < rd  */
1004         } else {
1005             emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
1006         }
1007         emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh  */
1008     }
1009 }
1010 
1011 /* Emulation of 64-bit sltu rd, rs, rt */
1012 static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
1013               const u8 rs[], const u8 rt[])
1014 {
1015     u8 tmp = MIPS_R_T9;
1016 
1017     emit(ctx, sltu, rd, lo(rs), lo(rt));           /* rd = rsl < rtl     */
1018     emit(ctx, subu, tmp, hi(rs), hi(rt));          /* tmp = rsh - rth    */
1019     emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp);        /* rd = 0 if tmp != 0 */
1020     emit(ctx, sltu, tmp, hi(rs), hi(rt));          /* tmp = rsh < rth    */
1021     emit(ctx, or, rd, rd, tmp);                    /* rd = rd | tmp      */
1022 }
1023 
1024 /* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
1025 static void emit_slti_r64(struct jit_context *ctx, u8 rd,
1026               const u8 rs[], s64 imm)
1027 {
1028     u8 t1 = MIPS_R_T8;
1029     u8 t2 = MIPS_R_T9;
1030     u8 cmp;
1031 
1032     /*
1033      * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
1034      * else                      t1 = rsl <u imm
1035      */
1036     emit_mov_i(ctx, rd, (s32)imm);
1037     emit(ctx, sltu, t1, lo(rs), rd);               /* t1 = rsl <u imm   */
1038     emit(ctx, sltu, t2, rd, lo(rs));               /* t2 = imm <u rsl   */
1039     emit(ctx, srl, rd, hi(rs), 31);                /* rd = rsh >> 31    */
1040     if (imm < 0)
1041         emit_movz_r(ctx, t1, t2, rd);          /* t1 = rd ? t1 : t2 */
1042     else
1043         emit_movn_r(ctx, t1, t2, rd);          /* t1 = rd ? t2 : t1 */
1044     /*
1045      * if ((imm < 0 && rsh != 0xffffffff) ||
1046      *     (imm >= 0 && rsh != 0))
1047      *      t1 = 0
1048      */
1049     if (imm < 0) {
1050         emit(ctx, addiu, rd, hi(rs), 1);       /* rd = rsh + 1 */
1051         cmp = rd;
1052     } else { /* imm >= 0 */
1053         cmp = hi(rs);
1054     }
1055     emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp);        /* t1 = 0 if cmp != 0 */
1056 
1057     /*
1058      * if (imm < 0) rd = rsh < -1
1059      * else         rd = rsh != 0
1060      * rd = rd | t1
1061      */
1062     emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
1063     emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1       */
1064 }
1065 
1066 /* Emulation of 64-bit(slt rd, rs, rt) */
1067 static void emit_slt_r64(struct jit_context *ctx, u8 rd,
1068              const u8 rs[], const u8 rt[])
1069 {
1070     u8 t1 = MIPS_R_T7;
1071     u8 t2 = MIPS_R_T8;
1072     u8 t3 = MIPS_R_T9;
1073 
1074     /*
1075      * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
1076      * else                     t1 = rsl <u rtl
1077      * if (rsh == rth)          t1 = 0
1078      */
1079     emit(ctx, sltu, t1, lo(rs), lo(rt));           /* t1 = rsl <u rtl   */
1080     emit(ctx, sltu, t2, lo(rt), lo(rs));           /* t2 = rtl <u rsl   */
1081     emit(ctx, xor, t3, hi(rs), hi(rt));            /* t3 = rlh ^ rth    */
1082     emit(ctx, srl, rd, t3, 31);                    /* rd = t3 >> 31     */
1083     emit_movn_r(ctx, t1, t2, rd);                  /* t1 = rd ? t2 : t1 */
1084     emit_movn_r(ctx, t1, MIPS_R_ZERO, t3);         /* t1 = 0 if t3 != 0 */
1085 
1086     /* rd = (rsh < rth) | t1 */
1087     emit(ctx, slt, rd, hi(rs), hi(rt));            /* rd = rsh <s rth   */
1088     emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1      */
1089 }
1090 
1091 /* Jump immediate (64-bit) */
1092 static void emit_jmp_i64(struct jit_context *ctx,
1093              const u8 dst[], s32 imm, s32 off, u8 op)
1094 {
1095     u8 tmp = MIPS_R_T6;
1096 
1097     switch (op) {
1098     /* No-op, used internally for branch optimization */
1099     case JIT_JNOP:
1100         break;
1101     /* PC += off if dst == imm */
1102     /* PC += off if dst != imm */
1103     case BPF_JEQ:
1104     case BPF_JNE:
1105         if (imm >= -0x7fff && imm <= 0x8000) {
1106             emit(ctx, addiu, tmp, lo(dst), -imm);
1107         } else if ((u32)imm <= 0xffff) {
1108             emit(ctx, xori, tmp, lo(dst), imm);
1109         } else {       /* Register fallback */
1110             emit_mov_i(ctx, tmp, imm);
1111             emit(ctx, xor, tmp, lo(dst), tmp);
1112         }
1113         if (imm < 0) { /* Compare sign extension */
1114             emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
1115             emit(ctx, or, tmp, tmp, MIPS_R_T9);
1116         } else {       /* Compare zero extension */
1117             emit(ctx, or, tmp, tmp, hi(dst));
1118         }
1119         if (op == BPF_JEQ)
1120             emit(ctx, beqz, tmp, off);
1121         else   /* BPF_JNE */
1122             emit(ctx, bnez, tmp, off);
1123         break;
1124     /* PC += off if dst & imm */
1125     /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1126     case BPF_JSET:
1127     case JIT_JNSET:
1128         if ((u32)imm <= 0xffff) {
1129             emit(ctx, andi, tmp, lo(dst), imm);
1130         } else {     /* Register fallback */
1131             emit_mov_i(ctx, tmp, imm);
1132             emit(ctx, and, tmp, lo(dst), tmp);
1133         }
1134         if (imm < 0) /* Sign-extension pulls in high word */
1135             emit(ctx, or, tmp, tmp, hi(dst));
1136         if (op == BPF_JSET)
1137             emit(ctx, bnez, tmp, off);
1138         else   /* JIT_JNSET */
1139             emit(ctx, beqz, tmp, off);
1140         break;
1141     /* PC += off if dst > imm */
1142     case BPF_JGT:
1143         emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1144         emit(ctx, beqz, tmp, off);
1145         break;
1146     /* PC += off if dst >= imm */
1147     case BPF_JGE:
1148         emit_sltiu_r64(ctx, tmp, dst, imm);
1149         emit(ctx, beqz, tmp, off);
1150         break;
1151     /* PC += off if dst < imm */
1152     case BPF_JLT:
1153         emit_sltiu_r64(ctx, tmp, dst, imm);
1154         emit(ctx, bnez, tmp, off);
1155         break;
1156     /* PC += off if dst <= imm */
1157     case BPF_JLE:
1158         emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1159         emit(ctx, bnez, tmp, off);
1160         break;
1161     /* PC += off if dst > imm (signed) */
1162     case BPF_JSGT:
1163         emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1164         emit(ctx, beqz, tmp, off);
1165         break;
1166     /* PC += off if dst >= imm (signed) */
1167     case BPF_JSGE:
1168         emit_slti_r64(ctx, tmp, dst, imm);
1169         emit(ctx, beqz, tmp, off);
1170         break;
1171     /* PC += off if dst < imm (signed) */
1172     case BPF_JSLT:
1173         emit_slti_r64(ctx, tmp, dst, imm);
1174         emit(ctx, bnez, tmp, off);
1175         break;
1176     /* PC += off if dst <= imm (signed) */
1177     case BPF_JSLE:
1178         emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1179         emit(ctx, bnez, tmp, off);
1180         break;
1181     }
1182 }
1183 
1184 /* Jump register (64-bit) */
1185 static void emit_jmp_r64(struct jit_context *ctx,
1186              const u8 dst[], const u8 src[], s32 off, u8 op)
1187 {
1188     u8 t1 = MIPS_R_T6;
1189     u8 t2 = MIPS_R_T7;
1190 
1191     switch (op) {
1192     /* No-op, used internally for branch optimization */
1193     case JIT_JNOP:
1194         break;
1195     /* PC += off if dst == src */
1196     /* PC += off if dst != src */
1197     case BPF_JEQ:
1198     case BPF_JNE:
1199         emit(ctx, subu, t1, lo(dst), lo(src));
1200         emit(ctx, subu, t2, hi(dst), hi(src));
1201         emit(ctx, or, t1, t1, t2);
1202         if (op == BPF_JEQ)
1203             emit(ctx, beqz, t1, off);
1204         else   /* BPF_JNE */
1205             emit(ctx, bnez, t1, off);
1206         break;
1207     /* PC += off if dst & src */
1208     /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1209     case BPF_JSET:
1210     case JIT_JNSET:
1211         emit(ctx, and, t1, lo(dst), lo(src));
1212         emit(ctx, and, t2, hi(dst), hi(src));
1213         emit(ctx, or, t1, t1, t2);
1214         if (op == BPF_JSET)
1215             emit(ctx, bnez, t1, off);
1216         else   /* JIT_JNSET */
1217             emit(ctx, beqz, t1, off);
1218         break;
1219     /* PC += off if dst > src */
1220     case BPF_JGT:
1221         emit_sltu_r64(ctx, t1, src, dst);
1222         emit(ctx, bnez, t1, off);
1223         break;
1224     /* PC += off if dst >= src */
1225     case BPF_JGE:
1226         emit_sltu_r64(ctx, t1, dst, src);
1227         emit(ctx, beqz, t1, off);
1228         break;
1229     /* PC += off if dst < src */
1230     case BPF_JLT:
1231         emit_sltu_r64(ctx, t1, dst, src);
1232         emit(ctx, bnez, t1, off);
1233         break;
1234     /* PC += off if dst <= src */
1235     case BPF_JLE:
1236         emit_sltu_r64(ctx, t1, src, dst);
1237         emit(ctx, beqz, t1, off);
1238         break;
1239     /* PC += off if dst > src (signed) */
1240     case BPF_JSGT:
1241         emit_slt_r64(ctx, t1, src, dst);
1242         emit(ctx, bnez, t1, off);
1243         break;
1244     /* PC += off if dst >= src (signed) */
1245     case BPF_JSGE:
1246         emit_slt_r64(ctx, t1, dst, src);
1247         emit(ctx, beqz, t1, off);
1248         break;
1249     /* PC += off if dst < src (signed) */
1250     case BPF_JSLT:
1251         emit_slt_r64(ctx, t1, dst, src);
1252         emit(ctx, bnez, t1, off);
1253         break;
1254     /* PC += off if dst <= src (signed) */
1255     case BPF_JSLE:
1256         emit_slt_r64(ctx, t1, src, dst);
1257         emit(ctx, beqz, t1, off);
1258         break;
1259     }
1260 }
1261 
1262 /* Function call */
1263 static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
1264 {
1265     bool fixed;
1266     u64 addr;
1267 
1268     /* Decode the call address */
1269     if (bpf_jit_get_func_addr(ctx->program, insn, false,
1270                   &addr, &fixed) < 0)
1271         return -1;
1272     if (!fixed)
1273         return -1;
1274 
1275     /* Push stack arguments */
1276     push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
1277 
1278     /* Emit function call */
1279     emit_mov_i(ctx, MIPS_R_T9, addr);
1280     emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
1281     emit(ctx, nop); /* Delay slot */
1282 
1283     clobber_reg(ctx, MIPS_R_RA);
1284     clobber_reg(ctx, MIPS_R_V0);
1285     clobber_reg(ctx, MIPS_R_V1);
1286     return 0;
1287 }
1288 
1289 /* Function tail call */
1290 static int emit_tail_call(struct jit_context *ctx)
1291 {
1292     u8 ary = lo(bpf2mips32[BPF_REG_2]);
1293     u8 ind = lo(bpf2mips32[BPF_REG_3]);
1294     u8 t1 = MIPS_R_T8;
1295     u8 t2 = MIPS_R_T9;
1296     int off;
1297 
1298     /*
1299      * Tail call:
1300      * eBPF R1   - function argument (context ptr), passed in a0-a1
1301      * eBPF R2   - ptr to object with array of function entry points
1302      * eBPF R3   - array index of function to be called
1303      * stack[sz] - remaining tail call count, initialized in prologue
1304      */
1305 
1306     /* if (ind >= ary->map.max_entries) goto out */
1307     off = offsetof(struct bpf_array, map.max_entries);
1308     if (off > 0x7fff)
1309         return -1;
1310     emit(ctx, lw, t1, off, ary);             /* t1 = ary->map.max_entries*/
1311     emit_load_delay(ctx);                    /* Load delay slot          */
1312     emit(ctx, sltu, t1, ind, t1);            /* t1 = ind < t1            */
1313     emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0  */
1314                          /* (next insn delay slot)   */
1315     /* if (TCC-- <= 0) goto out */
1316     emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP);  /* t2 = *(SP + size) */
1317     emit_load_delay(ctx);                     /* Load delay slot         */
1318     emit(ctx, blez, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 <= 0 */
1319     emit(ctx, addiu, t2, t2, -1);             /* t2-- (delay slot)       */
1320     emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP);  /* *(SP + size) = t2 */
1321 
1322     /* prog = ary->ptrs[ind] */
1323     off = offsetof(struct bpf_array, ptrs);
1324     if (off > 0x7fff)
1325         return -1;
1326     emit(ctx, sll, t1, ind, 2);               /* t1 = ind << 2           */
1327     emit(ctx, addu, t1, t1, ary);             /* t1 += ary               */
1328     emit(ctx, lw, t2, off, t1);               /* t2 = *(t1 + off)        */
1329     emit_load_delay(ctx);                     /* Load delay slot         */
1330 
1331     /* if (prog == 0) goto out */
1332     emit(ctx, beqz, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 == 0 */
1333     emit(ctx, nop);                           /* Delay slot              */
1334 
1335     /* func = prog->bpf_func + 8 (prologue skip offset) */
1336     off = offsetof(struct bpf_prog, bpf_func);
1337     if (off > 0x7fff)
1338         return -1;
1339     emit(ctx, lw, t1, off, t2);                /* t1 = *(t2 + off)       */
1340     emit_load_delay(ctx);                      /* Load delay slot        */
1341     emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP);  /* t1 += skip (8 or 12)   */
1342 
1343     /* goto func */
1344     build_epilogue(ctx, t1);
1345     return 0;
1346 }
1347 
1348 /*
1349  * Stack frame layout for a JITed program (stack grows down).
1350  *
1351  * Higher address  : Caller's stack frame       :
1352  *                 :----------------------------:
1353  *                 : 64-bit eBPF args r3-r5     :
1354  *                 :----------------------------:
1355  *                 : Reserved / tail call count :
1356  *                 +============================+  <--- MIPS sp before call
1357  *                 | Callee-saved registers,    |
1358  *                 | including RA and FP        |
1359  *                 +----------------------------+  <--- eBPF FP (MIPS zero,fp)
1360  *                 | Local eBPF variables       |
1361  *                 | allocated by program       |
1362  *                 +----------------------------+
1363  *                 | Reserved for caller-saved  |
1364  *                 | registers                  |
1365  *                 +----------------------------+
1366  *                 | Reserved for 64-bit eBPF   |
1367  *                 | args r3-r5 & args passed   |
1368  *                 | on stack in kernel calls   |
1369  * Lower address   +============================+  <--- MIPS sp
1370  */
1371 
1372 /* Build program prologue to set up the stack and registers */
1373 void build_prologue(struct jit_context *ctx)
1374 {
1375     const u8 *r1 = bpf2mips32[BPF_REG_1];
1376     const u8 *fp = bpf2mips32[BPF_REG_FP];
1377     int stack, saved, locals, reserved;
1378 
1379     /*
1380      * The first two instructions initialize TCC in the reserved (for us)
1381      * 16-byte area in the parent's stack frame. On a tail call, the
1382      * calling function jumps into the prologue after these instructions.
1383      */
1384     emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT, 0xffff));
1385     emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
1386 
1387     /*
1388      * Register eBPF R1 contains the 32-bit context pointer argument.
1389      * A 32-bit argument is always passed in MIPS register a0, regardless
1390      * of CPU endianness. Initialize R1 accordingly and zero-extend.
1391      */
1392 #ifdef __BIG_ENDIAN
1393     emit(ctx, move, lo(r1), MIPS_R_A0);
1394 #endif
1395 
1396     /* === Entry-point for tail calls === */
1397 
1398     /* Zero-extend the 32-bit argument */
1399     emit(ctx, move, hi(r1), MIPS_R_ZERO);
1400 
1401     /* If the eBPF frame pointer was accessed it must be saved */
1402     if (ctx->accessed & BIT(BPF_REG_FP))
1403         clobber_reg64(ctx, fp);
1404 
1405     /* Compute the stack space needed for callee-saved registers */
1406     saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
1407     saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
1408 
1409     /* Stack space used by eBPF program local data */
1410     locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
1411 
1412     /*
1413      * If we are emitting function calls, reserve extra stack space for
1414      * caller-saved registers and function arguments passed on the stack.
1415      * The required space is computed automatically during resource
1416      * usage discovery (pass 1).
1417      */
1418     reserved = ctx->stack_used;
1419 
1420     /* Allocate the stack frame */
1421     stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
1422     emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
1423 
1424     /* Store callee-saved registers on stack */
1425     push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
1426 
1427     /* Initialize the eBPF frame pointer if accessed */
1428     if (ctx->accessed & BIT(BPF_REG_FP))
1429         emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
1430 
1431     ctx->saved_size = saved;
1432     ctx->stack_size = stack;
1433 }
1434 
1435 /* Build the program epilogue to restore the stack and registers */
1436 void build_epilogue(struct jit_context *ctx, int dest_reg)
1437 {
1438     /* Restore callee-saved registers from stack */
1439     pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
1440          ctx->stack_size - ctx->saved_size);
1441     /*
1442      * A 32-bit return value is always passed in MIPS register v0,
1443      * but on big-endian targets the low part of R0 is mapped to v1.
1444      */
1445 #ifdef __BIG_ENDIAN
1446     emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
1447 #endif
1448 
1449     /* Jump to the return address and adjust the stack pointer */
1450     emit(ctx, jr, dest_reg);
1451     emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
1452 }
1453 
1454 /* Build one eBPF instruction */
1455 int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
1456 {
1457     const u8 *dst = bpf2mips32[insn->dst_reg];
1458     const u8 *src = bpf2mips32[insn->src_reg];
1459     const u8 *res = bpf2mips32[BPF_REG_0];
1460     const u8 *tmp = bpf2mips32[JIT_REG_TMP];
1461     u8 code = insn->code;
1462     s16 off = insn->off;
1463     s32 imm = insn->imm;
1464     s32 val, rel;
1465     u8 alu, jmp;
1466 
1467     switch (code) {
1468     /* ALU operations */
1469     /* dst = imm */
1470     case BPF_ALU | BPF_MOV | BPF_K:
1471         emit_mov_i(ctx, lo(dst), imm);
1472         emit_zext_ver(ctx, dst);
1473         break;
1474     /* dst = src */
1475     case BPF_ALU | BPF_MOV | BPF_X:
1476         if (imm == 1) {
1477             /* Special mov32 for zext */
1478             emit_mov_i(ctx, hi(dst), 0);
1479         } else {
1480             emit_mov_r(ctx, lo(dst), lo(src));
1481             emit_zext_ver(ctx, dst);
1482         }
1483         break;
1484     /* dst = -dst */
1485     case BPF_ALU | BPF_NEG:
1486         emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
1487         emit_zext_ver(ctx, dst);
1488         break;
1489     /* dst = dst & imm */
1490     /* dst = dst | imm */
1491     /* dst = dst ^ imm */
1492     /* dst = dst << imm */
1493     /* dst = dst >> imm */
1494     /* dst = dst >> imm (arithmetic) */
1495     /* dst = dst + imm */
1496     /* dst = dst - imm */
1497     /* dst = dst * imm */
1498     /* dst = dst / imm */
1499     /* dst = dst % imm */
1500     case BPF_ALU | BPF_OR | BPF_K:
1501     case BPF_ALU | BPF_AND | BPF_K:
1502     case BPF_ALU | BPF_XOR | BPF_K:
1503     case BPF_ALU | BPF_LSH | BPF_K:
1504     case BPF_ALU | BPF_RSH | BPF_K:
1505     case BPF_ALU | BPF_ARSH | BPF_K:
1506     case BPF_ALU | BPF_ADD | BPF_K:
1507     case BPF_ALU | BPF_SUB | BPF_K:
1508     case BPF_ALU | BPF_MUL | BPF_K:
1509     case BPF_ALU | BPF_DIV | BPF_K:
1510     case BPF_ALU | BPF_MOD | BPF_K:
1511         if (!valid_alu_i(BPF_OP(code), imm)) {
1512             emit_mov_i(ctx, MIPS_R_T6, imm);
1513             emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
1514         } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
1515             emit_alu_i(ctx, lo(dst), val, alu);
1516         }
1517         emit_zext_ver(ctx, dst);
1518         break;
1519     /* dst = dst & src */
1520     /* dst = dst | src */
1521     /* dst = dst ^ src */
1522     /* dst = dst << src */
1523     /* dst = dst >> src */
1524     /* dst = dst >> src (arithmetic) */
1525     /* dst = dst + src */
1526     /* dst = dst - src */
1527     /* dst = dst * src */
1528     /* dst = dst / src */
1529     /* dst = dst % src */
1530     case BPF_ALU | BPF_AND | BPF_X:
1531     case BPF_ALU | BPF_OR | BPF_X:
1532     case BPF_ALU | BPF_XOR | BPF_X:
1533     case BPF_ALU | BPF_LSH | BPF_X:
1534     case BPF_ALU | BPF_RSH | BPF_X:
1535     case BPF_ALU | BPF_ARSH | BPF_X:
1536     case BPF_ALU | BPF_ADD | BPF_X:
1537     case BPF_ALU | BPF_SUB | BPF_X:
1538     case BPF_ALU | BPF_MUL | BPF_X:
1539     case BPF_ALU | BPF_DIV | BPF_X:
1540     case BPF_ALU | BPF_MOD | BPF_X:
1541         emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
1542         emit_zext_ver(ctx, dst);
1543         break;
1544     /* dst = imm (64-bit) */
1545     case BPF_ALU64 | BPF_MOV | BPF_K:
1546         emit_mov_se_i64(ctx, dst, imm);
1547         break;
1548     /* dst = src (64-bit) */
1549     case BPF_ALU64 | BPF_MOV | BPF_X:
1550         emit_mov_r(ctx, lo(dst), lo(src));
1551         emit_mov_r(ctx, hi(dst), hi(src));
1552         break;
1553     /* dst = -dst (64-bit) */
1554     case BPF_ALU64 | BPF_NEG:
1555         emit_neg_i64(ctx, dst);
1556         break;
1557     /* dst = dst & imm (64-bit) */
1558     case BPF_ALU64 | BPF_AND | BPF_K:
1559         emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1560         break;
1561     /* dst = dst | imm (64-bit) */
1562     /* dst = dst ^ imm (64-bit) */
1563     /* dst = dst + imm (64-bit) */
1564     /* dst = dst - imm (64-bit) */
1565     case BPF_ALU64 | BPF_OR | BPF_K:
1566     case BPF_ALU64 | BPF_XOR | BPF_K:
1567     case BPF_ALU64 | BPF_ADD | BPF_K:
1568     case BPF_ALU64 | BPF_SUB | BPF_K:
1569         if (imm)
1570             emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1571         break;
1572     /* dst = dst << imm (64-bit) */
1573     /* dst = dst >> imm (64-bit) */
1574     /* dst = dst >> imm (64-bit, arithmetic) */
1575     case BPF_ALU64 | BPF_LSH | BPF_K:
1576     case BPF_ALU64 | BPF_RSH | BPF_K:
1577     case BPF_ALU64 | BPF_ARSH | BPF_K:
1578         if (imm)
1579             emit_shift_i64(ctx, dst, imm, BPF_OP(code));
1580         break;
1581     /* dst = dst * imm (64-bit) */
1582     case BPF_ALU64 | BPF_MUL | BPF_K:
1583         emit_mul_i64(ctx, dst, imm);
1584         break;
1585     /* dst = dst / imm (64-bit) */
1586     /* dst = dst % imm (64-bit) */
1587     case BPF_ALU64 | BPF_DIV | BPF_K:
1588     case BPF_ALU64 | BPF_MOD | BPF_K:
1589         /*
1590          * Sign-extend the immediate value into a temporary register,
1591          * and then do the operation on this register.
1592          */
1593         emit_mov_se_i64(ctx, tmp, imm);
1594         emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
1595         break;
1596     /* dst = dst & src (64-bit) */
1597     /* dst = dst | src (64-bit) */
1598     /* dst = dst ^ src (64-bit) */
1599     /* dst = dst + src (64-bit) */
1600     /* dst = dst - src (64-bit) */
1601     case BPF_ALU64 | BPF_AND | BPF_X:
1602     case BPF_ALU64 | BPF_OR | BPF_X:
1603     case BPF_ALU64 | BPF_XOR | BPF_X:
1604     case BPF_ALU64 | BPF_ADD | BPF_X:
1605     case BPF_ALU64 | BPF_SUB | BPF_X:
1606         emit_alu_r64(ctx, dst, src, BPF_OP(code));
1607         break;
1608     /* dst = dst << src (64-bit) */
1609     /* dst = dst >> src (64-bit) */
1610     /* dst = dst >> src (64-bit, arithmetic) */
1611     case BPF_ALU64 | BPF_LSH | BPF_X:
1612     case BPF_ALU64 | BPF_RSH | BPF_X:
1613     case BPF_ALU64 | BPF_ARSH | BPF_X:
1614         emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
1615         break;
1616     /* dst = dst * src (64-bit) */
1617     case BPF_ALU64 | BPF_MUL | BPF_X:
1618         emit_mul_r64(ctx, dst, src);
1619         break;
1620     /* dst = dst / src (64-bit) */
1621     /* dst = dst % src (64-bit) */
1622     case BPF_ALU64 | BPF_DIV | BPF_X:
1623     case BPF_ALU64 | BPF_MOD | BPF_X:
1624         emit_divmod_r64(ctx, dst, src, BPF_OP(code));
1625         break;
1626     /* dst = htole(dst) */
1627     /* dst = htobe(dst) */
1628     case BPF_ALU | BPF_END | BPF_FROM_LE:
1629     case BPF_ALU | BPF_END | BPF_FROM_BE:
1630         if (BPF_SRC(code) ==
1631 #ifdef __BIG_ENDIAN
1632             BPF_FROM_LE
1633 #else
1634             BPF_FROM_BE
1635 #endif
1636             )
1637             emit_bswap_r64(ctx, dst, imm);
1638         else
1639             emit_trunc_r64(ctx, dst, imm);
1640         break;
1641     /* dst = imm64 */
1642     case BPF_LD | BPF_IMM | BPF_DW:
1643         emit_mov_i(ctx, lo(dst), imm);
1644         emit_mov_i(ctx, hi(dst), insn[1].imm);
1645         return 1;
1646     /* LDX: dst = *(size *)(src + off) */
1647     case BPF_LDX | BPF_MEM | BPF_W:
1648     case BPF_LDX | BPF_MEM | BPF_H:
1649     case BPF_LDX | BPF_MEM | BPF_B:
1650     case BPF_LDX | BPF_MEM | BPF_DW:
1651         emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
1652         break;
1653     /* ST: *(size *)(dst + off) = imm */
1654     case BPF_ST | BPF_MEM | BPF_W:
1655     case BPF_ST | BPF_MEM | BPF_H:
1656     case BPF_ST | BPF_MEM | BPF_B:
1657     case BPF_ST | BPF_MEM | BPF_DW:
1658         switch (BPF_SIZE(code)) {
1659         case BPF_DW:
1660             /* Sign-extend immediate value into temporary reg */
1661             emit_mov_se_i64(ctx, tmp, imm);
1662             break;
1663         case BPF_W:
1664         case BPF_H:
1665         case BPF_B:
1666             emit_mov_i(ctx, lo(tmp), imm);
1667             break;
1668         }
1669         emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
1670         break;
1671     /* STX: *(size *)(dst + off) = src */
1672     case BPF_STX | BPF_MEM | BPF_W:
1673     case BPF_STX | BPF_MEM | BPF_H:
1674     case BPF_STX | BPF_MEM | BPF_B:
1675     case BPF_STX | BPF_MEM | BPF_DW:
1676         emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
1677         break;
1678     /* Speculation barrier */
1679     case BPF_ST | BPF_NOSPEC:
1680         break;
1681     /* Atomics */
1682     case BPF_STX | BPF_ATOMIC | BPF_W:
1683         switch (imm) {
1684         case BPF_ADD:
1685         case BPF_ADD | BPF_FETCH:
1686         case BPF_AND:
1687         case BPF_AND | BPF_FETCH:
1688         case BPF_OR:
1689         case BPF_OR | BPF_FETCH:
1690         case BPF_XOR:
1691         case BPF_XOR | BPF_FETCH:
1692         case BPF_XCHG:
1693             if (cpu_has_llsc)
1694                 emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
1695             else /* Non-ll/sc fallback */
1696                 emit_atomic_r32(ctx, lo(dst), lo(src),
1697                         off, imm);
1698             if (imm & BPF_FETCH)
1699                 emit_zext_ver(ctx, src);
1700             break;
1701         case BPF_CMPXCHG:
1702             if (cpu_has_llsc)
1703                 emit_cmpxchg_r(ctx, lo(dst), lo(src),
1704                            lo(res), off);
1705             else /* Non-ll/sc fallback */
1706                 emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
1707             /* Result zero-extension inserted by verifier */
1708             break;
1709         default:
1710             goto notyet;
1711         }
1712         break;
1713     /* Atomics (64-bit) */
1714     case BPF_STX | BPF_ATOMIC | BPF_DW:
1715         switch (imm) {
1716         case BPF_ADD:
1717         case BPF_ADD | BPF_FETCH:
1718         case BPF_AND:
1719         case BPF_AND | BPF_FETCH:
1720         case BPF_OR:
1721         case BPF_OR | BPF_FETCH:
1722         case BPF_XOR:
1723         case BPF_XOR | BPF_FETCH:
1724         case BPF_XCHG:
1725             emit_atomic_r64(ctx, lo(dst), src, off, imm);
1726             break;
1727         case BPF_CMPXCHG:
1728             emit_cmpxchg_r64(ctx, lo(dst), src, off);
1729             break;
1730         default:
1731             goto notyet;
1732         }
1733         break;
1734     /* PC += off if dst == src */
1735     /* PC += off if dst != src */
1736     /* PC += off if dst & src */
1737     /* PC += off if dst > src */
1738     /* PC += off if dst >= src */
1739     /* PC += off if dst < src */
1740     /* PC += off if dst <= src */
1741     /* PC += off if dst > src (signed) */
1742     /* PC += off if dst >= src (signed) */
1743     /* PC += off if dst < src (signed) */
1744     /* PC += off if dst <= src (signed) */
1745     case BPF_JMP32 | BPF_JEQ | BPF_X:
1746     case BPF_JMP32 | BPF_JNE | BPF_X:
1747     case BPF_JMP32 | BPF_JSET | BPF_X:
1748     case BPF_JMP32 | BPF_JGT | BPF_X:
1749     case BPF_JMP32 | BPF_JGE | BPF_X:
1750     case BPF_JMP32 | BPF_JLT | BPF_X:
1751     case BPF_JMP32 | BPF_JLE | BPF_X:
1752     case BPF_JMP32 | BPF_JSGT | BPF_X:
1753     case BPF_JMP32 | BPF_JSGE | BPF_X:
1754     case BPF_JMP32 | BPF_JSLT | BPF_X:
1755     case BPF_JMP32 | BPF_JSLE | BPF_X:
1756         if (off == 0)
1757             break;
1758         setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1759         emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
1760         if (finish_jmp(ctx, jmp, off) < 0)
1761             goto toofar;
1762         break;
1763     /* PC += off if dst == imm */
1764     /* PC += off if dst != imm */
1765     /* PC += off if dst & imm */
1766     /* PC += off if dst > imm */
1767     /* PC += off if dst >= imm */
1768     /* PC += off if dst < imm */
1769     /* PC += off if dst <= imm */
1770     /* PC += off if dst > imm (signed) */
1771     /* PC += off if dst >= imm (signed) */
1772     /* PC += off if dst < imm (signed) */
1773     /* PC += off if dst <= imm (signed) */
1774     case BPF_JMP32 | BPF_JEQ | BPF_K:
1775     case BPF_JMP32 | BPF_JNE | BPF_K:
1776     case BPF_JMP32 | BPF_JSET | BPF_K:
1777     case BPF_JMP32 | BPF_JGT | BPF_K:
1778     case BPF_JMP32 | BPF_JGE | BPF_K:
1779     case BPF_JMP32 | BPF_JLT | BPF_K:
1780     case BPF_JMP32 | BPF_JLE | BPF_K:
1781     case BPF_JMP32 | BPF_JSGT | BPF_K:
1782     case BPF_JMP32 | BPF_JSGE | BPF_K:
1783     case BPF_JMP32 | BPF_JSLT | BPF_K:
1784     case BPF_JMP32 | BPF_JSLE | BPF_K:
1785         if (off == 0)
1786             break;
1787         setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
1788         if (valid_jmp_i(jmp, imm)) {
1789             emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
1790         } else {
1791             /* Move large immediate to register */
1792             emit_mov_i(ctx, MIPS_R_T6, imm);
1793             emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
1794         }
1795         if (finish_jmp(ctx, jmp, off) < 0)
1796             goto toofar;
1797         break;
1798     /* PC += off if dst == src */
1799     /* PC += off if dst != src */
1800     /* PC += off if dst & src */
1801     /* PC += off if dst > src */
1802     /* PC += off if dst >= src */
1803     /* PC += off if dst < src */
1804     /* PC += off if dst <= src */
1805     /* PC += off if dst > src (signed) */
1806     /* PC += off if dst >= src (signed) */
1807     /* PC += off if dst < src (signed) */
1808     /* PC += off if dst <= src (signed) */
1809     case BPF_JMP | BPF_JEQ | BPF_X:
1810     case BPF_JMP | BPF_JNE | BPF_X:
1811     case BPF_JMP | BPF_JSET | BPF_X:
1812     case BPF_JMP | BPF_JGT | BPF_X:
1813     case BPF_JMP | BPF_JGE | BPF_X:
1814     case BPF_JMP | BPF_JLT | BPF_X:
1815     case BPF_JMP | BPF_JLE | BPF_X:
1816     case BPF_JMP | BPF_JSGT | BPF_X:
1817     case BPF_JMP | BPF_JSGE | BPF_X:
1818     case BPF_JMP | BPF_JSLT | BPF_X:
1819     case BPF_JMP | BPF_JSLE | BPF_X:
1820         if (off == 0)
1821             break;
1822         setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1823         emit_jmp_r64(ctx, dst, src, rel, jmp);
1824         if (finish_jmp(ctx, jmp, off) < 0)
1825             goto toofar;
1826         break;
1827     /* PC += off if dst == imm */
1828     /* PC += off if dst != imm */
1829     /* PC += off if dst & imm */
1830     /* PC += off if dst > imm */
1831     /* PC += off if dst >= imm */
1832     /* PC += off if dst < imm */
1833     /* PC += off if dst <= imm */
1834     /* PC += off if dst > imm (signed) */
1835     /* PC += off if dst >= imm (signed) */
1836     /* PC += off if dst < imm (signed) */
1837     /* PC += off if dst <= imm (signed) */
1838     case BPF_JMP | BPF_JEQ | BPF_K:
1839     case BPF_JMP | BPF_JNE | BPF_K:
1840     case BPF_JMP | BPF_JSET | BPF_K:
1841     case BPF_JMP | BPF_JGT | BPF_K:
1842     case BPF_JMP | BPF_JGE | BPF_K:
1843     case BPF_JMP | BPF_JLT | BPF_K:
1844     case BPF_JMP | BPF_JLE | BPF_K:
1845     case BPF_JMP | BPF_JSGT | BPF_K:
1846     case BPF_JMP | BPF_JSGE | BPF_K:
1847     case BPF_JMP | BPF_JSLT | BPF_K:
1848     case BPF_JMP | BPF_JSLE | BPF_K:
1849         if (off == 0)
1850             break;
1851         setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
1852         emit_jmp_i64(ctx, dst, imm, rel, jmp);
1853         if (finish_jmp(ctx, jmp, off) < 0)
1854             goto toofar;
1855         break;
1856     /* PC += off */
1857     case BPF_JMP | BPF_JA:
1858         if (off == 0)
1859             break;
1860         if (emit_ja(ctx, off) < 0)
1861             goto toofar;
1862         break;
1863     /* Tail call */
1864     case BPF_JMP | BPF_TAIL_CALL:
1865         if (emit_tail_call(ctx) < 0)
1866             goto invalid;
1867         break;
1868     /* Function call */
1869     case BPF_JMP | BPF_CALL:
1870         if (emit_call(ctx, insn) < 0)
1871             goto invalid;
1872         break;
1873     /* Function return */
1874     case BPF_JMP | BPF_EXIT:
1875         /*
1876          * Optimization: when last instruction is EXIT
1877          * simply continue to epilogue.
1878          */
1879         if (ctx->bpf_index == ctx->program->len - 1)
1880             break;
1881         if (emit_exit(ctx) < 0)
1882             goto toofar;
1883         break;
1884 
1885     default:
1886 invalid:
1887         pr_err_once("unknown opcode %02x\n", code);
1888         return -EINVAL;
1889 notyet:
1890         pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1891         return -EFAULT;
1892 toofar:
1893         pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1894                  ctx->bpf_index, code);
1895         return -E2BIG;
1896     }
1897     return 0;
1898 }