Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/moduleloader.h>
0003 #include <linux/workqueue.h>
0004 #include <linux/netdevice.h>
0005 #include <linux/filter.h>
0006 #include <linux/bpf.h>
0007 #include <linux/cache.h>
0008 #include <linux/if_vlan.h>
0009 
0010 #include <asm/cacheflush.h>
0011 #include <asm/ptrace.h>
0012 
0013 #include "bpf_jit_64.h"
0014 
0015 static inline bool is_simm13(unsigned int value)
0016 {
0017     return value + 0x1000 < 0x2000;
0018 }
0019 
0020 static inline bool is_simm10(unsigned int value)
0021 {
0022     return value + 0x200 < 0x400;
0023 }
0024 
0025 static inline bool is_simm5(unsigned int value)
0026 {
0027     return value + 0x10 < 0x20;
0028 }
0029 
0030 static inline bool is_sethi(unsigned int value)
0031 {
0032     return (value & ~0x3fffff) == 0;
0033 }
0034 
0035 static void bpf_flush_icache(void *start_, void *end_)
0036 {
0037     /* Cheetah's I-cache is fully coherent.  */
0038     if (tlb_type == spitfire) {
0039         unsigned long start = (unsigned long) start_;
0040         unsigned long end = (unsigned long) end_;
0041 
0042         start &= ~7UL;
0043         end = (end + 7UL) & ~7UL;
0044         while (start < end) {
0045             flushi(start);
0046             start += 32;
0047         }
0048     }
0049 }
0050 
0051 #define S13(X)      ((X) & 0x1fff)
0052 #define S5(X)       ((X) & 0x1f)
0053 #define IMMED       0x00002000
0054 #define RD(X)       ((X) << 25)
0055 #define RS1(X)      ((X) << 14)
0056 #define RS2(X)      ((X))
0057 #define OP(X)       ((X) << 30)
0058 #define OP2(X)      ((X) << 22)
0059 #define OP3(X)      ((X) << 19)
0060 #define COND(X)     (((X) & 0xf) << 25)
0061 #define CBCOND(X)   (((X) & 0x1f) << 25)
0062 #define F1(X)       OP(X)
0063 #define F2(X, Y)    (OP(X) | OP2(Y))
0064 #define F3(X, Y)    (OP(X) | OP3(Y))
0065 #define ASI(X)      (((X) & 0xff) << 5)
0066 
0067 #define CONDN       COND(0x0)
0068 #define CONDE       COND(0x1)
0069 #define CONDLE      COND(0x2)
0070 #define CONDL       COND(0x3)
0071 #define CONDLEU     COND(0x4)
0072 #define CONDCS      COND(0x5)
0073 #define CONDNEG     COND(0x6)
0074 #define CONDVC      COND(0x7)
0075 #define CONDA       COND(0x8)
0076 #define CONDNE      COND(0x9)
0077 #define CONDG       COND(0xa)
0078 #define CONDGE      COND(0xb)
0079 #define CONDGU      COND(0xc)
0080 #define CONDCC      COND(0xd)
0081 #define CONDPOS     COND(0xe)
0082 #define CONDVS      COND(0xf)
0083 
0084 #define CONDGEU     CONDCC
0085 #define CONDLU      CONDCS
0086 
0087 #define WDISP22(X)  (((X) >> 2) & 0x3fffff)
0088 #define WDISP19(X)  (((X) >> 2) & 0x7ffff)
0089 
0090 /* The 10-bit branch displacement for CBCOND is split into two fields */
0091 static u32 WDISP10(u32 off)
0092 {
0093     u32 ret = ((off >> 2) & 0xff) << 5;
0094 
0095     ret |= ((off >> (2 + 8)) & 0x03) << 19;
0096 
0097     return ret;
0098 }
0099 
0100 #define CBCONDE     CBCOND(0x09)
0101 #define CBCONDLE    CBCOND(0x0a)
0102 #define CBCONDL     CBCOND(0x0b)
0103 #define CBCONDLEU   CBCOND(0x0c)
0104 #define CBCONDCS    CBCOND(0x0d)
0105 #define CBCONDN     CBCOND(0x0e)
0106 #define CBCONDVS    CBCOND(0x0f)
0107 #define CBCONDNE    CBCOND(0x19)
0108 #define CBCONDG     CBCOND(0x1a)
0109 #define CBCONDGE    CBCOND(0x1b)
0110 #define CBCONDGU    CBCOND(0x1c)
0111 #define CBCONDCC    CBCOND(0x1d)
0112 #define CBCONDPOS   CBCOND(0x1e)
0113 #define CBCONDVC    CBCOND(0x1f)
0114 
0115 #define CBCONDGEU   CBCONDCC
0116 #define CBCONDLU    CBCONDCS
0117 
0118 #define ANNUL       (1 << 29)
0119 #define XCC     (1 << 21)
0120 
0121 #define BRANCH      (F2(0, 1) | XCC)
0122 #define CBCOND_OP   (F2(0, 3) | XCC)
0123 
0124 #define BA      (BRANCH | CONDA)
0125 #define BG      (BRANCH | CONDG)
0126 #define BL      (BRANCH | CONDL)
0127 #define BLE     (BRANCH | CONDLE)
0128 #define BGU     (BRANCH | CONDGU)
0129 #define BLEU        (BRANCH | CONDLEU)
0130 #define BGE     (BRANCH | CONDGE)
0131 #define BGEU        (BRANCH | CONDGEU)
0132 #define BLU     (BRANCH | CONDLU)
0133 #define BE      (BRANCH | CONDE)
0134 #define BNE     (BRANCH | CONDNE)
0135 
0136 #define SETHI(K, REG)   \
0137     (F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
0138 #define OR_LO(K, REG)   \
0139     (F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
0140 
0141 #define ADD     F3(2, 0x00)
0142 #define AND     F3(2, 0x01)
0143 #define ANDCC       F3(2, 0x11)
0144 #define OR      F3(2, 0x02)
0145 #define XOR     F3(2, 0x03)
0146 #define SUB     F3(2, 0x04)
0147 #define SUBCC       F3(2, 0x14)
0148 #define MUL     F3(2, 0x0a)
0149 #define MULX        F3(2, 0x09)
0150 #define UDIVX       F3(2, 0x0d)
0151 #define DIV     F3(2, 0x0e)
0152 #define SLL     F3(2, 0x25)
0153 #define SLLX        (F3(2, 0x25)|(1<<12))
0154 #define SRA     F3(2, 0x27)
0155 #define SRAX        (F3(2, 0x27)|(1<<12))
0156 #define SRL     F3(2, 0x26)
0157 #define SRLX        (F3(2, 0x26)|(1<<12))
0158 #define JMPL        F3(2, 0x38)
0159 #define SAVE        F3(2, 0x3c)
0160 #define RESTORE     F3(2, 0x3d)
0161 #define CALL        F1(1)
0162 #define BR      F2(0, 0x01)
0163 #define RD_Y        F3(2, 0x28)
0164 #define WR_Y        F3(2, 0x30)
0165 
0166 #define LD32        F3(3, 0x00)
0167 #define LD8     F3(3, 0x01)
0168 #define LD16        F3(3, 0x02)
0169 #define LD64        F3(3, 0x0b)
0170 #define LD64A       F3(3, 0x1b)
0171 #define ST8     F3(3, 0x05)
0172 #define ST16        F3(3, 0x06)
0173 #define ST32        F3(3, 0x04)
0174 #define ST64        F3(3, 0x0e)
0175 
0176 #define CAS     F3(3, 0x3c)
0177 #define CASX        F3(3, 0x3e)
0178 
0179 #define LDPTR       LD64
0180 #define BASE_STACKFRAME 176
0181 
0182 #define LD32I       (LD32 | IMMED)
0183 #define LD8I        (LD8 | IMMED)
0184 #define LD16I       (LD16 | IMMED)
0185 #define LD64I       (LD64 | IMMED)
0186 #define LDPTRI      (LDPTR | IMMED)
0187 #define ST32I       (ST32 | IMMED)
0188 
0189 struct jit_ctx {
0190     struct bpf_prog     *prog;
0191     unsigned int        *offset;
0192     int         idx;
0193     int         epilogue_offset;
0194     bool            tmp_1_used;
0195     bool            tmp_2_used;
0196     bool            tmp_3_used;
0197     bool            saw_frame_pointer;
0198     bool            saw_call;
0199     bool            saw_tail_call;
0200     u32         *image;
0201 };
0202 
0203 #define TMP_REG_1   (MAX_BPF_JIT_REG + 0)
0204 #define TMP_REG_2   (MAX_BPF_JIT_REG + 1)
0205 #define TMP_REG_3   (MAX_BPF_JIT_REG + 2)
0206 
0207 /* Map BPF registers to SPARC registers */
0208 static const int bpf2sparc[] = {
0209     /* return value from in-kernel function, and exit value from eBPF */
0210     [BPF_REG_0] = O5,
0211 
0212     /* arguments from eBPF program to in-kernel function */
0213     [BPF_REG_1] = O0,
0214     [BPF_REG_2] = O1,
0215     [BPF_REG_3] = O2,
0216     [BPF_REG_4] = O3,
0217     [BPF_REG_5] = O4,
0218 
0219     /* callee saved registers that in-kernel function will preserve */
0220     [BPF_REG_6] = L0,
0221     [BPF_REG_7] = L1,
0222     [BPF_REG_8] = L2,
0223     [BPF_REG_9] = L3,
0224 
0225     /* read-only frame pointer to access stack */
0226     [BPF_REG_FP] = L6,
0227 
0228     [BPF_REG_AX] = G7,
0229 
0230     /* temporary register for BPF JIT */
0231     [TMP_REG_1] = G1,
0232     [TMP_REG_2] = G2,
0233     [TMP_REG_3] = G3,
0234 };
0235 
0236 static void emit(const u32 insn, struct jit_ctx *ctx)
0237 {
0238     if (ctx->image != NULL)
0239         ctx->image[ctx->idx] = insn;
0240 
0241     ctx->idx++;
0242 }
0243 
0244 static void emit_call(u32 *func, struct jit_ctx *ctx)
0245 {
0246     if (ctx->image != NULL) {
0247         void *here = &ctx->image[ctx->idx];
0248         unsigned int off;
0249 
0250         off = (void *)func - here;
0251         ctx->image[ctx->idx] = CALL | ((off >> 2) & 0x3fffffff);
0252     }
0253     ctx->idx++;
0254 }
0255 
0256 static void emit_nop(struct jit_ctx *ctx)
0257 {
0258     emit(SETHI(0, G0), ctx);
0259 }
0260 
0261 static void emit_reg_move(u32 from, u32 to, struct jit_ctx *ctx)
0262 {
0263     emit(OR | RS1(G0) | RS2(from) | RD(to), ctx);
0264 }
0265 
0266 /* Emit 32-bit constant, zero extended. */
0267 static void emit_set_const(s32 K, u32 reg, struct jit_ctx *ctx)
0268 {
0269     emit(SETHI(K, reg), ctx);
0270     emit(OR_LO(K, reg), ctx);
0271 }
0272 
0273 /* Emit 32-bit constant, sign extended. */
0274 static void emit_set_const_sext(s32 K, u32 reg, struct jit_ctx *ctx)
0275 {
0276     if (K >= 0) {
0277         emit(SETHI(K, reg), ctx);
0278         emit(OR_LO(K, reg), ctx);
0279     } else {
0280         u32 hbits = ~(u32) K;
0281         u32 lbits = -0x400 | (u32) K;
0282 
0283         emit(SETHI(hbits, reg), ctx);
0284         emit(XOR | IMMED | RS1(reg) | S13(lbits) | RD(reg), ctx);
0285     }
0286 }
0287 
0288 static void emit_alu(u32 opcode, u32 src, u32 dst, struct jit_ctx *ctx)
0289 {
0290     emit(opcode | RS1(dst) | RS2(src) | RD(dst), ctx);
0291 }
0292 
0293 static void emit_alu3(u32 opcode, u32 a, u32 b, u32 c, struct jit_ctx *ctx)
0294 {
0295     emit(opcode | RS1(a) | RS2(b) | RD(c), ctx);
0296 }
0297 
0298 static void emit_alu_K(unsigned int opcode, unsigned int dst, unsigned int imm,
0299                struct jit_ctx *ctx)
0300 {
0301     bool small_immed = is_simm13(imm);
0302     unsigned int insn = opcode;
0303 
0304     insn |= RS1(dst) | RD(dst);
0305     if (small_immed) {
0306         emit(insn | IMMED | S13(imm), ctx);
0307     } else {
0308         unsigned int tmp = bpf2sparc[TMP_REG_1];
0309 
0310         ctx->tmp_1_used = true;
0311 
0312         emit_set_const_sext(imm, tmp, ctx);
0313         emit(insn | RS2(tmp), ctx);
0314     }
0315 }
0316 
0317 static void emit_alu3_K(unsigned int opcode, unsigned int src, unsigned int imm,
0318             unsigned int dst, struct jit_ctx *ctx)
0319 {
0320     bool small_immed = is_simm13(imm);
0321     unsigned int insn = opcode;
0322 
0323     insn |= RS1(src) | RD(dst);
0324     if (small_immed) {
0325         emit(insn | IMMED | S13(imm), ctx);
0326     } else {
0327         unsigned int tmp = bpf2sparc[TMP_REG_1];
0328 
0329         ctx->tmp_1_used = true;
0330 
0331         emit_set_const_sext(imm, tmp, ctx);
0332         emit(insn | RS2(tmp), ctx);
0333     }
0334 }
0335 
0336 static void emit_loadimm32(s32 K, unsigned int dest, struct jit_ctx *ctx)
0337 {
0338     if (K >= 0 && is_simm13(K)) {
0339         /* or %g0, K, DEST */
0340         emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
0341     } else {
0342         emit_set_const(K, dest, ctx);
0343     }
0344 }
0345 
0346 static void emit_loadimm(s32 K, unsigned int dest, struct jit_ctx *ctx)
0347 {
0348     if (is_simm13(K)) {
0349         /* or %g0, K, DEST */
0350         emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
0351     } else {
0352         emit_set_const(K, dest, ctx);
0353     }
0354 }
0355 
0356 static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
0357 {
0358     if (is_simm13(K)) {
0359         /* or %g0, K, DEST */
0360         emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
0361     } else {
0362         emit_set_const_sext(K, dest, ctx);
0363     }
0364 }
0365 
0366 static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
0367                    int *hbsp, int *lbsp, int *abbasp)
0368 {
0369     int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
0370     int i;
0371 
0372     lowest_bit_set = highest_bit_set = -1;
0373     i = 0;
0374     do {
0375         if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
0376             lowest_bit_set = i;
0377         if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
0378             highest_bit_set = (64 - i - 1);
0379     }  while (++i < 32 && (highest_bit_set == -1 ||
0380                    lowest_bit_set == -1));
0381     if (i == 32) {
0382         i = 0;
0383         do {
0384             if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
0385                 lowest_bit_set = i + 32;
0386             if (highest_bit_set == -1 &&
0387                 ((low_bits >> (32 - i - 1)) & 1))
0388                 highest_bit_set = 32 - i - 1;
0389         } while (++i < 32 && (highest_bit_set == -1 ||
0390                       lowest_bit_set == -1));
0391     }
0392 
0393     all_bits_between_are_set = 1;
0394     for (i = lowest_bit_set; i <= highest_bit_set; i++) {
0395         if (i < 32) {
0396             if ((low_bits & (1 << i)) != 0)
0397                 continue;
0398         } else {
0399             if ((high_bits & (1 << (i - 32))) != 0)
0400                 continue;
0401         }
0402         all_bits_between_are_set = 0;
0403         break;
0404     }
0405     *hbsp = highest_bit_set;
0406     *lbsp = lowest_bit_set;
0407     *abbasp = all_bits_between_are_set;
0408 }
0409 
0410 static unsigned long create_simple_focus_bits(unsigned long high_bits,
0411                           unsigned long low_bits,
0412                           int lowest_bit_set, int shift)
0413 {
0414     long hi, lo;
0415 
0416     if (lowest_bit_set < 32) {
0417         lo = (low_bits >> lowest_bit_set) << shift;
0418         hi = ((high_bits << (32 - lowest_bit_set)) << shift);
0419     } else {
0420         lo = 0;
0421         hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
0422     }
0423     return hi | lo;
0424 }
0425 
0426 static bool const64_is_2insns(unsigned long high_bits,
0427                   unsigned long low_bits)
0428 {
0429     int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
0430 
0431     if (high_bits == 0 || high_bits == 0xffffffff)
0432         return true;
0433 
0434     analyze_64bit_constant(high_bits, low_bits,
0435                    &highest_bit_set, &lowest_bit_set,
0436                    &all_bits_between_are_set);
0437 
0438     if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
0439         all_bits_between_are_set != 0)
0440         return true;
0441 
0442     if (highest_bit_set - lowest_bit_set < 21)
0443         return true;
0444 
0445     return false;
0446 }
0447 
0448 static void sparc_emit_set_const64_quick2(unsigned long high_bits,
0449                       unsigned long low_imm,
0450                       unsigned int dest,
0451                       int shift_count, struct jit_ctx *ctx)
0452 {
0453     emit_loadimm32(high_bits, dest, ctx);
0454 
0455     /* Now shift it up into place.  */
0456     emit_alu_K(SLLX, dest, shift_count, ctx);
0457 
0458     /* If there is a low immediate part piece, finish up by
0459      * putting that in as well.
0460      */
0461     if (low_imm != 0)
0462         emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
0463 }
0464 
0465 static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
0466 {
0467     int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
0468     unsigned int tmp = bpf2sparc[TMP_REG_1];
0469     u32 low_bits = (K & 0xffffffff);
0470     u32 high_bits = (K >> 32);
0471 
0472     /* These two tests also take care of all of the one
0473      * instruction cases.
0474      */
0475     if (high_bits == 0xffffffff && (low_bits & 0x80000000))
0476         return emit_loadimm_sext(K, dest, ctx);
0477     if (high_bits == 0x00000000)
0478         return emit_loadimm32(K, dest, ctx);
0479 
0480     analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
0481                    &lowest_bit_set, &all_bits_between_are_set);
0482 
0483     /* 1) mov   -1, %reg
0484      *    sllx  %reg, shift, %reg
0485      * 2) mov   -1, %reg
0486      *    srlx  %reg, shift, %reg
0487      * 3) mov   some_small_const, %reg
0488      *    sllx  %reg, shift, %reg
0489      */
0490     if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
0491          all_bits_between_are_set != 0) ||
0492         ((highest_bit_set - lowest_bit_set) < 12)) {
0493         int shift = lowest_bit_set;
0494         long the_const = -1;
0495 
0496         if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
0497             all_bits_between_are_set == 0) {
0498             the_const =
0499                 create_simple_focus_bits(high_bits, low_bits,
0500                              lowest_bit_set, 0);
0501         } else if (lowest_bit_set == 0)
0502             shift = -(63 - highest_bit_set);
0503 
0504         emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
0505         if (shift > 0)
0506             emit_alu_K(SLLX, dest, shift, ctx);
0507         else if (shift < 0)
0508             emit_alu_K(SRLX, dest, -shift, ctx);
0509 
0510         return;
0511     }
0512 
0513     /* Now a range of 22 or less bits set somewhere.
0514      * 1) sethi %hi(focus_bits), %reg
0515      *    sllx  %reg, shift, %reg
0516      * 2) sethi %hi(focus_bits), %reg
0517      *    srlx  %reg, shift, %reg
0518      */
0519     if ((highest_bit_set - lowest_bit_set) < 21) {
0520         unsigned long focus_bits =
0521             create_simple_focus_bits(high_bits, low_bits,
0522                          lowest_bit_set, 10);
0523 
0524         emit(SETHI(focus_bits, dest), ctx);
0525 
0526         /* If lowest_bit_set == 10 then a sethi alone could
0527          * have done it.
0528          */
0529         if (lowest_bit_set < 10)
0530             emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
0531         else if (lowest_bit_set > 10)
0532             emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
0533         return;
0534     }
0535 
0536     /* Ok, now 3 instruction sequences.  */
0537     if (low_bits == 0) {
0538         emit_loadimm32(high_bits, dest, ctx);
0539         emit_alu_K(SLLX, dest, 32, ctx);
0540         return;
0541     }
0542 
0543     /* We may be able to do something quick
0544      * when the constant is negated, so try that.
0545      */
0546     if (const64_is_2insns((~high_bits) & 0xffffffff,
0547                   (~low_bits) & 0xfffffc00)) {
0548         /* NOTE: The trailing bits get XOR'd so we need the
0549          * non-negated bits, not the negated ones.
0550          */
0551         unsigned long trailing_bits = low_bits & 0x3ff;
0552 
0553         if ((((~high_bits) & 0xffffffff) == 0 &&
0554              ((~low_bits) & 0x80000000) == 0) ||
0555             (((~high_bits) & 0xffffffff) == 0xffffffff &&
0556              ((~low_bits) & 0x80000000) != 0)) {
0557             unsigned long fast_int = (~low_bits & 0xffffffff);
0558 
0559             if ((is_sethi(fast_int) &&
0560                  (~high_bits & 0xffffffff) == 0)) {
0561                 emit(SETHI(fast_int, dest), ctx);
0562             } else if (is_simm13(fast_int)) {
0563                 emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
0564             } else {
0565                 emit_loadimm64(fast_int, dest, ctx);
0566             }
0567         } else {
0568             u64 n = ((~low_bits) & 0xfffffc00) |
0569                 (((unsigned long)((~high_bits) & 0xffffffff))<<32);
0570             emit_loadimm64(n, dest, ctx);
0571         }
0572 
0573         low_bits = -0x400 | trailing_bits;
0574 
0575         emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
0576         return;
0577     }
0578 
0579     /* 1) sethi %hi(xxx), %reg
0580      *    or    %reg, %lo(xxx), %reg
0581      *    sllx  %reg, yyy, %reg
0582      */
0583     if ((highest_bit_set - lowest_bit_set) < 32) {
0584         unsigned long focus_bits =
0585             create_simple_focus_bits(high_bits, low_bits,
0586                          lowest_bit_set, 0);
0587 
0588         /* So what we know is that the set bits straddle the
0589          * middle of the 64-bit word.
0590          */
0591         sparc_emit_set_const64_quick2(focus_bits, 0, dest,
0592                           lowest_bit_set, ctx);
0593         return;
0594     }
0595 
0596     /* 1) sethi %hi(high_bits), %reg
0597      *    or    %reg, %lo(high_bits), %reg
0598      *    sllx  %reg, 32, %reg
0599      *    or    %reg, low_bits, %reg
0600      */
0601     if (is_simm13(low_bits) && ((int)low_bits > 0)) {
0602         sparc_emit_set_const64_quick2(high_bits, low_bits,
0603                           dest, 32, ctx);
0604         return;
0605     }
0606 
0607     /* Oh well, we tried... Do a full 64-bit decomposition.  */
0608     ctx->tmp_1_used = true;
0609 
0610     emit_loadimm32(high_bits, tmp, ctx);
0611     emit_loadimm32(low_bits, dest, ctx);
0612     emit_alu_K(SLLX, tmp, 32, ctx);
0613     emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
0614 }
0615 
0616 static void emit_branch(unsigned int br_opc, unsigned int from_idx, unsigned int to_idx,
0617             struct jit_ctx *ctx)
0618 {
0619     unsigned int off = to_idx - from_idx;
0620 
0621     if (br_opc & XCC)
0622         emit(br_opc | WDISP19(off << 2), ctx);
0623     else
0624         emit(br_opc | WDISP22(off << 2), ctx);
0625 }
0626 
0627 static void emit_cbcond(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
0628             const u8 dst, const u8 src, struct jit_ctx *ctx)
0629 {
0630     unsigned int off = to_idx - from_idx;
0631 
0632     emit(cb_opc | WDISP10(off << 2) | RS1(dst) | RS2(src), ctx);
0633 }
0634 
0635 static void emit_cbcondi(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
0636              const u8 dst, s32 imm, struct jit_ctx *ctx)
0637 {
0638     unsigned int off = to_idx - from_idx;
0639 
0640     emit(cb_opc | IMMED | WDISP10(off << 2) | RS1(dst) | S5(imm), ctx);
0641 }
0642 
0643 #define emit_read_y(REG, CTX)   emit(RD_Y | RD(REG), CTX)
0644 #define emit_write_y(REG, CTX)  emit(WR_Y | IMMED | RS1(REG) | S13(0), CTX)
0645 
0646 #define emit_cmp(R1, R2, CTX)               \
0647     emit(SUBCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
0648 
0649 #define emit_cmpi(R1, IMM, CTX)             \
0650     emit(SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
0651 
0652 #define emit_btst(R1, R2, CTX)              \
0653     emit(ANDCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
0654 
0655 #define emit_btsti(R1, IMM, CTX)            \
0656     emit(ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
0657 
0658 static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
0659                    const s32 imm, bool is_imm, int branch_dst,
0660                    struct jit_ctx *ctx)
0661 {
0662     bool use_cbcond = (sparc64_elf_hwcap & AV_SPARC_CBCOND) != 0;
0663     const u8 tmp = bpf2sparc[TMP_REG_1];
0664 
0665     branch_dst = ctx->offset[branch_dst];
0666 
0667     if (!is_simm10(branch_dst - ctx->idx) ||
0668         BPF_OP(code) == BPF_JSET)
0669         use_cbcond = false;
0670 
0671     if (is_imm) {
0672         bool fits = true;
0673 
0674         if (use_cbcond) {
0675             if (!is_simm5(imm))
0676                 fits = false;
0677         } else if (!is_simm13(imm)) {
0678             fits = false;
0679         }
0680         if (!fits) {
0681             ctx->tmp_1_used = true;
0682             emit_loadimm_sext(imm, tmp, ctx);
0683             src = tmp;
0684             is_imm = false;
0685         }
0686     }
0687 
0688     if (!use_cbcond) {
0689         u32 br_opcode;
0690 
0691         if (BPF_OP(code) == BPF_JSET) {
0692             if (is_imm)
0693                 emit_btsti(dst, imm, ctx);
0694             else
0695                 emit_btst(dst, src, ctx);
0696         } else {
0697             if (is_imm)
0698                 emit_cmpi(dst, imm, ctx);
0699             else
0700                 emit_cmp(dst, src, ctx);
0701         }
0702         switch (BPF_OP(code)) {
0703         case BPF_JEQ:
0704             br_opcode = BE;
0705             break;
0706         case BPF_JGT:
0707             br_opcode = BGU;
0708             break;
0709         case BPF_JLT:
0710             br_opcode = BLU;
0711             break;
0712         case BPF_JGE:
0713             br_opcode = BGEU;
0714             break;
0715         case BPF_JLE:
0716             br_opcode = BLEU;
0717             break;
0718         case BPF_JSET:
0719         case BPF_JNE:
0720             br_opcode = BNE;
0721             break;
0722         case BPF_JSGT:
0723             br_opcode = BG;
0724             break;
0725         case BPF_JSLT:
0726             br_opcode = BL;
0727             break;
0728         case BPF_JSGE:
0729             br_opcode = BGE;
0730             break;
0731         case BPF_JSLE:
0732             br_opcode = BLE;
0733             break;
0734         default:
0735             /* Make sure we dont leak kernel information to the
0736              * user.
0737              */
0738             return -EFAULT;
0739         }
0740         emit_branch(br_opcode, ctx->idx, branch_dst, ctx);
0741         emit_nop(ctx);
0742     } else {
0743         u32 cbcond_opcode;
0744 
0745         switch (BPF_OP(code)) {
0746         case BPF_JEQ:
0747             cbcond_opcode = CBCONDE;
0748             break;
0749         case BPF_JGT:
0750             cbcond_opcode = CBCONDGU;
0751             break;
0752         case BPF_JLT:
0753             cbcond_opcode = CBCONDLU;
0754             break;
0755         case BPF_JGE:
0756             cbcond_opcode = CBCONDGEU;
0757             break;
0758         case BPF_JLE:
0759             cbcond_opcode = CBCONDLEU;
0760             break;
0761         case BPF_JNE:
0762             cbcond_opcode = CBCONDNE;
0763             break;
0764         case BPF_JSGT:
0765             cbcond_opcode = CBCONDG;
0766             break;
0767         case BPF_JSLT:
0768             cbcond_opcode = CBCONDL;
0769             break;
0770         case BPF_JSGE:
0771             cbcond_opcode = CBCONDGE;
0772             break;
0773         case BPF_JSLE:
0774             cbcond_opcode = CBCONDLE;
0775             break;
0776         default:
0777             /* Make sure we dont leak kernel information to the
0778              * user.
0779              */
0780             return -EFAULT;
0781         }
0782         cbcond_opcode |= CBCOND_OP;
0783         if (is_imm)
0784             emit_cbcondi(cbcond_opcode, ctx->idx, branch_dst,
0785                      dst, imm, ctx);
0786         else
0787             emit_cbcond(cbcond_opcode, ctx->idx, branch_dst,
0788                     dst, src, ctx);
0789     }
0790     return 0;
0791 }
0792 
0793 /* Just skip the save instruction and the ctx register move.  */
0794 #define BPF_TAILCALL_PROLOGUE_SKIP  32
0795 #define BPF_TAILCALL_CNT_SP_OFF     (STACK_BIAS + 128)
0796 
0797 static void build_prologue(struct jit_ctx *ctx)
0798 {
0799     s32 stack_needed = BASE_STACKFRAME;
0800 
0801     if (ctx->saw_frame_pointer || ctx->saw_tail_call) {
0802         struct bpf_prog *prog = ctx->prog;
0803         u32 stack_depth;
0804 
0805         stack_depth = prog->aux->stack_depth;
0806         stack_needed += round_up(stack_depth, 16);
0807     }
0808 
0809     if (ctx->saw_tail_call)
0810         stack_needed += 8;
0811 
0812     /* save %sp, -176, %sp */
0813     emit(SAVE | IMMED | RS1(SP) | S13(-stack_needed) | RD(SP), ctx);
0814 
0815     /* tail_call_cnt = 0 */
0816     if (ctx->saw_tail_call) {
0817         u32 off = BPF_TAILCALL_CNT_SP_OFF;
0818 
0819         emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(G0), ctx);
0820     } else {
0821         emit_nop(ctx);
0822     }
0823     if (ctx->saw_frame_pointer) {
0824         const u8 vfp = bpf2sparc[BPF_REG_FP];
0825 
0826         emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
0827     } else {
0828         emit_nop(ctx);
0829     }
0830 
0831     emit_reg_move(I0, O0, ctx);
0832     emit_reg_move(I1, O1, ctx);
0833     emit_reg_move(I2, O2, ctx);
0834     emit_reg_move(I3, O3, ctx);
0835     emit_reg_move(I4, O4, ctx);
0836     /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
0837 }
0838 
0839 static void build_epilogue(struct jit_ctx *ctx)
0840 {
0841     ctx->epilogue_offset = ctx->idx;
0842 
0843     /* ret (jmpl %i7 + 8, %g0) */
0844     emit(JMPL | IMMED | RS1(I7) | S13(8) | RD(G0), ctx);
0845 
0846     /* restore %i5, %g0, %o0 */
0847     emit(RESTORE | RS1(bpf2sparc[BPF_REG_0]) | RS2(G0) | RD(O0), ctx);
0848 }
0849 
0850 static void emit_tail_call(struct jit_ctx *ctx)
0851 {
0852     const u8 bpf_array = bpf2sparc[BPF_REG_2];
0853     const u8 bpf_index = bpf2sparc[BPF_REG_3];
0854     const u8 tmp = bpf2sparc[TMP_REG_1];
0855     u32 off;
0856 
0857     ctx->saw_tail_call = true;
0858 
0859     off = offsetof(struct bpf_array, map.max_entries);
0860     emit(LD32 | IMMED | RS1(bpf_array) | S13(off) | RD(tmp), ctx);
0861     emit_cmp(bpf_index, tmp, ctx);
0862 #define OFFSET1 17
0863     emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET1, ctx);
0864     emit_nop(ctx);
0865 
0866     off = BPF_TAILCALL_CNT_SP_OFF;
0867     emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
0868     emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx);
0869 #define OFFSET2 13
0870     emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET2, ctx);
0871     emit_nop(ctx);
0872 
0873     emit_alu_K(ADD, tmp, 1, ctx);
0874     off = BPF_TAILCALL_CNT_SP_OFF;
0875     emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
0876 
0877     emit_alu3_K(SLL, bpf_index, 3, tmp, ctx);
0878     emit_alu(ADD, bpf_array, tmp, ctx);
0879     off = offsetof(struct bpf_array, ptrs);
0880     emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
0881 
0882     emit_cmpi(tmp, 0, ctx);
0883 #define OFFSET3 5
0884     emit_branch(BE, ctx->idx, ctx->idx + OFFSET3, ctx);
0885     emit_nop(ctx);
0886 
0887     off = offsetof(struct bpf_prog, bpf_func);
0888     emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
0889 
0890     off = BPF_TAILCALL_PROLOGUE_SKIP;
0891     emit(JMPL | IMMED | RS1(tmp) | S13(off) | RD(G0), ctx);
0892     emit_nop(ctx);
0893 }
0894 
0895 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
0896 {
0897     const u8 code = insn->code;
0898     const u8 dst = bpf2sparc[insn->dst_reg];
0899     const u8 src = bpf2sparc[insn->src_reg];
0900     const int i = insn - ctx->prog->insnsi;
0901     const s16 off = insn->off;
0902     const s32 imm = insn->imm;
0903 
0904     if (insn->src_reg == BPF_REG_FP)
0905         ctx->saw_frame_pointer = true;
0906 
0907     switch (code) {
0908     /* dst = src */
0909     case BPF_ALU | BPF_MOV | BPF_X:
0910         emit_alu3_K(SRL, src, 0, dst, ctx);
0911         if (insn_is_zext(&insn[1]))
0912             return 1;
0913         break;
0914     case BPF_ALU64 | BPF_MOV | BPF_X:
0915         emit_reg_move(src, dst, ctx);
0916         break;
0917     /* dst = dst OP src */
0918     case BPF_ALU | BPF_ADD | BPF_X:
0919     case BPF_ALU64 | BPF_ADD | BPF_X:
0920         emit_alu(ADD, src, dst, ctx);
0921         goto do_alu32_trunc;
0922     case BPF_ALU | BPF_SUB | BPF_X:
0923     case BPF_ALU64 | BPF_SUB | BPF_X:
0924         emit_alu(SUB, src, dst, ctx);
0925         goto do_alu32_trunc;
0926     case BPF_ALU | BPF_AND | BPF_X:
0927     case BPF_ALU64 | BPF_AND | BPF_X:
0928         emit_alu(AND, src, dst, ctx);
0929         goto do_alu32_trunc;
0930     case BPF_ALU | BPF_OR | BPF_X:
0931     case BPF_ALU64 | BPF_OR | BPF_X:
0932         emit_alu(OR, src, dst, ctx);
0933         goto do_alu32_trunc;
0934     case BPF_ALU | BPF_XOR | BPF_X:
0935     case BPF_ALU64 | BPF_XOR | BPF_X:
0936         emit_alu(XOR, src, dst, ctx);
0937         goto do_alu32_trunc;
0938     case BPF_ALU | BPF_MUL | BPF_X:
0939         emit_alu(MUL, src, dst, ctx);
0940         goto do_alu32_trunc;
0941     case BPF_ALU64 | BPF_MUL | BPF_X:
0942         emit_alu(MULX, src, dst, ctx);
0943         break;
0944     case BPF_ALU | BPF_DIV | BPF_X:
0945         emit_write_y(G0, ctx);
0946         emit_alu(DIV, src, dst, ctx);
0947         if (insn_is_zext(&insn[1]))
0948             return 1;
0949         break;
0950     case BPF_ALU64 | BPF_DIV | BPF_X:
0951         emit_alu(UDIVX, src, dst, ctx);
0952         break;
0953     case BPF_ALU | BPF_MOD | BPF_X: {
0954         const u8 tmp = bpf2sparc[TMP_REG_1];
0955 
0956         ctx->tmp_1_used = true;
0957 
0958         emit_write_y(G0, ctx);
0959         emit_alu3(DIV, dst, src, tmp, ctx);
0960         emit_alu3(MULX, tmp, src, tmp, ctx);
0961         emit_alu3(SUB, dst, tmp, dst, ctx);
0962         goto do_alu32_trunc;
0963     }
0964     case BPF_ALU64 | BPF_MOD | BPF_X: {
0965         const u8 tmp = bpf2sparc[TMP_REG_1];
0966 
0967         ctx->tmp_1_used = true;
0968 
0969         emit_alu3(UDIVX, dst, src, tmp, ctx);
0970         emit_alu3(MULX, tmp, src, tmp, ctx);
0971         emit_alu3(SUB, dst, tmp, dst, ctx);
0972         break;
0973     }
0974     case BPF_ALU | BPF_LSH | BPF_X:
0975         emit_alu(SLL, src, dst, ctx);
0976         goto do_alu32_trunc;
0977     case BPF_ALU64 | BPF_LSH | BPF_X:
0978         emit_alu(SLLX, src, dst, ctx);
0979         break;
0980     case BPF_ALU | BPF_RSH | BPF_X:
0981         emit_alu(SRL, src, dst, ctx);
0982         if (insn_is_zext(&insn[1]))
0983             return 1;
0984         break;
0985     case BPF_ALU64 | BPF_RSH | BPF_X:
0986         emit_alu(SRLX, src, dst, ctx);
0987         break;
0988     case BPF_ALU | BPF_ARSH | BPF_X:
0989         emit_alu(SRA, src, dst, ctx);
0990         goto do_alu32_trunc;
0991     case BPF_ALU64 | BPF_ARSH | BPF_X:
0992         emit_alu(SRAX, src, dst, ctx);
0993         break;
0994 
0995     /* dst = -dst */
0996     case BPF_ALU | BPF_NEG:
0997     case BPF_ALU64 | BPF_NEG:
0998         emit(SUB | RS1(0) | RS2(dst) | RD(dst), ctx);
0999         goto do_alu32_trunc;
1000 
1001     case BPF_ALU | BPF_END | BPF_FROM_BE:
1002         switch (imm) {
1003         case 16:
1004             emit_alu_K(SLL, dst, 16, ctx);
1005             emit_alu_K(SRL, dst, 16, ctx);
1006             if (insn_is_zext(&insn[1]))
1007                 return 1;
1008             break;
1009         case 32:
1010             if (!ctx->prog->aux->verifier_zext)
1011                 emit_alu_K(SRL, dst, 0, ctx);
1012             break;
1013         case 64:
1014             /* nop */
1015             break;
1016 
1017         }
1018         break;
1019 
1020     /* dst = BSWAP##imm(dst) */
1021     case BPF_ALU | BPF_END | BPF_FROM_LE: {
1022         const u8 tmp = bpf2sparc[TMP_REG_1];
1023         const u8 tmp2 = bpf2sparc[TMP_REG_2];
1024 
1025         ctx->tmp_1_used = true;
1026         switch (imm) {
1027         case 16:
1028             emit_alu3_K(AND, dst, 0xff, tmp, ctx);
1029             emit_alu3_K(SRL, dst, 8, dst, ctx);
1030             emit_alu3_K(AND, dst, 0xff, dst, ctx);
1031             emit_alu3_K(SLL, tmp, 8, tmp, ctx);
1032             emit_alu(OR, tmp, dst, ctx);
1033             if (insn_is_zext(&insn[1]))
1034                 return 1;
1035             break;
1036 
1037         case 32:
1038             ctx->tmp_2_used = true;
1039             emit_alu3_K(SRL, dst, 24, tmp, ctx);    /* tmp  = dst >> 24 */
1040             emit_alu3_K(SRL, dst, 16, tmp2, ctx);   /* tmp2 = dst >> 16 */
1041             emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
1042             emit_alu3_K(SLL, tmp2, 8, tmp2, ctx);   /* tmp2 = tmp2 << 8 */
1043             emit_alu(OR, tmp2, tmp, ctx);       /* tmp  = tmp | tmp2 */
1044             emit_alu3_K(SRL, dst, 8, tmp2, ctx);    /* tmp2 = dst >> 8 */
1045             emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
1046             emit_alu3_K(SLL, tmp2, 16, tmp2, ctx);  /* tmp2 = tmp2 << 16 */
1047             emit_alu(OR, tmp2, tmp, ctx);       /* tmp  = tmp | tmp2 */
1048             emit_alu3_K(AND, dst, 0xff, dst, ctx);  /* dst  = dst & 0xff */
1049             emit_alu3_K(SLL, dst, 24, dst, ctx);    /* dst  = dst << 24 */
1050             emit_alu(OR, tmp, dst, ctx);        /* dst  = dst | tmp */
1051             if (insn_is_zext(&insn[1]))
1052                 return 1;
1053             break;
1054 
1055         case 64:
1056             emit_alu3_K(ADD, SP, STACK_BIAS + 128, tmp, ctx);
1057             emit(ST64 | RS1(tmp) | RS2(G0) | RD(dst), ctx);
1058             emit(LD64A | ASI(ASI_PL) | RS1(tmp) | RS2(G0) | RD(dst), ctx);
1059             break;
1060         }
1061         break;
1062     }
1063     /* dst = imm */
1064     case BPF_ALU | BPF_MOV | BPF_K:
1065         emit_loadimm32(imm, dst, ctx);
1066         if (insn_is_zext(&insn[1]))
1067             return 1;
1068         break;
1069     case BPF_ALU64 | BPF_MOV | BPF_K:
1070         emit_loadimm_sext(imm, dst, ctx);
1071         break;
1072     /* dst = dst OP imm */
1073     case BPF_ALU | BPF_ADD | BPF_K:
1074     case BPF_ALU64 | BPF_ADD | BPF_K:
1075         emit_alu_K(ADD, dst, imm, ctx);
1076         goto do_alu32_trunc;
1077     case BPF_ALU | BPF_SUB | BPF_K:
1078     case BPF_ALU64 | BPF_SUB | BPF_K:
1079         emit_alu_K(SUB, dst, imm, ctx);
1080         goto do_alu32_trunc;
1081     case BPF_ALU | BPF_AND | BPF_K:
1082     case BPF_ALU64 | BPF_AND | BPF_K:
1083         emit_alu_K(AND, dst, imm, ctx);
1084         goto do_alu32_trunc;
1085     case BPF_ALU | BPF_OR | BPF_K:
1086     case BPF_ALU64 | BPF_OR | BPF_K:
1087         emit_alu_K(OR, dst, imm, ctx);
1088         goto do_alu32_trunc;
1089     case BPF_ALU | BPF_XOR | BPF_K:
1090     case BPF_ALU64 | BPF_XOR | BPF_K:
1091         emit_alu_K(XOR, dst, imm, ctx);
1092         goto do_alu32_trunc;
1093     case BPF_ALU | BPF_MUL | BPF_K:
1094         emit_alu_K(MUL, dst, imm, ctx);
1095         goto do_alu32_trunc;
1096     case BPF_ALU64 | BPF_MUL | BPF_K:
1097         emit_alu_K(MULX, dst, imm, ctx);
1098         break;
1099     case BPF_ALU | BPF_DIV | BPF_K:
1100         if (imm == 0)
1101             return -EINVAL;
1102 
1103         emit_write_y(G0, ctx);
1104         emit_alu_K(DIV, dst, imm, ctx);
1105         goto do_alu32_trunc;
1106     case BPF_ALU64 | BPF_DIV | BPF_K:
1107         if (imm == 0)
1108             return -EINVAL;
1109 
1110         emit_alu_K(UDIVX, dst, imm, ctx);
1111         break;
1112     case BPF_ALU64 | BPF_MOD | BPF_K:
1113     case BPF_ALU | BPF_MOD | BPF_K: {
1114         const u8 tmp = bpf2sparc[TMP_REG_2];
1115         unsigned int div;
1116 
1117         if (imm == 0)
1118             return -EINVAL;
1119 
1120         div = (BPF_CLASS(code) == BPF_ALU64) ? UDIVX : DIV;
1121 
1122         ctx->tmp_2_used = true;
1123 
1124         if (BPF_CLASS(code) != BPF_ALU64)
1125             emit_write_y(G0, ctx);
1126         if (is_simm13(imm)) {
1127             emit(div | IMMED | RS1(dst) | S13(imm) | RD(tmp), ctx);
1128             emit(MULX | IMMED | RS1(tmp) | S13(imm) | RD(tmp), ctx);
1129             emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
1130         } else {
1131             const u8 tmp1 = bpf2sparc[TMP_REG_1];
1132 
1133             ctx->tmp_1_used = true;
1134 
1135             emit_set_const_sext(imm, tmp1, ctx);
1136             emit(div | RS1(dst) | RS2(tmp1) | RD(tmp), ctx);
1137             emit(MULX | RS1(tmp) | RS2(tmp1) | RD(tmp), ctx);
1138             emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
1139         }
1140         goto do_alu32_trunc;
1141     }
1142     case BPF_ALU | BPF_LSH | BPF_K:
1143         emit_alu_K(SLL, dst, imm, ctx);
1144         goto do_alu32_trunc;
1145     case BPF_ALU64 | BPF_LSH | BPF_K:
1146         emit_alu_K(SLLX, dst, imm, ctx);
1147         break;
1148     case BPF_ALU | BPF_RSH | BPF_K:
1149         emit_alu_K(SRL, dst, imm, ctx);
1150         if (insn_is_zext(&insn[1]))
1151             return 1;
1152         break;
1153     case BPF_ALU64 | BPF_RSH | BPF_K:
1154         emit_alu_K(SRLX, dst, imm, ctx);
1155         break;
1156     case BPF_ALU | BPF_ARSH | BPF_K:
1157         emit_alu_K(SRA, dst, imm, ctx);
1158         goto do_alu32_trunc;
1159     case BPF_ALU64 | BPF_ARSH | BPF_K:
1160         emit_alu_K(SRAX, dst, imm, ctx);
1161         break;
1162 
1163     do_alu32_trunc:
1164         if (BPF_CLASS(code) == BPF_ALU &&
1165             !ctx->prog->aux->verifier_zext)
1166             emit_alu_K(SRL, dst, 0, ctx);
1167         break;
1168 
1169     /* JUMP off */
1170     case BPF_JMP | BPF_JA:
1171         emit_branch(BA, ctx->idx, ctx->offset[i + off], ctx);
1172         emit_nop(ctx);
1173         break;
1174     /* IF (dst COND src) JUMP off */
1175     case BPF_JMP | BPF_JEQ | BPF_X:
1176     case BPF_JMP | BPF_JGT | BPF_X:
1177     case BPF_JMP | BPF_JLT | BPF_X:
1178     case BPF_JMP | BPF_JGE | BPF_X:
1179     case BPF_JMP | BPF_JLE | BPF_X:
1180     case BPF_JMP | BPF_JNE | BPF_X:
1181     case BPF_JMP | BPF_JSGT | BPF_X:
1182     case BPF_JMP | BPF_JSLT | BPF_X:
1183     case BPF_JMP | BPF_JSGE | BPF_X:
1184     case BPF_JMP | BPF_JSLE | BPF_X:
1185     case BPF_JMP | BPF_JSET | BPF_X: {
1186         int err;
1187 
1188         err = emit_compare_and_branch(code, dst, src, 0, false, i + off, ctx);
1189         if (err)
1190             return err;
1191         break;
1192     }
1193     /* IF (dst COND imm) JUMP off */
1194     case BPF_JMP | BPF_JEQ | BPF_K:
1195     case BPF_JMP | BPF_JGT | BPF_K:
1196     case BPF_JMP | BPF_JLT | BPF_K:
1197     case BPF_JMP | BPF_JGE | BPF_K:
1198     case BPF_JMP | BPF_JLE | BPF_K:
1199     case BPF_JMP | BPF_JNE | BPF_K:
1200     case BPF_JMP | BPF_JSGT | BPF_K:
1201     case BPF_JMP | BPF_JSLT | BPF_K:
1202     case BPF_JMP | BPF_JSGE | BPF_K:
1203     case BPF_JMP | BPF_JSLE | BPF_K:
1204     case BPF_JMP | BPF_JSET | BPF_K: {
1205         int err;
1206 
1207         err = emit_compare_and_branch(code, dst, 0, imm, true, i + off, ctx);
1208         if (err)
1209             return err;
1210         break;
1211     }
1212 
1213     /* function call */
1214     case BPF_JMP | BPF_CALL:
1215     {
1216         u8 *func = ((u8 *)__bpf_call_base) + imm;
1217 
1218         ctx->saw_call = true;
1219 
1220         emit_call((u32 *)func, ctx);
1221         emit_nop(ctx);
1222 
1223         emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1224         break;
1225     }
1226 
1227     /* tail call */
1228     case BPF_JMP | BPF_TAIL_CALL:
1229         emit_tail_call(ctx);
1230         break;
1231 
1232     /* function return */
1233     case BPF_JMP | BPF_EXIT:
1234         /* Optimization: when last instruction is EXIT,
1235            simply fallthrough to epilogue. */
1236         if (i == ctx->prog->len - 1)
1237             break;
1238         emit_branch(BA, ctx->idx, ctx->epilogue_offset, ctx);
1239         emit_nop(ctx);
1240         break;
1241 
1242     /* dst = imm64 */
1243     case BPF_LD | BPF_IMM | BPF_DW:
1244     {
1245         const struct bpf_insn insn1 = insn[1];
1246         u64 imm64;
1247 
1248         imm64 = (u64)insn1.imm << 32 | (u32)imm;
1249         emit_loadimm64(imm64, dst, ctx);
1250 
1251         return 1;
1252     }
1253 
1254     /* LDX: dst = *(size *)(src + off) */
1255     case BPF_LDX | BPF_MEM | BPF_W:
1256     case BPF_LDX | BPF_MEM | BPF_H:
1257     case BPF_LDX | BPF_MEM | BPF_B:
1258     case BPF_LDX | BPF_MEM | BPF_DW: {
1259         const u8 tmp = bpf2sparc[TMP_REG_1];
1260         u32 opcode = 0, rs2;
1261 
1262         ctx->tmp_1_used = true;
1263         switch (BPF_SIZE(code)) {
1264         case BPF_W:
1265             opcode = LD32;
1266             break;
1267         case BPF_H:
1268             opcode = LD16;
1269             break;
1270         case BPF_B:
1271             opcode = LD8;
1272             break;
1273         case BPF_DW:
1274             opcode = LD64;
1275             break;
1276         }
1277 
1278         if (is_simm13(off)) {
1279             opcode |= IMMED;
1280             rs2 = S13(off);
1281         } else {
1282             emit_loadimm(off, tmp, ctx);
1283             rs2 = RS2(tmp);
1284         }
1285         emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
1286         if (opcode != LD64 && insn_is_zext(&insn[1]))
1287             return 1;
1288         break;
1289     }
1290     /* speculation barrier */
1291     case BPF_ST | BPF_NOSPEC:
1292         break;
1293     /* ST: *(size *)(dst + off) = imm */
1294     case BPF_ST | BPF_MEM | BPF_W:
1295     case BPF_ST | BPF_MEM | BPF_H:
1296     case BPF_ST | BPF_MEM | BPF_B:
1297     case BPF_ST | BPF_MEM | BPF_DW: {
1298         const u8 tmp = bpf2sparc[TMP_REG_1];
1299         const u8 tmp2 = bpf2sparc[TMP_REG_2];
1300         u32 opcode = 0, rs2;
1301 
1302         if (insn->dst_reg == BPF_REG_FP)
1303             ctx->saw_frame_pointer = true;
1304 
1305         ctx->tmp_2_used = true;
1306         emit_loadimm(imm, tmp2, ctx);
1307 
1308         switch (BPF_SIZE(code)) {
1309         case BPF_W:
1310             opcode = ST32;
1311             break;
1312         case BPF_H:
1313             opcode = ST16;
1314             break;
1315         case BPF_B:
1316             opcode = ST8;
1317             break;
1318         case BPF_DW:
1319             opcode = ST64;
1320             break;
1321         }
1322 
1323         if (is_simm13(off)) {
1324             opcode |= IMMED;
1325             rs2 = S13(off);
1326         } else {
1327             ctx->tmp_1_used = true;
1328             emit_loadimm(off, tmp, ctx);
1329             rs2 = RS2(tmp);
1330         }
1331         emit(opcode | RS1(dst) | rs2 | RD(tmp2), ctx);
1332         break;
1333     }
1334 
1335     /* STX: *(size *)(dst + off) = src */
1336     case BPF_STX | BPF_MEM | BPF_W:
1337     case BPF_STX | BPF_MEM | BPF_H:
1338     case BPF_STX | BPF_MEM | BPF_B:
1339     case BPF_STX | BPF_MEM | BPF_DW: {
1340         const u8 tmp = bpf2sparc[TMP_REG_1];
1341         u32 opcode = 0, rs2;
1342 
1343         if (insn->dst_reg == BPF_REG_FP)
1344             ctx->saw_frame_pointer = true;
1345 
1346         switch (BPF_SIZE(code)) {
1347         case BPF_W:
1348             opcode = ST32;
1349             break;
1350         case BPF_H:
1351             opcode = ST16;
1352             break;
1353         case BPF_B:
1354             opcode = ST8;
1355             break;
1356         case BPF_DW:
1357             opcode = ST64;
1358             break;
1359         }
1360         if (is_simm13(off)) {
1361             opcode |= IMMED;
1362             rs2 = S13(off);
1363         } else {
1364             ctx->tmp_1_used = true;
1365             emit_loadimm(off, tmp, ctx);
1366             rs2 = RS2(tmp);
1367         }
1368         emit(opcode | RS1(dst) | rs2 | RD(src), ctx);
1369         break;
1370     }
1371 
1372     case BPF_STX | BPF_ATOMIC | BPF_W: {
1373         const u8 tmp = bpf2sparc[TMP_REG_1];
1374         const u8 tmp2 = bpf2sparc[TMP_REG_2];
1375         const u8 tmp3 = bpf2sparc[TMP_REG_3];
1376 
1377         if (insn->imm != BPF_ADD) {
1378             pr_err_once("unknown atomic op %02x\n", insn->imm);
1379             return -EINVAL;
1380         }
1381 
1382         /* lock *(u32 *)(dst + off) += src */
1383 
1384         if (insn->dst_reg == BPF_REG_FP)
1385             ctx->saw_frame_pointer = true;
1386 
1387         ctx->tmp_1_used = true;
1388         ctx->tmp_2_used = true;
1389         ctx->tmp_3_used = true;
1390         emit_loadimm(off, tmp, ctx);
1391         emit_alu3(ADD, dst, tmp, tmp, ctx);
1392 
1393         emit(LD32 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
1394         emit_alu3(ADD, tmp2, src, tmp3, ctx);
1395         emit(CAS | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
1396         emit_cmp(tmp2, tmp3, ctx);
1397         emit_branch(BNE, 4, 0, ctx);
1398         emit_nop(ctx);
1399         break;
1400     }
1401     /* STX XADD: lock *(u64 *)(dst + off) += src */
1402     case BPF_STX | BPF_ATOMIC | BPF_DW: {
1403         const u8 tmp = bpf2sparc[TMP_REG_1];
1404         const u8 tmp2 = bpf2sparc[TMP_REG_2];
1405         const u8 tmp3 = bpf2sparc[TMP_REG_3];
1406 
1407         if (insn->imm != BPF_ADD) {
1408             pr_err_once("unknown atomic op %02x\n", insn->imm);
1409             return -EINVAL;
1410         }
1411 
1412         if (insn->dst_reg == BPF_REG_FP)
1413             ctx->saw_frame_pointer = true;
1414 
1415         ctx->tmp_1_used = true;
1416         ctx->tmp_2_used = true;
1417         ctx->tmp_3_used = true;
1418         emit_loadimm(off, tmp, ctx);
1419         emit_alu3(ADD, dst, tmp, tmp, ctx);
1420 
1421         emit(LD64 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
1422         emit_alu3(ADD, tmp2, src, tmp3, ctx);
1423         emit(CASX | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
1424         emit_cmp(tmp2, tmp3, ctx);
1425         emit_branch(BNE, 4, 0, ctx);
1426         emit_nop(ctx);
1427         break;
1428     }
1429 
1430     default:
1431         pr_err_once("unknown opcode %02x\n", code);
1432         return -EINVAL;
1433     }
1434 
1435     return 0;
1436 }
1437 
1438 static int build_body(struct jit_ctx *ctx)
1439 {
1440     const struct bpf_prog *prog = ctx->prog;
1441     int i;
1442 
1443     for (i = 0; i < prog->len; i++) {
1444         const struct bpf_insn *insn = &prog->insnsi[i];
1445         int ret;
1446 
1447         ret = build_insn(insn, ctx);
1448 
1449         if (ret > 0) {
1450             i++;
1451             ctx->offset[i] = ctx->idx;
1452             continue;
1453         }
1454         ctx->offset[i] = ctx->idx;
1455         if (ret)
1456             return ret;
1457     }
1458     return 0;
1459 }
1460 
1461 static void jit_fill_hole(void *area, unsigned int size)
1462 {
1463     u32 *ptr;
1464     /* We are guaranteed to have aligned memory. */
1465     for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1466         *ptr++ = 0x91d02005; /* ta 5 */
1467 }
1468 
1469 bool bpf_jit_needs_zext(void)
1470 {
1471     return true;
1472 }
1473 
1474 struct sparc64_jit_data {
1475     struct bpf_binary_header *header;
1476     u8 *image;
1477     struct jit_ctx ctx;
1478 };
1479 
1480 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1481 {
1482     struct bpf_prog *tmp, *orig_prog = prog;
1483     struct sparc64_jit_data *jit_data;
1484     struct bpf_binary_header *header;
1485     u32 prev_image_size, image_size;
1486     bool tmp_blinded = false;
1487     bool extra_pass = false;
1488     struct jit_ctx ctx;
1489     u8 *image_ptr;
1490     int pass, i;
1491 
1492     if (!prog->jit_requested)
1493         return orig_prog;
1494 
1495     tmp = bpf_jit_blind_constants(prog);
1496     /* If blinding was requested and we failed during blinding,
1497      * we must fall back to the interpreter.
1498      */
1499     if (IS_ERR(tmp))
1500         return orig_prog;
1501     if (tmp != prog) {
1502         tmp_blinded = true;
1503         prog = tmp;
1504     }
1505 
1506     jit_data = prog->aux->jit_data;
1507     if (!jit_data) {
1508         jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1509         if (!jit_data) {
1510             prog = orig_prog;
1511             goto out;
1512         }
1513         prog->aux->jit_data = jit_data;
1514     }
1515     if (jit_data->ctx.offset) {
1516         ctx = jit_data->ctx;
1517         image_ptr = jit_data->image;
1518         header = jit_data->header;
1519         extra_pass = true;
1520         image_size = sizeof(u32) * ctx.idx;
1521         prev_image_size = image_size;
1522         pass = 1;
1523         goto skip_init_ctx;
1524     }
1525 
1526     memset(&ctx, 0, sizeof(ctx));
1527     ctx.prog = prog;
1528 
1529     ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
1530     if (ctx.offset == NULL) {
1531         prog = orig_prog;
1532         goto out_off;
1533     }
1534 
1535     /* Longest sequence emitted is for bswap32, 12 instructions.  Pre-cook
1536      * the offset array so that we converge faster.
1537      */
1538     for (i = 0; i < prog->len; i++)
1539         ctx.offset[i] = i * (12 * 4);
1540 
1541     prev_image_size = ~0U;
1542     for (pass = 1; pass < 40; pass++) {
1543         ctx.idx = 0;
1544 
1545         build_prologue(&ctx);
1546         if (build_body(&ctx)) {
1547             prog = orig_prog;
1548             goto out_off;
1549         }
1550         build_epilogue(&ctx);
1551 
1552         if (bpf_jit_enable > 1)
1553             pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
1554                 ctx.idx * 4,
1555                 ctx.tmp_1_used ? '1' : ' ',
1556                 ctx.tmp_2_used ? '2' : ' ',
1557                 ctx.tmp_3_used ? '3' : ' ',
1558                 ctx.saw_frame_pointer ? 'F' : ' ',
1559                 ctx.saw_call ? 'C' : ' ',
1560                 ctx.saw_tail_call ? 'T' : ' ');
1561 
1562         if (ctx.idx * 4 == prev_image_size)
1563             break;
1564         prev_image_size = ctx.idx * 4;
1565         cond_resched();
1566     }
1567 
1568     /* Now we know the actual image size. */
1569     image_size = sizeof(u32) * ctx.idx;
1570     header = bpf_jit_binary_alloc(image_size, &image_ptr,
1571                       sizeof(u32), jit_fill_hole);
1572     if (header == NULL) {
1573         prog = orig_prog;
1574         goto out_off;
1575     }
1576 
1577     ctx.image = (u32 *)image_ptr;
1578 skip_init_ctx:
1579     ctx.idx = 0;
1580 
1581     build_prologue(&ctx);
1582 
1583     if (build_body(&ctx)) {
1584         bpf_jit_binary_free(header);
1585         prog = orig_prog;
1586         goto out_off;
1587     }
1588 
1589     build_epilogue(&ctx);
1590 
1591     if (ctx.idx * 4 != prev_image_size) {
1592         pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
1593                prev_image_size, ctx.idx * 4);
1594         bpf_jit_binary_free(header);
1595         prog = orig_prog;
1596         goto out_off;
1597     }
1598 
1599     if (bpf_jit_enable > 1)
1600         bpf_jit_dump(prog->len, image_size, pass, ctx.image);
1601 
1602     bpf_flush_icache(header, (u8 *)header + header->size);
1603 
1604     if (!prog->is_func || extra_pass) {
1605         bpf_jit_binary_lock_ro(header);
1606     } else {
1607         jit_data->ctx = ctx;
1608         jit_data->image = image_ptr;
1609         jit_data->header = header;
1610     }
1611 
1612     prog->bpf_func = (void *)ctx.image;
1613     prog->jited = 1;
1614     prog->jited_len = image_size;
1615 
1616     if (!prog->is_func || extra_pass) {
1617         bpf_prog_fill_jited_linfo(prog, ctx.offset);
1618 out_off:
1619         kfree(ctx.offset);
1620         kfree(jit_data);
1621         prog->aux->jit_data = NULL;
1622     }
1623 out:
1624     if (tmp_blinded)
1625         bpf_jit_prog_release_other(prog, prog == orig_prog ?
1626                        tmp : orig_prog);
1627     return prog;
1628 }