kernel/bpf/verifier.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
0003  * Copyright (c) 2016 Facebook
0004  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
0005  */
0006 #include <uapi/linux/btf.h>
0007 #include <linux/bpf-cgroup.h>
0008 #include <linux/kernel.h>
0009 #include <linux/types.h>
0010 #include <linux/slab.h>
0011 #include <linux/bpf.h>
0012 #include <linux/btf.h>
0013 #include <linux/bpf_verifier.h>
0014 #include <linux/filter.h>
0015 #include <net/netlink.h>
0016 #include <linux/file.h>
0017 #include <linux/vmalloc.h>
0018 #include <linux/stringify.h>
0019 #include <linux/bsearch.h>
0020 #include <linux/sort.h>
0021 #include <linux/perf_event.h>
0022 #include <linux/ctype.h>
0023 #include <linux/error-injection.h>
0024 #include <linux/bpf_lsm.h>
0025 #include <linux/btf_ids.h>
0026
0027 #include "disasm.h"
0028
0029 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
0030 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
0031     [_id] = & _name ## _verifier_ops,
0032 #define BPF_MAP_TYPE(_id, _ops)
0033 #define BPF_LINK_TYPE(_id, _name)
0034 #include <linux/bpf_types.h>
0035 #undef BPF_PROG_TYPE
0036 #undef BPF_MAP_TYPE
0037 #undef BPF_LINK_TYPE
0038 };
0039
0040 /* bpf_check() is a static code analyzer that walks eBPF program
0041  * instruction by instruction and updates register/stack state.
0042  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
0043  *
0044  * The first pass is depth-first-search to check that the program is a DAG.
0045  * It rejects the following programs:
0046  * - larger than BPF_MAXINSNS insns
0047  * - if loop is present (detected via back-edge)
0048  * - unreachable insns exist (shouldn't be a forest. program = one function)
0049  * - out of bounds or malformed jumps
0050  * The second pass is all possible path descent from the 1st insn.
0051  * Since it's analyzing all paths through the program, the length of the
0052  * analysis is limited to 64k insn, which may be hit even if total number of
0053  * insn is less then 4K, but there are too many branches that change stack/regs.
0054  * Number of 'branches to be analyzed' is limited to 1k
0055  *
0056  * On entry to each instruction, each register has a type, and the instruction
0057  * changes the types of the registers depending on instruction semantics.
0058  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
0059  * copied to R1.
0060  *
0061  * All registers are 64-bit.
0062  * R0 - return register
0063  * R1-R5 argument passing registers
0064  * R6-R9 callee saved registers
0065  * R10 - frame pointer read-only
0066  *
0067  * At the start of BPF program the register R1 contains a pointer to bpf_context
0068  * and has type PTR_TO_CTX.
0069  *
0070  * Verifier tracks arithmetic operations on pointers in case:
0071  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
0072  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
0073  * 1st insn copies R10 (which has FRAME_PTR) type into R1
0074  * and 2nd arithmetic instruction is pattern matched to recognize
0075  * that it wants to construct a pointer to some element within stack.
0076  * So after 2nd insn, the register R1 has type PTR_TO_STACK
0077  * (and -20 constant is saved for further stack bounds checking).
0078  * Meaning that this reg is a pointer to stack plus known immediate constant.
0079  *
0080  * Most of the time the registers have SCALAR_VALUE type, which
0081  * means the register has some value, but it's not a valid pointer.
0082  * (like pointer plus pointer becomes SCALAR_VALUE type)
0083  *
0084  * When verifier sees load or store instructions the type of base register
0085  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
0086  * four pointer types recognized by check_mem_access() function.
0087  *
0088  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
0089  * and the range of [ptr, ptr + map's value_size) is accessible.
0090  *
0091  * registers used to pass values to function calls are checked against
0092  * function argument constraints.
0093  *
0094  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
0095  * It means that the register type passed to this function must be
0096  * PTR_TO_STACK and it will be used inside the function as
0097  * 'pointer to map element key'
0098  *
0099  * For example the argument constraints for bpf_map_lookup_elem():
0100  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
0101  *   .arg1_type = ARG_CONST_MAP_PTR,
0102  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
0103  *
0104  * ret_type says that this function returns 'pointer to map elem value or null'
0105  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
0106  * 2nd argument should be a pointer to stack, which will be used inside
0107  * the helper function as a pointer to map element key.
0108  *
0109  * On the kernel side the helper function looks like:
0110  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
0111  * {
0112  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
0113  *    void *key = (void *) (unsigned long) r2;
0114  *    void *value;
0115  *
0116  *    here kernel can access 'key' and 'map' pointers safely, knowing that
0117  *    [key, key + map->key_size) bytes are valid and were initialized on
0118  *    the stack of eBPF program.
0119  * }
0120  *
0121  * Corresponding eBPF program may look like:
0122  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
0123  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
0124  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
0125  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
0126  * here verifier looks at prototype of map_lookup_elem() and sees:
0127  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
0128  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
0129  *
0130  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
0131  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
0132  * and were initialized prior to this call.
0133  * If it's ok, then verifier allows this BPF_CALL insn and looks at
0134  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
0135  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
0136  * returns either pointer to map value or NULL.
0137  *
0138  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
0139  * insn, the register holding that pointer in the true branch changes state to
0140  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
0141  * branch. See check_cond_jmp_op().
0142  *
0143  * After the call R0 is set to return type of the function and registers R1-R5
0144  * are set to NOT_INIT to indicate that they are no longer readable.
0145  *
0146  * The following reference types represent a potential reference to a kernel
0147  * resource which, after first being allocated, must be checked and freed by
0148  * the BPF program:
0149  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
0150  *
0151  * When the verifier sees a helper call return a reference type, it allocates a
0152  * pointer id for the reference and stores it in the current function state.
0153  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
0154  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
0155  * passes through a NULL-check conditional. For the branch wherein the state is
0156  * changed to CONST_IMM, the verifier releases the reference.
0157  *
0158  * For each helper function that allocates a reference, such as
0159  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
0160  * bpf_sk_release(). When a reference type passes into the release function,
0161  * the verifier also releases the reference. If any unchecked or unreleased
0162  * reference remains at the end of the program, the verifier rejects it.
0163  */
0164
0165 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
0166 struct bpf_verifier_stack_elem {
0167     /* verifer state is 'st'
0168      * before processing instruction 'insn_idx'
0169      * and after processing instruction 'prev_insn_idx'
0170      */
0171     struct bpf_verifier_state st;
0172     int insn_idx;
0173     int prev_insn_idx;
0174     struct bpf_verifier_stack_elem *next;
0175     /* length of verifier log at the time this state was pushed on stack */
0176     u32 log_pos;
0177 };
0178
0179 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
0180 #define BPF_COMPLEXITY_LIMIT_STATES 64
0181
0182 #define BPF_MAP_KEY_POISON  (1ULL << 63)
0183 #define BPF_MAP_KEY_SEEN    (1ULL << 62)
0184
0185 #define BPF_MAP_PTR_UNPRIV  1UL
0186 #define BPF_MAP_PTR_POISON  ((void *)((0xeB9FUL << 1) + \
0187                       POISON_POINTER_DELTA))
0188 #define BPF_MAP_PTR(X)      ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
0189
0190 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
0191 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
0192
0193 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
0194 {
0195     return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
0196 }
0197
0198 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
0199 {
0200     return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
0201 }
0202
0203 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
0204                   const struct bpf_map *map, bool unpriv)
0205 {
0206     BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
0207     unpriv |= bpf_map_ptr_unpriv(aux);
0208     aux->map_ptr_state = (unsigned long)map |
0209                  (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
0210 }
0211
0212 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
0213 {
0214     return aux->map_key_state & BPF_MAP_KEY_POISON;
0215 }
0216
0217 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
0218 {
0219     return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
0220 }
0221
0222 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
0223 {
0224     return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
0225 }
0226
0227 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
0228 {
0229     bool poisoned = bpf_map_key_poisoned(aux);
0230
0231     aux->map_key_state = state | BPF_MAP_KEY_SEEN |
0232                  (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
0233 }
0234
0235 static bool bpf_pseudo_call(const struct bpf_insn *insn)
0236 {
0237     return insn->code == (BPF_JMP | BPF_CALL) &&
0238            insn->src_reg == BPF_PSEUDO_CALL;
0239 }
0240
0241 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
0242 {
0243     return insn->code == (BPF_JMP | BPF_CALL) &&
0244            insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
0245 }
0246
0247 struct bpf_call_arg_meta {
0248     struct bpf_map *map_ptr;
0249     bool raw_mode;
0250     bool pkt_access;
0251     u8 release_regno;
0252     int regno;
0253     int access_size;
0254     int mem_size;
0255     u64 msize_max_value;
0256     int ref_obj_id;
0257     int map_uid;
0258     int func_id;
0259     struct btf *btf;
0260     u32 btf_id;
0261     struct btf *ret_btf;
0262     u32 ret_btf_id;
0263     u32 subprogno;
0264     struct bpf_map_value_off_desc *kptr_off_desc;
0265     u8 uninit_dynptr_regno;
0266 };
0267
0268 struct btf *btf_vmlinux;
0269
0270 static DEFINE_MUTEX(bpf_verifier_lock);
0271
0272 static const struct bpf_line_info *
0273 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
0274 {
0275     const struct bpf_line_info *linfo;
0276     const struct bpf_prog *prog;
0277     u32 i, nr_linfo;
0278
0279     prog = env->prog;
0280     nr_linfo = prog->aux->nr_linfo;
0281
0282     if (!nr_linfo || insn_off >= prog->len)
0283         return NULL;
0284
0285     linfo = prog->aux->linfo;
0286     for (i = 1; i < nr_linfo; i++)
0287         if (insn_off < linfo[i].insn_off)
0288             break;
0289
0290     return &linfo[i - 1];
0291 }
0292
0293 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
0294                va_list args)
0295 {
0296     unsigned int n;
0297
0298     n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
0299
0300     WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
0301           "verifier log line truncated - local buffer too short\n");
0302
0303     if (log->level == BPF_LOG_KERNEL) {
0304         bool newline = n > 0 && log->kbuf[n - 1] == '\n';
0305
0306         pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
0307         return;
0308     }
0309
0310     n = min(log->len_total - log->len_used - 1, n);
0311     log->kbuf[n] = '\0';
0312     if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
0313         log->len_used += n;
0314     else
0315         log->ubuf = NULL;
0316 }
0317
0318 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
0319 {
0320     char zero = 0;
0321
0322     if (!bpf_verifier_log_needed(log))
0323         return;
0324
0325     log->len_used = new_pos;
0326     if (put_user(zero, log->ubuf + new_pos))
0327         log->ubuf = NULL;
0328 }
0329
0330 /* log_level controls verbosity level of eBPF verifier.
0331  * bpf_verifier_log_write() is used to dump the verification trace to the log,
0332  * so the user can figure out what's wrong with the program
0333  */
0334 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
0335                        const char *fmt, ...)
0336 {
0337     va_list args;
0338
0339     if (!bpf_verifier_log_needed(&env->log))
0340         return;
0341
0342     va_start(args, fmt);
0343     bpf_verifier_vlog(&env->log, fmt, args);
0344     va_end(args);
0345 }
0346 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
0347
0348 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
0349 {
0350     struct bpf_verifier_env *env = private_data;
0351     va_list args;
0352
0353     if (!bpf_verifier_log_needed(&env->log))
0354         return;
0355
0356     va_start(args, fmt);
0357     bpf_verifier_vlog(&env->log, fmt, args);
0358     va_end(args);
0359 }
0360
0361 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
0362                 const char *fmt, ...)
0363 {
0364     va_list args;
0365
0366     if (!bpf_verifier_log_needed(log))
0367         return;
0368
0369     va_start(args, fmt);
0370     bpf_verifier_vlog(log, fmt, args);
0371     va_end(args);
0372 }
0373
0374 static const char *ltrim(const char *s)
0375 {
0376     while (isspace(*s))
0377         s++;
0378
0379     return s;
0380 }
0381
0382 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
0383                      u32 insn_off,
0384                      const char *prefix_fmt, ...)
0385 {
0386     const struct bpf_line_info *linfo;
0387
0388     if (!bpf_verifier_log_needed(&env->log))
0389         return;
0390
0391     linfo = find_linfo(env, insn_off);
0392     if (!linfo || linfo == env->prev_linfo)
0393         return;
0394
0395     if (prefix_fmt) {
0396         va_list args;
0397
0398         va_start(args, prefix_fmt);
0399         bpf_verifier_vlog(&env->log, prefix_fmt, args);
0400         va_end(args);
0401     }
0402
0403     verbose(env, "%s\n",
0404         ltrim(btf_name_by_offset(env->prog->aux->btf,
0405                      linfo->line_off)));
0406
0407     env->prev_linfo = linfo;
0408 }
0409
0410 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
0411                    struct bpf_reg_state *reg,
0412                    struct tnum *range, const char *ctx,
0413                    const char *reg_name)
0414 {
0415     char tn_buf[48];
0416
0417     verbose(env, "At %s the register %s ", ctx, reg_name);
0418     if (!tnum_is_unknown(reg->var_off)) {
0419         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
0420         verbose(env, "has value %s", tn_buf);
0421     } else {
0422         verbose(env, "has unknown scalar value");
0423     }
0424     tnum_strn(tn_buf, sizeof(tn_buf), *range);
0425     verbose(env, " should have been in %s\n", tn_buf);
0426 }
0427
0428 static bool type_is_pkt_pointer(enum bpf_reg_type type)
0429 {
0430     return type == PTR_TO_PACKET ||
0431            type == PTR_TO_PACKET_META;
0432 }
0433
0434 static bool type_is_sk_pointer(enum bpf_reg_type type)
0435 {
0436     return type == PTR_TO_SOCKET ||
0437         type == PTR_TO_SOCK_COMMON ||
0438         type == PTR_TO_TCP_SOCK ||
0439         type == PTR_TO_XDP_SOCK;
0440 }
0441
0442 static bool reg_type_not_null(enum bpf_reg_type type)
0443 {
0444     return type == PTR_TO_SOCKET ||
0445         type == PTR_TO_TCP_SOCK ||
0446         type == PTR_TO_MAP_VALUE ||
0447         type == PTR_TO_MAP_KEY ||
0448         type == PTR_TO_SOCK_COMMON;
0449 }
0450
0451 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
0452 {
0453     return reg->type == PTR_TO_MAP_VALUE &&
0454         map_value_has_spin_lock(reg->map_ptr);
0455 }
0456
0457 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
0458 {
0459     return base_type(type) == PTR_TO_SOCKET ||
0460         base_type(type) == PTR_TO_TCP_SOCK ||
0461         base_type(type) == PTR_TO_MEM ||
0462         base_type(type) == PTR_TO_BTF_ID;
0463 }
0464
0465 static bool type_is_rdonly_mem(u32 type)
0466 {
0467     return type & MEM_RDONLY;
0468 }
0469
0470 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
0471 {
0472     return type == ARG_PTR_TO_SOCK_COMMON;
0473 }
0474
0475 static bool type_may_be_null(u32 type)
0476 {
0477     return type & PTR_MAYBE_NULL;
0478 }
0479
0480 static bool may_be_acquire_function(enum bpf_func_id func_id)
0481 {
0482     return func_id == BPF_FUNC_sk_lookup_tcp ||
0483         func_id == BPF_FUNC_sk_lookup_udp ||
0484         func_id == BPF_FUNC_skc_lookup_tcp ||
0485         func_id == BPF_FUNC_map_lookup_elem ||
0486             func_id == BPF_FUNC_ringbuf_reserve;
0487 }
0488
0489 static bool is_acquire_function(enum bpf_func_id func_id,
0490                 const struct bpf_map *map)
0491 {
0492     enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
0493
0494     if (func_id == BPF_FUNC_sk_lookup_tcp ||
0495         func_id == BPF_FUNC_sk_lookup_udp ||
0496         func_id == BPF_FUNC_skc_lookup_tcp ||
0497         func_id == BPF_FUNC_ringbuf_reserve ||
0498         func_id == BPF_FUNC_kptr_xchg)
0499         return true;
0500
0501     if (func_id == BPF_FUNC_map_lookup_elem &&
0502         (map_type == BPF_MAP_TYPE_SOCKMAP ||
0503          map_type == BPF_MAP_TYPE_SOCKHASH))
0504         return true;
0505
0506     return false;
0507 }
0508
0509 static bool is_ptr_cast_function(enum bpf_func_id func_id)
0510 {
0511     return func_id == BPF_FUNC_tcp_sock ||
0512         func_id == BPF_FUNC_sk_fullsock ||
0513         func_id == BPF_FUNC_skc_to_tcp_sock ||
0514         func_id == BPF_FUNC_skc_to_tcp6_sock ||
0515         func_id == BPF_FUNC_skc_to_udp6_sock ||
0516         func_id == BPF_FUNC_skc_to_mptcp_sock ||
0517         func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
0518         func_id == BPF_FUNC_skc_to_tcp_request_sock;
0519 }
0520
0521 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
0522 {
0523     return BPF_CLASS(insn->code) == BPF_STX &&
0524            BPF_MODE(insn->code) == BPF_ATOMIC &&
0525            insn->imm == BPF_CMPXCHG;
0526 }
0527
0528 /* string representation of 'enum bpf_reg_type'
0529  *
0530  * Note that reg_type_str() can not appear more than once in a single verbose()
0531  * statement.
0532  */
0533 static const char *reg_type_str(struct bpf_verifier_env *env,
0534                 enum bpf_reg_type type)
0535 {
0536     char postfix[16] = {0}, prefix[32] = {0};
0537     static const char * const str[] = {
0538         [NOT_INIT]      = "?",
0539         [SCALAR_VALUE]      = "scalar",
0540         [PTR_TO_CTX]        = "ctx",
0541         [CONST_PTR_TO_MAP]  = "map_ptr",
0542         [PTR_TO_MAP_VALUE]  = "map_value",
0543         [PTR_TO_STACK]      = "fp",
0544         [PTR_TO_PACKET]     = "pkt",
0545         [PTR_TO_PACKET_META]    = "pkt_meta",
0546         [PTR_TO_PACKET_END] = "pkt_end",
0547         [PTR_TO_FLOW_KEYS]  = "flow_keys",
0548         [PTR_TO_SOCKET]     = "sock",
0549         [PTR_TO_SOCK_COMMON]    = "sock_common",
0550         [PTR_TO_TCP_SOCK]   = "tcp_sock",
0551         [PTR_TO_TP_BUFFER]  = "tp_buffer",
0552         [PTR_TO_XDP_SOCK]   = "xdp_sock",
0553         [PTR_TO_BTF_ID]     = "ptr_",
0554         [PTR_TO_MEM]        = "mem",
0555         [PTR_TO_BUF]        = "buf",
0556         [PTR_TO_FUNC]       = "func",
0557         [PTR_TO_MAP_KEY]    = "map_key",
0558     };
0559
0560     if (type & PTR_MAYBE_NULL) {
0561         if (base_type(type) == PTR_TO_BTF_ID)
0562             strncpy(postfix, "or_null_", 16);
0563         else
0564             strncpy(postfix, "_or_null", 16);
0565     }
0566
0567     if (type & MEM_RDONLY)
0568         strncpy(prefix, "rdonly_", 32);
0569     if (type & MEM_ALLOC)
0570         strncpy(prefix, "alloc_", 32);
0571     if (type & MEM_USER)
0572         strncpy(prefix, "user_", 32);
0573     if (type & MEM_PERCPU)
0574         strncpy(prefix, "percpu_", 32);
0575     if (type & PTR_UNTRUSTED)
0576         strncpy(prefix, "untrusted_", 32);
0577
0578     snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
0579          prefix, str[base_type(type)], postfix);
0580     return env->type_str_buf;
0581 }
0582
0583 static char slot_type_char[] = {
0584     [STACK_INVALID] = '?',
0585     [STACK_SPILL]   = 'r',
0586     [STACK_MISC]    = 'm',
0587     [STACK_ZERO]    = '0',
0588     [STACK_DYNPTR]  = 'd',
0589 };
0590
0591 static void print_liveness(struct bpf_verifier_env *env,
0592                enum bpf_reg_liveness live)
0593 {
0594     if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
0595         verbose(env, "_");
0596     if (live & REG_LIVE_READ)
0597         verbose(env, "r");
0598     if (live & REG_LIVE_WRITTEN)
0599         verbose(env, "w");
0600     if (live & REG_LIVE_DONE)
0601         verbose(env, "D");
0602 }
0603
0604 static int get_spi(s32 off)
0605 {
0606     return (-off - 1) / BPF_REG_SIZE;
0607 }
0608
0609 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
0610 {
0611     int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
0612
0613     /* We need to check that slots between [spi - nr_slots + 1, spi] are
0614      * within [0, allocated_stack).
0615      *
0616      * Please note that the spi grows downwards. For example, a dynptr
0617      * takes the size of two stack slots; the first slot will be at
0618      * spi and the second slot will be at spi - 1.
0619      */
0620     return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
0621 }
0622
0623 static struct bpf_func_state *func(struct bpf_verifier_env *env,
0624                    const struct bpf_reg_state *reg)
0625 {
0626     struct bpf_verifier_state *cur = env->cur_state;
0627
0628     return cur->frame[reg->frameno];
0629 }
0630
0631 static const char *kernel_type_name(const struct btf* btf, u32 id)
0632 {
0633     return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
0634 }
0635
0636 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
0637 {
0638     env->scratched_regs |= 1U << regno;
0639 }
0640
0641 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
0642 {
0643     env->scratched_stack_slots |= 1ULL << spi;
0644 }
0645
0646 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
0647 {
0648     return (env->scratched_regs >> regno) & 1;
0649 }
0650
0651 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
0652 {
0653     return (env->scratched_stack_slots >> regno) & 1;
0654 }
0655
0656 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
0657 {
0658     return env->scratched_regs || env->scratched_stack_slots;
0659 }
0660
0661 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
0662 {
0663     env->scratched_regs = 0U;
0664     env->scratched_stack_slots = 0ULL;
0665 }
0666
0667 /* Used for printing the entire verifier state. */
0668 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
0669 {
0670     env->scratched_regs = ~0U;
0671     env->scratched_stack_slots = ~0ULL;
0672 }
0673
0674 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
0675 {
0676     switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
0677     case DYNPTR_TYPE_LOCAL:
0678         return BPF_DYNPTR_TYPE_LOCAL;
0679     case DYNPTR_TYPE_RINGBUF:
0680         return BPF_DYNPTR_TYPE_RINGBUF;
0681     default:
0682         return BPF_DYNPTR_TYPE_INVALID;
0683     }
0684 }
0685
0686 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
0687 {
0688     return type == BPF_DYNPTR_TYPE_RINGBUF;
0689 }
0690
0691 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
0692                    enum bpf_arg_type arg_type, int insn_idx)
0693 {
0694     struct bpf_func_state *state = func(env, reg);
0695     enum bpf_dynptr_type type;
0696     int spi, i, id;
0697
0698     spi = get_spi(reg->off);
0699
0700     if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
0701         return -EINVAL;
0702
0703     for (i = 0; i < BPF_REG_SIZE; i++) {
0704         state->stack[spi].slot_type[i] = STACK_DYNPTR;
0705         state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
0706     }
0707
0708     type = arg_to_dynptr_type(arg_type);
0709     if (type == BPF_DYNPTR_TYPE_INVALID)
0710         return -EINVAL;
0711
0712     state->stack[spi].spilled_ptr.dynptr.first_slot = true;
0713     state->stack[spi].spilled_ptr.dynptr.type = type;
0714     state->stack[spi - 1].spilled_ptr.dynptr.type = type;
0715
0716     if (dynptr_type_refcounted(type)) {
0717         /* The id is used to track proper releasing */
0718         id = acquire_reference_state(env, insn_idx);
0719         if (id < 0)
0720             return id;
0721
0722         state->stack[spi].spilled_ptr.id = id;
0723         state->stack[spi - 1].spilled_ptr.id = id;
0724     }
0725
0726     return 0;
0727 }
0728
0729 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
0730 {
0731     struct bpf_func_state *state = func(env, reg);
0732     int spi, i;
0733
0734     spi = get_spi(reg->off);
0735
0736     if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
0737         return -EINVAL;
0738
0739     for (i = 0; i < BPF_REG_SIZE; i++) {
0740         state->stack[spi].slot_type[i] = STACK_INVALID;
0741         state->stack[spi - 1].slot_type[i] = STACK_INVALID;
0742     }
0743
0744     /* Invalidate any slices associated with this dynptr */
0745     if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
0746         release_reference(env, state->stack[spi].spilled_ptr.id);
0747         state->stack[spi].spilled_ptr.id = 0;
0748         state->stack[spi - 1].spilled_ptr.id = 0;
0749     }
0750
0751     state->stack[spi].spilled_ptr.dynptr.first_slot = false;
0752     state->stack[spi].spilled_ptr.dynptr.type = 0;
0753     state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
0754
0755     return 0;
0756 }
0757
0758 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
0759 {
0760     struct bpf_func_state *state = func(env, reg);
0761     int spi = get_spi(reg->off);
0762     int i;
0763
0764     if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
0765         return true;
0766
0767     for (i = 0; i < BPF_REG_SIZE; i++) {
0768         if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
0769             state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
0770             return false;
0771     }
0772
0773     return true;
0774 }
0775
0776 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
0777                      enum bpf_arg_type arg_type)
0778 {
0779     struct bpf_func_state *state = func(env, reg);
0780     int spi = get_spi(reg->off);
0781     int i;
0782
0783     if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
0784         !state->stack[spi].spilled_ptr.dynptr.first_slot)
0785         return false;
0786
0787     for (i = 0; i < BPF_REG_SIZE; i++) {
0788         if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
0789             state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
0790             return false;
0791     }
0792
0793     /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
0794     if (arg_type == ARG_PTR_TO_DYNPTR)
0795         return true;
0796
0797     return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
0798 }
0799
0800 /* The reg state of a pointer or a bounded scalar was saved when
0801  * it was spilled to the stack.
0802  */
0803 static bool is_spilled_reg(const struct bpf_stack_state *stack)
0804 {
0805     return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
0806 }
0807
0808 static void scrub_spilled_slot(u8 *stype)
0809 {
0810     if (*stype != STACK_INVALID)
0811         *stype = STACK_MISC;
0812 }
0813
0814 static void print_verifier_state(struct bpf_verifier_env *env,
0815                  const struct bpf_func_state *state,
0816                  bool print_all)
0817 {
0818     const struct bpf_reg_state *reg;
0819     enum bpf_reg_type t;
0820     int i;
0821
0822     if (state->frameno)
0823         verbose(env, " frame%d:", state->frameno);
0824     for (i = 0; i < MAX_BPF_REG; i++) {
0825         reg = &state->regs[i];
0826         t = reg->type;
0827         if (t == NOT_INIT)
0828             continue;
0829         if (!print_all && !reg_scratched(env, i))
0830             continue;
0831         verbose(env, " R%d", i);
0832         print_liveness(env, reg->live);
0833         verbose(env, "=");
0834         if (t == SCALAR_VALUE && reg->precise)
0835             verbose(env, "P");
0836         if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
0837             tnum_is_const(reg->var_off)) {
0838             /* reg->off should be 0 for SCALAR_VALUE */
0839             verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
0840             verbose(env, "%lld", reg->var_off.value + reg->off);
0841         } else {
0842             const char *sep = "";
0843
0844             verbose(env, "%s", reg_type_str(env, t));
0845             if (base_type(t) == PTR_TO_BTF_ID)
0846                 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
0847             verbose(env, "(");
0848 /*
0849  * _a stands for append, was shortened to avoid multiline statements below.
0850  * This macro is used to output a comma separated list of attributes.
0851  */
0852 #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
0853
0854             if (reg->id)
0855                 verbose_a("id=%d", reg->id);
0856             if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
0857                 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
0858             if (t != SCALAR_VALUE)
0859                 verbose_a("off=%d", reg->off);
0860             if (type_is_pkt_pointer(t))
0861                 verbose_a("r=%d", reg->range);
0862             else if (base_type(t) == CONST_PTR_TO_MAP ||
0863                  base_type(t) == PTR_TO_MAP_KEY ||
0864                  base_type(t) == PTR_TO_MAP_VALUE)
0865                 verbose_a("ks=%d,vs=%d",
0866                       reg->map_ptr->key_size,
0867                       reg->map_ptr->value_size);
0868             if (tnum_is_const(reg->var_off)) {
0869                 /* Typically an immediate SCALAR_VALUE, but
0870                  * could be a pointer whose offset is too big
0871                  * for reg->off
0872                  */
0873                 verbose_a("imm=%llx", reg->var_off.value);
0874             } else {
0875                 if (reg->smin_value != reg->umin_value &&
0876                     reg->smin_value != S64_MIN)
0877                     verbose_a("smin=%lld", (long long)reg->smin_value);
0878                 if (reg->smax_value != reg->umax_value &&
0879                     reg->smax_value != S64_MAX)
0880                     verbose_a("smax=%lld", (long long)reg->smax_value);
0881                 if (reg->umin_value != 0)
0882                     verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
0883                 if (reg->umax_value != U64_MAX)
0884                     verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
0885                 if (!tnum_is_unknown(reg->var_off)) {
0886                     char tn_buf[48];
0887
0888                     tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
0889                     verbose_a("var_off=%s", tn_buf);
0890                 }
0891                 if (reg->s32_min_value != reg->smin_value &&
0892                     reg->s32_min_value != S32_MIN)
0893                     verbose_a("s32_min=%d", (int)(reg->s32_min_value));
0894                 if (reg->s32_max_value != reg->smax_value &&
0895                     reg->s32_max_value != S32_MAX)
0896                     verbose_a("s32_max=%d", (int)(reg->s32_max_value));
0897                 if (reg->u32_min_value != reg->umin_value &&
0898                     reg->u32_min_value != U32_MIN)
0899                     verbose_a("u32_min=%d", (int)(reg->u32_min_value));
0900                 if (reg->u32_max_value != reg->umax_value &&
0901                     reg->u32_max_value != U32_MAX)
0902                     verbose_a("u32_max=%d", (int)(reg->u32_max_value));
0903             }
0904 #undef verbose_a
0905
0906             verbose(env, ")");
0907         }
0908     }
0909     for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
0910         char types_buf[BPF_REG_SIZE + 1];
0911         bool valid = false;
0912         int j;
0913
0914         for (j = 0; j < BPF_REG_SIZE; j++) {
0915             if (state->stack[i].slot_type[j] != STACK_INVALID)
0916                 valid = true;
0917             types_buf[j] = slot_type_char[
0918                     state->stack[i].slot_type[j]];
0919         }
0920         types_buf[BPF_REG_SIZE] = 0;
0921         if (!valid)
0922             continue;
0923         if (!print_all && !stack_slot_scratched(env, i))
0924             continue;
0925         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
0926         print_liveness(env, state->stack[i].spilled_ptr.live);
0927         if (is_spilled_reg(&state->stack[i])) {
0928             reg = &state->stack[i].spilled_ptr;
0929             t = reg->type;
0930             verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
0931             if (t == SCALAR_VALUE && reg->precise)
0932                 verbose(env, "P");
0933             if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
0934                 verbose(env, "%lld", reg->var_off.value + reg->off);
0935         } else {
0936             verbose(env, "=%s", types_buf);
0937         }
0938     }
0939     if (state->acquired_refs && state->refs[0].id) {
0940         verbose(env, " refs=%d", state->refs[0].id);
0941         for (i = 1; i < state->acquired_refs; i++)
0942             if (state->refs[i].id)
0943                 verbose(env, ",%d", state->refs[i].id);
0944     }
0945     if (state->in_callback_fn)
0946         verbose(env, " cb");
0947     if (state->in_async_callback_fn)
0948         verbose(env, " async_cb");
0949     verbose(env, "\n");
0950     mark_verifier_state_clean(env);
0951 }
0952
0953 static inline u32 vlog_alignment(u32 pos)
0954 {
0955     return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
0956             BPF_LOG_MIN_ALIGNMENT) - pos - 1;
0957 }
0958
0959 static void print_insn_state(struct bpf_verifier_env *env,
0960                  const struct bpf_func_state *state)
0961 {
0962     if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
0963         /* remove new line character */
0964         bpf_vlog_reset(&env->log, env->prev_log_len - 1);
0965         verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
0966     } else {
0967         verbose(env, "%d:", env->insn_idx);
0968     }
0969     print_verifier_state(env, state, false);
0970 }
0971
0972 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
0973  * small to hold src. This is different from krealloc since we don't want to preserve
0974  * the contents of dst.
0975  *
0976  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
0977  * not be allocated.
0978  */
0979 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
0980 {
0981     size_t bytes;
0982
0983     if (ZERO_OR_NULL_PTR(src))
0984         goto out;
0985
0986     if (unlikely(check_mul_overflow(n, size, &bytes)))
0987         return NULL;
0988
0989     if (ksize(dst) < bytes) {
0990         kfree(dst);
0991         dst = kmalloc_track_caller(bytes, flags);
0992         if (!dst)
0993             return NULL;
0994     }
0995
0996     memcpy(dst, src, bytes);
0997 out:
0998     return dst ? dst : ZERO_SIZE_PTR;
0999 }
1000
1001 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1002  * small to hold new_n items. new items are zeroed out if the array grows.
1003  *
1004  * Contrary to krealloc_array, does not free arr if new_n is zero.
1005  */
1006 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1007 {
1008     if (!new_n || old_n == new_n)
1009         goto out;
1010
1011     arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
1012     if (!arr)
1013         return NULL;
1014
1015     if (new_n > old_n)
1016         memset(arr + old_n * size, 0, (new_n - old_n) * size);
1017
1018 out:
1019     return arr ? arr : ZERO_SIZE_PTR;
1020 }
1021
1022 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1023 {
1024     dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1025                    sizeof(struct bpf_reference_state), GFP_KERNEL);
1026     if (!dst->refs)
1027         return -ENOMEM;
1028
1029     dst->acquired_refs = src->acquired_refs;
1030     return 0;
1031 }
1032
1033 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1034 {
1035     size_t n = src->allocated_stack / BPF_REG_SIZE;
1036
1037     dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1038                 GFP_KERNEL);
1039     if (!dst->stack)
1040         return -ENOMEM;
1041
1042     dst->allocated_stack = src->allocated_stack;
1043     return 0;
1044 }
1045
1046 static int resize_reference_state(struct bpf_func_state *state, size_t n)
1047 {
1048     state->refs = realloc_array(state->refs, state->acquired_refs, n,
1049                     sizeof(struct bpf_reference_state));
1050     if (!state->refs)
1051         return -ENOMEM;
1052
1053     state->acquired_refs = n;
1054     return 0;
1055 }
1056
1057 static int grow_stack_state(struct bpf_func_state *state, int size)
1058 {
1059     size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1060
1061     if (old_n >= n)
1062         return 0;
1063
1064     state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1065     if (!state->stack)
1066         return -ENOMEM;
1067
1068     state->allocated_stack = size;
1069     return 0;
1070 }
1071
1072 /* Acquire a pointer id from the env and update the state->refs to include
1073  * this new pointer reference.
1074  * On success, returns a valid pointer id to associate with the register
1075  * On failure, returns a negative errno.
1076  */
1077 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1078 {
1079     struct bpf_func_state *state = cur_func(env);
1080     int new_ofs = state->acquired_refs;
1081     int id, err;
1082
1083     err = resize_reference_state(state, state->acquired_refs + 1);
1084     if (err)
1085         return err;
1086     id = ++env->id_gen;
1087     state->refs[new_ofs].id = id;
1088     state->refs[new_ofs].insn_idx = insn_idx;
1089
1090     return id;
1091 }
1092
1093 /* release function corresponding to acquire_reference_state(). Idempotent. */
1094 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1095 {
1096     int i, last_idx;
1097
1098     last_idx = state->acquired_refs - 1;
1099     for (i = 0; i < state->acquired_refs; i++) {
1100         if (state->refs[i].id == ptr_id) {
1101             if (last_idx && i != last_idx)
1102                 memcpy(&state->refs[i], &state->refs[last_idx],
1103                        sizeof(*state->refs));
1104             memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1105             state->acquired_refs--;
1106             return 0;
1107         }
1108     }
1109     return -EINVAL;
1110 }
1111
1112 static void free_func_state(struct bpf_func_state *state)
1113 {
1114     if (!state)
1115         return;
1116     kfree(state->refs);
1117     kfree(state->stack);
1118     kfree(state);
1119 }
1120
1121 static void clear_jmp_history(struct bpf_verifier_state *state)
1122 {
1123     kfree(state->jmp_history);
1124     state->jmp_history = NULL;
1125     state->jmp_history_cnt = 0;
1126 }
1127
1128 static void free_verifier_state(struct bpf_verifier_state *state,
1129                 bool free_self)
1130 {
1131     int i;
1132
1133     for (i = 0; i <= state->curframe; i++) {
1134         free_func_state(state->frame[i]);
1135         state->frame[i] = NULL;
1136     }
1137     clear_jmp_history(state);
1138     if (free_self)
1139         kfree(state);
1140 }
1141
1142 /* copy verifier state from src to dst growing dst stack space
1143  * when necessary to accommodate larger src stack
1144  */
1145 static int copy_func_state(struct bpf_func_state *dst,
1146                const struct bpf_func_state *src)
1147 {
1148     int err;
1149
1150     memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1151     err = copy_reference_state(dst, src);
1152     if (err)
1153         return err;
1154     return copy_stack_state(dst, src);
1155 }
1156
1157 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1158                    const struct bpf_verifier_state *src)
1159 {
1160     struct bpf_func_state *dst;
1161     int i, err;
1162
1163     dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1164                         src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1165                         GFP_USER);
1166     if (!dst_state->jmp_history)
1167         return -ENOMEM;
1168     dst_state->jmp_history_cnt = src->jmp_history_cnt;
1169
1170     /* if dst has more stack frames then src frame, free them */
1171     for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1172         free_func_state(dst_state->frame[i]);
1173         dst_state->frame[i] = NULL;
1174     }
1175     dst_state->speculative = src->speculative;
1176     dst_state->curframe = src->curframe;
1177     dst_state->active_spin_lock = src->active_spin_lock;
1178     dst_state->branches = src->branches;
1179     dst_state->parent = src->parent;
1180     dst_state->first_insn_idx = src->first_insn_idx;
1181     dst_state->last_insn_idx = src->last_insn_idx;
1182     for (i = 0; i <= src->curframe; i++) {
1183         dst = dst_state->frame[i];
1184         if (!dst) {
1185             dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1186             if (!dst)
1187                 return -ENOMEM;
1188             dst_state->frame[i] = dst;
1189         }
1190         err = copy_func_state(dst, src->frame[i]);
1191         if (err)
1192             return err;
1193     }
1194     return 0;
1195 }
1196
1197 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1198 {
1199     while (st) {
1200         u32 br = --st->branches;
1201
1202         /* WARN_ON(br > 1) technically makes sense here,
1203          * but see comment in push_stack(), hence:
1204          */
1205         WARN_ONCE((int)br < 0,
1206               "BUG update_branch_counts:branches_to_explore=%d\n",
1207               br);
1208         if (br)
1209             break;
1210         st = st->parent;
1211     }
1212 }
1213
1214 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1215              int *insn_idx, bool pop_log)
1216 {
1217     struct bpf_verifier_state *cur = env->cur_state;
1218     struct bpf_verifier_stack_elem *elem, *head = env->head;
1219     int err;
1220
1221     if (env->head == NULL)
1222         return -ENOENT;
1223
1224     if (cur) {
1225         err = copy_verifier_state(cur, &head->st);
1226         if (err)
1227             return err;
1228     }
1229     if (pop_log)
1230         bpf_vlog_reset(&env->log, head->log_pos);
1231     if (insn_idx)
1232         *insn_idx = head->insn_idx;
1233     if (prev_insn_idx)
1234         *prev_insn_idx = head->prev_insn_idx;
1235     elem = head->next;
1236     free_verifier_state(&head->st, false);
1237     kfree(head);
1238     env->head = elem;
1239     env->stack_size--;
1240     return 0;
1241 }
1242
1243 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1244                          int insn_idx, int prev_insn_idx,
1245                          bool speculative)
1246 {
1247     struct bpf_verifier_state *cur = env->cur_state;
1248     struct bpf_verifier_stack_elem *elem;
1249     int err;
1250
1251     elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1252     if (!elem)
1253         goto err;
1254
1255     elem->insn_idx = insn_idx;
1256     elem->prev_insn_idx = prev_insn_idx;
1257     elem->next = env->head;
1258     elem->log_pos = env->log.len_used;
1259     env->head = elem;
1260     env->stack_size++;
1261     err = copy_verifier_state(&elem->st, cur);
1262     if (err)
1263         goto err;
1264     elem->st.speculative |= speculative;
1265     if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1266         verbose(env, "The sequence of %d jumps is too complex.\n",
1267             env->stack_size);
1268         goto err;
1269     }
1270     if (elem->st.parent) {
1271         ++elem->st.parent->branches;
1272         /* WARN_ON(branches > 2) technically makes sense here,
1273          * but
1274          * 1. speculative states will bump 'branches' for non-branch
1275          * instructions
1276          * 2. is_state_visited() heuristics may decide not to create
1277          * a new state for a sequence of branches and all such current
1278          * and cloned states will be pointing to a single parent state
1279          * which might have large 'branches' count.
1280          */
1281     }
1282     return &elem->st;
1283 err:
1284     free_verifier_state(env->cur_state, true);
1285     env->cur_state = NULL;
1286     /* pop all elements and return */
1287     while (!pop_stack(env, NULL, NULL, false));
1288     return NULL;
1289 }
1290
1291 #define CALLER_SAVED_REGS 6
1292 static const int caller_saved[CALLER_SAVED_REGS] = {
1293     BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1294 };
1295
1296 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1297                 struct bpf_reg_state *reg);
1298
1299 /* This helper doesn't clear reg->id */
1300 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1301 {
1302     reg->var_off = tnum_const(imm);
1303     reg->smin_value = (s64)imm;
1304     reg->smax_value = (s64)imm;
1305     reg->umin_value = imm;
1306     reg->umax_value = imm;
1307
1308     reg->s32_min_value = (s32)imm;
1309     reg->s32_max_value = (s32)imm;
1310     reg->u32_min_value = (u32)imm;
1311     reg->u32_max_value = (u32)imm;
1312 }
1313
1314 /* Mark the unknown part of a register (variable offset or scalar value) as
1315  * known to have the value @imm.
1316  */
1317 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1318 {
1319     /* Clear id, off, and union(map_ptr, range) */
1320     memset(((u8 *)reg) + sizeof(reg->type), 0,
1321            offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1322     ___mark_reg_known(reg, imm);
1323 }
1324
1325 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1326 {
1327     reg->var_off = tnum_const_subreg(reg->var_off, imm);
1328     reg->s32_min_value = (s32)imm;
1329     reg->s32_max_value = (s32)imm;
1330     reg->u32_min_value = (u32)imm;
1331     reg->u32_max_value = (u32)imm;
1332 }
1333
1334 /* Mark the 'variable offset' part of a register as zero.  This should be
1335  * used only on registers holding a pointer type.
1336  */
1337 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1338 {
1339     __mark_reg_known(reg, 0);
1340 }
1341
1342 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1343 {
1344     __mark_reg_known(reg, 0);
1345     reg->type = SCALAR_VALUE;
1346 }
1347
1348 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1349                 struct bpf_reg_state *regs, u32 regno)
1350 {
1351     if (WARN_ON(regno >= MAX_BPF_REG)) {
1352         verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1353         /* Something bad happened, let's kill all regs */
1354         for (regno = 0; regno < MAX_BPF_REG; regno++)
1355             __mark_reg_not_init(env, regs + regno);
1356         return;
1357     }
1358     __mark_reg_known_zero(regs + regno);
1359 }
1360
1361 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1362 {
1363     if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1364         const struct bpf_map *map = reg->map_ptr;
1365
1366         if (map->inner_map_meta) {
1367             reg->type = CONST_PTR_TO_MAP;
1368             reg->map_ptr = map->inner_map_meta;
1369             /* transfer reg's id which is unique for every map_lookup_elem
1370              * as UID of the inner map.
1371              */
1372             if (map_value_has_timer(map->inner_map_meta))
1373                 reg->map_uid = reg->id;
1374         } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1375             reg->type = PTR_TO_XDP_SOCK;
1376         } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1377                map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1378             reg->type = PTR_TO_SOCKET;
1379         } else {
1380             reg->type = PTR_TO_MAP_VALUE;
1381         }
1382         return;
1383     }
1384
1385     reg->type &= ~PTR_MAYBE_NULL;
1386 }
1387
1388 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1389 {
1390     return type_is_pkt_pointer(reg->type);
1391 }
1392
1393 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1394 {
1395     return reg_is_pkt_pointer(reg) ||
1396            reg->type == PTR_TO_PACKET_END;
1397 }
1398
1399 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1400 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1401                     enum bpf_reg_type which)
1402 {
1403     /* The register can already have a range from prior markings.
1404      * This is fine as long as it hasn't been advanced from its
1405      * origin.
1406      */
1407     return reg->type == which &&
1408            reg->id == 0 &&
1409            reg->off == 0 &&
1410            tnum_equals_const(reg->var_off, 0);
1411 }
1412
1413 /* Reset the min/max bounds of a register */
1414 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1415 {
1416     reg->smin_value = S64_MIN;
1417     reg->smax_value = S64_MAX;
1418     reg->umin_value = 0;
1419     reg->umax_value = U64_MAX;
1420
1421     reg->s32_min_value = S32_MIN;
1422     reg->s32_max_value = S32_MAX;
1423     reg->u32_min_value = 0;
1424     reg->u32_max_value = U32_MAX;
1425 }
1426
1427 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1428 {
1429     reg->smin_value = S64_MIN;
1430     reg->smax_value = S64_MAX;
1431     reg->umin_value = 0;
1432     reg->umax_value = U64_MAX;
1433 }
1434
1435 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1436 {
1437     reg->s32_min_value = S32_MIN;
1438     reg->s32_max_value = S32_MAX;
1439     reg->u32_min_value = 0;
1440     reg->u32_max_value = U32_MAX;
1441 }
1442
1443 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1444 {
1445     struct tnum var32_off = tnum_subreg(reg->var_off);
1446
1447     /* min signed is max(sign bit) | min(other bits) */
1448     reg->s32_min_value = max_t(s32, reg->s32_min_value,
1449             var32_off.value | (var32_off.mask & S32_MIN));
1450     /* max signed is min(sign bit) | max(other bits) */
1451     reg->s32_max_value = min_t(s32, reg->s32_max_value,
1452             var32_off.value | (var32_off.mask & S32_MAX));
1453     reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1454     reg->u32_max_value = min(reg->u32_max_value,
1455                  (u32)(var32_off.value | var32_off.mask));
1456 }
1457
1458 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1459 {
1460     /* min signed is max(sign bit) | min(other bits) */
1461     reg->smin_value = max_t(s64, reg->smin_value,
1462                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1463     /* max signed is min(sign bit) | max(other bits) */
1464     reg->smax_value = min_t(s64, reg->smax_value,
1465                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1466     reg->umin_value = max(reg->umin_value, reg->var_off.value);
1467     reg->umax_value = min(reg->umax_value,
1468                   reg->var_off.value | reg->var_off.mask);
1469 }
1470
1471 static void __update_reg_bounds(struct bpf_reg_state *reg)
1472 {
1473     __update_reg32_bounds(reg);
1474     __update_reg64_bounds(reg);
1475 }
1476
1477 /* Uses signed min/max values to inform unsigned, and vice-versa */
1478 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1479 {
1480     /* Learn sign from signed bounds.
1481      * If we cannot cross the sign boundary, then signed and unsigned bounds
1482      * are the same, so combine.  This works even in the negative case, e.g.
1483      * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1484      */
1485     if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1486         reg->s32_min_value = reg->u32_min_value =
1487             max_t(u32, reg->s32_min_value, reg->u32_min_value);
1488         reg->s32_max_value = reg->u32_max_value =
1489             min_t(u32, reg->s32_max_value, reg->u32_max_value);
1490         return;
1491     }
1492     /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1493      * boundary, so we must be careful.
1494      */
1495     if ((s32)reg->u32_max_value >= 0) {
1496         /* Positive.  We can't learn anything from the smin, but smax
1497          * is positive, hence safe.
1498          */
1499         reg->s32_min_value = reg->u32_min_value;
1500         reg->s32_max_value = reg->u32_max_value =
1501             min_t(u32, reg->s32_max_value, reg->u32_max_value);
1502     } else if ((s32)reg->u32_min_value < 0) {
1503         /* Negative.  We can't learn anything from the smax, but smin
1504          * is negative, hence safe.
1505          */
1506         reg->s32_min_value = reg->u32_min_value =
1507             max_t(u32, reg->s32_min_value, reg->u32_min_value);
1508         reg->s32_max_value = reg->u32_max_value;
1509     }
1510 }
1511
1512 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1513 {
1514     /* Learn sign from signed bounds.
1515      * If we cannot cross the sign boundary, then signed and unsigned bounds
1516      * are the same, so combine.  This works even in the negative case, e.g.
1517      * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1518      */
1519     if (reg->smin_value >= 0 || reg->smax_value < 0) {
1520         reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1521                               reg->umin_value);
1522         reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1523                               reg->umax_value);
1524         return;
1525     }
1526     /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1527      * boundary, so we must be careful.
1528      */
1529     if ((s64)reg->umax_value >= 0) {
1530         /* Positive.  We can't learn anything from the smin, but smax
1531          * is positive, hence safe.
1532          */
1533         reg->smin_value = reg->umin_value;
1534         reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1535                               reg->umax_value);
1536     } else if ((s64)reg->umin_value < 0) {
1537         /* Negative.  We can't learn anything from the smax, but smin
1538          * is negative, hence safe.
1539          */
1540         reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1541                               reg->umin_value);
1542         reg->smax_value = reg->umax_value;
1543     }
1544 }
1545
1546 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1547 {
1548     __reg32_deduce_bounds(reg);
1549     __reg64_deduce_bounds(reg);
1550 }
1551
1552 /* Attempts to improve var_off based on unsigned min/max information */
1553 static void __reg_bound_offset(struct bpf_reg_state *reg)
1554 {
1555     struct tnum var64_off = tnum_intersect(reg->var_off,
1556                            tnum_range(reg->umin_value,
1557                               reg->umax_value));
1558     struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1559                         tnum_range(reg->u32_min_value,
1560                                reg->u32_max_value));
1561
1562     reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1563 }
1564
1565 static void reg_bounds_sync(struct bpf_reg_state *reg)
1566 {
1567     /* We might have learned new bounds from the var_off. */
1568     __update_reg_bounds(reg);
1569     /* We might have learned something about the sign bit. */
1570     __reg_deduce_bounds(reg);
1571     /* We might have learned some bits from the bounds. */
1572     __reg_bound_offset(reg);
1573     /* Intersecting with the old var_off might have improved our bounds
1574      * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1575      * then new var_off is (0; 0x7f...fc) which improves our umax.
1576      */
1577     __update_reg_bounds(reg);
1578 }
1579
1580 static bool __reg32_bound_s64(s32 a)
1581 {
1582     return a >= 0 && a <= S32_MAX;
1583 }
1584
1585 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1586 {
1587     reg->umin_value = reg->u32_min_value;
1588     reg->umax_value = reg->u32_max_value;
1589
1590     /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1591      * be positive otherwise set to worse case bounds and refine later
1592      * from tnum.
1593      */
1594     if (__reg32_bound_s64(reg->s32_min_value) &&
1595         __reg32_bound_s64(reg->s32_max_value)) {
1596         reg->smin_value = reg->s32_min_value;
1597         reg->smax_value = reg->s32_max_value;
1598     } else {
1599         reg->smin_value = 0;
1600         reg->smax_value = U32_MAX;
1601     }
1602 }
1603
1604 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1605 {
1606     /* special case when 64-bit register has upper 32-bit register
1607      * zeroed. Typically happens after zext or <<32, >>32 sequence
1608      * allowing us to use 32-bit bounds directly,
1609      */
1610     if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1611         __reg_assign_32_into_64(reg);
1612     } else {
1613         /* Otherwise the best we can do is push lower 32bit known and
1614          * unknown bits into register (var_off set from jmp logic)
1615          * then learn as much as possible from the 64-bit tnum
1616          * known and unknown bits. The previous smin/smax bounds are
1617          * invalid here because of jmp32 compare so mark them unknown
1618          * so they do not impact tnum bounds calculation.
1619          */
1620         __mark_reg64_unbounded(reg);
1621     }
1622     reg_bounds_sync(reg);
1623 }
1624
1625 static bool __reg64_bound_s32(s64 a)
1626 {
1627     return a >= S32_MIN && a <= S32_MAX;
1628 }
1629
1630 static bool __reg64_bound_u32(u64 a)
1631 {
1632     return a >= U32_MIN && a <= U32_MAX;
1633 }
1634
1635 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1636 {
1637     __mark_reg32_unbounded(reg);
1638     if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1639         reg->s32_min_value = (s32)reg->smin_value;
1640         reg->s32_max_value = (s32)reg->smax_value;
1641     }
1642     if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1643         reg->u32_min_value = (u32)reg->umin_value;
1644         reg->u32_max_value = (u32)reg->umax_value;
1645     }
1646     reg_bounds_sync(reg);
1647 }
1648
1649 /* Mark a register as having a completely unknown (scalar) value. */
1650 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1651                    struct bpf_reg_state *reg)
1652 {
1653     /*
1654      * Clear type, id, off, and union(map_ptr, range) and
1655      * padding between 'type' and union
1656      */
1657     memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1658     reg->type = SCALAR_VALUE;
1659     reg->var_off = tnum_unknown;
1660     reg->frameno = 0;
1661     reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1662     __mark_reg_unbounded(reg);
1663 }
1664
1665 static void mark_reg_unknown(struct bpf_verifier_env *env,
1666                  struct bpf_reg_state *regs, u32 regno)
1667 {
1668     if (WARN_ON(regno >= MAX_BPF_REG)) {
1669         verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1670         /* Something bad happened, let's kill all regs except FP */
1671         for (regno = 0; regno < BPF_REG_FP; regno++)
1672             __mark_reg_not_init(env, regs + regno);
1673         return;
1674     }
1675     __mark_reg_unknown(env, regs + regno);
1676 }
1677
1678 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1679                 struct bpf_reg_state *reg)
1680 {
1681     __mark_reg_unknown(env, reg);
1682     reg->type = NOT_INIT;
1683 }
1684
1685 static void mark_reg_not_init(struct bpf_verifier_env *env,
1686                   struct bpf_reg_state *regs, u32 regno)
1687 {
1688     if (WARN_ON(regno >= MAX_BPF_REG)) {
1689         verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1690         /* Something bad happened, let's kill all regs except FP */
1691         for (regno = 0; regno < BPF_REG_FP; regno++)
1692             __mark_reg_not_init(env, regs + regno);
1693         return;
1694     }
1695     __mark_reg_not_init(env, regs + regno);
1696 }
1697
1698 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1699                 struct bpf_reg_state *regs, u32 regno,
1700                 enum bpf_reg_type reg_type,
1701                 struct btf *btf, u32 btf_id,
1702                 enum bpf_type_flag flag)
1703 {
1704     if (reg_type == SCALAR_VALUE) {
1705         mark_reg_unknown(env, regs, regno);
1706         return;
1707     }
1708     mark_reg_known_zero(env, regs, regno);
1709     regs[regno].type = PTR_TO_BTF_ID | flag;
1710     regs[regno].btf = btf;
1711     regs[regno].btf_id = btf_id;
1712 }
1713
1714 #define DEF_NOT_SUBREG  (0)
1715 static void init_reg_state(struct bpf_verifier_env *env,
1716                struct bpf_func_state *state)
1717 {
1718     struct bpf_reg_state *regs = state->regs;
1719     int i;
1720
1721     for (i = 0; i < MAX_BPF_REG; i++) {
1722         mark_reg_not_init(env, regs, i);
1723         regs[i].live = REG_LIVE_NONE;
1724         regs[i].parent = NULL;
1725         regs[i].subreg_def = DEF_NOT_SUBREG;
1726     }
1727
1728     /* frame pointer */
1729     regs[BPF_REG_FP].type = PTR_TO_STACK;
1730     mark_reg_known_zero(env, regs, BPF_REG_FP);
1731     regs[BPF_REG_FP].frameno = state->frameno;
1732 }
1733
1734 #define BPF_MAIN_FUNC (-1)
1735 static void init_func_state(struct bpf_verifier_env *env,
1736                 struct bpf_func_state *state,
1737                 int callsite, int frameno, int subprogno)
1738 {
1739     state->callsite = callsite;
1740     state->frameno = frameno;
1741     state->subprogno = subprogno;
1742     init_reg_state(env, state);
1743     mark_verifier_state_scratched(env);
1744 }
1745
1746 /* Similar to push_stack(), but for async callbacks */
1747 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1748                         int insn_idx, int prev_insn_idx,
1749                         int subprog)
1750 {
1751     struct bpf_verifier_stack_elem *elem;
1752     struct bpf_func_state *frame;
1753
1754     elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1755     if (!elem)
1756         goto err;
1757
1758     elem->insn_idx = insn_idx;
1759     elem->prev_insn_idx = prev_insn_idx;
1760     elem->next = env->head;
1761     elem->log_pos = env->log.len_used;
1762     env->head = elem;
1763     env->stack_size++;
1764     if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1765         verbose(env,
1766             "The sequence of %d jumps is too complex for async cb.\n",
1767             env->stack_size);
1768         goto err;
1769     }
1770     /* Unlike push_stack() do not copy_verifier_state().
1771      * The caller state doesn't matter.
1772      * This is async callback. It starts in a fresh stack.
1773      * Initialize it similar to do_check_common().
1774      */
1775     elem->st.branches = 1;
1776     frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1777     if (!frame)
1778         goto err;
1779     init_func_state(env, frame,
1780             BPF_MAIN_FUNC /* callsite */,
1781             0 /* frameno within this callchain */,
1782             subprog /* subprog number within this prog */);
1783     elem->st.frame[0] = frame;
1784     return &elem->st;
1785 err:
1786     free_verifier_state(env->cur_state, true);
1787     env->cur_state = NULL;
1788     /* pop all elements and return */
1789     while (!pop_stack(env, NULL, NULL, false));
1790     return NULL;
1791 }
1792
1793
1794 enum reg_arg_type {
1795     SRC_OP,     /* register is used as source operand */
1796     DST_OP,     /* register is used as destination operand */
1797     DST_OP_NO_MARK  /* same as above, check only, don't mark */
1798 };
1799
1800 static int cmp_subprogs(const void *a, const void *b)
1801 {
1802     return ((struct bpf_subprog_info *)a)->start -
1803            ((struct bpf_subprog_info *)b)->start;
1804 }
1805
1806 static int find_subprog(struct bpf_verifier_env *env, int off)
1807 {
1808     struct bpf_subprog_info *p;
1809
1810     p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1811             sizeof(env->subprog_info[0]), cmp_subprogs);
1812     if (!p)
1813         return -ENOENT;
1814     return p - env->subprog_info;
1815
1816 }
1817
1818 static int add_subprog(struct bpf_verifier_env *env, int off)
1819 {
1820     int insn_cnt = env->prog->len;
1821     int ret;
1822
1823     if (off >= insn_cnt || off < 0) {
1824         verbose(env, "call to invalid destination\n");
1825         return -EINVAL;
1826     }
1827     ret = find_subprog(env, off);
1828     if (ret >= 0)
1829         return ret;
1830     if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1831         verbose(env, "too many subprograms\n");
1832         return -E2BIG;
1833     }
1834     /* determine subprog starts. The end is one before the next starts */
1835     env->subprog_info[env->subprog_cnt++].start = off;
1836     sort(env->subprog_info, env->subprog_cnt,
1837          sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1838     return env->subprog_cnt - 1;
1839 }
1840
1841 #define MAX_KFUNC_DESCS 256
1842 #define MAX_KFUNC_BTFS  256
1843
1844 struct bpf_kfunc_desc {
1845     struct btf_func_model func_model;
1846     u32 func_id;
1847     s32 imm;
1848     u16 offset;
1849 };
1850
1851 struct bpf_kfunc_btf {
1852     struct btf *btf;
1853     struct module *module;
1854     u16 offset;
1855 };
1856
1857 struct bpf_kfunc_desc_tab {
1858     struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1859     u32 nr_descs;
1860 };
1861
1862 struct bpf_kfunc_btf_tab {
1863     struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1864     u32 nr_descs;
1865 };
1866
1867 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1868 {
1869     const struct bpf_kfunc_desc *d0 = a;
1870     const struct bpf_kfunc_desc *d1 = b;
1871
1872     /* func_id is not greater than BTF_MAX_TYPE */
1873     return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1874 }
1875
1876 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1877 {
1878     const struct bpf_kfunc_btf *d0 = a;
1879     const struct bpf_kfunc_btf *d1 = b;
1880
1881     return d0->offset - d1->offset;
1882 }
1883
1884 static const struct bpf_kfunc_desc *
1885 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1886 {
1887     struct bpf_kfunc_desc desc = {
1888         .func_id = func_id,
1889         .offset = offset,
1890     };
1891     struct bpf_kfunc_desc_tab *tab;
1892
1893     tab = prog->aux->kfunc_tab;
1894     return bsearch(&desc, tab->descs, tab->nr_descs,
1895                sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1896 }
1897
1898 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1899                      s16 offset)
1900 {
1901     struct bpf_kfunc_btf kf_btf = { .offset = offset };
1902     struct bpf_kfunc_btf_tab *tab;
1903     struct bpf_kfunc_btf *b;
1904     struct module *mod;
1905     struct btf *btf;
1906     int btf_fd;
1907
1908     tab = env->prog->aux->kfunc_btf_tab;
1909     b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1910             sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1911     if (!b) {
1912         if (tab->nr_descs == MAX_KFUNC_BTFS) {
1913             verbose(env, "too many different module BTFs\n");
1914             return ERR_PTR(-E2BIG);
1915         }
1916
1917         if (bpfptr_is_null(env->fd_array)) {
1918             verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1919             return ERR_PTR(-EPROTO);
1920         }
1921
1922         if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1923                         offset * sizeof(btf_fd),
1924                         sizeof(btf_fd)))
1925             return ERR_PTR(-EFAULT);
1926
1927         btf = btf_get_by_fd(btf_fd);
1928         if (IS_ERR(btf)) {
1929             verbose(env, "invalid module BTF fd specified\n");
1930             return btf;
1931         }
1932
1933         if (!btf_is_module(btf)) {
1934             verbose(env, "BTF fd for kfunc is not a module BTF\n");
1935             btf_put(btf);
1936             return ERR_PTR(-EINVAL);
1937         }
1938
1939         mod = btf_try_get_module(btf);
1940         if (!mod) {
1941             btf_put(btf);
1942             return ERR_PTR(-ENXIO);
1943         }
1944
1945         b = &tab->descs[tab->nr_descs++];
1946         b->btf = btf;
1947         b->module = mod;
1948         b->offset = offset;
1949
1950         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1951              kfunc_btf_cmp_by_off, NULL);
1952     }
1953     return b->btf;
1954 }
1955
1956 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1957 {
1958     if (!tab)
1959         return;
1960
1961     while (tab->nr_descs--) {
1962         module_put(tab->descs[tab->nr_descs].module);
1963         btf_put(tab->descs[tab->nr_descs].btf);
1964     }
1965     kfree(tab);
1966 }
1967
1968 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
1969 {
1970     if (offset) {
1971         if (offset < 0) {
1972             /* In the future, this can be allowed to increase limit
1973              * of fd index into fd_array, interpreted as u16.
1974              */
1975             verbose(env, "negative offset disallowed for kernel module function call\n");
1976             return ERR_PTR(-EINVAL);
1977         }
1978
1979         return __find_kfunc_desc_btf(env, offset);
1980     }
1981     return btf_vmlinux ?: ERR_PTR(-ENOENT);
1982 }
1983
1984 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1985 {
1986     const struct btf_type *func, *func_proto;
1987     struct bpf_kfunc_btf_tab *btf_tab;
1988     struct bpf_kfunc_desc_tab *tab;
1989     struct bpf_prog_aux *prog_aux;
1990     struct bpf_kfunc_desc *desc;
1991     const char *func_name;
1992     struct btf *desc_btf;
1993     unsigned long call_imm;
1994     unsigned long addr;
1995     int err;
1996
1997     prog_aux = env->prog->aux;
1998     tab = prog_aux->kfunc_tab;
1999     btf_tab = prog_aux->kfunc_btf_tab;
2000     if (!tab) {
2001         if (!btf_vmlinux) {
2002             verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2003             return -ENOTSUPP;
2004         }
2005
2006         if (!env->prog->jit_requested) {
2007             verbose(env, "JIT is required for calling kernel function\n");
2008             return -ENOTSUPP;
2009         }
2010
2011         if (!bpf_jit_supports_kfunc_call()) {
2012             verbose(env, "JIT does not support calling kernel function\n");
2013             return -ENOTSUPP;
2014         }
2015
2016         if (!env->prog->gpl_compatible) {
2017             verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2018             return -EINVAL;
2019         }
2020
2021         tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2022         if (!tab)
2023             return -ENOMEM;
2024         prog_aux->kfunc_tab = tab;
2025     }
2026
2027     /* func_id == 0 is always invalid, but instead of returning an error, be
2028      * conservative and wait until the code elimination pass before returning
2029      * error, so that invalid calls that get pruned out can be in BPF programs
2030      * loaded from userspace.  It is also required that offset be untouched
2031      * for such calls.
2032      */
2033     if (!func_id && !offset)
2034         return 0;
2035
2036     if (!btf_tab && offset) {
2037         btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2038         if (!btf_tab)
2039             return -ENOMEM;
2040         prog_aux->kfunc_btf_tab = btf_tab;
2041     }
2042
2043     desc_btf = find_kfunc_desc_btf(env, offset);
2044     if (IS_ERR(desc_btf)) {
2045         verbose(env, "failed to find BTF for kernel function\n");
2046         return PTR_ERR(desc_btf);
2047     }
2048
2049     if (find_kfunc_desc(env->prog, func_id, offset))
2050         return 0;
2051
2052     if (tab->nr_descs == MAX_KFUNC_DESCS) {
2053         verbose(env, "too many different kernel function calls\n");
2054         return -E2BIG;
2055     }
2056
2057     func = btf_type_by_id(desc_btf, func_id);
2058     if (!func || !btf_type_is_func(func)) {
2059         verbose(env, "kernel btf_id %u is not a function\n",
2060             func_id);
2061         return -EINVAL;
2062     }
2063     func_proto = btf_type_by_id(desc_btf, func->type);
2064     if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2065         verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2066             func_id);
2067         return -EINVAL;
2068     }
2069
2070     func_name = btf_name_by_offset(desc_btf, func->name_off);
2071     addr = kallsyms_lookup_name(func_name);
2072     if (!addr) {
2073         verbose(env, "cannot find address for kernel function %s\n",
2074             func_name);
2075         return -EINVAL;
2076     }
2077
2078     call_imm = BPF_CALL_IMM(addr);
2079     /* Check whether or not the relative offset overflows desc->imm */
2080     if ((unsigned long)(s32)call_imm != call_imm) {
2081         verbose(env, "address of kernel function %s is out of range\n",
2082             func_name);
2083         return -EINVAL;
2084     }
2085
2086     desc = &tab->descs[tab->nr_descs++];
2087     desc->func_id = func_id;
2088     desc->imm = call_imm;
2089     desc->offset = offset;
2090     err = btf_distill_func_proto(&env->log, desc_btf,
2091                      func_proto, func_name,
2092                      &desc->func_model);
2093     if (!err)
2094         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2095              kfunc_desc_cmp_by_id_off, NULL);
2096     return err;
2097 }
2098
2099 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2100 {
2101     const struct bpf_kfunc_desc *d0 = a;
2102     const struct bpf_kfunc_desc *d1 = b;
2103
2104     if (d0->imm > d1->imm)
2105         return 1;
2106     else if (d0->imm < d1->imm)
2107         return -1;
2108     return 0;
2109 }
2110
2111 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2112 {
2113     struct bpf_kfunc_desc_tab *tab;
2114
2115     tab = prog->aux->kfunc_tab;
2116     if (!tab)
2117         return;
2118
2119     sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2120          kfunc_desc_cmp_by_imm, NULL);
2121 }
2122
2123 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2124 {
2125     return !!prog->aux->kfunc_tab;
2126 }
2127
2128 const struct btf_func_model *
2129 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2130              const struct bpf_insn *insn)
2131 {
2132     const struct bpf_kfunc_desc desc = {
2133         .imm = insn->imm,
2134     };
2135     const struct bpf_kfunc_desc *res;
2136     struct bpf_kfunc_desc_tab *tab;
2137
2138     tab = prog->aux->kfunc_tab;
2139     res = bsearch(&desc, tab->descs, tab->nr_descs,
2140               sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2141
2142     return res ? &res->func_model : NULL;
2143 }
2144
2145 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2146 {
2147     struct bpf_subprog_info *subprog = env->subprog_info;
2148     struct bpf_insn *insn = env->prog->insnsi;
2149     int i, ret, insn_cnt = env->prog->len;
2150
2151     /* Add entry function. */
2152     ret = add_subprog(env, 0);
2153     if (ret)
2154         return ret;
2155
2156     for (i = 0; i < insn_cnt; i++, insn++) {
2157         if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2158             !bpf_pseudo_kfunc_call(insn))
2159             continue;
2160
2161         if (!env->bpf_capable) {
2162             verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2163             return -EPERM;
2164         }
2165
2166         if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2167             ret = add_subprog(env, i + insn->imm + 1);
2168         else
2169             ret = add_kfunc_call(env, insn->imm, insn->off);
2170
2171         if (ret < 0)
2172             return ret;
2173     }
2174
2175     /* Add a fake 'exit' subprog which could simplify subprog iteration
2176      * logic. 'subprog_cnt' should not be increased.
2177      */
2178     subprog[env->subprog_cnt].start = insn_cnt;
2179
2180     if (env->log.level & BPF_LOG_LEVEL2)
2181         for (i = 0; i < env->subprog_cnt; i++)
2182             verbose(env, "func#%d @%d\n", i, subprog[i].start);
2183
2184     return 0;
2185 }
2186
2187 static int check_subprogs(struct bpf_verifier_env *env)
2188 {
2189     int i, subprog_start, subprog_end, off, cur_subprog = 0;
2190     struct bpf_subprog_info *subprog = env->subprog_info;
2191     struct bpf_insn *insn = env->prog->insnsi;
2192     int insn_cnt = env->prog->len;
2193
2194     /* now check that all jumps are within the same subprog */
2195     subprog_start = subprog[cur_subprog].start;
2196     subprog_end = subprog[cur_subprog + 1].start;
2197     for (i = 0; i < insn_cnt; i++) {
2198         u8 code = insn[i].code;
2199
2200         if (code == (BPF_JMP | BPF_CALL) &&
2201             insn[i].imm == BPF_FUNC_tail_call &&
2202             insn[i].src_reg != BPF_PSEUDO_CALL)
2203             subprog[cur_subprog].has_tail_call = true;
2204         if (BPF_CLASS(code) == BPF_LD &&
2205             (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2206             subprog[cur_subprog].has_ld_abs = true;
2207         if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2208             goto next;
2209         if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2210             goto next;
2211         off = i + insn[i].off + 1;
2212         if (off < subprog_start || off >= subprog_end) {
2213             verbose(env, "jump out of range from insn %d to %d\n", i, off);
2214             return -EINVAL;
2215         }
2216 next:
2217         if (i == subprog_end - 1) {
2218             /* to avoid fall-through from one subprog into another
2219              * the last insn of the subprog should be either exit
2220              * or unconditional jump back
2221              */
2222             if (code != (BPF_JMP | BPF_EXIT) &&
2223                 code != (BPF_JMP | BPF_JA)) {
2224                 verbose(env, "last insn is not an exit or jmp\n");
2225                 return -EINVAL;
2226             }
2227             subprog_start = subprog_end;
2228             cur_subprog++;
2229             if (cur_subprog < env->subprog_cnt)
2230                 subprog_end = subprog[cur_subprog + 1].start;
2231         }
2232     }
2233     return 0;
2234 }
2235
2236 /* Parentage chain of this register (or stack slot) should take care of all
2237  * issues like callee-saved registers, stack slot allocation time, etc.
2238  */
2239 static int mark_reg_read(struct bpf_verifier_env *env,
2240              const struct bpf_reg_state *state,
2241              struct bpf_reg_state *parent, u8 flag)
2242 {
2243     bool writes = parent == state->parent; /* Observe write marks */
2244     int cnt = 0;
2245
2246     while (parent) {
2247         /* if read wasn't screened by an earlier write ... */
2248         if (writes && state->live & REG_LIVE_WRITTEN)
2249             break;
2250         if (parent->live & REG_LIVE_DONE) {
2251             verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2252                 reg_type_str(env, parent->type),
2253                 parent->var_off.value, parent->off);
2254             return -EFAULT;
2255         }
2256         /* The first condition is more likely to be true than the
2257          * second, checked it first.
2258          */
2259         if ((parent->live & REG_LIVE_READ) == flag ||
2260             parent->live & REG_LIVE_READ64)
2261             /* The parentage chain never changes and
2262              * this parent was already marked as LIVE_READ.
2263              * There is no need to keep walking the chain again and
2264              * keep re-marking all parents as LIVE_READ.
2265              * This case happens when the same register is read
2266              * multiple times without writes into it in-between.
2267              * Also, if parent has the stronger REG_LIVE_READ64 set,
2268              * then no need to set the weak REG_LIVE_READ32.
2269              */
2270             break;
2271         /* ... then we depend on parent's value */
2272         parent->live |= flag;
2273         /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2274         if (flag == REG_LIVE_READ64)
2275             parent->live &= ~REG_LIVE_READ32;
2276         state = parent;
2277         parent = state->parent;
2278         writes = true;
2279         cnt++;
2280     }
2281
2282     if (env->longest_mark_read_walk < cnt)
2283         env->longest_mark_read_walk = cnt;
2284     return 0;
2285 }
2286
2287 /* This function is supposed to be used by the following 32-bit optimization
2288  * code only. It returns TRUE if the source or destination register operates
2289  * on 64-bit, otherwise return FALSE.
2290  */
2291 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2292              u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2293 {
2294     u8 code, class, op;
2295
2296     code = insn->code;
2297     class = BPF_CLASS(code);
2298     op = BPF_OP(code);
2299     if (class == BPF_JMP) {
2300         /* BPF_EXIT for "main" will reach here. Return TRUE
2301          * conservatively.
2302          */
2303         if (op == BPF_EXIT)
2304             return true;
2305         if (op == BPF_CALL) {
2306             /* BPF to BPF call will reach here because of marking
2307              * caller saved clobber with DST_OP_NO_MARK for which we
2308              * don't care the register def because they are anyway
2309              * marked as NOT_INIT already.
2310              */
2311             if (insn->src_reg == BPF_PSEUDO_CALL)
2312                 return false;
2313             /* Helper call will reach here because of arg type
2314              * check, conservatively return TRUE.
2315              */
2316             if (t == SRC_OP)
2317                 return true;
2318
2319             return false;
2320         }
2321     }
2322
2323     if (class == BPF_ALU64 || class == BPF_JMP ||
2324         /* BPF_END always use BPF_ALU class. */
2325         (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2326         return true;
2327
2328     if (class == BPF_ALU || class == BPF_JMP32)
2329         return false;
2330
2331     if (class == BPF_LDX) {
2332         if (t != SRC_OP)
2333             return BPF_SIZE(code) == BPF_DW;
2334         /* LDX source must be ptr. */
2335         return true;
2336     }
2337
2338     if (class == BPF_STX) {
2339         /* BPF_STX (including atomic variants) has multiple source
2340          * operands, one of which is a ptr. Check whether the caller is
2341          * asking about it.
2342          */
2343         if (t == SRC_OP && reg->type != SCALAR_VALUE)
2344             return true;
2345         return BPF_SIZE(code) == BPF_DW;
2346     }
2347
2348     if (class == BPF_LD) {
2349         u8 mode = BPF_MODE(code);
2350
2351         /* LD_IMM64 */
2352         if (mode == BPF_IMM)
2353             return true;
2354
2355         /* Both LD_IND and LD_ABS return 32-bit data. */
2356         if (t != SRC_OP)
2357             return  false;
2358
2359         /* Implicit ctx ptr. */
2360         if (regno == BPF_REG_6)
2361             return true;
2362
2363         /* Explicit source could be any width. */
2364         return true;
2365     }
2366
2367     if (class == BPF_ST)
2368         /* The only source register for BPF_ST is a ptr. */
2369         return true;
2370
2371     /* Conservatively return true at default. */
2372     return true;
2373 }
2374
2375 /* Return the regno defined by the insn, or -1. */
2376 static int insn_def_regno(const struct bpf_insn *insn)
2377 {
2378     switch (BPF_CLASS(insn->code)) {
2379     case BPF_JMP:
2380     case BPF_JMP32:
2381     case BPF_ST:
2382         return -1;
2383     case BPF_STX:
2384         if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2385             (insn->imm & BPF_FETCH)) {
2386             if (insn->imm == BPF_CMPXCHG)
2387                 return BPF_REG_0;
2388             else
2389                 return insn->src_reg;
2390         } else {
2391             return -1;
2392         }
2393     default:
2394         return insn->dst_reg;
2395     }
2396 }
2397
2398 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2399 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2400 {
2401     int dst_reg = insn_def_regno(insn);
2402
2403     if (dst_reg == -1)
2404         return false;
2405
2406     return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2407 }
2408
2409 static void mark_insn_zext(struct bpf_verifier_env *env,
2410                struct bpf_reg_state *reg)
2411 {
2412     s32 def_idx = reg->subreg_def;
2413
2414     if (def_idx == DEF_NOT_SUBREG)
2415         return;
2416
2417     env->insn_aux_data[def_idx - 1].zext_dst = true;
2418     /* The dst will be zero extended, so won't be sub-register anymore. */
2419     reg->subreg_def = DEF_NOT_SUBREG;
2420 }
2421
2422 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2423              enum reg_arg_type t)
2424 {
2425     struct bpf_verifier_state *vstate = env->cur_state;
2426     struct bpf_func_state *state = vstate->frame[vstate->curframe];
2427     struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2428     struct bpf_reg_state *reg, *regs = state->regs;
2429     bool rw64;
2430
2431     if (regno >= MAX_BPF_REG) {
2432         verbose(env, "R%d is invalid\n", regno);
2433         return -EINVAL;
2434     }
2435
2436     mark_reg_scratched(env, regno);
2437
2438     reg = &regs[regno];
2439     rw64 = is_reg64(env, insn, regno, reg, t);
2440     if (t == SRC_OP) {
2441         /* check whether register used as source operand can be read */
2442         if (reg->type == NOT_INIT) {
2443             verbose(env, "R%d !read_ok\n", regno);
2444             return -EACCES;
2445         }
2446         /* We don't need to worry about FP liveness because it's read-only */
2447         if (regno == BPF_REG_FP)
2448             return 0;
2449
2450         if (rw64)
2451             mark_insn_zext(env, reg);
2452
2453         return mark_reg_read(env, reg, reg->parent,
2454                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2455     } else {
2456         /* check whether register used as dest operand can be written to */
2457         if (regno == BPF_REG_FP) {
2458             verbose(env, "frame pointer is read only\n");
2459             return -EACCES;
2460         }
2461         reg->live |= REG_LIVE_WRITTEN;
2462         reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2463         if (t == DST_OP)
2464             mark_reg_unknown(env, regs, regno);
2465     }
2466     return 0;
2467 }
2468
2469 /* for any branch, call, exit record the history of jmps in the given state */
2470 static int push_jmp_history(struct bpf_verifier_env *env,
2471                 struct bpf_verifier_state *cur)
2472 {
2473     u32 cnt = cur->jmp_history_cnt;
2474     struct bpf_idx_pair *p;
2475
2476     cnt++;
2477     p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2478     if (!p)
2479         return -ENOMEM;
2480     p[cnt - 1].idx = env->insn_idx;
2481     p[cnt - 1].prev_idx = env->prev_insn_idx;
2482     cur->jmp_history = p;
2483     cur->jmp_history_cnt = cnt;
2484     return 0;
2485 }
2486
2487 /* Backtrack one insn at a time. If idx is not at the top of recorded
2488  * history then previous instruction came from straight line execution.
2489  */
2490 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2491                  u32 *history)
2492 {
2493     u32 cnt = *history;
2494
2495     if (cnt && st->jmp_history[cnt - 1].idx == i) {
2496         i = st->jmp_history[cnt - 1].prev_idx;
2497         (*history)--;
2498     } else {
2499         i--;
2500     }
2501     return i;
2502 }
2503
2504 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2505 {
2506     const struct btf_type *func;
2507     struct btf *desc_btf;
2508
2509     if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2510         return NULL;
2511
2512     desc_btf = find_kfunc_desc_btf(data, insn->off);
2513     if (IS_ERR(desc_btf))
2514         return "<error>";
2515
2516     func = btf_type_by_id(desc_btf, insn->imm);
2517     return btf_name_by_offset(desc_btf, func->name_off);
2518 }
2519
2520 /* For given verifier state backtrack_insn() is called from the last insn to
2521  * the first insn. Its purpose is to compute a bitmask of registers and
2522  * stack slots that needs precision in the parent verifier state.
2523  */
2524 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2525               u32 *reg_mask, u64 *stack_mask)
2526 {
2527     const struct bpf_insn_cbs cbs = {
2528         .cb_call    = disasm_kfunc_name,
2529         .cb_print   = verbose,
2530         .private_data   = env,
2531     };
2532     struct bpf_insn *insn = env->prog->insnsi + idx;
2533     u8 class = BPF_CLASS(insn->code);
2534     u8 opcode = BPF_OP(insn->code);
2535     u8 mode = BPF_MODE(insn->code);
2536     u32 dreg = 1u << insn->dst_reg;
2537     u32 sreg = 1u << insn->src_reg;
2538     u32 spi;
2539
2540     if (insn->code == 0)
2541         return 0;
2542     if (env->log.level & BPF_LOG_LEVEL2) {
2543         verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2544         verbose(env, "%d: ", idx);
2545         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2546     }
2547
2548     if (class == BPF_ALU || class == BPF_ALU64) {
2549         if (!(*reg_mask & dreg))
2550             return 0;
2551         if (opcode == BPF_MOV) {
2552             if (BPF_SRC(insn->code) == BPF_X) {
2553                 /* dreg = sreg
2554                  * dreg needs precision after this insn
2555                  * sreg needs precision before this insn
2556                  */
2557                 *reg_mask &= ~dreg;
2558                 *reg_mask |= sreg;
2559             } else {
2560                 /* dreg = K
2561                  * dreg needs precision after this insn.
2562                  * Corresponding register is already marked
2563                  * as precise=true in this verifier state.
2564                  * No further markings in parent are necessary
2565                  */
2566                 *reg_mask &= ~dreg;
2567             }
2568         } else {
2569             if (BPF_SRC(insn->code) == BPF_X) {
2570                 /* dreg += sreg
2571                  * both dreg and sreg need precision
2572                  * before this insn
2573                  */
2574                 *reg_mask |= sreg;
2575             } /* else dreg += K
2576                * dreg still needs precision before this insn
2577                */
2578         }
2579     } else if (class == BPF_LDX) {
2580         if (!(*reg_mask & dreg))
2581             return 0;
2582         *reg_mask &= ~dreg;
2583
2584         /* scalars can only be spilled into stack w/o losing precision.
2585          * Load from any other memory can be zero extended.
2586          * The desire to keep that precision is already indicated
2587          * by 'precise' mark in corresponding register of this state.
2588          * No further tracking necessary.
2589          */
2590         if (insn->src_reg != BPF_REG_FP)
2591             return 0;
2592
2593         /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2594          * that [fp - off] slot contains scalar that needs to be
2595          * tracked with precision
2596          */
2597         spi = (-insn->off - 1) / BPF_REG_SIZE;
2598         if (spi >= 64) {
2599             verbose(env, "BUG spi %d\n", spi);
2600             WARN_ONCE(1, "verifier backtracking bug");
2601             return -EFAULT;
2602         }
2603         *stack_mask |= 1ull << spi;
2604     } else if (class == BPF_STX || class == BPF_ST) {
2605         if (*reg_mask & dreg)
2606             /* stx & st shouldn't be using _scalar_ dst_reg
2607              * to access memory. It means backtracking
2608              * encountered a case of pointer subtraction.
2609              */
2610             return -ENOTSUPP;
2611         /* scalars can only be spilled into stack */
2612         if (insn->dst_reg != BPF_REG_FP)
2613             return 0;
2614         spi = (-insn->off - 1) / BPF_REG_SIZE;
2615         if (spi >= 64) {
2616             verbose(env, "BUG spi %d\n", spi);
2617             WARN_ONCE(1, "verifier backtracking bug");
2618             return -EFAULT;
2619         }
2620         if (!(*stack_mask & (1ull << spi)))
2621             return 0;
2622         *stack_mask &= ~(1ull << spi);
2623         if (class == BPF_STX)
2624             *reg_mask |= sreg;
2625     } else if (class == BPF_JMP || class == BPF_JMP32) {
2626         if (opcode == BPF_CALL) {
2627             if (insn->src_reg == BPF_PSEUDO_CALL)
2628                 return -ENOTSUPP;
2629             /* regular helper call sets R0 */
2630             *reg_mask &= ~1;
2631             if (*reg_mask & 0x3f) {
2632                 /* if backtracing was looking for registers R1-R5
2633                  * they should have been found already.
2634                  */
2635                 verbose(env, "BUG regs %x\n", *reg_mask);
2636                 WARN_ONCE(1, "verifier backtracking bug");
2637                 return -EFAULT;
2638             }
2639         } else if (opcode == BPF_EXIT) {
2640             return -ENOTSUPP;
2641         }
2642     } else if (class == BPF_LD) {
2643         if (!(*reg_mask & dreg))
2644             return 0;
2645         *reg_mask &= ~dreg;
2646         /* It's ld_imm64 or ld_abs or ld_ind.
2647          * For ld_imm64 no further tracking of precision
2648          * into parent is necessary
2649          */
2650         if (mode == BPF_IND || mode == BPF_ABS)
2651             /* to be analyzed */
2652             return -ENOTSUPP;
2653     }
2654     return 0;
2655 }
2656
2657 /* the scalar precision tracking algorithm:
2658  * . at the start all registers have precise=false.
2659  * . scalar ranges are tracked as normal through alu and jmp insns.
2660  * . once precise value of the scalar register is used in:
2661  *   .  ptr + scalar alu
2662  *   . if (scalar cond K|scalar)
2663  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2664  *   backtrack through the verifier states and mark all registers and
2665  *   stack slots with spilled constants that these scalar regisers
2666  *   should be precise.
2667  * . during state pruning two registers (or spilled stack slots)
2668  *   are equivalent if both are not precise.
2669  *
2670  * Note the verifier cannot simply walk register parentage chain,
2671  * since many different registers and stack slots could have been
2672  * used to compute single precise scalar.
2673  *
2674  * The approach of starting with precise=true for all registers and then
2675  * backtrack to mark a register as not precise when the verifier detects
2676  * that program doesn't care about specific value (e.g., when helper
2677  * takes register as ARG_ANYTHING parameter) is not safe.
2678  *
2679  * It's ok to walk single parentage chain of the verifier states.
2680  * It's possible that this backtracking will go all the way till 1st insn.
2681  * All other branches will be explored for needing precision later.
2682  *
2683  * The backtracking needs to deal with cases like:
2684  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2685  * r9 -= r8
2686  * r5 = r9
2687  * if r5 > 0x79f goto pc+7
2688  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2689  * r5 += 1
2690  * ...
2691  * call bpf_perf_event_output#25
2692  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2693  *
2694  * and this case:
2695  * r6 = 1
2696  * call foo // uses callee's r6 inside to compute r0
2697  * r0 += r6
2698  * if r0 == 0 goto
2699  *
2700  * to track above reg_mask/stack_mask needs to be independent for each frame.
2701  *
2702  * Also if parent's curframe > frame where backtracking started,
2703  * the verifier need to mark registers in both frames, otherwise callees
2704  * may incorrectly prune callers. This is similar to
2705  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2706  *
2707  * For now backtracking falls back into conservative marking.
2708  */
2709 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2710                      struct bpf_verifier_state *st)
2711 {
2712     struct bpf_func_state *func;
2713     struct bpf_reg_state *reg;
2714     int i, j;
2715
2716     /* big hammer: mark all scalars precise in this path.
2717      * pop_stack may still get !precise scalars.
2718      */
2719     for (; st; st = st->parent)
2720         for (i = 0; i <= st->curframe; i++) {
2721             func = st->frame[i];
2722             for (j = 0; j < BPF_REG_FP; j++) {
2723                 reg = &func->regs[j];
2724                 if (reg->type != SCALAR_VALUE)
2725                     continue;
2726                 reg->precise = true;
2727             }
2728             for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2729                 if (!is_spilled_reg(&func->stack[j]))
2730                     continue;
2731                 reg = &func->stack[j].spilled_ptr;
2732                 if (reg->type != SCALAR_VALUE)
2733                     continue;
2734                 reg->precise = true;
2735             }
2736         }
2737 }
2738
2739 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2740                   int spi)
2741 {
2742     struct bpf_verifier_state *st = env->cur_state;
2743     int first_idx = st->first_insn_idx;
2744     int last_idx = env->insn_idx;
2745     struct bpf_func_state *func;
2746     struct bpf_reg_state *reg;
2747     u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2748     u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2749     bool skip_first = true;
2750     bool new_marks = false;
2751     int i, err;
2752
2753     if (!env->bpf_capable)
2754         return 0;
2755
2756     func = st->frame[st->curframe];
2757     if (regno >= 0) {
2758         reg = &func->regs[regno];
2759         if (reg->type != SCALAR_VALUE) {
2760             WARN_ONCE(1, "backtracing misuse");
2761             return -EFAULT;
2762         }
2763         if (!reg->precise)
2764             new_marks = true;
2765         else
2766             reg_mask = 0;
2767         reg->precise = true;
2768     }
2769
2770     while (spi >= 0) {
2771         if (!is_spilled_reg(&func->stack[spi])) {
2772             stack_mask = 0;
2773             break;
2774         }
2775         reg = &func->stack[spi].spilled_ptr;
2776         if (reg->type != SCALAR_VALUE) {
2777             stack_mask = 0;
2778             break;
2779         }
2780         if (!reg->precise)
2781             new_marks = true;
2782         else
2783             stack_mask = 0;
2784         reg->precise = true;
2785         break;
2786     }
2787
2788     if (!new_marks)
2789         return 0;
2790     if (!reg_mask && !stack_mask)
2791         return 0;
2792     for (;;) {
2793         DECLARE_BITMAP(mask, 64);
2794         u32 history = st->jmp_history_cnt;
2795
2796         if (env->log.level & BPF_LOG_LEVEL2)
2797             verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2798         for (i = last_idx;;) {
2799             if (skip_first) {
2800                 err = 0;
2801                 skip_first = false;
2802             } else {
2803                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2804             }
2805             if (err == -ENOTSUPP) {
2806                 mark_all_scalars_precise(env, st);
2807                 return 0;
2808             } else if (err) {
2809                 return err;
2810             }
2811             if (!reg_mask && !stack_mask)
2812                 /* Found assignment(s) into tracked register in this state.
2813                  * Since this state is already marked, just return.
2814                  * Nothing to be tracked further in the parent state.
2815                  */
2816                 return 0;
2817             if (i == first_idx)
2818                 break;
2819             i = get_prev_insn_idx(st, i, &history);
2820             if (i >= env->prog->len) {
2821                 /* This can happen if backtracking reached insn 0
2822                  * and there are still reg_mask or stack_mask
2823                  * to backtrack.
2824                  * It means the backtracking missed the spot where
2825                  * particular register was initialized with a constant.
2826                  */
2827                 verbose(env, "BUG backtracking idx %d\n", i);
2828                 WARN_ONCE(1, "verifier backtracking bug");
2829                 return -EFAULT;
2830             }
2831         }
2832         st = st->parent;
2833         if (!st)
2834             break;
2835
2836         new_marks = false;
2837         func = st->frame[st->curframe];
2838         bitmap_from_u64(mask, reg_mask);
2839         for_each_set_bit(i, mask, 32) {
2840             reg = &func->regs[i];
2841             if (reg->type != SCALAR_VALUE) {
2842                 reg_mask &= ~(1u << i);
2843                 continue;
2844             }
2845             if (!reg->precise)
2846                 new_marks = true;
2847             reg->precise = true;
2848         }
2849
2850         bitmap_from_u64(mask, stack_mask);
2851         for_each_set_bit(i, mask, 64) {
2852             if (i >= func->allocated_stack / BPF_REG_SIZE) {
2853                 /* the sequence of instructions:
2854                  * 2: (bf) r3 = r10
2855                  * 3: (7b) *(u64 *)(r3 -8) = r0
2856                  * 4: (79) r4 = *(u64 *)(r10 -8)
2857                  * doesn't contain jmps. It's backtracked
2858                  * as a single block.
2859                  * During backtracking insn 3 is not recognized as
2860                  * stack access, so at the end of backtracking
2861                  * stack slot fp-8 is still marked in stack_mask.
2862                  * However the parent state may not have accessed
2863                  * fp-8 and it's "unallocated" stack space.
2864                  * In such case fallback to conservative.
2865                  */
2866                 mark_all_scalars_precise(env, st);
2867                 return 0;
2868             }
2869
2870             if (!is_spilled_reg(&func->stack[i])) {
2871                 stack_mask &= ~(1ull << i);
2872                 continue;
2873             }
2874             reg = &func->stack[i].spilled_ptr;
2875             if (reg->type != SCALAR_VALUE) {
2876                 stack_mask &= ~(1ull << i);
2877                 continue;
2878             }
2879             if (!reg->precise)
2880                 new_marks = true;
2881             reg->precise = true;
2882         }
2883         if (env->log.level & BPF_LOG_LEVEL2) {
2884             verbose(env, "parent %s regs=%x stack=%llx marks:",
2885                 new_marks ? "didn't have" : "already had",
2886                 reg_mask, stack_mask);
2887             print_verifier_state(env, func, true);
2888         }
2889
2890         if (!reg_mask && !stack_mask)
2891             break;
2892         if (!new_marks)
2893             break;
2894
2895         last_idx = st->last_insn_idx;
2896         first_idx = st->first_insn_idx;
2897     }
2898     return 0;
2899 }
2900
2901 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2902 {
2903     return __mark_chain_precision(env, regno, -1);
2904 }
2905
2906 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2907 {
2908     return __mark_chain_precision(env, -1, spi);
2909 }
2910
2911 static bool is_spillable_regtype(enum bpf_reg_type type)
2912 {
2913     switch (base_type(type)) {
2914     case PTR_TO_MAP_VALUE:
2915     case PTR_TO_STACK:
2916     case PTR_TO_CTX:
2917     case PTR_TO_PACKET:
2918     case PTR_TO_PACKET_META:
2919     case PTR_TO_PACKET_END:
2920     case PTR_TO_FLOW_KEYS:
2921     case CONST_PTR_TO_MAP:
2922     case PTR_TO_SOCKET:
2923     case PTR_TO_SOCK_COMMON:
2924     case PTR_TO_TCP_SOCK:
2925     case PTR_TO_XDP_SOCK:
2926     case PTR_TO_BTF_ID:
2927     case PTR_TO_BUF:
2928     case PTR_TO_MEM:
2929     case PTR_TO_FUNC:
2930     case PTR_TO_MAP_KEY:
2931         return true;
2932     default:
2933         return false;
2934     }
2935 }
2936
2937 /* Does this register contain a constant zero? */
2938 static bool register_is_null(struct bpf_reg_state *reg)
2939 {
2940     return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2941 }
2942
2943 static bool register_is_const(struct bpf_reg_state *reg)
2944 {
2945     return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2946 }
2947
2948 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2949 {
2950     return tnum_is_unknown(reg->var_off) &&
2951            reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2952            reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2953            reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2954            reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2955 }
2956
2957 static bool register_is_bounded(struct bpf_reg_state *reg)
2958 {
2959     return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2960 }
2961
2962 static bool __is_pointer_value(bool allow_ptr_leaks,
2963                    const struct bpf_reg_state *reg)
2964 {
2965     if (allow_ptr_leaks)
2966         return false;
2967
2968     return reg->type != SCALAR_VALUE;
2969 }
2970
2971 static void save_register_state(struct bpf_func_state *state,
2972                 int spi, struct bpf_reg_state *reg,
2973                 int size)
2974 {
2975     int i;
2976
2977     state->stack[spi].spilled_ptr = *reg;
2978     if (size == BPF_REG_SIZE)
2979         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2980
2981     for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2982         state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2983
2984     /* size < 8 bytes spill */
2985     for (; i; i--)
2986         scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2987 }
2988
2989 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2990  * stack boundary and alignment are checked in check_mem_access()
2991  */
2992 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2993                        /* stack frame we're writing to */
2994                        struct bpf_func_state *state,
2995                        int off, int size, int value_regno,
2996                        int insn_idx)
2997 {
2998     struct bpf_func_state *cur; /* state of the current function */
2999     int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3000     u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
3001     struct bpf_reg_state *reg = NULL;
3002
3003     err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
3004     if (err)
3005         return err;
3006     /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3007      * so it's aligned access and [off, off + size) are within stack limits
3008      */
3009     if (!env->allow_ptr_leaks &&
3010         state->stack[spi].slot_type[0] == STACK_SPILL &&
3011         size != BPF_REG_SIZE) {
3012         verbose(env, "attempt to corrupt spilled pointer on stack\n");
3013         return -EACCES;
3014     }
3015
3016     cur = env->cur_state->frame[env->cur_state->curframe];
3017     if (value_regno >= 0)
3018         reg = &cur->regs[value_regno];
3019     if (!env->bypass_spec_v4) {
3020         bool sanitize = reg && is_spillable_regtype(reg->type);
3021
3022         for (i = 0; i < size; i++) {
3023             if (state->stack[spi].slot_type[i] == STACK_INVALID) {
3024                 sanitize = true;
3025                 break;
3026             }
3027         }
3028
3029         if (sanitize)
3030             env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3031     }
3032
3033     mark_stack_slot_scratched(env, spi);
3034     if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
3035         !register_is_null(reg) && env->bpf_capable) {
3036         if (dst_reg != BPF_REG_FP) {
3037             /* The backtracking logic can only recognize explicit
3038              * stack slot address like [fp - 8]. Other spill of
3039              * scalar via different register has to be conservative.
3040              * Backtrack from here and mark all registers as precise
3041              * that contributed into 'reg' being a constant.
3042              */
3043             err = mark_chain_precision(env, value_regno);
3044             if (err)
3045                 return err;
3046         }
3047         save_register_state(state, spi, reg, size);
3048     } else if (reg && is_spillable_regtype(reg->type)) {
3049         /* register containing pointer is being spilled into stack */
3050         if (size != BPF_REG_SIZE) {
3051             verbose_linfo(env, insn_idx, "; ");
3052             verbose(env, "invalid size of register spill\n");
3053             return -EACCES;
3054         }
3055         if (state != cur && reg->type == PTR_TO_STACK) {
3056             verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3057             return -EINVAL;
3058         }
3059         save_register_state(state, spi, reg, size);
3060     } else {
3061         u8 type = STACK_MISC;
3062
3063         /* regular write of data into stack destroys any spilled ptr */
3064         state->stack[spi].spilled_ptr.type = NOT_INIT;
3065         /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
3066         if (is_spilled_reg(&state->stack[spi]))
3067             for (i = 0; i < BPF_REG_SIZE; i++)
3068                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3069
3070         /* only mark the slot as written if all 8 bytes were written
3071          * otherwise read propagation may incorrectly stop too soon
3072          * when stack slots are partially written.
3073          * This heuristic means that read propagation will be
3074          * conservative, since it will add reg_live_read marks
3075          * to stack slots all the way to first state when programs
3076          * writes+reads less than 8 bytes
3077          */
3078         if (size == BPF_REG_SIZE)
3079             state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3080
3081         /* when we zero initialize stack slots mark them as such */
3082         if (reg && register_is_null(reg)) {
3083             /* backtracking doesn't work for STACK_ZERO yet. */
3084             err = mark_chain_precision(env, value_regno);
3085             if (err)
3086                 return err;
3087             type = STACK_ZERO;
3088         }
3089
3090         /* Mark slots affected by this stack write. */
3091         for (i = 0; i < size; i++)
3092             state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
3093                 type;
3094     }
3095     return 0;
3096 }
3097
3098 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3099  * known to contain a variable offset.
3100  * This function checks whether the write is permitted and conservatively
3101  * tracks the effects of the write, considering that each stack slot in the
3102  * dynamic range is potentially written to.
3103  *
3104  * 'off' includes 'regno->off'.
3105  * 'value_regno' can be -1, meaning that an unknown value is being written to
3106  * the stack.
3107  *
3108  * Spilled pointers in range are not marked as written because we don't know
3109  * what's going to be actually written. This means that read propagation for
3110  * future reads cannot be terminated by this write.
3111  *
3112  * For privileged programs, uninitialized stack slots are considered
3113  * initialized by this write (even though we don't know exactly what offsets
3114  * are going to be written to). The idea is that we don't want the verifier to
3115  * reject future reads that access slots written to through variable offsets.
3116  */
3117 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3118                      /* func where register points to */
3119                      struct bpf_func_state *state,
3120                      int ptr_regno, int off, int size,
3121                      int value_regno, int insn_idx)
3122 {
3123     struct bpf_func_state *cur; /* state of the current function */
3124     int min_off, max_off;
3125     int i, err;
3126     struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3127     bool writing_zero = false;
3128     /* set if the fact that we're writing a zero is used to let any
3129      * stack slots remain STACK_ZERO
3130      */
3131     bool zero_used = false;
3132
3133     cur = env->cur_state->frame[env->cur_state->curframe];
3134     ptr_reg = &cur->regs[ptr_regno];
3135     min_off = ptr_reg->smin_value + off;
3136     max_off = ptr_reg->smax_value + off + size;
3137     if (value_regno >= 0)
3138         value_reg = &cur->regs[value_regno];
3139     if (value_reg && register_is_null(value_reg))
3140         writing_zero = true;
3141
3142     err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
3143     if (err)
3144         return err;
3145
3146
3147     /* Variable offset writes destroy any spilled pointers in range. */
3148     for (i = min_off; i < max_off; i++) {
3149         u8 new_type, *stype;
3150         int slot, spi;
3151
3152         slot = -i - 1;
3153         spi = slot / BPF_REG_SIZE;
3154         stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3155         mark_stack_slot_scratched(env, spi);
3156
3157         if (!env->allow_ptr_leaks
3158                 && *stype != NOT_INIT
3159                 && *stype != SCALAR_VALUE) {
3160             /* Reject the write if there's are spilled pointers in
3161              * range. If we didn't reject here, the ptr status
3162              * would be erased below (even though not all slots are
3163              * actually overwritten), possibly opening the door to
3164              * leaks.
3165              */
3166             verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3167                 insn_idx, i);
3168             return -EINVAL;
3169         }
3170
3171         /* Erase all spilled pointers. */
3172         state->stack[spi].spilled_ptr.type = NOT_INIT;
3173
3174         /* Update the slot type. */
3175         new_type = STACK_MISC;
3176         if (writing_zero && *stype == STACK_ZERO) {
3177             new_type = STACK_ZERO;
3178             zero_used = true;
3179         }
3180         /* If the slot is STACK_INVALID, we check whether it's OK to
3181          * pretend that it will be initialized by this write. The slot
3182          * might not actually be written to, and so if we mark it as
3183          * initialized future reads might leak uninitialized memory.
3184          * For privileged programs, we will accept such reads to slots
3185          * that may or may not be written because, if we're reject
3186          * them, the error would be too confusing.
3187          */
3188         if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3189             verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3190                     insn_idx, i);
3191             return -EINVAL;
3192         }
3193         *stype = new_type;
3194     }
3195     if (zero_used) {
3196         /* backtracking doesn't work for STACK_ZERO yet. */
3197         err = mark_chain_precision(env, value_regno);
3198         if (err)
3199             return err;
3200     }
3201     return 0;
3202 }
3203
3204 /* When register 'dst_regno' is assigned some values from stack[min_off,
3205  * max_off), we set the register's type according to the types of the
3206  * respective stack slots. If all the stack values are known to be zeros, then
3207  * so is the destination reg. Otherwise, the register is considered to be
3208  * SCALAR. This function does not deal with register filling; the caller must
3209  * ensure that all spilled registers in the stack range have been marked as
3210  * read.
3211  */
3212 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3213                 /* func where src register points to */
3214                 struct bpf_func_state *ptr_state,
3215                 int min_off, int max_off, int dst_regno)
3216 {
3217     struct bpf_verifier_state *vstate = env->cur_state;
3218     struct bpf_func_state *state = vstate->frame[vstate->curframe];
3219     int i, slot, spi;
3220     u8 *stype;
3221     int zeros = 0;
3222
3223     for (i = min_off; i < max_off; i++) {
3224         slot = -i - 1;
3225         spi = slot / BPF_REG_SIZE;
3226         stype = ptr_state->stack[spi].slot_type;
3227         if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3228             break;
3229         zeros++;
3230     }
3231     if (zeros == max_off - min_off) {
3232         /* any access_size read into register is zero extended,
3233          * so the whole register == const_zero
3234          */
3235         __mark_reg_const_zero(&state->regs[dst_regno]);
3236         /* backtracking doesn't support STACK_ZERO yet,
3237          * so mark it precise here, so that later
3238          * backtracking can stop here.
3239          * Backtracking may not need this if this register
3240          * doesn't participate in pointer adjustment.
3241          * Forward propagation of precise flag is not
3242          * necessary either. This mark is only to stop
3243          * backtracking. Any register that contributed
3244          * to const 0 was marked precise before spill.
3245          */
3246         state->regs[dst_regno].precise = true;
3247     } else {
3248         /* have read misc data from the stack */
3249         mark_reg_unknown(env, state->regs, dst_regno);
3250     }
3251     state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3252 }
3253
3254 /* Read the stack at 'off' and put the results into the register indicated by
3255  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3256  * spilled reg.
3257  *
3258  * 'dst_regno' can be -1, meaning that the read value is not going to a
3259  * register.
3260  *
3261  * The access is assumed to be within the current stack bounds.
3262  */
3263 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3264                       /* func where src register points to */
3265                       struct bpf_func_state *reg_state,
3266                       int off, int size, int dst_regno)
3267 {
3268     struct bpf_verifier_state *vstate = env->cur_state;
3269     struct bpf_func_state *state = vstate->frame[vstate->curframe];
3270     int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3271     struct bpf_reg_state *reg;
3272     u8 *stype, type;
3273
3274     stype = reg_state->stack[spi].slot_type;
3275     reg = &reg_state->stack[spi].spilled_ptr;
3276
3277     if (is_spilled_reg(&reg_state->stack[spi])) {
3278         u8 spill_size = 1;
3279
3280         for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3281             spill_size++;
3282
3283         if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3284             if (reg->type != SCALAR_VALUE) {
3285                 verbose_linfo(env, env->insn_idx, "; ");
3286                 verbose(env, "invalid size of register fill\n");
3287                 return -EACCES;
3288             }
3289
3290             mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3291             if (dst_regno < 0)
3292                 return 0;
3293
3294             if (!(off % BPF_REG_SIZE) && size == spill_size) {
3295                 /* The earlier check_reg_arg() has decided the
3296                  * subreg_def for this insn.  Save it first.
3297                  */
3298                 s32 subreg_def = state->regs[dst_regno].subreg_def;
3299
3300                 state->regs[dst_regno] = *reg;
3301                 state->regs[dst_regno].subreg_def = subreg_def;
3302             } else {
3303                 for (i = 0; i < size; i++) {
3304                     type = stype[(slot - i) % BPF_REG_SIZE];
3305                     if (type == STACK_SPILL)
3306                         continue;
3307                     if (type == STACK_MISC)
3308                         continue;
3309                     verbose(env, "invalid read from stack off %d+%d size %d\n",
3310                         off, i, size);
3311                     return -EACCES;
3312                 }
3313                 mark_reg_unknown(env, state->regs, dst_regno);
3314             }
3315             state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3316             return 0;
3317         }
3318
3319         if (dst_regno >= 0) {
3320             /* restore register state from stack */
3321             state->regs[dst_regno] = *reg;
3322             /* mark reg as written since spilled pointer state likely
3323              * has its liveness marks cleared by is_state_visited()
3324              * which resets stack/reg liveness for state transitions
3325              */
3326             state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3327         } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3328             /* If dst_regno==-1, the caller is asking us whether
3329              * it is acceptable to use this value as a SCALAR_VALUE
3330              * (e.g. for XADD).
3331              * We must not allow unprivileged callers to do that
3332              * with spilled pointers.
3333              */
3334             verbose(env, "leaking pointer from stack off %d\n",
3335                 off);
3336             return -EACCES;
3337         }
3338         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3339     } else {
3340         for (i = 0; i < size; i++) {
3341             type = stype[(slot - i) % BPF_REG_SIZE];
3342             if (type == STACK_MISC)
3343                 continue;
3344             if (type == STACK_ZERO)
3345                 continue;
3346             verbose(env, "invalid read from stack off %d+%d size %d\n",
3347                 off, i, size);
3348             return -EACCES;
3349         }
3350         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3351         if (dst_regno >= 0)
3352             mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3353     }
3354     return 0;
3355 }
3356
3357 enum bpf_access_src {
3358     ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3359     ACCESS_HELPER = 2,  /* the access is performed by a helper */
3360 };
3361
3362 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3363                      int regno, int off, int access_size,
3364                      bool zero_size_allowed,
3365                      enum bpf_access_src type,
3366                      struct bpf_call_arg_meta *meta);
3367
3368 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3369 {
3370     return cur_regs(env) + regno;
3371 }
3372
3373 /* Read the stack at 'ptr_regno + off' and put the result into the register
3374  * 'dst_regno'.
3375  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3376  * but not its variable offset.
3377  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3378  *
3379  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3380  * filling registers (i.e. reads of spilled register cannot be detected when
3381  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3382  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3383  * offset; for a fixed offset check_stack_read_fixed_off should be used
3384  * instead.
3385  */
3386 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3387                     int ptr_regno, int off, int size, int dst_regno)
3388 {
3389     /* The state of the source register. */
3390     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3391     struct bpf_func_state *ptr_state = func(env, reg);
3392     int err;
3393     int min_off, max_off;
3394
3395     /* Note that we pass a NULL meta, so raw access will not be permitted.
3396      */
3397     err = check_stack_range_initialized(env, ptr_regno, off, size,
3398                         false, ACCESS_DIRECT, NULL);
3399     if (err)
3400         return err;
3401
3402     min_off = reg->smin_value + off;
3403     max_off = reg->smax_value + off;
3404     mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3405     return 0;
3406 }
3407
3408 /* check_stack_read dispatches to check_stack_read_fixed_off or
3409  * check_stack_read_var_off.
3410  *
3411  * The caller must ensure that the offset falls within the allocated stack
3412  * bounds.
3413  *
3414  * 'dst_regno' is a register which will receive the value from the stack. It
3415  * can be -1, meaning that the read value is not going to a register.
3416  */
3417 static int check_stack_read(struct bpf_verifier_env *env,
3418                 int ptr_regno, int off, int size,
3419                 int dst_regno)
3420 {
3421     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3422     struct bpf_func_state *state = func(env, reg);
3423     int err;
3424     /* Some accesses are only permitted with a static offset. */
3425     bool var_off = !tnum_is_const(reg->var_off);
3426
3427     /* The offset is required to be static when reads don't go to a
3428      * register, in order to not leak pointers (see
3429      * check_stack_read_fixed_off).
3430      */
3431     if (dst_regno < 0 && var_off) {
3432         char tn_buf[48];
3433
3434         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3435         verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3436             tn_buf, off, size);
3437         return -EACCES;
3438     }
3439     /* Variable offset is prohibited for unprivileged mode for simplicity
3440      * since it requires corresponding support in Spectre masking for stack
3441      * ALU. See also retrieve_ptr_limit().
3442      */
3443     if (!env->bypass_spec_v1 && var_off) {
3444         char tn_buf[48];
3445
3446         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3447         verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3448                 ptr_regno, tn_buf);
3449         return -EACCES;
3450     }
3451
3452     if (!var_off) {
3453         off += reg->var_off.value;
3454         err = check_stack_read_fixed_off(env, state, off, size,
3455                          dst_regno);
3456     } else {
3457         /* Variable offset stack reads need more conservative handling
3458          * than fixed offset ones. Note that dst_regno >= 0 on this
3459          * branch.
3460          */
3461         err = check_stack_read_var_off(env, ptr_regno, off, size,
3462                            dst_regno);
3463     }
3464     return err;
3465 }
3466
3467
3468 /* check_stack_write dispatches to check_stack_write_fixed_off or
3469  * check_stack_write_var_off.
3470  *
3471  * 'ptr_regno' is the register used as a pointer into the stack.
3472  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3473  * 'value_regno' is the register whose value we're writing to the stack. It can
3474  * be -1, meaning that we're not writing from a register.
3475  *
3476  * The caller must ensure that the offset falls within the maximum stack size.
3477  */
3478 static int check_stack_write(struct bpf_verifier_env *env,
3479                  int ptr_regno, int off, int size,
3480                  int value_regno, int insn_idx)
3481 {
3482     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3483     struct bpf_func_state *state = func(env, reg);
3484     int err;
3485
3486     if (tnum_is_const(reg->var_off)) {
3487         off += reg->var_off.value;
3488         err = check_stack_write_fixed_off(env, state, off, size,
3489                           value_regno, insn_idx);
3490     } else {
3491         /* Variable offset stack reads need more conservative handling
3492          * than fixed offset ones.
3493          */
3494         err = check_stack_write_var_off(env, state,
3495                         ptr_regno, off, size,
3496                         value_regno, insn_idx);
3497     }
3498     return err;
3499 }
3500
3501 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3502                  int off, int size, enum bpf_access_type type)
3503 {
3504     struct bpf_reg_state *regs = cur_regs(env);
3505     struct bpf_map *map = regs[regno].map_ptr;
3506     u32 cap = bpf_map_flags_to_cap(map);
3507
3508     if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3509         verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3510             map->value_size, off, size);
3511         return -EACCES;
3512     }
3513
3514     if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3515         verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3516             map->value_size, off, size);
3517         return -EACCES;
3518     }
3519
3520     return 0;
3521 }
3522
3523 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3524 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3525                   int off, int size, u32 mem_size,
3526                   bool zero_size_allowed)
3527 {
3528     bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3529     struct bpf_reg_state *reg;
3530
3531     if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3532         return 0;
3533
3534     reg = &cur_regs(env)[regno];
3535     switch (reg->type) {
3536     case PTR_TO_MAP_KEY:
3537         verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3538             mem_size, off, size);
3539         break;
3540     case PTR_TO_MAP_VALUE:
3541         verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3542             mem_size, off, size);
3543         break;
3544     case PTR_TO_PACKET:
3545     case PTR_TO_PACKET_META:
3546     case PTR_TO_PACKET_END:
3547         verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3548             off, size, regno, reg->id, off, mem_size);
3549         break;
3550     case PTR_TO_MEM:
3551     default:
3552         verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3553             mem_size, off, size);
3554     }
3555
3556     return -EACCES;
3557 }
3558
3559 /* check read/write into a memory region with possible variable offset */
3560 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3561                    int off, int size, u32 mem_size,
3562                    bool zero_size_allowed)
3563 {
3564     struct bpf_verifier_state *vstate = env->cur_state;
3565     struct bpf_func_state *state = vstate->frame[vstate->curframe];
3566     struct bpf_reg_state *reg = &state->regs[regno];
3567     int err;
3568
3569     /* We may have adjusted the register pointing to memory region, so we
3570      * need to try adding each of min_value and max_value to off
3571      * to make sure our theoretical access will be safe.
3572      *
3573      * The minimum value is only important with signed
3574      * comparisons where we can't assume the floor of a
3575      * value is 0.  If we are using signed variables for our
3576      * index'es we need to make sure that whatever we use
3577      * will have a set floor within our range.
3578      */
3579     if (reg->smin_value < 0 &&
3580         (reg->smin_value == S64_MIN ||
3581          (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3582           reg->smin_value + off < 0)) {
3583         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3584             regno);
3585         return -EACCES;
3586     }
3587     err = __check_mem_access(env, regno, reg->smin_value + off, size,
3588                  mem_size, zero_size_allowed);
3589     if (err) {
3590         verbose(env, "R%d min value is outside of the allowed memory range\n",
3591             regno);
3592         return err;
3593     }
3594
3595     /* If we haven't set a max value then we need to bail since we can't be
3596      * sure we won't do bad things.
3597      * If reg->umax_value + off could overflow, treat that as unbounded too.
3598      */
3599     if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3600         verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3601             regno);
3602         return -EACCES;
3603     }
3604     err = __check_mem_access(env, regno, reg->umax_value + off, size,
3605                  mem_size, zero_size_allowed);
3606     if (err) {
3607         verbose(env, "R%d max value is outside of the allowed memory range\n",
3608             regno);
3609         return err;
3610     }
3611
3612     return 0;
3613 }
3614
3615 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3616                    const struct bpf_reg_state *reg, int regno,
3617                    bool fixed_off_ok)
3618 {
3619     /* Access to this pointer-typed register or passing it to a helper
3620      * is only allowed in its original, unmodified form.
3621      */
3622
3623     if (reg->off < 0) {
3624         verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
3625             reg_type_str(env, reg->type), regno, reg->off);
3626         return -EACCES;
3627     }
3628
3629     if (!fixed_off_ok && reg->off) {
3630         verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3631             reg_type_str(env, reg->type), regno, reg->off);
3632         return -EACCES;
3633     }
3634
3635     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3636         char tn_buf[48];
3637
3638         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3639         verbose(env, "variable %s access var_off=%s disallowed\n",
3640             reg_type_str(env, reg->type), tn_buf);
3641         return -EACCES;
3642     }
3643
3644     return 0;
3645 }
3646
3647 int check_ptr_off_reg(struct bpf_verifier_env *env,
3648               const struct bpf_reg_state *reg, int regno)
3649 {
3650     return __check_ptr_off_reg(env, reg, regno, false);
3651 }
3652
3653 static int map_kptr_match_type(struct bpf_verifier_env *env,
3654                    struct bpf_map_value_off_desc *off_desc,
3655                    struct bpf_reg_state *reg, u32 regno)
3656 {
3657     const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
3658     int perm_flags = PTR_MAYBE_NULL;
3659     const char *reg_name = "";
3660
3661     /* Only unreferenced case accepts untrusted pointers */
3662     if (off_desc->type == BPF_KPTR_UNREF)
3663         perm_flags |= PTR_UNTRUSTED;
3664
3665     if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
3666         goto bad_type;
3667
3668     if (!btf_is_kernel(reg->btf)) {
3669         verbose(env, "R%d must point to kernel BTF\n", regno);
3670         return -EINVAL;
3671     }
3672     /* We need to verify reg->type and reg->btf, before accessing reg->btf */
3673     reg_name = kernel_type_name(reg->btf, reg->btf_id);
3674
3675     /* For ref_ptr case, release function check should ensure we get one
3676      * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
3677      * normal store of unreferenced kptr, we must ensure var_off is zero.
3678      * Since ref_ptr cannot be accessed directly by BPF insns, checks for
3679      * reg->off and reg->ref_obj_id are not needed here.
3680      */
3681     if (__check_ptr_off_reg(env, reg, regno, true))
3682         return -EACCES;
3683
3684     /* A full type match is needed, as BTF can be vmlinux or module BTF, and
3685      * we also need to take into account the reg->off.
3686      *
3687      * We want to support cases like:
3688      *
3689      * struct foo {
3690      *         struct bar br;
3691      *         struct baz bz;
3692      * };
3693      *
3694      * struct foo *v;
3695      * v = func();        // PTR_TO_BTF_ID
3696      * val->foo = v;      // reg->off is zero, btf and btf_id match type
3697      * val->bar = &v->br; // reg->off is still zero, but we need to retry with
3698      *                    // first member type of struct after comparison fails
3699      * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
3700      *                    // to match type
3701      *
3702      * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
3703      * is zero. We must also ensure that btf_struct_ids_match does not walk
3704      * the struct to match type against first member of struct, i.e. reject
3705      * second case from above. Hence, when type is BPF_KPTR_REF, we set
3706      * strict mode to true for type match.
3707      */
3708     if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
3709                   off_desc->kptr.btf, off_desc->kptr.btf_id,
3710                   off_desc->type == BPF_KPTR_REF))
3711         goto bad_type;
3712     return 0;
3713 bad_type:
3714     verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
3715         reg_type_str(env, reg->type), reg_name);
3716     verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
3717     if (off_desc->type == BPF_KPTR_UNREF)
3718         verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
3719             targ_name);
3720     else
3721         verbose(env, "\n");
3722     return -EINVAL;
3723 }
3724
3725 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
3726                  int value_regno, int insn_idx,
3727                  struct bpf_map_value_off_desc *off_desc)
3728 {
3729     struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3730     int class = BPF_CLASS(insn->code);
3731     struct bpf_reg_state *val_reg;
3732
3733     /* Things we already checked for in check_map_access and caller:
3734      *  - Reject cases where variable offset may touch kptr
3735      *  - size of access (must be BPF_DW)
3736      *  - tnum_is_const(reg->var_off)
3737      *  - off_desc->offset == off + reg->var_off.value
3738      */
3739     /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
3740     if (BPF_MODE(insn->code) != BPF_MEM) {
3741         verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
3742         return -EACCES;
3743     }
3744
3745     /* We only allow loading referenced kptr, since it will be marked as
3746      * untrusted, similar to unreferenced kptr.
3747      */
3748     if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
3749         verbose(env, "store to referenced kptr disallowed\n");
3750         return -EACCES;
3751     }
3752
3753     if (class == BPF_LDX) {
3754         val_reg = reg_state(env, value_regno);
3755         /* We can simply mark the value_regno receiving the pointer
3756          * value from map as PTR_TO_BTF_ID, with the correct type.
3757          */
3758         mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
3759                 off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
3760         /* For mark_ptr_or_null_reg */
3761         val_reg->id = ++env->id_gen;
3762     } else if (class == BPF_STX) {
3763         val_reg = reg_state(env, value_regno);
3764         if (!register_is_null(val_reg) &&
3765             map_kptr_match_type(env, off_desc, val_reg, value_regno))
3766             return -EACCES;
3767     } else if (class == BPF_ST) {
3768         if (insn->imm) {
3769             verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
3770                 off_desc->offset);
3771             return -EACCES;
3772         }
3773     } else {
3774         verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
3775         return -EACCES;
3776     }
3777     return 0;
3778 }
3779
3780 /* check read/write into a map element with possible variable offset */
3781 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3782                 int off, int size, bool zero_size_allowed,
3783                 enum bpf_access_src src)
3784 {
3785     struct bpf_verifier_state *vstate = env->cur_state;
3786     struct bpf_func_state *state = vstate->frame[vstate->curframe];
3787     struct bpf_reg_state *reg = &state->regs[regno];
3788     struct bpf_map *map = reg->map_ptr;
3789     int err;
3790
3791     err = check_mem_region_access(env, regno, off, size, map->value_size,
3792                       zero_size_allowed);
3793     if (err)
3794         return err;
3795
3796     if (map_value_has_spin_lock(map)) {
3797         u32 lock = map->spin_lock_off;
3798
3799         /* if any part of struct bpf_spin_lock can be touched by
3800          * load/store reject this program.
3801          * To check that [x1, x2) overlaps with [y1, y2)
3802          * it is sufficient to check x1 < y2 && y1 < x2.
3803          */
3804         if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3805              lock < reg->umax_value + off + size) {
3806             verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3807             return -EACCES;
3808         }
3809     }
3810     if (map_value_has_timer(map)) {
3811         u32 t = map->timer_off;
3812
3813         if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3814              t < reg->umax_value + off + size) {
3815             verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3816             return -EACCES;
3817         }
3818     }
3819     if (map_value_has_kptrs(map)) {
3820         struct bpf_map_value_off *tab = map->kptr_off_tab;
3821         int i;
3822
3823         for (i = 0; i < tab->nr_off; i++) {
3824             u32 p = tab->off[i].offset;
3825
3826             if (reg->smin_value + off < p + sizeof(u64) &&
3827                 p < reg->umax_value + off + size) {
3828                 if (src != ACCESS_DIRECT) {
3829                     verbose(env, "kptr cannot be accessed indirectly by helper\n");
3830                     return -EACCES;
3831                 }
3832                 if (!tnum_is_const(reg->var_off)) {
3833                     verbose(env, "kptr access cannot have variable offset\n");
3834                     return -EACCES;
3835                 }
3836                 if (p != off + reg->var_off.value) {
3837                     verbose(env, "kptr access misaligned expected=%u off=%llu\n",
3838                         p, off + reg->var_off.value);
3839                     return -EACCES;
3840                 }
3841                 if (size != bpf_size_to_bytes(BPF_DW)) {
3842                     verbose(env, "kptr access size must be BPF_DW\n");
3843                     return -EACCES;
3844                 }
3845                 break;
3846             }
3847         }
3848     }
3849     return err;
3850 }
3851
3852 #define MAX_PACKET_OFF 0xffff
3853
3854 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3855                        const struct bpf_call_arg_meta *meta,
3856                        enum bpf_access_type t)
3857 {
3858     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3859
3860     switch (prog_type) {
3861     /* Program types only with direct read access go here! */
3862     case BPF_PROG_TYPE_LWT_IN:
3863     case BPF_PROG_TYPE_LWT_OUT:
3864     case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3865     case BPF_PROG_TYPE_SK_REUSEPORT:
3866     case BPF_PROG_TYPE_FLOW_DISSECTOR:
3867     case BPF_PROG_TYPE_CGROUP_SKB:
3868         if (t == BPF_WRITE)
3869             return false;
3870         fallthrough;
3871
3872     /* Program types with direct read + write access go here! */
3873     case BPF_PROG_TYPE_SCHED_CLS:
3874     case BPF_PROG_TYPE_SCHED_ACT:
3875     case BPF_PROG_TYPE_XDP:
3876     case BPF_PROG_TYPE_LWT_XMIT:
3877     case BPF_PROG_TYPE_SK_SKB:
3878     case BPF_PROG_TYPE_SK_MSG:
3879         if (meta)
3880             return meta->pkt_access;
3881
3882         env->seen_direct_write = true;
3883         return true;
3884
3885     case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3886         if (t == BPF_WRITE)
3887             env->seen_direct_write = true;
3888
3889         return true;
3890
3891     default:
3892         return false;
3893     }
3894 }
3895
3896 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3897                    int size, bool zero_size_allowed)
3898 {
3899     struct bpf_reg_state *regs = cur_regs(env);
3900     struct bpf_reg_state *reg = &regs[regno];
3901     int err;
3902
3903     /* We may have added a variable offset to the packet pointer; but any
3904      * reg->range we have comes after that.  We are only checking the fixed
3905      * offset.
3906      */
3907
3908     /* We don't allow negative numbers, because we aren't tracking enough
3909      * detail to prove they're safe.
3910      */
3911     if (reg->smin_value < 0) {
3912         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3913             regno);
3914         return -EACCES;
3915     }
3916
3917     err = reg->range < 0 ? -EINVAL :
3918           __check_mem_access(env, regno, off, size, reg->range,
3919                  zero_size_allowed);
3920     if (err) {
3921         verbose(env, "R%d offset is outside of the packet\n", regno);
3922         return err;
3923     }
3924
3925     /* __check_mem_access has made sure "off + size - 1" is within u16.
3926      * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3927      * otherwise find_good_pkt_pointers would have refused to set range info
3928      * that __check_mem_access would have rejected this pkt access.
3929      * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3930      */
3931     env->prog->aux->max_pkt_offset =
3932         max_t(u32, env->prog->aux->max_pkt_offset,
3933               off + reg->umax_value + size - 1);
3934
3935     return err;
3936 }
3937
3938 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3939 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3940                 enum bpf_access_type t, enum bpf_reg_type *reg_type,
3941                 struct btf **btf, u32 *btf_id)
3942 {
3943     struct bpf_insn_access_aux info = {
3944         .reg_type = *reg_type,
3945         .log = &env->log,
3946     };
3947
3948     if (env->ops->is_valid_access &&
3949         env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3950         /* A non zero info.ctx_field_size indicates that this field is a
3951          * candidate for later verifier transformation to load the whole
3952          * field and then apply a mask when accessed with a narrower
3953          * access than actual ctx access size. A zero info.ctx_field_size
3954          * will only allow for whole field access and rejects any other
3955          * type of narrower access.
3956          */
3957         *reg_type = info.reg_type;
3958
3959         if (base_type(*reg_type) == PTR_TO_BTF_ID) {
3960             *btf = info.btf;
3961             *btf_id = info.btf_id;
3962         } else {
3963             env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3964         }
3965         /* remember the offset of last byte accessed in ctx */
3966         if (env->prog->aux->max_ctx_offset < off + size)
3967             env->prog->aux->max_ctx_offset = off + size;
3968         return 0;
3969     }
3970
3971     verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3972     return -EACCES;
3973 }
3974
3975 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3976                   int size)
3977 {
3978     if (size < 0 || off < 0 ||
3979         (u64)off + size > sizeof(struct bpf_flow_keys)) {
3980         verbose(env, "invalid access to flow keys off=%d size=%d\n",
3981             off, size);
3982         return -EACCES;
3983     }
3984     return 0;
3985 }
3986
3987 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3988                  u32 regno, int off, int size,
3989                  enum bpf_access_type t)
3990 {
3991     struct bpf_reg_state *regs = cur_regs(env);
3992     struct bpf_reg_state *reg = &regs[regno];
3993     struct bpf_insn_access_aux info = {};
3994     bool valid;
3995
3996     if (reg->smin_value < 0) {
3997         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3998             regno);
3999         return -EACCES;
4000     }
4001
4002     switch (reg->type) {
4003     case PTR_TO_SOCK_COMMON:
4004         valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4005         break;
4006     case PTR_TO_SOCKET:
4007         valid = bpf_sock_is_valid_access(off, size, t, &info);
4008         break;
4009     case PTR_TO_TCP_SOCK:
4010         valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4011         break;
4012     case PTR_TO_XDP_SOCK:
4013         valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4014         break;
4015     default:
4016         valid = false;
4017     }
4018
4019
4020     if (valid) {
4021         env->insn_aux_data[insn_idx].ctx_field_size =
4022             info.ctx_field_size;
4023         return 0;
4024     }
4025
4026     verbose(env, "R%d invalid %s access off=%d size=%d\n",
4027         regno, reg_type_str(env, reg->type), off, size);
4028
4029     return -EACCES;
4030 }
4031
4032 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4033 {
4034     return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4035 }
4036
4037 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4038 {
4039     const struct bpf_reg_state *reg = reg_state(env, regno);
4040
4041     return reg->type == PTR_TO_CTX;
4042 }
4043
4044 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4045 {
4046     const struct bpf_reg_state *reg = reg_state(env, regno);
4047
4048     return type_is_sk_pointer(reg->type);
4049 }
4050
4051 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4052 {
4053     const struct bpf_reg_state *reg = reg_state(env, regno);
4054
4055     return type_is_pkt_pointer(reg->type);
4056 }
4057
4058 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4059 {
4060     const struct bpf_reg_state *reg = reg_state(env, regno);
4061
4062     /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4063     return reg->type == PTR_TO_FLOW_KEYS;
4064 }
4065
4066 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4067                    const struct bpf_reg_state *reg,
4068                    int off, int size, bool strict)
4069 {
4070     struct tnum reg_off;
4071     int ip_align;
4072
4073     /* Byte size accesses are always allowed. */
4074     if (!strict || size == 1)
4075         return 0;
4076
4077     /* For platforms that do not have a Kconfig enabling
4078      * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4079      * NET_IP_ALIGN is universally set to '2'.  And on platforms
4080      * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4081      * to this code only in strict mode where we want to emulate
4082      * the NET_IP_ALIGN==2 checking.  Therefore use an
4083      * unconditional IP align value of '2'.
4084      */
4085     ip_align = 2;
4086
4087     reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4088     if (!tnum_is_aligned(reg_off, size)) {
4089         char tn_buf[48];
4090
4091         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4092         verbose(env,
4093             "misaligned packet access off %d+%s+%d+%d size %d\n",
4094             ip_align, tn_buf, reg->off, off, size);
4095         return -EACCES;
4096     }
4097
4098     return 0;
4099 }
4100
4101 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4102                        const struct bpf_reg_state *reg,
4103                        const char *pointer_desc,
4104                        int off, int size, bool strict)
4105 {
4106     struct tnum reg_off;
4107
4108     /* Byte size accesses are always allowed. */
4109     if (!strict || size == 1)
4110         return 0;
4111
4112     reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4113     if (!tnum_is_aligned(reg_off, size)) {
4114         char tn_buf[48];
4115
4116         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4117         verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
4118             pointer_desc, tn_buf, reg->off, off, size);
4119         return -EACCES;
4120     }
4121
4122     return 0;
4123 }
4124
4125 static int check_ptr_alignment(struct bpf_verifier_env *env,
4126                    const struct bpf_reg_state *reg, int off,
4127                    int size, bool strict_alignment_once)
4128 {
4129     bool strict = env->strict_alignment || strict_alignment_once;
4130     const char *pointer_desc = "";
4131
4132     switch (reg->type) {
4133     case PTR_TO_PACKET:
4134     case PTR_TO_PACKET_META:
4135         /* Special case, because of NET_IP_ALIGN. Given metadata sits
4136          * right in front, treat it the very same way.
4137          */
4138         return check_pkt_ptr_alignment(env, reg, off, size, strict);
4139     case PTR_TO_FLOW_KEYS:
4140         pointer_desc = "flow keys ";
4141         break;
4142     case PTR_TO_MAP_KEY:
4143         pointer_desc = "key ";
4144         break;
4145     case PTR_TO_MAP_VALUE:
4146         pointer_desc = "value ";
4147         break;
4148     case PTR_TO_CTX:
4149         pointer_desc = "context ";
4150         break;
4151     case PTR_TO_STACK:
4152         pointer_desc = "stack ";
4153         /* The stack spill tracking logic in check_stack_write_fixed_off()
4154          * and check_stack_read_fixed_off() relies on stack accesses being
4155          * aligned.
4156          */
4157         strict = true;
4158         break;
4159     case PTR_TO_SOCKET:
4160         pointer_desc = "sock ";
4161         break;
4162     case PTR_TO_SOCK_COMMON:
4163         pointer_desc = "sock_common ";
4164         break;
4165     case PTR_TO_TCP_SOCK:
4166         pointer_desc = "tcp_sock ";
4167         break;
4168     case PTR_TO_XDP_SOCK:
4169         pointer_desc = "xdp_sock ";
4170         break;
4171     default:
4172         break;
4173     }
4174     return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4175                        strict);
4176 }
4177
4178 static int update_stack_depth(struct bpf_verifier_env *env,
4179                   const struct bpf_func_state *func,
4180                   int off)
4181 {
4182     u16 stack = env->subprog_info[func->subprogno].stack_depth;
4183
4184     if (stack >= -off)
4185         return 0;
4186
4187     /* update known max for given subprogram */
4188     env->subprog_info[func->subprogno].stack_depth = -off;
4189     return 0;
4190 }
4191
4192 /* starting from main bpf function walk all instructions of the function
4193  * and recursively walk all callees that given function can call.
4194  * Ignore jump and exit insns.
4195  * Since recursion is prevented by check_cfg() this algorithm
4196  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4197  */
4198 static int check_max_stack_depth(struct bpf_verifier_env *env)
4199 {
4200     int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4201     struct bpf_subprog_info *subprog = env->subprog_info;
4202     struct bpf_insn *insn = env->prog->insnsi;
4203     bool tail_call_reachable = false;
4204     int ret_insn[MAX_CALL_FRAMES];
4205     int ret_prog[MAX_CALL_FRAMES];
4206     int j;
4207
4208 process_func:
4209     /* protect against potential stack overflow that might happen when
4210      * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4211      * depth for such case down to 256 so that the worst case scenario
4212      * would result in 8k stack size (32 which is tailcall limit * 256 =
4213      * 8k).
4214      *
4215      * To get the idea what might happen, see an example:
4216      * func1 -> sub rsp, 128
4217      *  subfunc1 -> sub rsp, 256
4218      *  tailcall1 -> add rsp, 256
4219      *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4220      *   subfunc2 -> sub rsp, 64
4221      *   subfunc22 -> sub rsp, 128
4222      *   tailcall2 -> add rsp, 128
4223      *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4224      *
4225      * tailcall will unwind the current stack frame but it will not get rid
4226      * of caller's stack as shown on the example above.
4227      */
4228     if (idx && subprog[idx].has_tail_call && depth >= 256) {
4229         verbose(env,
4230             "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4231             depth);
4232         return -EACCES;
4233     }
4234     /* round up to 32-bytes, since this is granularity
4235      * of interpreter stack size
4236      */
4237     depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4238     if (depth > MAX_BPF_STACK) {
4239         verbose(env, "combined stack size of %d calls is %d. Too large\n",
4240             frame + 1, depth);
4241         return -EACCES;
4242     }
4243 continue_func:
4244     subprog_end = subprog[idx + 1].start;
4245     for (; i < subprog_end; i++) {
4246         int next_insn;
4247
4248         if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
4249             continue;
4250         /* remember insn and function to return to */
4251         ret_insn[frame] = i + 1;
4252         ret_prog[frame] = idx;
4253
4254         /* find the callee */
4255         next_insn = i + insn[i].imm + 1;
4256         idx = find_subprog(env, next_insn);
4257         if (idx < 0) {
4258             WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4259                   next_insn);
4260             return -EFAULT;
4261         }
4262         if (subprog[idx].is_async_cb) {
4263             if (subprog[idx].has_tail_call) {
4264                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4265                 return -EFAULT;
4266             }
4267              /* async callbacks don't increase bpf prog stack size */
4268             continue;
4269         }
4270         i = next_insn;
4271
4272         if (subprog[idx].has_tail_call)
4273             tail_call_reachable = true;
4274
4275         frame++;
4276         if (frame >= MAX_CALL_FRAMES) {
4277             verbose(env, "the call stack of %d frames is too deep !\n",
4278                 frame);
4279             return -E2BIG;
4280         }
4281         goto process_func;
4282     }
4283     /* if tail call got detected across bpf2bpf calls then mark each of the
4284      * currently present subprog frames as tail call reachable subprogs;
4285      * this info will be utilized by JIT so that we will be preserving the
4286      * tail call counter throughout bpf2bpf calls combined with tailcalls
4287      */
4288     if (tail_call_reachable)
4289         for (j = 0; j < frame; j++)
4290             subprog[ret_prog[j]].tail_call_reachable = true;
4291     if (subprog[0].tail_call_reachable)
4292         env->prog->aux->tail_call_reachable = true;
4293
4294     /* end of for() loop means the last insn of the 'subprog'
4295      * was reached. Doesn't matter whether it was JA or EXIT
4296      */
4297     if (frame == 0)
4298         return 0;
4299     depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4300     frame--;
4301     i = ret_insn[frame];
4302     idx = ret_prog[frame];
4303     goto continue_func;
4304 }
4305
4306 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
4307 static int get_callee_stack_depth(struct bpf_verifier_env *env,
4308                   const struct bpf_insn *insn, int idx)
4309 {
4310     int start = idx + insn->imm + 1, subprog;
4311
4312     subprog = find_subprog(env, start);
4313     if (subprog < 0) {
4314         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4315               start);
4316         return -EFAULT;
4317     }
4318     return env->subprog_info[subprog].stack_depth;
4319 }
4320 #endif
4321
4322 static int __check_buffer_access(struct bpf_verifier_env *env,
4323                  const char *buf_info,
4324                  const struct bpf_reg_state *reg,
4325                  int regno, int off, int size)
4326 {
4327     if (off < 0) {
4328         verbose(env,
4329             "R%d invalid %s buffer access: off=%d, size=%d\n",
4330             regno, buf_info, off, size);
4331         return -EACCES;
4332     }
4333     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4334         char tn_buf[48];
4335
4336         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4337         verbose(env,
4338             "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4339             regno, off, tn_buf);
4340         return -EACCES;
4341     }
4342
4343     return 0;
4344 }
4345
4346 static int check_tp_buffer_access(struct bpf_verifier_env *env,
4347                   const struct bpf_reg_state *reg,
4348                   int regno, int off, int size)
4349 {
4350     int err;
4351
4352     err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4353     if (err)
4354         return err;
4355
4356     if (off + size > env->prog->aux->max_tp_access)
4357         env->prog->aux->max_tp_access = off + size;
4358
4359     return 0;
4360 }
4361
4362 static int check_buffer_access(struct bpf_verifier_env *env,
4363                    const struct bpf_reg_state *reg,
4364                    int regno, int off, int size,
4365                    bool zero_size_allowed,
4366                    u32 *max_access)
4367 {
4368     const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
4369     int err;
4370
4371     err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4372     if (err)
4373         return err;
4374
4375     if (off + size > *max_access)
4376         *max_access = off + size;
4377
4378     return 0;
4379 }
4380
4381 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4382 static void zext_32_to_64(struct bpf_reg_state *reg)
4383 {
4384     reg->var_off = tnum_subreg(reg->var_off);
4385     __reg_assign_32_into_64(reg);
4386 }
4387
4388 /* truncate register to smaller size (in bytes)
4389  * must be called with size < BPF_REG_SIZE
4390  */
4391 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4392 {
4393     u64 mask;
4394
4395     /* clear high bits in bit representation */
4396     reg->var_off = tnum_cast(reg->var_off, size);
4397
4398     /* fix arithmetic bounds */
4399     mask = ((u64)1 << (size * 8)) - 1;
4400     if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4401         reg->umin_value &= mask;
4402         reg->umax_value &= mask;
4403     } else {
4404         reg->umin_value = 0;
4405         reg->umax_value = mask;
4406     }
4407     reg->smin_value = reg->umin_value;
4408     reg->smax_value = reg->umax_value;
4409
4410     /* If size is smaller than 32bit register the 32bit register
4411      * values are also truncated so we push 64-bit bounds into
4412      * 32-bit bounds. Above were truncated < 32-bits already.
4413      */
4414     if (size >= 4)
4415         return;
4416     __reg_combine_64_into_32(reg);
4417 }
4418
4419 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4420 {
4421     /* A map is considered read-only if the following condition are true:
4422      *
4423      * 1) BPF program side cannot change any of the map content. The
4424      *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4425      *    and was set at map creation time.
4426      * 2) The map value(s) have been initialized from user space by a
4427      *    loader and then "frozen", such that no new map update/delete
4428      *    operations from syscall side are possible for the rest of
4429      *    the map's lifetime from that point onwards.
4430      * 3) Any parallel/pending map update/delete operations from syscall
4431      *    side have been completed. Only after that point, it's safe to
4432      *    assume that map value(s) are immutable.
4433      */
4434     return (map->map_flags & BPF_F_RDONLY_PROG) &&
4435            READ_ONCE(map->frozen) &&
4436            !bpf_map_write_active(map);
4437 }
4438
4439 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4440 {
4441     void *ptr;
4442     u64 addr;
4443     int err;
4444
4445     err = map->ops->map_direct_value_addr(map, &addr, off);
4446     if (err)
4447         return err;
4448     ptr = (void *)(long)addr + off;
4449
4450     switch (size) {
4451     case sizeof(u8):
4452         *val = (u64)*(u8 *)ptr;
4453         break;
4454     case sizeof(u16):
4455         *val = (u64)*(u16 *)ptr;
4456         break;
4457     case sizeof(u32):
4458         *val = (u64)*(u32 *)ptr;
4459         break;
4460     case sizeof(u64):
4461         *val = *(u64 *)ptr;
4462         break;
4463     default:
4464         return -EINVAL;
4465     }
4466     return 0;
4467 }
4468
4469 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4470                    struct bpf_reg_state *regs,
4471                    int regno, int off, int size,
4472                    enum bpf_access_type atype,
4473                    int value_regno)
4474 {
4475     struct bpf_reg_state *reg = regs + regno;
4476     const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4477     const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4478     enum bpf_type_flag flag = 0;
4479     u32 btf_id;
4480     int ret;
4481
4482     if (off < 0) {
4483         verbose(env,
4484             "R%d is ptr_%s invalid negative access: off=%d\n",
4485             regno, tname, off);
4486         return -EACCES;
4487     }
4488     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4489         char tn_buf[48];
4490
4491         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4492         verbose(env,
4493             "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4494             regno, tname, off, tn_buf);
4495         return -EACCES;
4496     }
4497
4498     if (reg->type & MEM_USER) {
4499         verbose(env,
4500             "R%d is ptr_%s access user memory: off=%d\n",
4501             regno, tname, off);
4502         return -EACCES;
4503     }
4504
4505     if (reg->type & MEM_PERCPU) {
4506         verbose(env,
4507             "R%d is ptr_%s access percpu memory: off=%d\n",
4508             regno, tname, off);
4509         return -EACCES;
4510     }
4511
4512     if (env->ops->btf_struct_access) {
4513         ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4514                           off, size, atype, &btf_id, &flag);
4515     } else {
4516         if (atype != BPF_READ) {
4517             verbose(env, "only read is supported\n");
4518             return -EACCES;
4519         }
4520
4521         ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4522                     atype, &btf_id, &flag);
4523     }
4524
4525     if (ret < 0)
4526         return ret;
4527
4528     /* If this is an untrusted pointer, all pointers formed by walking it
4529      * also inherit the untrusted flag.
4530      */
4531     if (type_flag(reg->type) & PTR_UNTRUSTED)
4532         flag |= PTR_UNTRUSTED;
4533
4534     if (atype == BPF_READ && value_regno >= 0)
4535         mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
4536
4537     return 0;
4538 }
4539
4540 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4541                    struct bpf_reg_state *regs,
4542                    int regno, int off, int size,
4543                    enum bpf_access_type atype,
4544                    int value_regno)
4545 {
4546     struct bpf_reg_state *reg = regs + regno;
4547     struct bpf_map *map = reg->map_ptr;
4548     enum bpf_type_flag flag = 0;
4549     const struct btf_type *t;
4550     const char *tname;
4551     u32 btf_id;
4552     int ret;
4553
4554     if (!btf_vmlinux) {
4555         verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4556         return -ENOTSUPP;
4557     }
4558
4559     if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4560         verbose(env, "map_ptr access not supported for map type %d\n",
4561             map->map_type);
4562         return -ENOTSUPP;
4563     }
4564
4565     t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4566     tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4567
4568     if (!env->allow_ptr_to_map_access) {
4569         verbose(env,
4570             "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4571             tname);
4572         return -EPERM;
4573     }
4574
4575     if (off < 0) {
4576         verbose(env, "R%d is %s invalid negative access: off=%d\n",
4577             regno, tname, off);
4578         return -EACCES;
4579     }
4580
4581     if (atype != BPF_READ) {
4582         verbose(env, "only read from %s is supported\n", tname);
4583         return -EACCES;
4584     }
4585
4586     ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
4587     if (ret < 0)
4588         return ret;
4589
4590     if (value_regno >= 0)
4591         mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
4592
4593     return 0;
4594 }
4595
4596 /* Check that the stack access at the given offset is within bounds. The
4597  * maximum valid offset is -1.
4598  *
4599  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4600  * -state->allocated_stack for reads.
4601  */
4602 static int check_stack_slot_within_bounds(int off,
4603                       struct bpf_func_state *state,
4604                       enum bpf_access_type t)
4605 {
4606     int min_valid_off;
4607
4608     if (t == BPF_WRITE)
4609         min_valid_off = -MAX_BPF_STACK;
4610     else
4611         min_valid_off = -state->allocated_stack;
4612
4613     if (off < min_valid_off || off > -1)
4614         return -EACCES;
4615     return 0;
4616 }
4617
4618 /* Check that the stack access at 'regno + off' falls within the maximum stack
4619  * bounds.
4620  *
4621  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4622  */
4623 static int check_stack_access_within_bounds(
4624         struct bpf_verifier_env *env,
4625         int regno, int off, int access_size,
4626         enum bpf_access_src src, enum bpf_access_type type)
4627 {
4628     struct bpf_reg_state *regs = cur_regs(env);
4629     struct bpf_reg_state *reg = regs + regno;
4630     struct bpf_func_state *state = func(env, reg);
4631     int min_off, max_off;
4632     int err;
4633     char *err_extra;
4634
4635     if (src == ACCESS_HELPER)
4636         /* We don't know if helpers are reading or writing (or both). */
4637         err_extra = " indirect access to";
4638     else if (type == BPF_READ)
4639         err_extra = " read from";
4640     else
4641         err_extra = " write to";
4642
4643     if (tnum_is_const(reg->var_off)) {
4644         min_off = reg->var_off.value + off;
4645         if (access_size > 0)
4646             max_off = min_off + access_size - 1;
4647         else
4648             max_off = min_off;
4649     } else {
4650         if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4651             reg->smin_value <= -BPF_MAX_VAR_OFF) {
4652             verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4653                 err_extra, regno);
4654             return -EACCES;
4655         }
4656         min_off = reg->smin_value + off;
4657         if (access_size > 0)
4658             max_off = reg->smax_value + off + access_size - 1;
4659         else
4660             max_off = min_off;
4661     }
4662
4663     err = check_stack_slot_within_bounds(min_off, state, type);
4664     if (!err)
4665         err = check_stack_slot_within_bounds(max_off, state, type);
4666
4667     if (err) {
4668         if (tnum_is_const(reg->var_off)) {
4669             verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4670                 err_extra, regno, off, access_size);
4671         } else {
4672             char tn_buf[48];
4673
4674             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4675             verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4676                 err_extra, regno, tn_buf, access_size);
4677         }
4678     }
4679     return err;
4680 }
4681
4682 /* check whether memory at (regno + off) is accessible for t = (read | write)
4683  * if t==write, value_regno is a register which value is stored into memory
4684  * if t==read, value_regno is a register which will receive the value from memory
4685  * if t==write && value_regno==-1, some unknown value is stored into memory
4686  * if t==read && value_regno==-1, don't care what we read from memory
4687  */
4688 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4689                 int off, int bpf_size, enum bpf_access_type t,
4690                 int value_regno, bool strict_alignment_once)
4691 {
4692     struct bpf_reg_state *regs = cur_regs(env);
4693     struct bpf_reg_state *reg = regs + regno;
4694     struct bpf_func_state *state;
4695     int size, err = 0;
4696
4697     size = bpf_size_to_bytes(bpf_size);
4698     if (size < 0)
4699         return size;
4700
4701     /* alignment checks will add in reg->off themselves */
4702     err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4703     if (err)
4704         return err;
4705
4706     /* for access checks, reg->off is just part of off */
4707     off += reg->off;
4708
4709     if (reg->type == PTR_TO_MAP_KEY) {
4710         if (t == BPF_WRITE) {
4711             verbose(env, "write to change key R%d not allowed\n", regno);
4712             return -EACCES;
4713         }
4714
4715         err = check_mem_region_access(env, regno, off, size,
4716                           reg->map_ptr->key_size, false);
4717         if (err)
4718             return err;
4719         if (value_regno >= 0)
4720             mark_reg_unknown(env, regs, value_regno);
4721     } else if (reg->type == PTR_TO_MAP_VALUE) {
4722         struct bpf_map_value_off_desc *kptr_off_desc = NULL;
4723
4724         if (t == BPF_WRITE && value_regno >= 0 &&
4725             is_pointer_value(env, value_regno)) {
4726             verbose(env, "R%d leaks addr into map\n", value_regno);
4727             return -EACCES;
4728         }
4729         err = check_map_access_type(env, regno, off, size, t);
4730         if (err)
4731             return err;
4732         err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
4733         if (err)
4734             return err;
4735         if (tnum_is_const(reg->var_off))
4736             kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
4737                                   off + reg->var_off.value);
4738         if (kptr_off_desc) {
4739             err = check_map_kptr_access(env, regno, value_regno, insn_idx,
4740                             kptr_off_desc);
4741         } else if (t == BPF_READ && value_regno >= 0) {
4742             struct bpf_map *map = reg->map_ptr;
4743
4744             /* if map is read-only, track its contents as scalars */
4745             if (tnum_is_const(reg->var_off) &&
4746                 bpf_map_is_rdonly(map) &&
4747                 map->ops->map_direct_value_addr) {
4748                 int map_off = off + reg->var_off.value;
4749                 u64 val = 0;
4750
4751                 err = bpf_map_direct_read(map, map_off, size,
4752                               &val);
4753                 if (err)
4754                     return err;
4755
4756                 regs[value_regno].type = SCALAR_VALUE;
4757                 __mark_reg_known(&regs[value_regno], val);
4758             } else {
4759                 mark_reg_unknown(env, regs, value_regno);
4760             }
4761         }
4762     } else if (base_type(reg->type) == PTR_TO_MEM) {
4763         bool rdonly_mem = type_is_rdonly_mem(reg->type);
4764
4765         if (type_may_be_null(reg->type)) {
4766             verbose(env, "R%d invalid mem access '%s'\n", regno,
4767                 reg_type_str(env, reg->type));
4768             return -EACCES;
4769         }
4770
4771         if (t == BPF_WRITE && rdonly_mem) {
4772             verbose(env, "R%d cannot write into %s\n",
4773                 regno, reg_type_str(env, reg->type));
4774             return -EACCES;
4775         }
4776
4777         if (t == BPF_WRITE && value_regno >= 0 &&
4778             is_pointer_value(env, value_regno)) {
4779             verbose(env, "R%d leaks addr into mem\n", value_regno);
4780             return -EACCES;
4781         }
4782
4783         err = check_mem_region_access(env, regno, off, size,
4784                           reg->mem_size, false);
4785         if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
4786             mark_reg_unknown(env, regs, value_regno);
4787     } else if (reg->type == PTR_TO_CTX) {
4788         enum bpf_reg_type reg_type = SCALAR_VALUE;
4789         struct btf *btf = NULL;
4790         u32 btf_id = 0;
4791
4792         if (t == BPF_WRITE && value_regno >= 0 &&
4793             is_pointer_value(env, value_regno)) {
4794             verbose(env, "R%d leaks addr into ctx\n", value_regno);
4795             return -EACCES;
4796         }
4797
4798         err = check_ptr_off_reg(env, reg, regno);
4799         if (err < 0)
4800             return err;
4801
4802         err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
4803                        &btf_id);
4804         if (err)
4805             verbose_linfo(env, insn_idx, "; ");
4806         if (!err && t == BPF_READ && value_regno >= 0) {
4807             /* ctx access returns either a scalar, or a
4808              * PTR_TO_PACKET[_META,_END]. In the latter
4809              * case, we know the offset is zero.
4810              */
4811             if (reg_type == SCALAR_VALUE) {
4812                 mark_reg_unknown(env, regs, value_regno);
4813             } else {
4814                 mark_reg_known_zero(env, regs,
4815                             value_regno);
4816                 if (type_may_be_null(reg_type))
4817                     regs[value_regno].id = ++env->id_gen;
4818                 /* A load of ctx field could have different
4819                  * actual load size with the one encoded in the
4820                  * insn. When the dst is PTR, it is for sure not
4821                  * a sub-register.
4822                  */
4823                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4824                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
4825                     regs[value_regno].btf = btf;
4826                     regs[value_regno].btf_id = btf_id;
4827                 }
4828             }
4829             regs[value_regno].type = reg_type;
4830         }
4831
4832     } else if (reg->type == PTR_TO_STACK) {
4833         /* Basic bounds checks. */
4834         err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4835         if (err)
4836             return err;
4837
4838         state = func(env, reg);
4839         err = update_stack_depth(env, state, off);
4840         if (err)
4841             return err;
4842
4843         if (t == BPF_READ)
4844             err = check_stack_read(env, regno, off, size,
4845                            value_regno);
4846         else
4847             err = check_stack_write(env, regno, off, size,
4848                         value_regno, insn_idx);
4849     } else if (reg_is_pkt_pointer(reg)) {
4850         if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4851             verbose(env, "cannot write into packet\n");
4852             return -EACCES;
4853         }
4854         if (t == BPF_WRITE && value_regno >= 0 &&
4855             is_pointer_value(env, value_regno)) {
4856             verbose(env, "R%d leaks addr into packet\n",
4857                 value_regno);
4858             return -EACCES;
4859         }
4860         err = check_packet_access(env, regno, off, size, false);
4861         if (!err && t == BPF_READ && value_regno >= 0)
4862             mark_reg_unknown(env, regs, value_regno);
4863     } else if (reg->type == PTR_TO_FLOW_KEYS) {
4864         if (t == BPF_WRITE && value_regno >= 0 &&
4865             is_pointer_value(env, value_regno)) {
4866             verbose(env, "R%d leaks addr into flow keys\n",
4867                 value_regno);
4868             return -EACCES;
4869         }
4870
4871         err = check_flow_keys_access(env, off, size);
4872         if (!err && t == BPF_READ && value_regno >= 0)
4873             mark_reg_unknown(env, regs, value_regno);
4874     } else if (type_is_sk_pointer(reg->type)) {
4875         if (t == BPF_WRITE) {
4876             verbose(env, "R%d cannot write into %s\n",
4877                 regno, reg_type_str(env, reg->type));
4878             return -EACCES;
4879         }
4880         err = check_sock_access(env, insn_idx, regno, off, size, t);
4881         if (!err && value_regno >= 0)
4882             mark_reg_unknown(env, regs, value_regno);
4883     } else if (reg->type == PTR_TO_TP_BUFFER) {
4884         err = check_tp_buffer_access(env, reg, regno, off, size);
4885         if (!err && t == BPF_READ && value_regno >= 0)
4886             mark_reg_unknown(env, regs, value_regno);
4887     } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
4888            !type_may_be_null(reg->type)) {
4889         err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4890                           value_regno);
4891     } else if (reg->type == CONST_PTR_TO_MAP) {
4892         err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4893                           value_regno);
4894     } else if (base_type(reg->type) == PTR_TO_BUF) {
4895         bool rdonly_mem = type_is_rdonly_mem(reg->type);
4896         u32 *max_access;
4897
4898         if (rdonly_mem) {
4899             if (t == BPF_WRITE) {
4900                 verbose(env, "R%d cannot write into %s\n",
4901                     regno, reg_type_str(env, reg->type));
4902                 return -EACCES;
4903             }
4904             max_access = &env->prog->aux->max_rdonly_access;
4905         } else {
4906             max_access = &env->prog->aux->max_rdwr_access;
4907         }
4908
4909         err = check_buffer_access(env, reg, regno, off, size, false,
4910                       max_access);
4911
4912         if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
4913             mark_reg_unknown(env, regs, value_regno);
4914     } else {
4915         verbose(env, "R%d invalid mem access '%s'\n", regno,
4916             reg_type_str(env, reg->type));
4917         return -EACCES;
4918     }
4919
4920     if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4921         regs[value_regno].type == SCALAR_VALUE) {
4922         /* b/h/w load zero-extends, mark upper bits as known 0 */
4923         coerce_reg_to_size(&regs[value_regno], size);
4924     }
4925     return err;
4926 }
4927
4928 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4929 {
4930     int load_reg;
4931     int err;
4932
4933     switch (insn->imm) {
4934     case BPF_ADD:
4935     case BPF_ADD | BPF_FETCH:
4936     case BPF_AND:
4937     case BPF_AND | BPF_FETCH:
4938     case BPF_OR:
4939     case BPF_OR | BPF_FETCH:
4940     case BPF_XOR:
4941     case BPF_XOR | BPF_FETCH:
4942     case BPF_XCHG:
4943     case BPF_CMPXCHG:
4944         break;
4945     default:
4946         verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4947         return -EINVAL;
4948     }
4949
4950     if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4951         verbose(env, "invalid atomic operand size\n");
4952         return -EINVAL;
4953     }
4954
4955     /* check src1 operand */
4956     err = check_reg_arg(env, insn->src_reg, SRC_OP);
4957     if (err)
4958         return err;
4959
4960     /* check src2 operand */
4961     err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4962     if (err)
4963         return err;
4964
4965     if (insn->imm == BPF_CMPXCHG) {
4966         /* Check comparison of R0 with memory location */
4967         const u32 aux_reg = BPF_REG_0;
4968
4969         err = check_reg_arg(env, aux_reg, SRC_OP);
4970         if (err)
4971             return err;
4972
4973         if (is_pointer_value(env, aux_reg)) {
4974             verbose(env, "R%d leaks addr into mem\n", aux_reg);
4975             return -EACCES;
4976         }
4977     }
4978
4979     if (is_pointer_value(env, insn->src_reg)) {
4980         verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4981         return -EACCES;
4982     }
4983
4984     if (is_ctx_reg(env, insn->dst_reg) ||
4985         is_pkt_reg(env, insn->dst_reg) ||
4986         is_flow_key_reg(env, insn->dst_reg) ||
4987         is_sk_reg(env, insn->dst_reg)) {
4988         verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4989             insn->dst_reg,
4990             reg_type_str(env, reg_state(env, insn->dst_reg)->type));
4991         return -EACCES;
4992     }
4993
4994     if (insn->imm & BPF_FETCH) {
4995         if (insn->imm == BPF_CMPXCHG)
4996             load_reg = BPF_REG_0;
4997         else
4998             load_reg = insn->src_reg;
4999
5000         /* check and record load of old value */
5001         err = check_reg_arg(env, load_reg, DST_OP);
5002         if (err)
5003             return err;
5004     } else {
5005         /* This instruction accesses a memory location but doesn't
5006          * actually load it into a register.
5007          */
5008         load_reg = -1;
5009     }
5010
5011     /* Check whether we can read the memory, with second call for fetch
5012      * case to simulate the register fill.
5013      */
5014     err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5015                    BPF_SIZE(insn->code), BPF_READ, -1, true);
5016     if (!err && load_reg >= 0)
5017         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5018                        BPF_SIZE(insn->code), BPF_READ, load_reg,
5019                        true);
5020     if (err)
5021         return err;
5022
5023     /* Check whether we can write into the same memory. */
5024     err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5025                    BPF_SIZE(insn->code), BPF_WRITE, -1, true);
5026     if (err)
5027         return err;
5028
5029     return 0;
5030 }
5031
5032 /* When register 'regno' is used to read the stack (either directly or through
5033  * a helper function) make sure that it's within stack boundary and, depending
5034  * on the access type, that all elements of the stack are initialized.
5035  *
5036  * 'off' includes 'regno->off', but not its dynamic part (if any).
5037  *
5038  * All registers that have been spilled on the stack in the slots within the
5039  * read offsets are marked as read.
5040  */
5041 static int check_stack_range_initialized(
5042         struct bpf_verifier_env *env, int regno, int off,
5043         int access_size, bool zero_size_allowed,
5044         enum bpf_access_src type, struct bpf_call_arg_meta *meta)
5045 {
5046     struct bpf_reg_state *reg = reg_state(env, regno);
5047     struct bpf_func_state *state = func(env, reg);
5048     int err, min_off, max_off, i, j, slot, spi;
5049     char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
5050     enum bpf_access_type bounds_check_type;
5051     /* Some accesses can write anything into the stack, others are
5052      * read-only.
5053      */
5054     bool clobber = false;
5055
5056     if (access_size == 0 && !zero_size_allowed) {
5057         verbose(env, "invalid zero-sized read\n");
5058         return -EACCES;
5059     }
5060
5061     if (type == ACCESS_HELPER) {
5062         /* The bounds checks for writes are more permissive than for
5063          * reads. However, if raw_mode is not set, we'll do extra
5064          * checks below.
5065          */
5066         bounds_check_type = BPF_WRITE;
5067         clobber = true;
5068     } else {
5069         bounds_check_type = BPF_READ;
5070     }
5071     err = check_stack_access_within_bounds(env, regno, off, access_size,
5072                            type, bounds_check_type);
5073     if (err)
5074         return err;
5075
5076
5077     if (tnum_is_const(reg->var_off)) {
5078         min_off = max_off = reg->var_off.value + off;
5079     } else {
5080         /* Variable offset is prohibited for unprivileged mode for
5081          * simplicity since it requires corresponding support in
5082          * Spectre masking for stack ALU.
5083          * See also retrieve_ptr_limit().
5084          */
5085         if (!env->bypass_spec_v1) {
5086             char tn_buf[48];
5087
5088             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5089             verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
5090                 regno, err_extra, tn_buf);
5091             return -EACCES;
5092         }
5093         /* Only initialized buffer on stack is allowed to be accessed
5094          * with variable offset. With uninitialized buffer it's hard to
5095          * guarantee that whole memory is marked as initialized on
5096          * helper return since specific bounds are unknown what may
5097          * cause uninitialized stack leaking.
5098          */
5099         if (meta && meta->raw_mode)
5100             meta = NULL;
5101
5102         min_off = reg->smin_value + off;
5103         max_off = reg->smax_value + off;
5104     }
5105
5106     if (meta && meta->raw_mode) {
5107         meta->access_size = access_size;
5108         meta->regno = regno;
5109         return 0;
5110     }
5111
5112     for (i = min_off; i < max_off + access_size; i++) {
5113         u8 *stype;
5114
5115         slot = -i - 1;
5116         spi = slot / BPF_REG_SIZE;
5117         if (state->allocated_stack <= slot)
5118             goto err;
5119         stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5120         if (*stype == STACK_MISC)
5121             goto mark;
5122         if (*stype == STACK_ZERO) {
5123             if (clobber) {
5124                 /* helper can write anything into the stack */
5125                 *stype = STACK_MISC;
5126             }
5127             goto mark;
5128         }
5129
5130         if (is_spilled_reg(&state->stack[spi]) &&
5131             base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
5132             goto mark;
5133
5134         if (is_spilled_reg(&state->stack[spi]) &&
5135             (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
5136              env->allow_ptr_leaks)) {
5137             if (clobber) {
5138                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
5139                 for (j = 0; j < BPF_REG_SIZE; j++)
5140                     scrub_spilled_slot(&state->stack[spi].slot_type[j]);
5141             }
5142             goto mark;
5143         }
5144
5145 err:
5146         if (tnum_is_const(reg->var_off)) {
5147             verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
5148                 err_extra, regno, min_off, i - min_off, access_size);
5149         } else {
5150             char tn_buf[48];
5151
5152             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5153             verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5154                 err_extra, regno, tn_buf, i - min_off, access_size);
5155         }
5156         return -EACCES;
5157 mark:
5158         /* reading any byte out of 8-byte 'spill_slot' will cause
5159          * the whole slot to be marked as 'read'
5160          */
5161         mark_reg_read(env, &state->stack[spi].spilled_ptr,
5162                   state->stack[spi].spilled_ptr.parent,
5163                   REG_LIVE_READ64);
5164     }
5165     return update_stack_depth(env, state, min_off);
5166 }
5167
5168 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5169                    int access_size, bool zero_size_allowed,
5170                    struct bpf_call_arg_meta *meta)
5171 {
5172     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5173     u32 *max_access;
5174
5175     switch (base_type(reg->type)) {
5176     case PTR_TO_PACKET:
5177     case PTR_TO_PACKET_META:
5178         return check_packet_access(env, regno, reg->off, access_size,
5179                        zero_size_allowed);
5180     case PTR_TO_MAP_KEY:
5181         if (meta && meta->raw_mode) {
5182             verbose(env, "R%d cannot write into %s\n", regno,
5183                 reg_type_str(env, reg->type));
5184             return -EACCES;
5185         }
5186         return check_mem_region_access(env, regno, reg->off, access_size,
5187                            reg->map_ptr->key_size, false);
5188     case PTR_TO_MAP_VALUE:
5189         if (check_map_access_type(env, regno, reg->off, access_size,
5190                       meta && meta->raw_mode ? BPF_WRITE :
5191                       BPF_READ))
5192             return -EACCES;
5193         return check_map_access(env, regno, reg->off, access_size,
5194                     zero_size_allowed, ACCESS_HELPER);
5195     case PTR_TO_MEM:
5196         if (type_is_rdonly_mem(reg->type)) {
5197             if (meta && meta->raw_mode) {
5198                 verbose(env, "R%d cannot write into %s\n", regno,
5199                     reg_type_str(env, reg->type));
5200                 return -EACCES;
5201             }
5202         }
5203         return check_mem_region_access(env, regno, reg->off,
5204                            access_size, reg->mem_size,
5205                            zero_size_allowed);
5206     case PTR_TO_BUF:
5207         if (type_is_rdonly_mem(reg->type)) {
5208             if (meta && meta->raw_mode) {
5209                 verbose(env, "R%d cannot write into %s\n", regno,
5210                     reg_type_str(env, reg->type));
5211                 return -EACCES;
5212             }
5213
5214             max_access = &env->prog->aux->max_rdonly_access;
5215         } else {
5216             max_access = &env->prog->aux->max_rdwr_access;
5217         }
5218         return check_buffer_access(env, reg, regno, reg->off,
5219                        access_size, zero_size_allowed,
5220                        max_access);
5221     case PTR_TO_STACK:
5222         return check_stack_range_initialized(
5223                 env,
5224                 regno, reg->off, access_size,
5225                 zero_size_allowed, ACCESS_HELPER, meta);
5226     default: /* scalar_value or invalid ptr */
5227         /* Allow zero-byte read from NULL, regardless of pointer type */
5228         if (zero_size_allowed && access_size == 0 &&
5229             register_is_null(reg))
5230             return 0;
5231
5232         verbose(env, "R%d type=%s ", regno,
5233             reg_type_str(env, reg->type));
5234         verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
5235         return -EACCES;
5236     }
5237 }
5238
5239 static int check_mem_size_reg(struct bpf_verifier_env *env,
5240                   struct bpf_reg_state *reg, u32 regno,
5241                   bool zero_size_allowed,
5242                   struct bpf_call_arg_meta *meta)
5243 {
5244     int err;
5245
5246     /* This is used to refine r0 return value bounds for helpers
5247      * that enforce this value as an upper bound on return values.
5248      * See do_refine_retval_range() for helpers that can refine
5249      * the return value. C type of helper is u32 so we pull register
5250      * bound from umax_value however, if negative verifier errors
5251      * out. Only upper bounds can be learned because retval is an
5252      * int type and negative retvals are allowed.
5253      */
5254     meta->msize_max_value = reg->umax_value;
5255
5256     /* The register is SCALAR_VALUE; the access check
5257      * happens using its boundaries.
5258      */
5259     if (!tnum_is_const(reg->var_off))
5260         /* For unprivileged variable accesses, disable raw
5261          * mode so that the program is required to
5262          * initialize all the memory that the helper could
5263          * just partially fill up.
5264          */
5265         meta = NULL;
5266
5267     if (reg->smin_value < 0) {
5268         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5269             regno);
5270         return -EACCES;
5271     }
5272
5273     if (reg->umin_value == 0) {
5274         err = check_helper_mem_access(env, regno - 1, 0,
5275                           zero_size_allowed,
5276                           meta);
5277         if (err)
5278             return err;
5279     }
5280
5281     if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5282         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5283             regno);
5284         return -EACCES;
5285     }
5286     err = check_helper_mem_access(env, regno - 1,
5287                       reg->umax_value,
5288                       zero_size_allowed, meta);
5289     if (!err)
5290         err = mark_chain_precision(env, regno);
5291     return err;
5292 }
5293
5294 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5295            u32 regno, u32 mem_size)
5296 {
5297     bool may_be_null = type_may_be_null(reg->type);
5298     struct bpf_reg_state saved_reg;
5299     struct bpf_call_arg_meta meta;
5300     int err;
5301
5302     if (register_is_null(reg))
5303         return 0;
5304
5305     memset(&meta, 0, sizeof(meta));
5306     /* Assuming that the register contains a value check if the memory
5307      * access is safe. Temporarily save and restore the register's state as
5308      * the conversion shouldn't be visible to a caller.
5309      */
5310     if (may_be_null) {
5311         saved_reg = *reg;
5312         mark_ptr_not_null_reg(reg);
5313     }
5314
5315     err = check_helper_mem_access(env, regno, mem_size, true, &meta);
5316     /* Check access for BPF_WRITE */
5317     meta.raw_mode = true;
5318     err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
5319
5320     if (may_be_null)
5321         *reg = saved_reg;
5322
5323     return err;
5324 }
5325
5326 int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5327                  u32 regno)
5328 {
5329     struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
5330     bool may_be_null = type_may_be_null(mem_reg->type);
5331     struct bpf_reg_state saved_reg;
5332     struct bpf_call_arg_meta meta;
5333     int err;
5334
5335     WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
5336
5337     memset(&meta, 0, sizeof(meta));
5338
5339     if (may_be_null) {
5340         saved_reg = *mem_reg;
5341         mark_ptr_not_null_reg(mem_reg);
5342     }
5343
5344     err = check_mem_size_reg(env, reg, regno, true, &meta);
5345     /* Check access for BPF_WRITE */
5346     meta.raw_mode = true;
5347     err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
5348
5349     if (may_be_null)
5350         *mem_reg = saved_reg;
5351     return err;
5352 }
5353
5354 /* Implementation details:
5355  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
5356  * Two bpf_map_lookups (even with the same key) will have different reg->id.
5357  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
5358  * value_or_null->value transition, since the verifier only cares about
5359  * the range of access to valid map value pointer and doesn't care about actual
5360  * address of the map element.
5361  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
5362  * reg->id > 0 after value_or_null->value transition. By doing so
5363  * two bpf_map_lookups will be considered two different pointers that
5364  * point to different bpf_spin_locks.
5365  * The verifier allows taking only one bpf_spin_lock at a time to avoid
5366  * dead-locks.
5367  * Since only one bpf_spin_lock is allowed the checks are simpler than
5368  * reg_is_refcounted() logic. The verifier needs to remember only
5369  * one spin_lock instead of array of acquired_refs.
5370  * cur_state->active_spin_lock remembers which map value element got locked
5371  * and clears it after bpf_spin_unlock.
5372  */
5373 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
5374                  bool is_lock)
5375 {
5376     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5377     struct bpf_verifier_state *cur = env->cur_state;
5378     bool is_const = tnum_is_const(reg->var_off);
5379     struct bpf_map *map = reg->map_ptr;
5380     u64 val = reg->var_off.value;
5381
5382     if (!is_const) {
5383         verbose(env,
5384             "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5385             regno);
5386         return -EINVAL;
5387     }
5388     if (!map->btf) {
5389         verbose(env,
5390             "map '%s' has to have BTF in order to use bpf_spin_lock\n",
5391             map->name);
5392         return -EINVAL;
5393     }
5394     if (!map_value_has_spin_lock(map)) {
5395         if (map->spin_lock_off == -E2BIG)
5396             verbose(env,
5397                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
5398                 map->name);
5399         else if (map->spin_lock_off == -ENOENT)
5400             verbose(env,
5401                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
5402                 map->name);
5403         else
5404             verbose(env,
5405                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
5406                 map->name);
5407         return -EINVAL;
5408     }
5409     if (map->spin_lock_off != val + reg->off) {
5410         verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
5411             val + reg->off);
5412         return -EINVAL;
5413     }
5414     if (is_lock) {
5415         if (cur->active_spin_lock) {
5416             verbose(env,
5417                 "Locking two bpf_spin_locks are not allowed\n");
5418             return -EINVAL;
5419         }
5420         cur->active_spin_lock = reg->id;
5421     } else {
5422         if (!cur->active_spin_lock) {
5423             verbose(env, "bpf_spin_unlock without taking a lock\n");
5424             return -EINVAL;
5425         }
5426         if (cur->active_spin_lock != reg->id) {
5427             verbose(env, "bpf_spin_unlock of different lock\n");
5428             return -EINVAL;
5429         }
5430         cur->active_spin_lock = 0;
5431     }
5432     return 0;
5433 }
5434
5435 static int process_timer_func(struct bpf_verifier_env *env, int regno,
5436                   struct bpf_call_arg_meta *meta)
5437 {
5438     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5439     bool is_const = tnum_is_const(reg->var_off);
5440     struct bpf_map *map = reg->map_ptr;
5441     u64 val = reg->var_off.value;
5442
5443     if (!is_const) {
5444         verbose(env,
5445             "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5446             regno);
5447         return -EINVAL;
5448     }
5449     if (!map->btf) {
5450         verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5451             map->name);
5452         return -EINVAL;
5453     }
5454     if (!map_value_has_timer(map)) {
5455         if (map->timer_off == -E2BIG)
5456             verbose(env,
5457                 "map '%s' has more than one 'struct bpf_timer'\n",
5458                 map->name);
5459         else if (map->timer_off == -ENOENT)
5460             verbose(env,
5461                 "map '%s' doesn't have 'struct bpf_timer'\n",
5462                 map->name);
5463         else
5464             verbose(env,
5465                 "map '%s' is not a struct type or bpf_timer is mangled\n",
5466                 map->name);
5467         return -EINVAL;
5468     }
5469     if (map->timer_off != val + reg->off) {
5470         verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5471             val + reg->off, map->timer_off);
5472         return -EINVAL;
5473     }
5474     if (meta->map_ptr) {
5475         verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5476         return -EFAULT;
5477     }
5478     meta->map_uid = reg->map_uid;
5479     meta->map_ptr = map;
5480     return 0;
5481 }
5482
5483 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
5484                  struct bpf_call_arg_meta *meta)
5485 {
5486     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5487     struct bpf_map_value_off_desc *off_desc;
5488     struct bpf_map *map_ptr = reg->map_ptr;
5489     u32 kptr_off;
5490     int ret;
5491
5492     if (!tnum_is_const(reg->var_off)) {
5493         verbose(env,
5494             "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
5495             regno);
5496         return -EINVAL;
5497     }
5498     if (!map_ptr->btf) {
5499         verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
5500             map_ptr->name);
5501         return -EINVAL;
5502     }
5503     if (!map_value_has_kptrs(map_ptr)) {
5504         ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
5505         if (ret == -E2BIG)
5506             verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
5507                 BPF_MAP_VALUE_OFF_MAX);
5508         else if (ret == -EEXIST)
5509             verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
5510         else
5511             verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
5512         return -EINVAL;
5513     }
5514
5515     meta->map_ptr = map_ptr;
5516     kptr_off = reg->off + reg->var_off.value;
5517     off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
5518     if (!off_desc) {
5519         verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
5520         return -EACCES;
5521     }
5522     if (off_desc->type != BPF_KPTR_REF) {
5523         verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
5524         return -EACCES;
5525     }
5526     meta->kptr_off_desc = off_desc;
5527     return 0;
5528 }
5529
5530 static bool arg_type_is_mem_size(enum bpf_arg_type type)
5531 {
5532     return type == ARG_CONST_SIZE ||
5533            type == ARG_CONST_SIZE_OR_ZERO;
5534 }
5535
5536 static bool arg_type_is_release(enum bpf_arg_type type)
5537 {
5538     return type & OBJ_RELEASE;
5539 }
5540
5541 static bool arg_type_is_dynptr(enum bpf_arg_type type)
5542 {
5543     return base_type(type) == ARG_PTR_TO_DYNPTR;
5544 }
5545
5546 static int int_ptr_type_to_size(enum bpf_arg_type type)
5547 {
5548     if (type == ARG_PTR_TO_INT)
5549         return sizeof(u32);
5550     else if (type == ARG_PTR_TO_LONG)
5551         return sizeof(u64);
5552
5553     return -EINVAL;
5554 }
5555
5556 static int resolve_map_arg_type(struct bpf_verifier_env *env,
5557                  const struct bpf_call_arg_meta *meta,
5558                  enum bpf_arg_type *arg_type)
5559 {
5560     if (!meta->map_ptr) {
5561         /* kernel subsystem misconfigured verifier */
5562         verbose(env, "invalid map_ptr to access map->type\n");
5563         return -EACCES;
5564     }
5565
5566     switch (meta->map_ptr->map_type) {
5567     case BPF_MAP_TYPE_SOCKMAP:
5568     case BPF_MAP_TYPE_SOCKHASH:
5569         if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
5570             *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5571         } else {
5572             verbose(env, "invalid arg_type for sockmap/sockhash\n");
5573             return -EINVAL;
5574         }
5575         break;
5576     case BPF_MAP_TYPE_BLOOM_FILTER:
5577         if (meta->func_id == BPF_FUNC_map_peek_elem)
5578             *arg_type = ARG_PTR_TO_MAP_VALUE;
5579         break;
5580     default:
5581         break;
5582     }
5583     return 0;
5584 }
5585
5586 struct bpf_reg_types {
5587     const enum bpf_reg_type types[10];
5588     u32 *btf_id;
5589 };
5590
5591 static const struct bpf_reg_types map_key_value_types = {
5592     .types = {
5593         PTR_TO_STACK,
5594         PTR_TO_PACKET,
5595         PTR_TO_PACKET_META,
5596         PTR_TO_MAP_KEY,
5597         PTR_TO_MAP_VALUE,
5598     },
5599 };
5600
5601 static const struct bpf_reg_types sock_types = {
5602     .types = {
5603         PTR_TO_SOCK_COMMON,
5604         PTR_TO_SOCKET,
5605         PTR_TO_TCP_SOCK,
5606         PTR_TO_XDP_SOCK,
5607     },
5608 };
5609
5610 #ifdef CONFIG_NET
5611 static const struct bpf_reg_types btf_id_sock_common_types = {
5612     .types = {
5613         PTR_TO_SOCK_COMMON,
5614         PTR_TO_SOCKET,
5615         PTR_TO_TCP_SOCK,
5616         PTR_TO_XDP_SOCK,
5617         PTR_TO_BTF_ID,
5618     },
5619     .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5620 };
5621 #endif
5622
5623 static const struct bpf_reg_types mem_types = {
5624     .types = {
5625         PTR_TO_STACK,
5626         PTR_TO_PACKET,
5627         PTR_TO_PACKET_META,
5628         PTR_TO_MAP_KEY,
5629         PTR_TO_MAP_VALUE,
5630         PTR_TO_MEM,
5631         PTR_TO_MEM | MEM_ALLOC,
5632         PTR_TO_BUF,
5633     },
5634 };
5635
5636 static const struct bpf_reg_types int_ptr_types = {
5637     .types = {
5638         PTR_TO_STACK,
5639         PTR_TO_PACKET,
5640         PTR_TO_PACKET_META,
5641         PTR_TO_MAP_KEY,
5642         PTR_TO_MAP_VALUE,
5643     },
5644 };
5645
5646 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5647 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5648 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5649 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
5650 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5651 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5652 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5653 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
5654 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5655 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5656 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5657 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5658 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
5659
5660 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5661     [ARG_PTR_TO_MAP_KEY]        = &map_key_value_types,
5662     [ARG_PTR_TO_MAP_VALUE]      = &map_key_value_types,
5663     [ARG_CONST_SIZE]        = &scalar_types,
5664     [ARG_CONST_SIZE_OR_ZERO]    = &scalar_types,
5665     [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
5666     [ARG_CONST_MAP_PTR]     = &const_map_ptr_types,
5667     [ARG_PTR_TO_CTX]        = &context_types,
5668     [ARG_PTR_TO_SOCK_COMMON]    = &sock_types,
5669 #ifdef CONFIG_NET
5670     [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
5671 #endif
5672     [ARG_PTR_TO_SOCKET]     = &fullsock_types,
5673     [ARG_PTR_TO_BTF_ID]     = &btf_ptr_types,
5674     [ARG_PTR_TO_SPIN_LOCK]      = &spin_lock_types,
5675     [ARG_PTR_TO_MEM]        = &mem_types,
5676     [ARG_PTR_TO_ALLOC_MEM]      = &alloc_mem_types,
5677     [ARG_PTR_TO_INT]        = &int_ptr_types,
5678     [ARG_PTR_TO_LONG]       = &int_ptr_types,
5679     [ARG_PTR_TO_PERCPU_BTF_ID]  = &percpu_btf_ptr_types,
5680     [ARG_PTR_TO_FUNC]       = &func_ptr_types,
5681     [ARG_PTR_TO_STACK]      = &stack_ptr_types,
5682     [ARG_PTR_TO_CONST_STR]      = &const_str_ptr_types,
5683     [ARG_PTR_TO_TIMER]      = &timer_types,
5684     [ARG_PTR_TO_KPTR]       = &kptr_types,
5685     [ARG_PTR_TO_DYNPTR]     = &stack_ptr_types,
5686 };
5687
5688 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5689               enum bpf_arg_type arg_type,
5690               const u32 *arg_btf_id,
5691               struct bpf_call_arg_meta *meta)
5692 {
5693     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5694     enum bpf_reg_type expected, type = reg->type;
5695     const struct bpf_reg_types *compatible;
5696     int i, j;
5697
5698     compatible = compatible_reg_types[base_type(arg_type)];
5699     if (!compatible) {
5700         verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5701         return -EFAULT;
5702     }
5703
5704     /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
5705      * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
5706      *
5707      * Same for MAYBE_NULL:
5708      *
5709      * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
5710      * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
5711      *
5712      * Therefore we fold these flags depending on the arg_type before comparison.
5713      */
5714     if (arg_type & MEM_RDONLY)
5715         type &= ~MEM_RDONLY;
5716     if (arg_type & PTR_MAYBE_NULL)
5717         type &= ~PTR_MAYBE_NULL;
5718
5719     for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5720         expected = compatible->types[i];
5721         if (expected == NOT_INIT)
5722             break;
5723
5724         if (type == expected)
5725             goto found;
5726     }
5727
5728     verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
5729     for (j = 0; j + 1 < i; j++)
5730         verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
5731     verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
5732     return -EACCES;
5733
5734 found:
5735     if (reg->type == PTR_TO_BTF_ID) {
5736         /* For bpf_sk_release, it needs to match against first member
5737          * 'struct sock_common', hence make an exception for it. This
5738          * allows bpf_sk_release to work for multiple socket types.
5739          */
5740         bool strict_type_match = arg_type_is_release(arg_type) &&
5741                      meta->func_id != BPF_FUNC_sk_release;
5742
5743         if (!arg_btf_id) {
5744             if (!compatible->btf_id) {
5745                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5746                 return -EFAULT;
5747             }
5748             arg_btf_id = compatible->btf_id;
5749         }
5750
5751         if (meta->func_id == BPF_FUNC_kptr_xchg) {
5752             if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
5753                 return -EACCES;
5754         } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5755                          btf_vmlinux, *arg_btf_id,
5756                          strict_type_match)) {
5757             verbose(env, "R%d is of type %s but %s is expected\n",
5758                 regno, kernel_type_name(reg->btf, reg->btf_id),
5759                 kernel_type_name(btf_vmlinux, *arg_btf_id));
5760             return -EACCES;
5761         }
5762     }
5763
5764     return 0;
5765 }
5766
5767 int check_func_arg_reg_off(struct bpf_verifier_env *env,
5768                const struct bpf_reg_state *reg, int regno,
5769                enum bpf_arg_type arg_type)
5770 {
5771     enum bpf_reg_type type = reg->type;
5772     bool fixed_off_ok = false;
5773
5774     switch ((u32)type) {
5775     /* Pointer types where reg offset is explicitly allowed: */
5776     case PTR_TO_STACK:
5777         if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
5778             verbose(env, "cannot pass in dynptr at an offset\n");
5779             return -EINVAL;
5780         }
5781         fallthrough;
5782     case PTR_TO_PACKET:
5783     case PTR_TO_PACKET_META:
5784     case PTR_TO_MAP_KEY:
5785     case PTR_TO_MAP_VALUE:
5786     case PTR_TO_MEM:
5787     case PTR_TO_MEM | MEM_RDONLY:
5788     case PTR_TO_MEM | MEM_ALLOC:
5789     case PTR_TO_BUF:
5790     case PTR_TO_BUF | MEM_RDONLY:
5791     case SCALAR_VALUE:
5792         /* Some of the argument types nevertheless require a
5793          * zero register offset.
5794          */
5795         if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
5796             return 0;
5797         break;
5798     /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
5799      * fixed offset.
5800      */
5801     case PTR_TO_BTF_ID:
5802         /* When referenced PTR_TO_BTF_ID is passed to release function,
5803          * it's fixed offset must be 0. In the other cases, fixed offset
5804          * can be non-zero.
5805          */
5806         if (arg_type_is_release(arg_type) && reg->off) {
5807             verbose(env, "R%d must have zero offset when passed to release func\n",
5808                 regno);
5809             return -EINVAL;
5810         }
5811         /* For arg is release pointer, fixed_off_ok must be false, but
5812          * we already checked and rejected reg->off != 0 above, so set
5813          * to true to allow fixed offset for all other cases.
5814          */
5815         fixed_off_ok = true;
5816         break;
5817     default:
5818         break;
5819     }
5820     return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
5821 }
5822
5823 static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
5824 {
5825     struct bpf_func_state *state = func(env, reg);
5826     int spi = get_spi(reg->off);
5827
5828     return state->stack[spi].spilled_ptr.id;
5829 }
5830
5831 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5832               struct bpf_call_arg_meta *meta,
5833               const struct bpf_func_proto *fn)
5834 {
5835     u32 regno = BPF_REG_1 + arg;
5836     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5837     enum bpf_arg_type arg_type = fn->arg_type[arg];
5838     enum bpf_reg_type type = reg->type;
5839     u32 *arg_btf_id = NULL;
5840     int err = 0;
5841
5842     if (arg_type == ARG_DONTCARE)
5843         return 0;
5844
5845     err = check_reg_arg(env, regno, SRC_OP);
5846     if (err)
5847         return err;
5848
5849     if (arg_type == ARG_ANYTHING) {
5850         if (is_pointer_value(env, regno)) {
5851             verbose(env, "R%d leaks addr into helper function\n",
5852                 regno);
5853             return -EACCES;
5854         }
5855         return 0;
5856     }
5857
5858     if (type_is_pkt_pointer(type) &&
5859         !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5860         verbose(env, "helper access to the packet is not allowed\n");
5861         return -EACCES;
5862     }
5863
5864     if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
5865         err = resolve_map_arg_type(env, meta, &arg_type);
5866         if (err)
5867             return err;
5868     }
5869
5870     if (register_is_null(reg) && type_may_be_null(arg_type))
5871         /* A NULL register has a SCALAR_VALUE type, so skip
5872          * type checking.
5873          */
5874         goto skip_type_check;
5875
5876     /* arg_btf_id and arg_size are in a union. */
5877     if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
5878         arg_btf_id = fn->arg_btf_id[arg];
5879
5880     err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
5881     if (err)
5882         return err;
5883
5884     err = check_func_arg_reg_off(env, reg, regno, arg_type);
5885     if (err)
5886         return err;
5887
5888 skip_type_check:
5889     if (arg_type_is_release(arg_type)) {
5890         if (arg_type_is_dynptr(arg_type)) {
5891             struct bpf_func_state *state = func(env, reg);
5892             int spi = get_spi(reg->off);
5893
5894             if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
5895                 !state->stack[spi].spilled_ptr.id) {
5896                 verbose(env, "arg %d is an unacquired reference\n", regno);
5897                 return -EINVAL;
5898             }
5899         } else if (!reg->ref_obj_id && !register_is_null(reg)) {
5900             verbose(env, "R%d must be referenced when passed to release function\n",
5901                 regno);
5902             return -EINVAL;
5903         }
5904         if (meta->release_regno) {
5905             verbose(env, "verifier internal error: more than one release argument\n");
5906             return -EFAULT;
5907         }
5908         meta->release_regno = regno;
5909     }
5910
5911     if (reg->ref_obj_id) {
5912         if (meta->ref_obj_id) {
5913             verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5914                 regno, reg->ref_obj_id,
5915                 meta->ref_obj_id);
5916             return -EFAULT;
5917         }
5918         meta->ref_obj_id = reg->ref_obj_id;
5919     }
5920
5921     switch (base_type(arg_type)) {
5922     case ARG_CONST_MAP_PTR:
5923         /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5924         if (meta->map_ptr) {
5925             /* Use map_uid (which is unique id of inner map) to reject:
5926              * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5927              * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5928              * if (inner_map1 && inner_map2) {
5929              *     timer = bpf_map_lookup_elem(inner_map1);
5930              *     if (timer)
5931              *         // mismatch would have been allowed
5932              *         bpf_timer_init(timer, inner_map2);
5933              * }
5934              *
5935              * Comparing map_ptr is enough to distinguish normal and outer maps.
5936              */
5937             if (meta->map_ptr != reg->map_ptr ||
5938                 meta->map_uid != reg->map_uid) {
5939                 verbose(env,
5940                     "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5941                     meta->map_uid, reg->map_uid);
5942                 return -EINVAL;
5943             }
5944         }
5945         meta->map_ptr = reg->map_ptr;
5946         meta->map_uid = reg->map_uid;
5947         break;
5948     case ARG_PTR_TO_MAP_KEY:
5949         /* bpf_map_xxx(..., map_ptr, ..., key) call:
5950          * check that [key, key + map->key_size) are within
5951          * stack limits and initialized
5952          */
5953         if (!meta->map_ptr) {
5954             /* in function declaration map_ptr must come before
5955              * map_key, so that it's verified and known before
5956              * we have to check map_key here. Otherwise it means
5957              * that kernel subsystem misconfigured verifier
5958              */
5959             verbose(env, "invalid map_ptr to access map->key\n");
5960             return -EACCES;
5961         }
5962         err = check_helper_mem_access(env, regno,
5963                           meta->map_ptr->key_size, false,
5964                           NULL);
5965         break;
5966     case ARG_PTR_TO_MAP_VALUE:
5967         if (type_may_be_null(arg_type) && register_is_null(reg))
5968             return 0;
5969
5970         /* bpf_map_xxx(..., map_ptr, ..., value) call:
5971          * check [value, value + map->value_size) validity
5972          */
5973         if (!meta->map_ptr) {
5974             /* kernel subsystem misconfigured verifier */
5975             verbose(env, "invalid map_ptr to access map->value\n");
5976             return -EACCES;
5977         }
5978         meta->raw_mode = arg_type & MEM_UNINIT;
5979         err = check_helper_mem_access(env, regno,
5980                           meta->map_ptr->value_size, false,
5981                           meta);
5982         break;
5983     case ARG_PTR_TO_PERCPU_BTF_ID:
5984         if (!reg->btf_id) {
5985             verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5986             return -EACCES;
5987         }
5988         meta->ret_btf = reg->btf;
5989         meta->ret_btf_id = reg->btf_id;
5990         break;
5991     case ARG_PTR_TO_SPIN_LOCK:
5992         if (meta->func_id == BPF_FUNC_spin_lock) {
5993             if (process_spin_lock(env, regno, true))
5994                 return -EACCES;
5995         } else if (meta->func_id == BPF_FUNC_spin_unlock) {
5996             if (process_spin_lock(env, regno, false))
5997                 return -EACCES;
5998         } else {
5999             verbose(env, "verifier internal error\n");
6000             return -EFAULT;
6001         }
6002         break;
6003     case ARG_PTR_TO_TIMER:
6004         if (process_timer_func(env, regno, meta))
6005             return -EACCES;
6006         break;
6007     case ARG_PTR_TO_FUNC:
6008         meta->subprogno = reg->subprogno;
6009         break;
6010     case ARG_PTR_TO_MEM:
6011         /* The access to this pointer is only checked when we hit the
6012          * next is_mem_size argument below.
6013          */
6014         meta->raw_mode = arg_type & MEM_UNINIT;
6015         if (arg_type & MEM_FIXED_SIZE) {
6016             err = check_helper_mem_access(env, regno,
6017                               fn->arg_size[arg], false,
6018                               meta);
6019         }
6020         break;
6021     case ARG_CONST_SIZE:
6022         err = check_mem_size_reg(env, reg, regno, false, meta);
6023         break;
6024     case ARG_CONST_SIZE_OR_ZERO:
6025         err = check_mem_size_reg(env, reg, regno, true, meta);
6026         break;
6027     case ARG_PTR_TO_DYNPTR:
6028         if (arg_type & MEM_UNINIT) {
6029             if (!is_dynptr_reg_valid_uninit(env, reg)) {
6030                 verbose(env, "Dynptr has to be an uninitialized dynptr\n");
6031                 return -EINVAL;
6032             }
6033
6034             /* We only support one dynptr being uninitialized at the moment,
6035              * which is sufficient for the helper functions we have right now.
6036              */
6037             if (meta->uninit_dynptr_regno) {
6038                 verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
6039                 return -EFAULT;
6040             }
6041
6042             meta->uninit_dynptr_regno = regno;
6043         } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
6044             const char *err_extra = "";
6045
6046             switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
6047             case DYNPTR_TYPE_LOCAL:
6048                 err_extra = "local ";
6049                 break;
6050             case DYNPTR_TYPE_RINGBUF:
6051                 err_extra = "ringbuf ";
6052                 break;
6053             default:
6054                 break;
6055             }
6056
6057             verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
6058                 err_extra, arg + 1);
6059             return -EINVAL;
6060         }
6061         break;
6062     case ARG_CONST_ALLOC_SIZE_OR_ZERO:
6063         if (!tnum_is_const(reg->var_off)) {
6064             verbose(env, "R%d is not a known constant'\n",
6065                 regno);
6066             return -EACCES;
6067         }
6068         meta->mem_size = reg->var_off.value;
6069         err = mark_chain_precision(env, regno);
6070         if (err)
6071             return err;
6072         break;
6073     case ARG_PTR_TO_INT:
6074     case ARG_PTR_TO_LONG:
6075     {
6076         int size = int_ptr_type_to_size(arg_type);
6077
6078         err = check_helper_mem_access(env, regno, size, false, meta);
6079         if (err)
6080             return err;
6081         err = check_ptr_alignment(env, reg, 0, size, true);
6082         break;
6083     }
6084     case ARG_PTR_TO_CONST_STR:
6085     {
6086         struct bpf_map *map = reg->map_ptr;
6087         int map_off;
6088         u64 map_addr;
6089         char *str_ptr;
6090
6091         if (!bpf_map_is_rdonly(map)) {
6092             verbose(env, "R%d does not point to a readonly map'\n", regno);
6093             return -EACCES;
6094         }
6095
6096         if (!tnum_is_const(reg->var_off)) {
6097             verbose(env, "R%d is not a constant address'\n", regno);
6098             return -EACCES;
6099         }
6100
6101         if (!map->ops->map_direct_value_addr) {
6102             verbose(env, "no direct value access support for this map type\n");
6103             return -EACCES;
6104         }
6105
6106         err = check_map_access(env, regno, reg->off,
6107                        map->value_size - reg->off, false,
6108                        ACCESS_HELPER);
6109         if (err)
6110             return err;
6111
6112         map_off = reg->off + reg->var_off.value;
6113         err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
6114         if (err) {
6115             verbose(env, "direct value access on string failed\n");
6116             return err;
6117         }
6118
6119         str_ptr = (char *)(long)(map_addr);
6120         if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
6121             verbose(env, "string is not zero-terminated\n");
6122             return -EINVAL;
6123         }
6124         break;
6125     }
6126     case ARG_PTR_TO_KPTR:
6127         if (process_kptr_func(env, regno, meta))
6128             return -EACCES;
6129         break;
6130     }
6131
6132     return err;
6133 }
6134
6135 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
6136 {
6137     enum bpf_attach_type eatype = env->prog->expected_attach_type;
6138     enum bpf_prog_type type = resolve_prog_type(env->prog);
6139
6140     if (func_id != BPF_FUNC_map_update_elem)
6141         return false;
6142
6143     /* It's not possible to get access to a locked struct sock in these
6144      * contexts, so updating is safe.
6145      */
6146     switch (type) {
6147     case BPF_PROG_TYPE_TRACING:
6148         if (eatype == BPF_TRACE_ITER)
6149             return true;
6150         break;
6151     case BPF_PROG_TYPE_SOCKET_FILTER:
6152     case BPF_PROG_TYPE_SCHED_CLS:
6153     case BPF_PROG_TYPE_SCHED_ACT:
6154     case BPF_PROG_TYPE_XDP:
6155     case BPF_PROG_TYPE_SK_REUSEPORT:
6156     case BPF_PROG_TYPE_FLOW_DISSECTOR:
6157     case BPF_PROG_TYPE_SK_LOOKUP:
6158         return true;
6159     default:
6160         break;
6161     }
6162
6163     verbose(env, "cannot update sockmap in this context\n");
6164     return false;
6165 }
6166
6167 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
6168 {
6169     return env->prog->jit_requested &&
6170            bpf_jit_supports_subprog_tailcalls();
6171 }
6172
6173 static int check_map_func_compatibility(struct bpf_verifier_env *env,
6174                     struct bpf_map *map, int func_id)
6175 {
6176     if (!map)
6177         return 0;
6178
6179     /* We need a two way check, first is from map perspective ... */
6180     switch (map->map_type) {
6181     case BPF_MAP_TYPE_PROG_ARRAY:
6182         if (func_id != BPF_FUNC_tail_call)
6183             goto error;
6184         break;
6185     case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
6186         if (func_id != BPF_FUNC_perf_event_read &&
6187             func_id != BPF_FUNC_perf_event_output &&
6188             func_id != BPF_FUNC_skb_output &&
6189             func_id != BPF_FUNC_perf_event_read_value &&
6190             func_id != BPF_FUNC_xdp_output)
6191             goto error;
6192         break;
6193     case BPF_MAP_TYPE_RINGBUF:
6194         if (func_id != BPF_FUNC_ringbuf_output &&
6195             func_id != BPF_FUNC_ringbuf_reserve &&
6196             func_id != BPF_FUNC_ringbuf_query &&
6197             func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
6198             func_id != BPF_FUNC_ringbuf_submit_dynptr &&
6199             func_id != BPF_FUNC_ringbuf_discard_dynptr)
6200             goto error;
6201         break;
6202     case BPF_MAP_TYPE_STACK_TRACE:
6203         if (func_id != BPF_FUNC_get_stackid)
6204             goto error;
6205         break;
6206     case BPF_MAP_TYPE_CGROUP_ARRAY:
6207         if (func_id != BPF_FUNC_skb_under_cgroup &&
6208             func_id != BPF_FUNC_current_task_under_cgroup)
6209             goto error;
6210         break;
6211     case BPF_MAP_TYPE_CGROUP_STORAGE:
6212     case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
6213         if (func_id != BPF_FUNC_get_local_storage)
6214             goto error;
6215         break;
6216     case BPF_MAP_TYPE_DEVMAP:
6217     case BPF_MAP_TYPE_DEVMAP_HASH:
6218         if (func_id != BPF_FUNC_redirect_map &&
6219             func_id != BPF_FUNC_map_lookup_elem)
6220             goto error;
6221         break;
6222     /* Restrict bpf side of cpumap and xskmap, open when use-cases
6223      * appear.
6224      */
6225     case BPF_MAP_TYPE_CPUMAP:
6226         if (func_id != BPF_FUNC_redirect_map)
6227             goto error;
6228         break;
6229     case BPF_MAP_TYPE_XSKMAP:
6230         if (func_id != BPF_FUNC_redirect_map &&
6231             func_id != BPF_FUNC_map_lookup_elem)
6232             goto error;
6233         break;
6234     case BPF_MAP_TYPE_ARRAY_OF_MAPS:
6235     case BPF_MAP_TYPE_HASH_OF_MAPS:
6236         if (func_id != BPF_FUNC_map_lookup_elem)
6237             goto error;
6238         break;
6239     case BPF_MAP_TYPE_SOCKMAP:
6240         if (func_id != BPF_FUNC_sk_redirect_map &&
6241             func_id != BPF_FUNC_sock_map_update &&
6242             func_id != BPF_FUNC_map_delete_elem &&
6243             func_id != BPF_FUNC_msg_redirect_map &&
6244             func_id != BPF_FUNC_sk_select_reuseport &&
6245             func_id != BPF_FUNC_map_lookup_elem &&
6246             !may_update_sockmap(env, func_id))
6247             goto error;
6248         break;
6249     case BPF_MAP_TYPE_SOCKHASH:
6250         if (func_id != BPF_FUNC_sk_redirect_hash &&
6251             func_id != BPF_FUNC_sock_hash_update &&
6252             func_id != BPF_FUNC_map_delete_elem &&
6253             func_id != BPF_FUNC_msg_redirect_hash &&
6254             func_id != BPF_FUNC_sk_select_reuseport &&
6255             func_id != BPF_FUNC_map_lookup_elem &&
6256             !may_update_sockmap(env, func_id))
6257             goto error;
6258         break;
6259     case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
6260         if (func_id != BPF_FUNC_sk_select_reuseport)
6261             goto error;
6262         break;
6263     case BPF_MAP_TYPE_QUEUE:
6264     case BPF_MAP_TYPE_STACK:
6265         if (func_id != BPF_FUNC_map_peek_elem &&
6266             func_id != BPF_FUNC_map_pop_elem &&
6267             func_id != BPF_FUNC_map_push_elem)
6268             goto error;
6269         break;
6270     case BPF_MAP_TYPE_SK_STORAGE:
6271         if (func_id != BPF_FUNC_sk_storage_get &&
6272             func_id != BPF_FUNC_sk_storage_delete)
6273             goto error;
6274         break;
6275     case BPF_MAP_TYPE_INODE_STORAGE:
6276         if (func_id != BPF_FUNC_inode_storage_get &&
6277             func_id != BPF_FUNC_inode_storage_delete)
6278             goto error;
6279         break;
6280     case BPF_MAP_TYPE_TASK_STORAGE:
6281         if (func_id != BPF_FUNC_task_storage_get &&
6282             func_id != BPF_FUNC_task_storage_delete)
6283             goto error;
6284         break;
6285     case BPF_MAP_TYPE_BLOOM_FILTER:
6286         if (func_id != BPF_FUNC_map_peek_elem &&
6287             func_id != BPF_FUNC_map_push_elem)
6288             goto error;
6289         break;
6290     default:
6291         break;
6292     }
6293
6294     /* ... and second from the function itself. */
6295     switch (func_id) {
6296     case BPF_FUNC_tail_call:
6297         if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
6298             goto error;
6299         if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
6300             verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
6301             return -EINVAL;
6302         }
6303         break;
6304     case BPF_FUNC_perf_event_read:
6305     case BPF_FUNC_perf_event_output:
6306     case BPF_FUNC_perf_event_read_value:
6307     case BPF_FUNC_skb_output:
6308     case BPF_FUNC_xdp_output:
6309         if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
6310             goto error;
6311         break;
6312     case BPF_FUNC_ringbuf_output:
6313     case BPF_FUNC_ringbuf_reserve:
6314     case BPF_FUNC_ringbuf_query:
6315     case BPF_FUNC_ringbuf_reserve_dynptr:
6316     case BPF_FUNC_ringbuf_submit_dynptr:
6317     case BPF_FUNC_ringbuf_discard_dynptr:
6318         if (map->map_type != BPF_MAP_TYPE_RINGBUF)
6319             goto error;
6320         break;
6321     case BPF_FUNC_get_stackid:
6322         if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
6323             goto error;
6324         break;
6325     case BPF_FUNC_current_task_under_cgroup:
6326     case BPF_FUNC_skb_under_cgroup:
6327         if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
6328             goto error;
6329         break;
6330     case BPF_FUNC_redirect_map:
6331         if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
6332             map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
6333             map->map_type != BPF_MAP_TYPE_CPUMAP &&
6334             map->map_type != BPF_MAP_TYPE_XSKMAP)
6335             goto error;
6336         break;
6337     case BPF_FUNC_sk_redirect_map:
6338     case BPF_FUNC_msg_redirect_map:
6339     case BPF_FUNC_sock_map_update:
6340         if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
6341             goto error;
6342         break;
6343     case BPF_FUNC_sk_redirect_hash:
6344     case BPF_FUNC_msg_redirect_hash:
6345     case BPF_FUNC_sock_hash_update:
6346         if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
6347             goto error;
6348         break;
6349     case BPF_FUNC_get_local_storage:
6350         if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
6351             map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
6352             goto error;
6353         break;
6354     case BPF_FUNC_sk_select_reuseport:
6355         if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
6356             map->map_type != BPF_MAP_TYPE_SOCKMAP &&
6357             map->map_type != BPF_MAP_TYPE_SOCKHASH)
6358             goto error;
6359         break;
6360     case BPF_FUNC_map_pop_elem:
6361         if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6362             map->map_type != BPF_MAP_TYPE_STACK)
6363             goto error;
6364         break;
6365     case BPF_FUNC_map_peek_elem:
6366     case BPF_FUNC_map_push_elem:
6367         if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6368             map->map_type != BPF_MAP_TYPE_STACK &&
6369             map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
6370             goto error;
6371         break;
6372     case BPF_FUNC_map_lookup_percpu_elem:
6373         if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
6374             map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6375             map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
6376             goto error;
6377         break;
6378     case BPF_FUNC_sk_storage_get:
6379     case BPF_FUNC_sk_storage_delete:
6380         if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
6381             goto error;
6382         break;
6383     case BPF_FUNC_inode_storage_get:
6384     case BPF_FUNC_inode_storage_delete:
6385         if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
6386             goto error;
6387         break;
6388     case BPF_FUNC_task_storage_get:
6389     case BPF_FUNC_task_storage_delete:
6390         if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
6391             goto error;
6392         break;
6393     default:
6394         break;
6395     }
6396
6397     return 0;
6398 error:
6399     verbose(env, "cannot pass map_type %d into func %s#%d\n",
6400         map->map_type, func_id_name(func_id), func_id);
6401     return -EINVAL;
6402 }
6403
6404 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
6405 {
6406     int count = 0;
6407
6408     if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
6409         count++;
6410     if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
6411         count++;
6412     if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
6413         count++;
6414     if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
6415         count++;
6416     if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
6417         count++;
6418
6419     /* We only support one arg being in raw mode at the moment,
6420      * which is sufficient for the helper functions we have
6421      * right now.
6422      */
6423     return count <= 1;
6424 }
6425
6426 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
6427 {
6428     bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
6429     bool has_size = fn->arg_size[arg] != 0;
6430     bool is_next_size = false;
6431
6432     if (arg + 1 < ARRAY_SIZE(fn->arg_type))
6433         is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
6434
6435     if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
6436         return is_next_size;
6437
6438     return has_size == is_next_size || is_next_size == is_fixed;
6439 }
6440
6441 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
6442 {
6443     /* bpf_xxx(..., buf, len) call will access 'len'
6444      * bytes from memory 'buf'. Both arg types need
6445      * to be paired, so make sure there's no buggy
6446      * helper function specification.
6447      */
6448     if (arg_type_is_mem_size(fn->arg1_type) ||
6449         check_args_pair_invalid(fn, 0) ||
6450         check_args_pair_invalid(fn, 1) ||
6451         check_args_pair_invalid(fn, 2) ||
6452         check_args_pair_invalid(fn, 3) ||
6453         check_args_pair_invalid(fn, 4))
6454         return false;
6455
6456     return true;
6457 }
6458
6459 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
6460 {
6461     int count = 0;
6462
6463     if (arg_type_may_be_refcounted(fn->arg1_type))
6464         count++;
6465     if (arg_type_may_be_refcounted(fn->arg2_type))
6466         count++;
6467     if (arg_type_may_be_refcounted(fn->arg3_type))
6468         count++;
6469     if (arg_type_may_be_refcounted(fn->arg4_type))
6470         count++;
6471     if (arg_type_may_be_refcounted(fn->arg5_type))
6472         count++;
6473
6474     /* A reference acquiring function cannot acquire
6475      * another refcounted ptr.
6476      */
6477     if (may_be_acquire_function(func_id) && count)
6478         return false;
6479
6480     /* We only support one arg being unreferenced at the moment,
6481      * which is sufficient for the helper functions we have right now.
6482      */
6483     return count <= 1;
6484 }
6485
6486 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
6487 {
6488     int i;
6489
6490     for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
6491         if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
6492             return false;
6493
6494         if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
6495             /* arg_btf_id and arg_size are in a union. */
6496             (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
6497              !(fn->arg_type[i] & MEM_FIXED_SIZE)))
6498             return false;
6499     }
6500
6501     return true;
6502 }
6503
6504 static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
6505                 struct bpf_call_arg_meta *meta)
6506 {
6507     return check_raw_mode_ok(fn) &&
6508            check_arg_pair_ok(fn) &&
6509            check_btf_id_ok(fn) &&
6510            check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
6511 }
6512
6513 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
6514  * are now invalid, so turn them into unknown SCALAR_VALUE.
6515  */
6516 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
6517                      struct bpf_func_state *state)
6518 {
6519     struct bpf_reg_state *regs = state->regs, *reg;
6520     int i;
6521
6522     for (i = 0; i < MAX_BPF_REG; i++)
6523         if (reg_is_pkt_pointer_any(&regs[i]))
6524             mark_reg_unknown(env, regs, i);
6525
6526     bpf_for_each_spilled_reg(i, state, reg) {
6527         if (!reg)
6528             continue;
6529         if (reg_is_pkt_pointer_any(reg))
6530             __mark_reg_unknown(env, reg);
6531     }
6532 }
6533
6534 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
6535 {
6536     struct bpf_verifier_state *vstate = env->cur_state;
6537     int i;
6538
6539     for (i = 0; i <= vstate->curframe; i++)
6540         __clear_all_pkt_pointers(env, vstate->frame[i]);
6541 }
6542
6543 enum {
6544     AT_PKT_END = -1,
6545     BEYOND_PKT_END = -2,
6546 };
6547
6548 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
6549 {
6550     struct bpf_func_state *state = vstate->frame[vstate->curframe];
6551     struct bpf_reg_state *reg = &state->regs[regn];
6552
6553     if (reg->type != PTR_TO_PACKET)
6554         /* PTR_TO_PACKET_META is not supported yet */
6555         return;
6556
6557     /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
6558      * How far beyond pkt_end it goes is unknown.
6559      * if (!range_open) it's the case of pkt >= pkt_end
6560      * if (range_open) it's the case of pkt > pkt_end
6561      * hence this pointer is at least 1 byte bigger than pkt_end
6562      */
6563     if (range_open)
6564         reg->range = BEYOND_PKT_END;
6565     else
6566         reg->range = AT_PKT_END;
6567 }
6568
6569 static void release_reg_references(struct bpf_verifier_env *env,
6570                    struct bpf_func_state *state,
6571                    int ref_obj_id)
6572 {
6573     struct bpf_reg_state *regs = state->regs, *reg;
6574     int i;
6575
6576     for (i = 0; i < MAX_BPF_REG; i++)
6577         if (regs[i].ref_obj_id == ref_obj_id)
6578             mark_reg_unknown(env, regs, i);
6579
6580     bpf_for_each_spilled_reg(i, state, reg) {
6581         if (!reg)
6582             continue;
6583         if (reg->ref_obj_id == ref_obj_id)
6584             __mark_reg_unknown(env, reg);
6585     }
6586 }
6587
6588 /* The pointer with the specified id has released its reference to kernel
6589  * resources. Identify all copies of the same pointer and clear the reference.
6590  */
6591 static int release_reference(struct bpf_verifier_env *env,
6592                  int ref_obj_id)
6593 {
6594     struct bpf_verifier_state *vstate = env->cur_state;
6595     int err;
6596     int i;
6597
6598     err = release_reference_state(cur_func(env), ref_obj_id);
6599     if (err)
6600         return err;
6601
6602     for (i = 0; i <= vstate->curframe; i++)
6603         release_reg_references(env, vstate->frame[i], ref_obj_id);
6604
6605     return 0;
6606 }
6607
6608 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
6609                     struct bpf_reg_state *regs)
6610 {
6611     int i;
6612
6613     /* after the call registers r0 - r5 were scratched */
6614     for (i = 0; i < CALLER_SAVED_REGS; i++) {
6615         mark_reg_not_init(env, regs, caller_saved[i]);
6616         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6617     }
6618 }
6619
6620 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
6621                    struct bpf_func_state *caller,
6622                    struct bpf_func_state *callee,
6623                    int insn_idx);
6624
6625 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6626                  int *insn_idx, int subprog,
6627                  set_callee_state_fn set_callee_state_cb)
6628 {
6629     struct bpf_verifier_state *state = env->cur_state;
6630     struct bpf_func_info_aux *func_info_aux;
6631     struct bpf_func_state *caller, *callee;
6632     int err;
6633     bool is_global = false;
6634
6635     if (state->curframe + 1 >= MAX_CALL_FRAMES) {
6636         verbose(env, "the call stack of %d frames is too deep\n",
6637             state->curframe + 2);
6638         return -E2BIG;
6639     }
6640
6641     caller = state->frame[state->curframe];
6642     if (state->frame[state->curframe + 1]) {
6643         verbose(env, "verifier bug. Frame %d already allocated\n",
6644             state->curframe + 1);
6645         return -EFAULT;
6646     }
6647
6648     func_info_aux = env->prog->aux->func_info_aux;
6649     if (func_info_aux)
6650         is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
6651     err = btf_check_subprog_arg_match(env, subprog, caller->regs);
6652     if (err == -EFAULT)
6653         return err;
6654     if (is_global) {
6655         if (err) {
6656             verbose(env, "Caller passes invalid args into func#%d\n",
6657                 subprog);
6658             return err;
6659         } else {
6660             if (env->log.level & BPF_LOG_LEVEL)
6661                 verbose(env,
6662                     "Func#%d is global and valid. Skipping.\n",
6663                     subprog);
6664             clear_caller_saved_regs(env, caller->regs);
6665
6666             /* All global functions return a 64-bit SCALAR_VALUE */
6667             mark_reg_unknown(env, caller->regs, BPF_REG_0);
6668             caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6669
6670             /* continue with next insn after call */
6671             return 0;
6672         }
6673     }
6674
6675     if (insn->code == (BPF_JMP | BPF_CALL) &&
6676         insn->src_reg == 0 &&
6677         insn->imm == BPF_FUNC_timer_set_callback) {
6678         struct bpf_verifier_state *async_cb;
6679
6680         /* there is no real recursion here. timer callbacks are async */
6681         env->subprog_info[subprog].is_async_cb = true;
6682         async_cb = push_async_cb(env, env->subprog_info[subprog].start,
6683                      *insn_idx, subprog);
6684         if (!async_cb)
6685             return -EFAULT;
6686         callee = async_cb->frame[0];
6687         callee->async_entry_cnt = caller->async_entry_cnt + 1;
6688
6689         /* Convert bpf_timer_set_callback() args into timer callback args */
6690         err = set_callee_state_cb(env, caller, callee, *insn_idx);
6691         if (err)
6692             return err;
6693
6694         clear_caller_saved_regs(env, caller->regs);
6695         mark_reg_unknown(env, caller->regs, BPF_REG_0);
6696         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6697         /* continue with next insn after call */
6698         return 0;
6699     }
6700
6701     callee = kzalloc(sizeof(*callee), GFP_KERNEL);
6702     if (!callee)
6703         return -ENOMEM;
6704     state->frame[state->curframe + 1] = callee;
6705
6706     /* callee cannot access r0, r6 - r9 for reading and has to write
6707      * into its own stack before reading from it.
6708      * callee can read/write into caller's stack
6709      */
6710     init_func_state(env, callee,
6711             /* remember the callsite, it will be used by bpf_exit */
6712             *insn_idx /* callsite */,
6713             state->curframe + 1 /* frameno within this callchain */,
6714             subprog /* subprog number within this prog */);
6715
6716     /* Transfer references to the callee */
6717     err = copy_reference_state(callee, caller);
6718     if (err)
6719         return err;
6720
6721     err = set_callee_state_cb(env, caller, callee, *insn_idx);
6722     if (err)
6723         return err;
6724
6725     clear_caller_saved_regs(env, caller->regs);
6726
6727     /* only increment it after check_reg_arg() finished */
6728     state->curframe++;
6729
6730     /* and go analyze first insn of the callee */
6731     *insn_idx = env->subprog_info[subprog].start - 1;
6732
6733     if (env->log.level & BPF_LOG_LEVEL) {
6734         verbose(env, "caller:\n");
6735         print_verifier_state(env, caller, true);
6736         verbose(env, "callee:\n");
6737         print_verifier_state(env, callee, true);
6738     }
6739     return 0;
6740 }
6741
6742 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6743                    struct bpf_func_state *caller,
6744                    struct bpf_func_state *callee)
6745 {
6746     /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6747      *      void *callback_ctx, u64 flags);
6748      * callback_fn(struct bpf_map *map, void *key, void *value,
6749      *      void *callback_ctx);
6750      */
6751     callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6752
6753     callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6754     __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6755     callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6756
6757     callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6758     __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6759     callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6760
6761     /* pointer to stack or null */
6762     callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6763
6764     /* unused */
6765     __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6766     return 0;
6767 }
6768
6769 static int set_callee_state(struct bpf_verifier_env *env,
6770                 struct bpf_func_state *caller,
6771                 struct bpf_func_state *callee, int insn_idx)
6772 {
6773     int i;
6774
6775     /* copy r1 - r5 args that callee can access.  The copy includes parent
6776      * pointers, which connects us up to the liveness chain
6777      */
6778     for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6779         callee->regs[i] = caller->regs[i];
6780     return 0;
6781 }
6782
6783 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6784                int *insn_idx)
6785 {
6786     int subprog, target_insn;
6787
6788     target_insn = *insn_idx + insn->imm + 1;
6789     subprog = find_subprog(env, target_insn);
6790     if (subprog < 0) {
6791         verbose(env, "verifier bug. No program starts at insn %d\n",
6792             target_insn);
6793         return -EFAULT;
6794     }
6795
6796     return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6797 }
6798
6799 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6800                        struct bpf_func_state *caller,
6801                        struct bpf_func_state *callee,
6802                        int insn_idx)
6803 {
6804     struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6805     struct bpf_map *map;
6806     int err;
6807
6808     if (bpf_map_ptr_poisoned(insn_aux)) {
6809         verbose(env, "tail_call abusing map_ptr\n");
6810         return -EINVAL;
6811     }
6812
6813     map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6814     if (!map->ops->map_set_for_each_callback_args ||
6815         !map->ops->map_for_each_callback) {
6816         verbose(env, "callback function not allowed for map\n");
6817         return -ENOTSUPP;
6818     }
6819
6820     err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6821     if (err)
6822         return err;
6823
6824     callee->in_callback_fn = true;
6825     return 0;
6826 }
6827
6828 static int set_loop_callback_state(struct bpf_verifier_env *env,
6829                    struct bpf_func_state *caller,
6830                    struct bpf_func_state *callee,
6831                    int insn_idx)
6832 {
6833     /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
6834      *      u64 flags);
6835      * callback_fn(u32 index, void *callback_ctx);
6836      */
6837     callee->regs[BPF_REG_1].type = SCALAR_VALUE;
6838     callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
6839
6840     /* unused */
6841     __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
6842     __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6843     __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6844
6845     callee->in_callback_fn = true;
6846     return 0;
6847 }
6848
6849 static int set_timer_callback_state(struct bpf_verifier_env *env,
6850                     struct bpf_func_state *caller,
6851                     struct bpf_func_state *callee,
6852                     int insn_idx)
6853 {
6854     struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6855
6856     /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6857      * callback_fn(struct bpf_map *map, void *key, void *value);
6858      */
6859     callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6860     __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6861     callee->regs[BPF_REG_1].map_ptr = map_ptr;
6862
6863     callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6864     __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6865     callee->regs[BPF_REG_2].map_ptr = map_ptr;
6866
6867     callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6868     __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6869     callee->regs[BPF_REG_3].map_ptr = map_ptr;
6870
6871     /* unused */
6872     __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6873     __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6874     callee->in_async_callback_fn = true;
6875     return 0;
6876 }
6877
6878 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
6879                        struct bpf_func_state *caller,
6880                        struct bpf_func_state *callee,
6881                        int insn_idx)
6882 {
6883     /* bpf_find_vma(struct task_struct *task, u64 addr,
6884      *               void *callback_fn, void *callback_ctx, u64 flags)
6885      * (callback_fn)(struct task_struct *task,
6886      *               struct vm_area_struct *vma, void *callback_ctx);
6887      */
6888     callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6889
6890     callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
6891     __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6892     callee->regs[BPF_REG_2].btf =  btf_vmlinux;
6893     callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
6894
6895     /* pointer to stack or null */
6896     callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
6897
6898     /* unused */
6899     __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6900     __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6901     callee->in_callback_fn = true;
6902     return 0;
6903 }
6904
6905 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6906 {
6907     struct bpf_verifier_state *state = env->cur_state;
6908     struct bpf_func_state *caller, *callee;
6909     struct bpf_reg_state *r0;
6910     int err;
6911
6912     callee = state->frame[state->curframe];
6913     r0 = &callee->regs[BPF_REG_0];
6914     if (r0->type == PTR_TO_STACK) {
6915         /* technically it's ok to return caller's stack pointer
6916          * (or caller's caller's pointer) back to the caller,
6917          * since these pointers are valid. Only current stack
6918          * pointer will be invalid as soon as function exits,
6919          * but let's be conservative
6920          */
6921         verbose(env, "cannot return stack pointer to the caller\n");
6922         return -EINVAL;
6923     }
6924
6925     state->curframe--;
6926     caller = state->frame[state->curframe];
6927     if (callee->in_callback_fn) {
6928         /* enforce R0 return value range [0, 1]. */
6929         struct tnum range = tnum_range(0, 1);
6930
6931         if (r0->type != SCALAR_VALUE) {
6932             verbose(env, "R0 not a scalar value\n");
6933             return -EACCES;
6934         }
6935         if (!tnum_in(range, r0->var_off)) {
6936             verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6937             return -EINVAL;
6938         }
6939     } else {
6940         /* return to the caller whatever r0 had in the callee */
6941         caller->regs[BPF_REG_0] = *r0;
6942     }
6943
6944     /* Transfer references to the caller */
6945     err = copy_reference_state(caller, callee);
6946     if (err)
6947         return err;
6948
6949     *insn_idx = callee->callsite + 1;
6950     if (env->log.level & BPF_LOG_LEVEL) {
6951         verbose(env, "returning from callee:\n");
6952         print_verifier_state(env, callee, true);
6953         verbose(env, "to caller at %d:\n", *insn_idx);
6954         print_verifier_state(env, caller, true);
6955     }
6956     /* clear everything in the callee */
6957     free_func_state(callee);
6958     state->frame[state->curframe + 1] = NULL;
6959     return 0;
6960 }
6961
6962 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6963                    int func_id,
6964                    struct bpf_call_arg_meta *meta)
6965 {
6966     struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6967
6968     if (ret_type != RET_INTEGER ||
6969         (func_id != BPF_FUNC_get_stack &&
6970          func_id != BPF_FUNC_get_task_stack &&
6971          func_id != BPF_FUNC_probe_read_str &&
6972          func_id != BPF_FUNC_probe_read_kernel_str &&
6973          func_id != BPF_FUNC_probe_read_user_str))
6974         return;
6975
6976     ret_reg->smax_value = meta->msize_max_value;
6977     ret_reg->s32_max_value = meta->msize_max_value;
6978     ret_reg->smin_value = -MAX_ERRNO;
6979     ret_reg->s32_min_value = -MAX_ERRNO;
6980     reg_bounds_sync(ret_reg);
6981 }
6982
6983 static int
6984 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6985         int func_id, int insn_idx)
6986 {
6987     struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6988     struct bpf_map *map = meta->map_ptr;
6989
6990     if (func_id != BPF_FUNC_tail_call &&
6991         func_id != BPF_FUNC_map_lookup_elem &&
6992         func_id != BPF_FUNC_map_update_elem &&
6993         func_id != BPF_FUNC_map_delete_elem &&
6994         func_id != BPF_FUNC_map_push_elem &&
6995         func_id != BPF_FUNC_map_pop_elem &&
6996         func_id != BPF_FUNC_map_peek_elem &&
6997         func_id != BPF_FUNC_for_each_map_elem &&
6998         func_id != BPF_FUNC_redirect_map &&
6999         func_id != BPF_FUNC_map_lookup_percpu_elem)
7000         return 0;
7001
7002     if (map == NULL) {
7003         verbose(env, "kernel subsystem misconfigured verifier\n");
7004         return -EINVAL;
7005     }
7006
7007     /* In case of read-only, some additional restrictions
7008      * need to be applied in order to prevent altering the
7009      * state of the map from program side.
7010      */
7011     if ((map->map_flags & BPF_F_RDONLY_PROG) &&
7012         (func_id == BPF_FUNC_map_delete_elem ||
7013          func_id == BPF_FUNC_map_update_elem ||
7014          func_id == BPF_FUNC_map_push_elem ||
7015          func_id == BPF_FUNC_map_pop_elem)) {
7016         verbose(env, "write into map forbidden\n");
7017         return -EACCES;
7018     }
7019
7020     if (!BPF_MAP_PTR(aux->map_ptr_state))
7021         bpf_map_ptr_store(aux, meta->map_ptr,
7022                   !meta->map_ptr->bypass_spec_v1);
7023     else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
7024         bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
7025                   !meta->map_ptr->bypass_spec_v1);
7026     return 0;
7027 }
7028
7029 static int
7030 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
7031         int func_id, int insn_idx)
7032 {
7033     struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
7034     struct bpf_reg_state *regs = cur_regs(env), *reg;
7035     struct bpf_map *map = meta->map_ptr;
7036     u64 val, max;
7037     int err;
7038
7039     if (func_id != BPF_FUNC_tail_call)
7040         return 0;
7041     if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
7042         verbose(env, "kernel subsystem misconfigured verifier\n");
7043         return -EINVAL;
7044     }
7045
7046     reg = &regs[BPF_REG_3];
7047     val = reg->var_off.value;
7048     max = map->max_entries;
7049
7050     if (!(register_is_const(reg) && val < max)) {
7051         bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
7052         return 0;
7053     }
7054
7055     err = mark_chain_precision(env, BPF_REG_3);
7056     if (err)
7057         return err;
7058     if (bpf_map_key_unseen(aux))
7059         bpf_map_key_store(aux, val);
7060     else if (!bpf_map_key_poisoned(aux) &&
7061           bpf_map_key_immediate(aux) != val)
7062         bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
7063     return 0;
7064 }
7065
7066 static int check_reference_leak(struct bpf_verifier_env *env)
7067 {
7068     struct bpf_func_state *state = cur_func(env);
7069     int i;
7070
7071     for (i = 0; i < state->acquired_refs; i++) {
7072         verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
7073             state->refs[i].id, state->refs[i].insn_idx);
7074     }
7075     return state->acquired_refs ? -EINVAL : 0;
7076 }
7077
7078 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
7079                    struct bpf_reg_state *regs)
7080 {
7081     struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
7082     struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
7083     struct bpf_map *fmt_map = fmt_reg->map_ptr;
7084     int err, fmt_map_off, num_args;
7085     u64 fmt_addr;
7086     char *fmt;
7087
7088     /* data must be an array of u64 */
7089     if (data_len_reg->var_off.value % 8)
7090         return -EINVAL;
7091     num_args = data_len_reg->var_off.value / 8;
7092
7093     /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
7094      * and map_direct_value_addr is set.
7095      */
7096     fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
7097     err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
7098                           fmt_map_off);
7099     if (err) {
7100         verbose(env, "verifier bug\n");
7101         return -EFAULT;
7102     }
7103     fmt = (char *)(long)fmt_addr + fmt_map_off;
7104
7105     /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
7106      * can focus on validating the format specifiers.
7107      */
7108     err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
7109     if (err < 0)
7110         verbose(env, "Invalid format string\n");
7111
7112     return err;
7113 }
7114
7115 static int check_get_func_ip(struct bpf_verifier_env *env)
7116 {
7117     enum bpf_prog_type type = resolve_prog_type(env->prog);
7118     int func_id = BPF_FUNC_get_func_ip;
7119
7120     if (type == BPF_PROG_TYPE_TRACING) {
7121         if (!bpf_prog_has_trampoline(env->prog)) {
7122             verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
7123                 func_id_name(func_id), func_id);
7124             return -ENOTSUPP;
7125         }
7126         return 0;
7127     } else if (type == BPF_PROG_TYPE_KPROBE) {
7128         return 0;
7129     }
7130
7131     verbose(env, "func %s#%d not supported for program type %d\n",
7132         func_id_name(func_id), func_id, type);
7133     return -ENOTSUPP;
7134 }
7135
7136 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7137 {
7138     return &env->insn_aux_data[env->insn_idx];
7139 }
7140
7141 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
7142 {
7143     struct bpf_reg_state *regs = cur_regs(env);
7144     struct bpf_reg_state *reg = &regs[BPF_REG_4];
7145     bool reg_is_null = register_is_null(reg);
7146
7147     if (reg_is_null)
7148         mark_chain_precision(env, BPF_REG_4);
7149
7150     return reg_is_null;
7151 }
7152
7153 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
7154 {
7155     struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
7156
7157     if (!state->initialized) {
7158         state->initialized = 1;
7159         state->fit_for_inline = loop_flag_is_zero(env);
7160         state->callback_subprogno = subprogno;
7161         return;
7162     }
7163
7164     if (!state->fit_for_inline)
7165         return;
7166
7167     state->fit_for_inline = (loop_flag_is_zero(env) &&
7168                  state->callback_subprogno == subprogno);
7169 }
7170
7171 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7172                  int *insn_idx_p)
7173 {
7174     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
7175     const struct bpf_func_proto *fn = NULL;
7176     enum bpf_return_type ret_type;
7177     enum bpf_type_flag ret_flag;
7178     struct bpf_reg_state *regs;
7179     struct bpf_call_arg_meta meta;
7180     int insn_idx = *insn_idx_p;
7181     bool changes_data;
7182     int i, err, func_id;
7183
7184     /* find function prototype */
7185     func_id = insn->imm;
7186     if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
7187         verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
7188             func_id);
7189         return -EINVAL;
7190     }
7191
7192     if (env->ops->get_func_proto)
7193         fn = env->ops->get_func_proto(func_id, env->prog);
7194     if (!fn) {
7195         verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
7196             func_id);
7197         return -EINVAL;
7198     }
7199
7200     /* eBPF programs must be GPL compatible to use GPL-ed functions */
7201     if (!env->prog->gpl_compatible && fn->gpl_only) {
7202         verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
7203         return -EINVAL;
7204     }
7205
7206     if (fn->allowed && !fn->allowed(env->prog)) {
7207         verbose(env, "helper call is not allowed in probe\n");
7208         return -EINVAL;
7209     }
7210
7211     /* With LD_ABS/IND some JITs save/restore skb from r1. */
7212     changes_data = bpf_helper_changes_pkt_data(fn->func);
7213     if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
7214         verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
7215             func_id_name(func_id), func_id);
7216         return -EINVAL;
7217     }
7218
7219     memset(&meta, 0, sizeof(meta));
7220     meta.pkt_access = fn->pkt_access;
7221
7222     err = check_func_proto(fn, func_id, &meta);
7223     if (err) {
7224         verbose(env, "kernel subsystem misconfigured func %s#%d\n",
7225             func_id_name(func_id), func_id);
7226         return err;
7227     }
7228
7229     meta.func_id = func_id;
7230     /* check args */
7231     for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
7232         err = check_func_arg(env, i, &meta, fn);
7233         if (err)
7234             return err;
7235     }
7236
7237     err = record_func_map(env, &meta, func_id, insn_idx);
7238     if (err)
7239         return err;
7240
7241     err = record_func_key(env, &meta, func_id, insn_idx);
7242     if (err)
7243         return err;
7244
7245     /* Mark slots with STACK_MISC in case of raw mode, stack offset
7246      * is inferred from register state.
7247      */
7248     for (i = 0; i < meta.access_size; i++) {
7249         err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
7250                        BPF_WRITE, -1, false);
7251         if (err)
7252             return err;
7253     }
7254
7255     regs = cur_regs(env);
7256
7257     if (meta.uninit_dynptr_regno) {
7258         /* we write BPF_DW bits (8 bytes) at a time */
7259         for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7260             err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
7261                            i, BPF_DW, BPF_WRITE, -1, false);
7262             if (err)
7263                 return err;
7264         }
7265
7266         err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
7267                           fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
7268                           insn_idx);
7269         if (err)
7270             return err;
7271     }
7272
7273     if (meta.release_regno) {
7274         err = -EINVAL;
7275         if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
7276             err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
7277         else if (meta.ref_obj_id)
7278             err = release_reference(env, meta.ref_obj_id);
7279         /* meta.ref_obj_id can only be 0 if register that is meant to be
7280          * released is NULL, which must be > R0.
7281          */
7282         else if (register_is_null(&regs[meta.release_regno]))
7283             err = 0;
7284         if (err) {
7285             verbose(env, "func %s#%d reference has not been acquired before\n",
7286                 func_id_name(func_id), func_id);
7287             return err;
7288         }
7289     }
7290
7291     switch (func_id) {
7292     case BPF_FUNC_tail_call:
7293         err = check_reference_leak(env);
7294         if (err) {
7295             verbose(env, "tail_call would lead to reference leak\n");
7296             return err;
7297         }
7298         break;
7299     case BPF_FUNC_get_local_storage:
7300         /* check that flags argument in get_local_storage(map, flags) is 0,
7301          * this is required because get_local_storage() can't return an error.
7302          */
7303         if (!register_is_null(&regs[BPF_REG_2])) {
7304             verbose(env, "get_local_storage() doesn't support non-zero flags\n");
7305             return -EINVAL;
7306         }
7307         break;
7308     case BPF_FUNC_for_each_map_elem:
7309         err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7310                     set_map_elem_callback_state);
7311         break;
7312     case BPF_FUNC_timer_set_callback:
7313         err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7314                     set_timer_callback_state);
7315         break;
7316     case BPF_FUNC_find_vma:
7317         err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7318                     set_find_vma_callback_state);
7319         break;
7320     case BPF_FUNC_snprintf:
7321         err = check_bpf_snprintf_call(env, regs);
7322         break;
7323     case BPF_FUNC_loop:
7324         update_loop_inline_state(env, meta.subprogno);
7325         err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7326                     set_loop_callback_state);
7327         break;
7328     case BPF_FUNC_dynptr_from_mem:
7329         if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
7330             verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
7331                 reg_type_str(env, regs[BPF_REG_1].type));
7332             return -EACCES;
7333         }
7334         break;
7335     case BPF_FUNC_set_retval:
7336         if (prog_type == BPF_PROG_TYPE_LSM &&
7337             env->prog->expected_attach_type == BPF_LSM_CGROUP) {
7338             if (!env->prog->aux->attach_func_proto->type) {
7339                 /* Make sure programs that attach to void
7340                  * hooks don't try to modify return value.
7341                  */
7342                 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
7343                 return -EINVAL;
7344             }
7345         }
7346         break;
7347     }
7348
7349     if (err)
7350         return err;
7351
7352     /* reset caller saved regs */
7353     for (i = 0; i < CALLER_SAVED_REGS; i++) {
7354         mark_reg_not_init(env, regs, caller_saved[i]);
7355         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7356     }
7357
7358     /* helper call returns 64-bit value. */
7359     regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7360
7361     /* update return register (already marked as written above) */
7362     ret_type = fn->ret_type;
7363     ret_flag = type_flag(fn->ret_type);
7364     if (ret_type == RET_INTEGER) {
7365         /* sets type to SCALAR_VALUE */
7366         mark_reg_unknown(env, regs, BPF_REG_0);
7367     } else if (ret_type == RET_VOID) {
7368         regs[BPF_REG_0].type = NOT_INIT;
7369     } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
7370         /* There is no offset yet applied, variable or fixed */
7371         mark_reg_known_zero(env, regs, BPF_REG_0);
7372         /* remember map_ptr, so that check_map_access()
7373          * can check 'value_size' boundary of memory access
7374          * to map element returned from bpf_map_lookup_elem()
7375          */
7376         if (meta.map_ptr == NULL) {
7377             verbose(env,
7378                 "kernel subsystem misconfigured verifier\n");
7379             return -EINVAL;
7380         }
7381         regs[BPF_REG_0].map_ptr = meta.map_ptr;
7382         regs[BPF_REG_0].map_uid = meta.map_uid;
7383         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
7384         if (!type_may_be_null(ret_type) &&
7385             map_value_has_spin_lock(meta.map_ptr)) {
7386             regs[BPF_REG_0].id = ++env->id_gen;
7387         }
7388     } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
7389         mark_reg_known_zero(env, regs, BPF_REG_0);
7390         regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
7391     } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
7392         mark_reg_known_zero(env, regs, BPF_REG_0);
7393         regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
7394     } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
7395         mark_reg_known_zero(env, regs, BPF_REG_0);
7396         regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
7397     } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
7398         mark_reg_known_zero(env, regs, BPF_REG_0);
7399         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
7400         regs[BPF_REG_0].mem_size = meta.mem_size;
7401     } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
7402         const struct btf_type *t;
7403
7404         mark_reg_known_zero(env, regs, BPF_REG_0);
7405         t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
7406         if (!btf_type_is_struct(t)) {
7407             u32 tsize;
7408             const struct btf_type *ret;
7409             const char *tname;
7410
7411             /* resolve the type size of ksym. */
7412             ret = btf_resolve_size(meta.ret_btf, t, &tsize);
7413             if (IS_ERR(ret)) {
7414                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
7415                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
7416                     tname, PTR_ERR(ret));
7417                 return -EINVAL;
7418             }
7419             regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
7420             regs[BPF_REG_0].mem_size = tsize;
7421         } else {
7422             /* MEM_RDONLY may be carried from ret_flag, but it
7423              * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
7424              * it will confuse the check of PTR_TO_BTF_ID in
7425              * check_mem_access().
7426              */
7427             ret_flag &= ~MEM_RDONLY;
7428
7429             regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
7430             regs[BPF_REG_0].btf = meta.ret_btf;
7431             regs[BPF_REG_0].btf_id = meta.ret_btf_id;
7432         }
7433     } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
7434         struct btf *ret_btf;
7435         int ret_btf_id;
7436
7437         mark_reg_known_zero(env, regs, BPF_REG_0);
7438         regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
7439         if (func_id == BPF_FUNC_kptr_xchg) {
7440             ret_btf = meta.kptr_off_desc->kptr.btf;
7441             ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
7442         } else {
7443             ret_btf = btf_vmlinux;
7444             ret_btf_id = *fn->ret_btf_id;
7445         }
7446         if (ret_btf_id == 0) {
7447             verbose(env, "invalid return type %u of func %s#%d\n",
7448                 base_type(ret_type), func_id_name(func_id),
7449                 func_id);
7450             return -EINVAL;
7451         }
7452         regs[BPF_REG_0].btf = ret_btf;
7453         regs[BPF_REG_0].btf_id = ret_btf_id;
7454     } else {
7455         verbose(env, "unknown return type %u of func %s#%d\n",
7456             base_type(ret_type), func_id_name(func_id), func_id);
7457         return -EINVAL;
7458     }
7459
7460     if (type_may_be_null(regs[BPF_REG_0].type))
7461         regs[BPF_REG_0].id = ++env->id_gen;
7462
7463     if (is_ptr_cast_function(func_id)) {
7464         /* For release_reference() */
7465         regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
7466     } else if (is_acquire_function(func_id, meta.map_ptr)) {
7467         int id = acquire_reference_state(env, insn_idx);
7468
7469         if (id < 0)
7470             return id;
7471         /* For mark_ptr_or_null_reg() */
7472         regs[BPF_REG_0].id = id;
7473         /* For release_reference() */
7474         regs[BPF_REG_0].ref_obj_id = id;
7475     } else if (func_id == BPF_FUNC_dynptr_data) {
7476         int dynptr_id = 0, i;
7477
7478         /* Find the id of the dynptr we're acquiring a reference to */
7479         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
7480             if (arg_type_is_dynptr(fn->arg_type[i])) {
7481                 if (dynptr_id) {
7482                     verbose(env, "verifier internal error: multiple dynptr args in func\n");
7483                     return -EFAULT;
7484                 }
7485                 dynptr_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
7486             }
7487         }
7488         /* For release_reference() */
7489         regs[BPF_REG_0].ref_obj_id = dynptr_id;
7490     }
7491
7492     do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
7493
7494     err = check_map_func_compatibility(env, meta.map_ptr, func_id);
7495     if (err)
7496         return err;
7497
7498     if ((func_id == BPF_FUNC_get_stack ||
7499          func_id == BPF_FUNC_get_task_stack) &&
7500         !env->prog->has_callchain_buf) {
7501         const char *err_str;
7502
7503 #ifdef CONFIG_PERF_EVENTS
7504         err = get_callchain_buffers(sysctl_perf_event_max_stack);
7505         err_str = "cannot get callchain buffer for func %s#%d\n";
7506 #else
7507         err = -ENOTSUPP;
7508         err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
7509 #endif
7510         if (err) {
7511             verbose(env, err_str, func_id_name(func_id), func_id);
7512             return err;
7513         }
7514
7515         env->prog->has_callchain_buf = true;
7516     }
7517
7518     if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
7519         env->prog->call_get_stack = true;
7520
7521     if (func_id == BPF_FUNC_get_func_ip) {
7522         if (check_get_func_ip(env))
7523             return -ENOTSUPP;
7524         env->prog->call_get_func_ip = true;
7525     }
7526
7527     if (changes_data)
7528         clear_all_pkt_pointers(env);
7529     return 0;
7530 }
7531
7532 /* mark_btf_func_reg_size() is used when the reg size is determined by
7533  * the BTF func_proto's return value size and argument.
7534  */
7535 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
7536                    size_t reg_size)
7537 {
7538     struct bpf_reg_state *reg = &cur_regs(env)[regno];
7539
7540     if (regno == BPF_REG_0) {
7541         /* Function return value */
7542         reg->live |= REG_LIVE_WRITTEN;
7543         reg->subreg_def = reg_size == sizeof(u64) ?
7544             DEF_NOT_SUBREG : env->insn_idx + 1;
7545     } else {
7546         /* Function argument */
7547         if (reg_size == sizeof(u64)) {
7548             mark_insn_zext(env, reg);
7549             mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
7550         } else {
7551             mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
7552         }
7553     }
7554 }
7555
7556 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7557                 int *insn_idx_p)
7558 {
7559     const struct btf_type *t, *func, *func_proto, *ptr_type;
7560     struct bpf_reg_state *regs = cur_regs(env);
7561     const char *func_name, *ptr_type_name;
7562     u32 i, nargs, func_id, ptr_type_id;
7563     int err, insn_idx = *insn_idx_p;
7564     const struct btf_param *args;
7565     struct btf *desc_btf;
7566     u32 *kfunc_flags;
7567     bool acq;
7568
7569     /* skip for now, but return error when we find this in fixup_kfunc_call */
7570     if (!insn->imm)
7571         return 0;
7572
7573     desc_btf = find_kfunc_desc_btf(env, insn->off);
7574     if (IS_ERR(desc_btf))
7575         return PTR_ERR(desc_btf);
7576
7577     func_id = insn->imm;
7578     func = btf_type_by_id(desc_btf, func_id);
7579     func_name = btf_name_by_offset(desc_btf, func->name_off);
7580     func_proto = btf_type_by_id(desc_btf, func->type);
7581
7582     kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
7583     if (!kfunc_flags) {
7584         verbose(env, "calling kernel function %s is not allowed\n",
7585             func_name);
7586         return -EACCES;
7587     }
7588     acq = *kfunc_flags & KF_ACQUIRE;
7589
7590     /* Check the arguments */
7591     err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags);
7592     if (err < 0)
7593         return err;
7594     /* In case of release function, we get register number of refcounted
7595      * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
7596      */
7597     if (err) {
7598         err = release_reference(env, regs[err].ref_obj_id);
7599         if (err) {
7600             verbose(env, "kfunc %s#%d reference has not been acquired before\n",
7601                 func_name, func_id);
7602             return err;
7603         }
7604     }
7605
7606     for (i = 0; i < CALLER_SAVED_REGS; i++)
7607         mark_reg_not_init(env, regs, caller_saved[i]);
7608
7609     /* Check return type */
7610     t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
7611
7612     if (acq && !btf_type_is_ptr(t)) {
7613         verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
7614         return -EINVAL;
7615     }
7616
7617     if (btf_type_is_scalar(t)) {
7618         mark_reg_unknown(env, regs, BPF_REG_0);
7619         mark_btf_func_reg_size(env, BPF_REG_0, t->size);
7620     } else if (btf_type_is_ptr(t)) {
7621         ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
7622                            &ptr_type_id);
7623         if (!btf_type_is_struct(ptr_type)) {
7624             ptr_type_name = btf_name_by_offset(desc_btf,
7625                                ptr_type->name_off);
7626             verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
7627                 func_name, btf_type_str(ptr_type),
7628                 ptr_type_name);
7629             return -EINVAL;
7630         }
7631         mark_reg_known_zero(env, regs, BPF_REG_0);
7632         regs[BPF_REG_0].btf = desc_btf;
7633         regs[BPF_REG_0].type = PTR_TO_BTF_ID;
7634         regs[BPF_REG_0].btf_id = ptr_type_id;
7635         if (*kfunc_flags & KF_RET_NULL) {
7636             regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
7637             /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
7638             regs[BPF_REG_0].id = ++env->id_gen;
7639         }
7640         mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
7641         if (acq) {
7642             int id = acquire_reference_state(env, insn_idx);
7643
7644             if (id < 0)
7645                 return id;
7646             regs[BPF_REG_0].id = id;
7647             regs[BPF_REG_0].ref_obj_id = id;
7648         }
7649     } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
7650
7651     nargs = btf_type_vlen(func_proto);
7652     args = (const struct btf_param *)(func_proto + 1);
7653     for (i = 0; i < nargs; i++) {
7654         u32 regno = i + 1;
7655
7656         t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
7657         if (btf_type_is_ptr(t))
7658             mark_btf_func_reg_size(env, regno, sizeof(void *));
7659         else
7660             /* scalar. ensured by btf_check_kfunc_arg_match() */
7661             mark_btf_func_reg_size(env, regno, t->size);
7662     }
7663
7664     return 0;
7665 }
7666
7667 static bool signed_add_overflows(s64 a, s64 b)
7668 {
7669     /* Do the add in u64, where overflow is well-defined */
7670     s64 res = (s64)((u64)a + (u64)b);
7671
7672     if (b < 0)
7673         return res > a;
7674     return res < a;
7675 }
7676
7677 static bool signed_add32_overflows(s32 a, s32 b)
7678 {
7679     /* Do the add in u32, where overflow is well-defined */
7680     s32 res = (s32)((u32)a + (u32)b);
7681
7682     if (b < 0)
7683         return res > a;
7684     return res < a;
7685 }
7686
7687 static bool signed_sub_overflows(s64 a, s64 b)
7688 {
7689     /* Do the sub in u64, where overflow is well-defined */
7690     s64 res = (s64)((u64)a - (u64)b);
7691
7692     if (b < 0)
7693         return res < a;
7694     return res > a;
7695 }
7696
7697 static bool signed_sub32_overflows(s32 a, s32 b)
7698 {
7699     /* Do the sub in u32, where overflow is well-defined */
7700     s32 res = (s32)((u32)a - (u32)b);
7701
7702     if (b < 0)
7703         return res < a;
7704     return res > a;
7705 }
7706
7707 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
7708                   const struct bpf_reg_state *reg,
7709                   enum bpf_reg_type type)
7710 {
7711     bool known = tnum_is_const(reg->var_off);
7712     s64 val = reg->var_off.value;
7713     s64 smin = reg->smin_value;
7714
7715     if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
7716         verbose(env, "math between %s pointer and %lld is not allowed\n",
7717             reg_type_str(env, type), val);
7718         return false;
7719     }
7720
7721     if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
7722         verbose(env, "%s pointer offset %d is not allowed\n",
7723             reg_type_str(env, type), reg->off);
7724         return false;
7725     }
7726
7727     if (smin == S64_MIN) {
7728         verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
7729             reg_type_str(env, type));
7730         return false;
7731     }
7732
7733     if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
7734         verbose(env, "value %lld makes %s pointer be out of bounds\n",
7735             smin, reg_type_str(env, type));
7736         return false;
7737     }
7738
7739     return true;
7740 }
7741
7742 enum {
7743     REASON_BOUNDS   = -1,
7744     REASON_TYPE = -2,
7745     REASON_PATHS    = -3,
7746     REASON_LIMIT    = -4,
7747     REASON_STACK    = -5,
7748 };
7749
7750 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
7751                   u32 *alu_limit, bool mask_to_left)
7752 {
7753     u32 max = 0, ptr_limit = 0;
7754
7755     switch (ptr_reg->type) {
7756     case PTR_TO_STACK:
7757         /* Offset 0 is out-of-bounds, but acceptable start for the
7758          * left direction, see BPF_REG_FP. Also, unknown scalar
7759          * offset where we would need to deal with min/max bounds is
7760          * currently prohibited for unprivileged.
7761          */
7762         max = MAX_BPF_STACK + mask_to_left;
7763         ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
7764         break;
7765     case PTR_TO_MAP_VALUE:
7766         max = ptr_reg->map_ptr->value_size;
7767         ptr_limit = (mask_to_left ?
7768                  ptr_reg->smin_value :
7769                  ptr_reg->umax_value) + ptr_reg->off;
7770         break;
7771     default:
7772         return REASON_TYPE;
7773     }
7774
7775     if (ptr_limit >= max)
7776         return REASON_LIMIT;
7777     *alu_limit = ptr_limit;
7778     return 0;
7779 }
7780
7781 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
7782                     const struct bpf_insn *insn)
7783 {
7784     return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
7785 }
7786
7787 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
7788                        u32 alu_state, u32 alu_limit)
7789 {
7790     /* If we arrived here from different branches with different
7791      * state or limits to sanitize, then this won't work.
7792      */
7793     if (aux->alu_state &&
7794         (aux->alu_state != alu_state ||
7795          aux->alu_limit != alu_limit))
7796         return REASON_PATHS;
7797
7798     /* Corresponding fixup done in do_misc_fixups(). */
7799     aux->alu_state = alu_state;
7800     aux->alu_limit = alu_limit;
7801     return 0;
7802 }
7803
7804 static int sanitize_val_alu(struct bpf_verifier_env *env,
7805                 struct bpf_insn *insn)
7806 {
7807     struct bpf_insn_aux_data *aux = cur_aux(env);
7808
7809     if (can_skip_alu_sanitation(env, insn))
7810         return 0;
7811
7812     return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
7813 }
7814
7815 static bool sanitize_needed(u8 opcode)
7816 {
7817     return opcode == BPF_ADD || opcode == BPF_SUB;
7818 }
7819
7820 struct bpf_sanitize_info {
7821     struct bpf_insn_aux_data aux;
7822     bool mask_to_left;
7823 };
7824
7825 static struct bpf_verifier_state *
7826 sanitize_speculative_path(struct bpf_verifier_env *env,
7827               const struct bpf_insn *insn,
7828               u32 next_idx, u32 curr_idx)
7829 {
7830     struct bpf_verifier_state *branch;
7831     struct bpf_reg_state *regs;
7832
7833     branch = push_stack(env, next_idx, curr_idx, true);
7834     if (branch && insn) {
7835         regs = branch->frame[branch->curframe]->regs;
7836         if (BPF_SRC(insn->code) == BPF_K) {
7837             mark_reg_unknown(env, regs, insn->dst_reg);
7838         } else if (BPF_SRC(insn->code) == BPF_X) {
7839             mark_reg_unknown(env, regs, insn->dst_reg);
7840             mark_reg_unknown(env, regs, insn->src_reg);
7841         }
7842     }
7843     return branch;
7844 }
7845
7846 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
7847                 struct bpf_insn *insn,
7848                 const struct bpf_reg_state *ptr_reg,
7849                 const struct bpf_reg_state *off_reg,
7850                 struct bpf_reg_state *dst_reg,
7851                 struct bpf_sanitize_info *info,
7852                 const bool commit_window)
7853 {
7854     struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
7855     struct bpf_verifier_state *vstate = env->cur_state;
7856     bool off_is_imm = tnum_is_const(off_reg->var_off);
7857     bool off_is_neg = off_reg->smin_value < 0;
7858     bool ptr_is_dst_reg = ptr_reg == dst_reg;
7859     u8 opcode = BPF_OP(insn->code);
7860     u32 alu_state, alu_limit;
7861     struct bpf_reg_state tmp;
7862     bool ret;
7863     int err;
7864
7865     if (can_skip_alu_sanitation(env, insn))
7866         return 0;
7867
7868     /* We already marked aux for masking from non-speculative
7869      * paths, thus we got here in the first place. We only care
7870      * to explore bad access from here.
7871      */
7872     if (vstate->speculative)
7873         goto do_sim;
7874
7875     if (!commit_window) {
7876         if (!tnum_is_const(off_reg->var_off) &&
7877             (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
7878             return REASON_BOUNDS;
7879
7880         info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
7881                      (opcode == BPF_SUB && !off_is_neg);
7882     }
7883
7884     err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
7885     if (err < 0)
7886         return err;
7887
7888     if (commit_window) {
7889         /* In commit phase we narrow the masking window based on
7890          * the observed pointer move after the simulated operation.
7891          */
7892         alu_state = info->aux.alu_state;
7893         alu_limit = abs(info->aux.alu_limit - alu_limit);
7894     } else {
7895         alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
7896         alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7897         alu_state |= ptr_is_dst_reg ?
7898                  BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
7899
7900         /* Limit pruning on unknown scalars to enable deep search for
7901          * potential masking differences from other program paths.
7902          */
7903         if (!off_is_imm)
7904             env->explore_alu_limits = true;
7905     }
7906
7907     err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7908     if (err < 0)
7909         return err;
7910 do_sim:
7911     /* If we're in commit phase, we're done here given we already
7912      * pushed the truncated dst_reg into the speculative verification
7913      * stack.
7914      *
7915      * Also, when register is a known constant, we rewrite register-based
7916      * operation to immediate-based, and thus do not need masking (and as
7917      * a consequence, do not need to simulate the zero-truncation either).
7918      */
7919     if (commit_window || off_is_imm)
7920         return 0;
7921
7922     /* Simulate and find potential out-of-bounds access under
7923      * speculative execution from truncation as a result of
7924      * masking when off was not within expected range. If off
7925      * sits in dst, then we temporarily need to move ptr there
7926      * to simulate dst (== 0) +/-= ptr. Needed, for example,
7927      * for cases where we use K-based arithmetic in one direction
7928      * and truncated reg-based in the other in order to explore
7929      * bad access.
7930      */
7931     if (!ptr_is_dst_reg) {
7932         tmp = *dst_reg;
7933         *dst_reg = *ptr_reg;
7934     }
7935     ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7936                     env->insn_idx);
7937     if (!ptr_is_dst_reg && ret)
7938         *dst_reg = tmp;
7939     return !ret ? REASON_STACK : 0;
7940 }
7941
7942 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7943 {
7944     struct bpf_verifier_state *vstate = env->cur_state;
7945
7946     /* If we simulate paths under speculation, we don't update the
7947      * insn as 'seen' such that when we verify unreachable paths in
7948      * the non-speculative domain, sanitize_dead_code() can still
7949      * rewrite/sanitize them.
7950      */
7951     if (!vstate->speculative)
7952         env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7953 }
7954
7955 static int sanitize_err(struct bpf_verifier_env *env,
7956             const struct bpf_insn *insn, int reason,
7957             const struct bpf_reg_state *off_reg,
7958             const struct bpf_reg_state *dst_reg)
7959 {
7960     static const char *err = "pointer arithmetic with it prohibited for !root";
7961     const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7962     u32 dst = insn->dst_reg, src = insn->src_reg;
7963
7964     switch (reason) {
7965     case REASON_BOUNDS:
7966         verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7967             off_reg == dst_reg ? dst : src, err);
7968         break;
7969     case REASON_TYPE:
7970         verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7971             off_reg == dst_reg ? src : dst, err);
7972         break;
7973     case REASON_PATHS:
7974         verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7975             dst, op, err);
7976         break;
7977     case REASON_LIMIT:
7978         verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7979             dst, op, err);
7980         break;
7981     case REASON_STACK:
7982         verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7983             dst, err);
7984         break;
7985     default:
7986         verbose(env, "verifier internal error: unknown reason (%d)\n",
7987             reason);
7988         break;
7989     }
7990
7991     return -EACCES;
7992 }
7993
7994 /* check that stack access falls within stack limits and that 'reg' doesn't
7995  * have a variable offset.
7996  *
7997  * Variable offset is prohibited for unprivileged mode for simplicity since it
7998  * requires corresponding support in Spectre masking for stack ALU.  See also
7999  * retrieve_ptr_limit().
8000  *
8001  *
8002  * 'off' includes 'reg->off'.
8003  */
8004 static int check_stack_access_for_ptr_arithmetic(
8005                 struct bpf_verifier_env *env,
8006                 int regno,
8007                 const struct bpf_reg_state *reg,
8008                 int off)
8009 {
8010     if (!tnum_is_const(reg->var_off)) {
8011         char tn_buf[48];
8012
8013         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8014         verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
8015             regno, tn_buf, off);
8016         return -EACCES;
8017     }
8018
8019     if (off >= 0 || off < -MAX_BPF_STACK) {
8020         verbose(env, "R%d stack pointer arithmetic goes out of range, "
8021             "prohibited for !root; off=%d\n", regno, off);
8022         return -EACCES;
8023     }
8024
8025     return 0;
8026 }
8027
8028 static int sanitize_check_bounds(struct bpf_verifier_env *env,
8029                  const struct bpf_insn *insn,
8030                  const struct bpf_reg_state *dst_reg)
8031 {
8032     u32 dst = insn->dst_reg;
8033
8034     /* For unprivileged we require that resulting offset must be in bounds
8035      * in order to be able to sanitize access later on.
8036      */
8037     if (env->bypass_spec_v1)
8038         return 0;
8039
8040     switch (dst_reg->type) {
8041     case PTR_TO_STACK:
8042         if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
8043                     dst_reg->off + dst_reg->var_off.value))
8044             return -EACCES;
8045         break;
8046     case PTR_TO_MAP_VALUE:
8047         if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
8048             verbose(env, "R%d pointer arithmetic of map value goes out of range, "
8049                 "prohibited for !root\n", dst);
8050             return -EACCES;
8051         }
8052         break;
8053     default:
8054         break;
8055     }
8056
8057     return 0;
8058 }
8059
8060 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
8061  * Caller should also handle BPF_MOV case separately.
8062  * If we return -EACCES, caller may want to try again treating pointer as a
8063  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
8064  */
8065 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
8066                    struct bpf_insn *insn,
8067                    const struct bpf_reg_state *ptr_reg,
8068                    const struct bpf_reg_state *off_reg)
8069 {
8070     struct bpf_verifier_state *vstate = env->cur_state;
8071     struct bpf_func_state *state = vstate->frame[vstate->curframe];
8072     struct bpf_reg_state *regs = state->regs, *dst_reg;
8073     bool known = tnum_is_const(off_reg->var_off);
8074     s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
8075         smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
8076     u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
8077         umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
8078     struct bpf_sanitize_info info = {};
8079     u8 opcode = BPF_OP(insn->code);
8080     u32 dst = insn->dst_reg;
8081     int ret;
8082
8083     dst_reg = &regs[dst];
8084
8085     if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
8086         smin_val > smax_val || umin_val > umax_val) {
8087         /* Taint dst register if offset had invalid bounds derived from
8088          * e.g. dead branches.
8089          */
8090         __mark_reg_unknown(env, dst_reg);
8091         return 0;
8092     }
8093
8094     if (BPF_CLASS(insn->code) != BPF_ALU64) {
8095         /* 32-bit ALU ops on pointers produce (meaningless) scalars */
8096         if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8097             __mark_reg_unknown(env, dst_reg);
8098             return 0;
8099         }
8100
8101         verbose(env,
8102             "R%d 32-bit pointer arithmetic prohibited\n",
8103             dst);
8104         return -EACCES;
8105     }
8106
8107     if (ptr_reg->type & PTR_MAYBE_NULL) {
8108         verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
8109             dst, reg_type_str(env, ptr_reg->type));
8110         return -EACCES;
8111     }
8112
8113     switch (base_type(ptr_reg->type)) {
8114     case CONST_PTR_TO_MAP:
8115         /* smin_val represents the known value */
8116         if (known && smin_val == 0 && opcode == BPF_ADD)
8117             break;
8118         fallthrough;
8119     case PTR_TO_PACKET_END:
8120     case PTR_TO_SOCKET:
8121     case PTR_TO_SOCK_COMMON:
8122     case PTR_TO_TCP_SOCK:
8123     case PTR_TO_XDP_SOCK:
8124         verbose(env, "R%d pointer arithmetic on %s prohibited\n",
8125             dst, reg_type_str(env, ptr_reg->type));
8126         return -EACCES;
8127     default:
8128         break;
8129     }
8130
8131     /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
8132      * The id may be overwritten later if we create a new variable offset.
8133      */
8134     dst_reg->type = ptr_reg->type;
8135     dst_reg->id = ptr_reg->id;
8136
8137     if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
8138         !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
8139         return -EINVAL;
8140
8141     /* pointer types do not carry 32-bit bounds at the moment. */
8142     __mark_reg32_unbounded(dst_reg);
8143
8144     if (sanitize_needed(opcode)) {
8145         ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
8146                        &info, false);
8147         if (ret < 0)
8148             return sanitize_err(env, insn, ret, off_reg, dst_reg);
8149     }
8150
8151     switch (opcode) {
8152     case BPF_ADD:
8153         /* We can take a fixed offset as long as it doesn't overflow
8154          * the s32 'off' field
8155          */
8156         if (known && (ptr_reg->off + smin_val ==
8157                   (s64)(s32)(ptr_reg->off + smin_val))) {
8158             /* pointer += K.  Accumulate it into fixed offset */
8159             dst_reg->smin_value = smin_ptr;
8160             dst_reg->smax_value = smax_ptr;
8161             dst_reg->umin_value = umin_ptr;
8162             dst_reg->umax_value = umax_ptr;
8163             dst_reg->var_off = ptr_reg->var_off;
8164             dst_reg->off = ptr_reg->off + smin_val;
8165             dst_reg->raw = ptr_reg->raw;
8166             break;
8167         }
8168         /* A new variable offset is created.  Note that off_reg->off
8169          * == 0, since it's a scalar.
8170          * dst_reg gets the pointer type and since some positive
8171          * integer value was added to the pointer, give it a new 'id'
8172          * if it's a PTR_TO_PACKET.
8173          * this creates a new 'base' pointer, off_reg (variable) gets
8174          * added into the variable offset, and we copy the fixed offset
8175          * from ptr_reg.
8176          */
8177         if (signed_add_overflows(smin_ptr, smin_val) ||
8178             signed_add_overflows(smax_ptr, smax_val)) {
8179             dst_reg->smin_value = S64_MIN;
8180             dst_reg->smax_value = S64_MAX;
8181         } else {
8182             dst_reg->smin_value = smin_ptr + smin_val;
8183             dst_reg->smax_value = smax_ptr + smax_val;
8184         }
8185         if (umin_ptr + umin_val < umin_ptr ||
8186             umax_ptr + umax_val < umax_ptr) {
8187             dst_reg->umin_value = 0;
8188             dst_reg->umax_value = U64_MAX;
8189         } else {
8190             dst_reg->umin_value = umin_ptr + umin_val;
8191             dst_reg->umax_value = umax_ptr + umax_val;
8192         }
8193         dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
8194         dst_reg->off = ptr_reg->off;
8195         dst_reg->raw = ptr_reg->raw;
8196         if (reg_is_pkt_pointer(ptr_reg)) {
8197             dst_reg->id = ++env->id_gen;
8198             /* something was added to pkt_ptr, set range to zero */
8199             memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
8200         }
8201         break;
8202     case BPF_SUB:
8203         if (dst_reg == off_reg) {
8204             /* scalar -= pointer.  Creates an unknown scalar */
8205             verbose(env, "R%d tried to subtract pointer from scalar\n",
8206                 dst);
8207             return -EACCES;
8208         }
8209         /* We don't allow subtraction from FP, because (according to
8210          * test_verifier.c test "invalid fp arithmetic", JITs might not
8211          * be able to deal with it.
8212          */
8213         if (ptr_reg->type == PTR_TO_STACK) {
8214             verbose(env, "R%d subtraction from stack pointer prohibited\n",
8215                 dst);
8216             return -EACCES;
8217         }
8218         if (known && (ptr_reg->off - smin_val ==
8219                   (s64)(s32)(ptr_reg->off - smin_val))) {
8220             /* pointer -= K.  Subtract it from fixed offset */
8221             dst_reg->smin_value = smin_ptr;
8222             dst_reg->smax_value = smax_ptr;
8223             dst_reg->umin_value = umin_ptr;
8224             dst_reg->umax_value = umax_ptr;
8225             dst_reg->var_off = ptr_reg->var_off;
8226             dst_reg->id = ptr_reg->id;
8227             dst_reg->off = ptr_reg->off - smin_val;
8228             dst_reg->raw = ptr_reg->raw;
8229             break;
8230         }
8231         /* A new variable offset is created.  If the subtrahend is known
8232          * nonnegative, then any reg->range we had before is still good.
8233          */
8234         if (signed_sub_overflows(smin_ptr, smax_val) ||
8235             signed_sub_overflows(smax_ptr, smin_val)) {
8236             /* Overflow possible, we know nothing */
8237             dst_reg->smin_value = S64_MIN;
8238             dst_reg->smax_value = S64_MAX;
8239         } else {
8240             dst_reg->smin_value = smin_ptr - smax_val;
8241             dst_reg->smax_value = smax_ptr - smin_val;
8242         }
8243         if (umin_ptr < umax_val) {
8244             /* Overflow possible, we know nothing */
8245             dst_reg->umin_value = 0;
8246             dst_reg->umax_value = U64_MAX;
8247         } else {
8248             /* Cannot overflow (as long as bounds are consistent) */
8249             dst_reg->umin_value = umin_ptr - umax_val;
8250             dst_reg->umax_value = umax_ptr - umin_val;
8251         }
8252         dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
8253         dst_reg->off = ptr_reg->off;
8254         dst_reg->raw = ptr_reg->raw;
8255         if (reg_is_pkt_pointer(ptr_reg)) {
8256             dst_reg->id = ++env->id_gen;
8257             /* something was added to pkt_ptr, set range to zero */
8258             if (smin_val < 0)
8259                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
8260         }
8261         break;
8262     case BPF_AND:
8263     case BPF_OR:
8264     case BPF_XOR:
8265         /* bitwise ops on pointers are troublesome, prohibit. */
8266         verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
8267             dst, bpf_alu_string[opcode >> 4]);
8268         return -EACCES;
8269     default:
8270         /* other operators (e.g. MUL,LSH) produce non-pointer results */
8271         verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
8272             dst, bpf_alu_string[opcode >> 4]);
8273         return -EACCES;
8274     }
8275
8276     if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
8277         return -EINVAL;
8278     reg_bounds_sync(dst_reg);
8279     if (sanitize_check_bounds(env, insn, dst_reg) < 0)
8280         return -EACCES;
8281     if (sanitize_needed(opcode)) {
8282         ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
8283                        &info, true);
8284         if (ret < 0)
8285             return sanitize_err(env, insn, ret, off_reg, dst_reg);
8286     }
8287
8288     return 0;
8289 }
8290
8291 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
8292                  struct bpf_reg_state *src_reg)
8293 {
8294     s32 smin_val = src_reg->s32_min_value;
8295     s32 smax_val = src_reg->s32_max_value;
8296     u32 umin_val = src_reg->u32_min_value;
8297     u32 umax_val = src_reg->u32_max_value;
8298
8299     if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
8300         signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
8301         dst_reg->s32_min_value = S32_MIN;
8302         dst_reg->s32_max_value = S32_MAX;
8303     } else {
8304         dst_reg->s32_min_value += smin_val;
8305         dst_reg->s32_max_value += smax_val;
8306     }
8307     if (dst_reg->u32_min_value + umin_val < umin_val ||
8308         dst_reg->u32_max_value + umax_val < umax_val) {
8309         dst_reg->u32_min_value = 0;
8310         dst_reg->u32_max_value = U32_MAX;
8311     } else {
8312         dst_reg->u32_min_value += umin_val;
8313         dst_reg->u32_max_value += umax_val;
8314     }
8315 }
8316
8317 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
8318                    struct bpf_reg_state *src_reg)
8319 {
8320     s64 smin_val = src_reg->smin_value;
8321     s64 smax_val = src_reg->smax_value;
8322     u64 umin_val = src_reg->umin_value;
8323     u64 umax_val = src_reg->umax_value;
8324
8325     if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
8326         signed_add_overflows(dst_reg->smax_value, smax_val)) {
8327         dst_reg->smin_value = S64_MIN;
8328         dst_reg->smax_value = S64_MAX;
8329     } else {
8330         dst_reg->smin_value += smin_val;
8331         dst_reg->smax_value += smax_val;
8332     }
8333     if (dst_reg->umin_value + umin_val < umin_val ||
8334         dst_reg->umax_value + umax_val < umax_val) {
8335         dst_reg->umin_value = 0;
8336         dst_reg->umax_value = U64_MAX;
8337     } else {
8338         dst_reg->umin_value += umin_val;
8339         dst_reg->umax_value += umax_val;
8340     }
8341 }
8342
8343 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
8344                  struct bpf_reg_state *src_reg)
8345 {
8346     s32 smin_val = src_reg->s32_min_value;
8347     s32 smax_val = src_reg->s32_max_value;
8348     u32 umin_val = src_reg->u32_min_value;
8349     u32 umax_val = src_reg->u32_max_value;
8350
8351     if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
8352         signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
8353         /* Overflow possible, we know nothing */
8354         dst_reg->s32_min_value = S32_MIN;
8355         dst_reg->s32_max_value = S32_MAX;
8356     } else {
8357         dst_reg->s32_min_value -= smax_val;
8358         dst_reg->s32_max_value -= smin_val;
8359     }
8360     if (dst_reg->u32_min_value < umax_val) {
8361         /* Overflow possible, we know nothing */
8362         dst_reg->u32_min_value = 0;
8363         dst_reg->u32_max_value = U32_MAX;
8364     } else {
8365         /* Cannot overflow (as long as bounds are consistent) */
8366         dst_reg->u32_min_value -= umax_val;
8367         dst_reg->u32_max_value -= umin_val;
8368     }
8369 }
8370
8371 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
8372                    struct bpf_reg_state *src_reg)
8373 {
8374     s64 smin_val = src_reg->smin_value;
8375     s64 smax_val = src_reg->smax_value;
8376     u64 umin_val = src_reg->umin_value;
8377     u64 umax_val = src_reg->umax_value;
8378
8379     if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
8380         signed_sub_overflows(dst_reg->smax_value, smin_val)) {
8381         /* Overflow possible, we know nothing */
8382         dst_reg->smin_value = S64_MIN;
8383         dst_reg->smax_value = S64_MAX;
8384     } else {
8385         dst_reg->smin_value -= smax_val;
8386         dst_reg->smax_value -= smin_val;
8387     }
8388     if (dst_reg->umin_value < umax_val) {
8389         /* Overflow possible, we know nothing */
8390         dst_reg->umin_value = 0;
8391         dst_reg->umax_value = U64_MAX;
8392     } else {
8393         /* Cannot overflow (as long as bounds are consistent) */
8394         dst_reg->umin_value -= umax_val;
8395         dst_reg->umax_value -= umin_val;
8396     }
8397 }
8398
8399 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
8400                  struct bpf_reg_state *src_reg)
8401 {
8402     s32 smin_val = src_reg->s32_min_value;
8403     u32 umin_val = src_reg->u32_min_value;
8404     u32 umax_val = src_reg->u32_max_value;
8405
8406     if (smin_val < 0 || dst_reg->s32_min_value < 0) {
8407         /* Ain't nobody got time to multiply that sign */
8408         __mark_reg32_unbounded(dst_reg);
8409         return;
8410     }
8411     /* Both values are positive, so we can work with unsigned and
8412      * copy the result to signed (unless it exceeds S32_MAX).
8413      */
8414     if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
8415         /* Potential overflow, we know nothing */
8416         __mark_reg32_unbounded(dst_reg);
8417         return;
8418     }
8419     dst_reg->u32_min_value *= umin_val;
8420     dst_reg->u32_max_value *= umax_val;
8421     if (dst_reg->u32_max_value > S32_MAX) {
8422         /* Overflow possible, we know nothing */
8423         dst_reg->s32_min_value = S32_MIN;
8424         dst_reg->s32_max_value = S32_MAX;
8425     } else {
8426         dst_reg->s32_min_value = dst_reg->u32_min_value;
8427         dst_reg->s32_max_value = dst_reg->u32_max_value;
8428     }
8429 }
8430
8431 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
8432                    struct bpf_reg_state *src_reg)
8433 {
8434     s64 smin_val = src_reg->smin_value;
8435     u64 umin_val = src_reg->umin_value;
8436     u64 umax_val = src_reg->umax_value;
8437
8438     if (smin_val < 0 || dst_reg->smin_value < 0) {
8439         /* Ain't nobody got time to multiply that sign */
8440         __mark_reg64_unbounded(dst_reg);
8441         return;
8442     }
8443     /* Both values are positive, so we can work with unsigned and
8444      * copy the result to signed (unless it exceeds S64_MAX).
8445      */
8446     if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
8447         /* Potential overflow, we know nothing */
8448         __mark_reg64_unbounded(dst_reg);
8449         return;
8450     }
8451     dst_reg->umin_value *= umin_val;
8452     dst_reg->umax_value *= umax_val;
8453     if (dst_reg->umax_value > S64_MAX) {
8454         /* Overflow possible, we know nothing */
8455         dst_reg->smin_value = S64_MIN;
8456         dst_reg->smax_value = S64_MAX;
8457     } else {
8458         dst_reg->smin_value = dst_reg->umin_value;
8459         dst_reg->smax_value = dst_reg->umax_value;
8460     }
8461 }
8462
8463 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
8464                  struct bpf_reg_state *src_reg)
8465 {
8466     bool src_known = tnum_subreg_is_const(src_reg->var_off);
8467     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8468     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
8469     s32 smin_val = src_reg->s32_min_value;
8470     u32 umax_val = src_reg->u32_max_value;
8471
8472     if (src_known && dst_known) {
8473         __mark_reg32_known(dst_reg, var32_off.value);
8474         return;
8475     }
8476
8477     /* We get our minimum from the var_off, since that's inherently
8478      * bitwise.  Our maximum is the minimum of the operands' maxima.
8479      */
8480     dst_reg->u32_min_value = var32_off.value;
8481     dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
8482     if (dst_reg->s32_min_value < 0 || smin_val < 0) {
8483         /* Lose signed bounds when ANDing negative numbers,
8484          * ain't nobody got time for that.
8485          */
8486         dst_reg->s32_min_value = S32_MIN;
8487         dst_reg->s32_max_value = S32_MAX;
8488     } else {
8489         /* ANDing two positives gives a positive, so safe to
8490          * cast result into s64.
8491          */
8492         dst_reg->s32_min_value = dst_reg->u32_min_value;
8493         dst_reg->s32_max_value = dst_reg->u32_max_value;
8494     }
8495 }
8496
8497 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
8498                    struct bpf_reg_state *src_reg)
8499 {
8500     bool src_known = tnum_is_const(src_reg->var_off);
8501     bool dst_known = tnum_is_const(dst_reg->var_off);
8502     s64 smin_val = src_reg->smin_value;
8503     u64 umax_val = src_reg->umax_value;
8504
8505     if (src_known && dst_known) {
8506         __mark_reg_known(dst_reg, dst_reg->var_off.value);
8507         return;
8508     }
8509
8510     /* We get our minimum from the var_off, since that's inherently
8511      * bitwise.  Our maximum is the minimum of the operands' maxima.
8512      */
8513     dst_reg->umin_value = dst_reg->var_off.value;
8514     dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
8515     if (dst_reg->smin_value < 0 || smin_val < 0) {
8516         /* Lose signed bounds when ANDing negative numbers,
8517          * ain't nobody got time for that.
8518          */
8519         dst_reg->smin_value = S64_MIN;
8520         dst_reg->smax_value = S64_MAX;
8521     } else {
8522         /* ANDing two positives gives a positive, so safe to
8523          * cast result into s64.
8524          */
8525         dst_reg->smin_value = dst_reg->umin_value;
8526         dst_reg->smax_value = dst_reg->umax_value;
8527     }
8528     /* We may learn something more from the var_off */
8529     __update_reg_bounds(dst_reg);
8530 }
8531
8532 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
8533                 struct bpf_reg_state *src_reg)
8534 {
8535     bool src_known = tnum_subreg_is_const(src_reg->var_off);
8536     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8537     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
8538     s32 smin_val = src_reg->s32_min_value;
8539     u32 umin_val = src_reg->u32_min_value;
8540
8541     if (src_known && dst_known) {
8542         __mark_reg32_known(dst_reg, var32_off.value);
8543         return;
8544     }
8545
8546     /* We get our maximum from the var_off, and our minimum is the
8547      * maximum of the operands' minima
8548      */
8549     dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
8550     dst_reg->u32_max_value = var32_off.value | var32_off.mask;
8551     if (dst_reg->s32_min_value < 0 || smin_val < 0) {
8552         /* Lose signed bounds when ORing negative numbers,
8553          * ain't nobody got time for that.
8554          */
8555         dst_reg->s32_min_value = S32_MIN;
8556         dst_reg->s32_max_value = S32_MAX;
8557     } else {
8558         /* ORing two positives gives a positive, so safe to
8559          * cast result into s64.
8560          */
8561         dst_reg->s32_min_value = dst_reg->u32_min_value;
8562         dst_reg->s32_max_value = dst_reg->u32_max_value;
8563     }
8564 }
8565
8566 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
8567                   struct bpf_reg_state *src_reg)
8568 {
8569     bool src_known = tnum_is_const(src_reg->var_off);
8570     bool dst_known = tnum_is_const(dst_reg->var_off);
8571     s64 smin_val = src_reg->smin_value;
8572     u64 umin_val = src_reg->umin_value;
8573
8574     if (src_known && dst_known) {
8575         __mark_reg_known(dst_reg, dst_reg->var_off.value);
8576         return;
8577     }
8578
8579     /* We get our maximum from the var_off, and our minimum is the
8580      * maximum of the operands' minima
8581      */
8582     dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
8583     dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
8584     if (dst_reg->smin_value < 0 || smin_val < 0) {
8585         /* Lose signed bounds when ORing negative numbers,
8586          * ain't nobody got time for that.
8587          */
8588         dst_reg->smin_value = S64_MIN;
8589         dst_reg->smax_value = S64_MAX;
8590     } else {
8591         /* ORing two positives gives a positive, so safe to
8592          * cast result into s64.
8593          */
8594         dst_reg->smin_value = dst_reg->umin_value;
8595         dst_reg->smax_value = dst_reg->umax_value;
8596     }
8597     /* We may learn something more from the var_off */
8598     __update_reg_bounds(dst_reg);
8599 }
8600
8601 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
8602                  struct bpf_reg_state *src_reg)
8603 {
8604     bool src_known = tnum_subreg_is_const(src_reg->var_off);
8605     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8606     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
8607     s32 smin_val = src_reg->s32_min_value;
8608
8609     if (src_known && dst_known) {
8610         __mark_reg32_known(dst_reg, var32_off.value);
8611         return;
8612     }
8613
8614     /* We get both minimum and maximum from the var32_off. */
8615     dst_reg->u32_min_value = var32_off.value;
8616     dst_reg->u32_max_value = var32_off.value | var32_off.mask;
8617
8618     if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
8619         /* XORing two positive sign numbers gives a positive,
8620          * so safe to cast u32 result into s32.
8621          */
8622         dst_reg->s32_min_value = dst_reg->u32_min_value;
8623         dst_reg->s32_max_value = dst_reg->u32_max_value;
8624     } else {
8625         dst_reg->s32_min_value = S32_MIN;
8626         dst_reg->s32_max_value = S32_MAX;
8627     }
8628 }
8629
8630 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
8631                    struct bpf_reg_state *src_reg)
8632 {
8633     bool src_known = tnum_is_const(src_reg->var_off);
8634     bool dst_known = tnum_is_const(dst_reg->var_off);
8635     s64 smin_val = src_reg->smin_value;
8636
8637     if (src_known && dst_known) {
8638         /* dst_reg->var_off.value has been updated earlier */
8639         __mark_reg_known(dst_reg, dst_reg->var_off.value);
8640         return;
8641     }
8642
8643     /* We get both minimum and maximum from the var_off. */
8644     dst_reg->umin_value = dst_reg->var_off.value;
8645     dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
8646
8647     if (dst_reg->smin_value >= 0 && smin_val >= 0) {
8648         /* XORing two positive sign numbers gives a positive,
8649          * so safe to cast u64 result into s64.
8650          */
8651         dst_reg->smin_value = dst_reg->umin_value;
8652         dst_reg->smax_value = dst_reg->umax_value;
8653     } else {
8654         dst_reg->smin_value = S64_MIN;
8655         dst_reg->smax_value = S64_MAX;
8656     }
8657
8658     __update_reg_bounds(dst_reg);
8659 }
8660
8661 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8662                    u64 umin_val, u64 umax_val)
8663 {
8664     /* We lose all sign bit information (except what we can pick
8665      * up from var_off)
8666      */
8667     dst_reg->s32_min_value = S32_MIN;
8668     dst_reg->s32_max_value = S32_MAX;
8669     /* If we might shift our top bit out, then we know nothing */
8670     if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
8671         dst_reg->u32_min_value = 0;
8672         dst_reg->u32_max_value = U32_MAX;
8673     } else {
8674         dst_reg->u32_min_value <<= umin_val;
8675         dst_reg->u32_max_value <<= umax_val;
8676     }
8677 }
8678
8679 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8680                  struct bpf_reg_state *src_reg)
8681 {
8682     u32 umax_val = src_reg->u32_max_value;
8683     u32 umin_val = src_reg->u32_min_value;
8684     /* u32 alu operation will zext upper bits */
8685     struct tnum subreg = tnum_subreg(dst_reg->var_off);
8686
8687     __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8688     dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
8689     /* Not required but being careful mark reg64 bounds as unknown so
8690      * that we are forced to pick them up from tnum and zext later and
8691      * if some path skips this step we are still safe.
8692      */
8693     __mark_reg64_unbounded(dst_reg);
8694     __update_reg32_bounds(dst_reg);
8695 }
8696
8697 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
8698                    u64 umin_val, u64 umax_val)
8699 {
8700     /* Special case <<32 because it is a common compiler pattern to sign
8701      * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
8702      * positive we know this shift will also be positive so we can track
8703      * bounds correctly. Otherwise we lose all sign bit information except
8704      * what we can pick up from var_off. Perhaps we can generalize this
8705      * later to shifts of any length.
8706      */
8707     if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
8708         dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
8709     else
8710         dst_reg->smax_value = S64_MAX;
8711
8712     if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
8713         dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
8714     else
8715         dst_reg->smin_value = S64_MIN;
8716
8717     /* If we might shift our top bit out, then we know nothing */
8718     if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
8719         dst_reg->umin_value = 0;
8720         dst_reg->umax_value = U64_MAX;
8721     } else {
8722         dst_reg->umin_value <<= umin_val;
8723         dst_reg->umax_value <<= umax_val;
8724     }
8725 }
8726
8727 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
8728                    struct bpf_reg_state *src_reg)
8729 {
8730     u64 umax_val = src_reg->umax_value;
8731     u64 umin_val = src_reg->umin_value;
8732
8733     /* scalar64 calc uses 32bit unshifted bounds so must be called first */
8734     __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
8735     __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8736
8737     dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
8738     /* We may learn something more from the var_off */
8739     __update_reg_bounds(dst_reg);
8740 }
8741
8742 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
8743                  struct bpf_reg_state *src_reg)
8744 {
8745     struct tnum subreg = tnum_subreg(dst_reg->var_off);
8746     u32 umax_val = src_reg->u32_max_value;
8747     u32 umin_val = src_reg->u32_min_value;
8748
8749     /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8750      * be negative, then either:
8751      * 1) src_reg might be zero, so the sign bit of the result is
8752      *    unknown, so we lose our signed bounds
8753      * 2) it's known negative, thus the unsigned bounds capture the
8754      *    signed bounds
8755      * 3) the signed bounds cross zero, so they tell us nothing
8756      *    about the result
8757      * If the value in dst_reg is known nonnegative, then again the
8758      * unsigned bounds capture the signed bounds.
8759      * Thus, in all cases it suffices to blow away our signed bounds
8760      * and rely on inferring new ones from the unsigned bounds and
8761      * var_off of the result.
8762      */
8763     dst_reg->s32_min_value = S32_MIN;
8764     dst_reg->s32_max_value = S32_MAX;
8765
8766     dst_reg->var_off = tnum_rshift(subreg, umin_val);
8767     dst_reg->u32_min_value >>= umax_val;
8768     dst_reg->u32_max_value >>= umin_val;
8769
8770     __mark_reg64_unbounded(dst_reg);
8771     __update_reg32_bounds(dst_reg);
8772 }
8773
8774 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
8775                    struct bpf_reg_state *src_reg)
8776 {
8777     u64 umax_val = src_reg->umax_value;
8778     u64 umin_val = src_reg->umin_value;
8779
8780     /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8781      * be negative, then either:
8782      * 1) src_reg might be zero, so the sign bit of the result is
8783      *    unknown, so we lose our signed bounds
8784      * 2) it's known negative, thus the unsigned bounds capture the
8785      *    signed bounds
8786      * 3) the signed bounds cross zero, so they tell us nothing
8787      *    about the result
8788      * If the value in dst_reg is known nonnegative, then again the
8789      * unsigned bounds capture the signed bounds.
8790      * Thus, in all cases it suffices to blow away our signed bounds
8791      * and rely on inferring new ones from the unsigned bounds and
8792      * var_off of the result.
8793      */
8794     dst_reg->smin_value = S64_MIN;
8795     dst_reg->smax_value = S64_MAX;
8796     dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
8797     dst_reg->umin_value >>= umax_val;
8798     dst_reg->umax_value >>= umin_val;
8799
8800     /* Its not easy to operate on alu32 bounds here because it depends
8801      * on bits being shifted in. Take easy way out and mark unbounded
8802      * so we can recalculate later from tnum.
8803      */
8804     __mark_reg32_unbounded(dst_reg);
8805     __update_reg_bounds(dst_reg);
8806 }
8807
8808 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
8809                   struct bpf_reg_state *src_reg)
8810 {
8811     u64 umin_val = src_reg->u32_min_value;
8812
8813     /* Upon reaching here, src_known is true and
8814      * umax_val is equal to umin_val.
8815      */
8816     dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
8817     dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
8818
8819     dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
8820
8821     /* blow away the dst_reg umin_value/umax_value and rely on
8822      * dst_reg var_off to refine the result.
8823      */
8824     dst_reg->u32_min_value = 0;
8825     dst_reg->u32_max_value = U32_MAX;
8826
8827     __mark_reg64_unbounded(dst_reg);
8828     __update_reg32_bounds(dst_reg);
8829 }
8830
8831 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
8832                 struct bpf_reg_state *src_reg)
8833 {
8834     u64 umin_val = src_reg->umin_value;
8835
8836     /* Upon reaching here, src_known is true and umax_val is equal
8837      * to umin_val.
8838      */
8839     dst_reg->smin_value >>= umin_val;
8840     dst_reg->smax_value >>= umin_val;
8841
8842     dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
8843
8844     /* blow away the dst_reg umin_value/umax_value and rely on
8845      * dst_reg var_off to refine the result.
8846      */
8847     dst_reg->umin_value = 0;
8848     dst_reg->umax_value = U64_MAX;
8849
8850     /* Its not easy to operate on alu32 bounds here because it depends
8851      * on bits being shifted in from upper 32-bits. Take easy way out
8852      * and mark unbounded so we can recalculate later from tnum.
8853      */
8854     __mark_reg32_unbounded(dst_reg);
8855     __update_reg_bounds(dst_reg);
8856 }
8857
8858 /* WARNING: This function does calculations on 64-bit values, but the actual
8859  * execution may occur on 32-bit values. Therefore, things like bitshifts
8860  * need extra checks in the 32-bit case.
8861  */
8862 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8863                       struct bpf_insn *insn,
8864                       struct bpf_reg_state *dst_reg,
8865                       struct bpf_reg_state src_reg)
8866 {
8867     struct bpf_reg_state *regs = cur_regs(env);
8868     u8 opcode = BPF_OP(insn->code);
8869     bool src_known;
8870     s64 smin_val, smax_val;
8871     u64 umin_val, umax_val;
8872     s32 s32_min_val, s32_max_val;
8873     u32 u32_min_val, u32_max_val;
8874     u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
8875     bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
8876     int ret;
8877
8878     smin_val = src_reg.smin_value;
8879     smax_val = src_reg.smax_value;
8880     umin_val = src_reg.umin_value;
8881     umax_val = src_reg.umax_value;
8882
8883     s32_min_val = src_reg.s32_min_value;
8884     s32_max_val = src_reg.s32_max_value;
8885     u32_min_val = src_reg.u32_min_value;
8886     u32_max_val = src_reg.u32_max_value;
8887
8888     if (alu32) {
8889         src_known = tnum_subreg_is_const(src_reg.var_off);
8890         if ((src_known &&
8891              (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
8892             s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
8893             /* Taint dst register if offset had invalid bounds
8894              * derived from e.g. dead branches.
8895              */
8896             __mark_reg_unknown(env, dst_reg);
8897             return 0;
8898         }
8899     } else {
8900         src_known = tnum_is_const(src_reg.var_off);
8901         if ((src_known &&
8902              (smin_val != smax_val || umin_val != umax_val)) ||
8903             smin_val > smax_val || umin_val > umax_val) {
8904             /* Taint dst register if offset had invalid bounds
8905              * derived from e.g. dead branches.
8906              */
8907             __mark_reg_unknown(env, dst_reg);
8908             return 0;
8909         }
8910     }
8911
8912     if (!src_known &&
8913         opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8914         __mark_reg_unknown(env, dst_reg);
8915         return 0;
8916     }
8917
8918     if (sanitize_needed(opcode)) {
8919         ret = sanitize_val_alu(env, insn);
8920         if (ret < 0)
8921             return sanitize_err(env, insn, ret, NULL, NULL);
8922     }
8923
8924     /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8925      * There are two classes of instructions: The first class we track both
8926      * alu32 and alu64 sign/unsigned bounds independently this provides the
8927      * greatest amount of precision when alu operations are mixed with jmp32
8928      * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8929      * and BPF_OR. This is possible because these ops have fairly easy to
8930      * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8931      * See alu32 verifier tests for examples. The second class of
8932      * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8933      * with regards to tracking sign/unsigned bounds because the bits may
8934      * cross subreg boundaries in the alu64 case. When this happens we mark
8935      * the reg unbounded in the subreg bound space and use the resulting
8936      * tnum to calculate an approximation of the sign/unsigned bounds.
8937      */
8938     switch (opcode) {
8939     case BPF_ADD:
8940         scalar32_min_max_add(dst_reg, &src_reg);
8941         scalar_min_max_add(dst_reg, &src_reg);
8942         dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8943         break;
8944     case BPF_SUB:
8945         scalar32_min_max_sub(dst_reg, &src_reg);
8946         scalar_min_max_sub(dst_reg, &src_reg);
8947         dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8948         break;
8949     case BPF_MUL:
8950         dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8951         scalar32_min_max_mul(dst_reg, &src_reg);
8952         scalar_min_max_mul(dst_reg, &src_reg);
8953         break;
8954     case BPF_AND:
8955         dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8956         scalar32_min_max_and(dst_reg, &src_reg);
8957         scalar_min_max_and(dst_reg, &src_reg);
8958         break;
8959     case BPF_OR:
8960         dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8961         scalar32_min_max_or(dst_reg, &src_reg);
8962         scalar_min_max_or(dst_reg, &src_reg);
8963         break;
8964     case BPF_XOR:
8965         dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8966         scalar32_min_max_xor(dst_reg, &src_reg);
8967         scalar_min_max_xor(dst_reg, &src_reg);
8968         break;
8969     case BPF_LSH:
8970         if (umax_val >= insn_bitness) {
8971             /* Shifts greater than 31 or 63 are undefined.
8972              * This includes shifts by a negative number.
8973              */
8974             mark_reg_unknown(env, regs, insn->dst_reg);
8975             break;
8976         }
8977         if (alu32)
8978             scalar32_min_max_lsh(dst_reg, &src_reg);
8979         else
8980             scalar_min_max_lsh(dst_reg, &src_reg);
8981         break;
8982     case BPF_RSH:
8983         if (umax_val >= insn_bitness) {
8984             /* Shifts greater than 31 or 63 are undefined.
8985              * This includes shifts by a negative number.
8986              */
8987             mark_reg_unknown(env, regs, insn->dst_reg);
8988             break;
8989         }
8990         if (alu32)
8991             scalar32_min_max_rsh(dst_reg, &src_reg);
8992         else
8993             scalar_min_max_rsh(dst_reg, &src_reg);
8994         break;
8995     case BPF_ARSH:
8996         if (umax_val >= insn_bitness) {
8997             /* Shifts greater than 31 or 63 are undefined.
8998              * This includes shifts by a negative number.
8999              */
9000             mark_reg_unknown(env, regs, insn->dst_reg);
9001             break;
9002         }
9003         if (alu32)
9004             scalar32_min_max_arsh(dst_reg, &src_reg);
9005         else
9006             scalar_min_max_arsh(dst_reg, &src_reg);
9007         break;
9008     default:
9009         mark_reg_unknown(env, regs, insn->dst_reg);
9010         break;
9011     }
9012
9013     /* ALU32 ops are zero extended into 64bit register */
9014     if (alu32)
9015         zext_32_to_64(dst_reg);
9016     reg_bounds_sync(dst_reg);
9017     return 0;
9018 }
9019
9020 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
9021  * and var_off.
9022  */
9023 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
9024                    struct bpf_insn *insn)
9025 {
9026     struct bpf_verifier_state *vstate = env->cur_state;
9027     struct bpf_func_state *state = vstate->frame[vstate->curframe];
9028     struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
9029     struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
9030     u8 opcode = BPF_OP(insn->code);
9031     int err;
9032
9033     dst_reg = &regs[insn->dst_reg];
9034     src_reg = NULL;
9035     if (dst_reg->type != SCALAR_VALUE)
9036         ptr_reg = dst_reg;
9037     else
9038         /* Make sure ID is cleared otherwise dst_reg min/max could be
9039          * incorrectly propagated into other registers by find_equal_scalars()
9040          */
9041         dst_reg->id = 0;
9042     if (BPF_SRC(insn->code) == BPF_X) {
9043         src_reg = &regs[insn->src_reg];
9044         if (src_reg->type != SCALAR_VALUE) {
9045             if (dst_reg->type != SCALAR_VALUE) {
9046                 /* Combining two pointers by any ALU op yields
9047                  * an arbitrary scalar. Disallow all math except
9048                  * pointer subtraction
9049                  */
9050                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
9051                     mark_reg_unknown(env, regs, insn->dst_reg);
9052                     return 0;
9053                 }
9054                 verbose(env, "R%d pointer %s pointer prohibited\n",
9055                     insn->dst_reg,
9056                     bpf_alu_string[opcode >> 4]);
9057                 return -EACCES;
9058             } else {
9059                 /* scalar += pointer
9060                  * This is legal, but we have to reverse our
9061                  * src/dest handling in computing the range
9062                  */
9063                 err = mark_chain_precision(env, insn->dst_reg);
9064                 if (err)
9065                     return err;
9066                 return adjust_ptr_min_max_vals(env, insn,
9067                                    src_reg, dst_reg);
9068             }
9069         } else if (ptr_reg) {
9070             /* pointer += scalar */
9071             err = mark_chain_precision(env, insn->src_reg);
9072             if (err)
9073                 return err;
9074             return adjust_ptr_min_max_vals(env, insn,
9075                                dst_reg, src_reg);
9076         }
9077     } else {
9078         /* Pretend the src is a reg with a known value, since we only
9079          * need to be able to read from this state.
9080          */
9081         off_reg.type = SCALAR_VALUE;
9082         __mark_reg_known(&off_reg, insn->imm);
9083         src_reg = &off_reg;
9084         if (ptr_reg) /* pointer += K */
9085             return adjust_ptr_min_max_vals(env, insn,
9086                                ptr_reg, src_reg);
9087     }
9088
9089     /* Got here implies adding two SCALAR_VALUEs */
9090     if (WARN_ON_ONCE(ptr_reg)) {
9091         print_verifier_state(env, state, true);
9092         verbose(env, "verifier internal error: unexpected ptr_reg\n");
9093         return -EINVAL;
9094     }
9095     if (WARN_ON(!src_reg)) {
9096         print_verifier_state(env, state, true);
9097         verbose(env, "verifier internal error: no src_reg\n");
9098         return -EINVAL;
9099     }
9100     return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
9101 }
9102
9103 /* check validity of 32-bit and 64-bit arithmetic operations */
9104 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
9105 {
9106     struct bpf_reg_state *regs = cur_regs(env);
9107     u8 opcode = BPF_OP(insn->code);
9108     int err;
9109
9110     if (opcode == BPF_END || opcode == BPF_NEG) {
9111         if (opcode == BPF_NEG) {
9112             if (BPF_SRC(insn->code) != BPF_K ||
9113                 insn->src_reg != BPF_REG_0 ||
9114                 insn->off != 0 || insn->imm != 0) {
9115                 verbose(env, "BPF_NEG uses reserved fields\n");
9116                 return -EINVAL;
9117             }
9118         } else {
9119             if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
9120                 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
9121                 BPF_CLASS(insn->code) == BPF_ALU64) {
9122                 verbose(env, "BPF_END uses reserved fields\n");
9123                 return -EINVAL;
9124             }
9125         }
9126
9127         /* check src operand */
9128         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9129         if (err)
9130             return err;
9131
9132         if (is_pointer_value(env, insn->dst_reg)) {
9133             verbose(env, "R%d pointer arithmetic prohibited\n",
9134                 insn->dst_reg);
9135             return -EACCES;
9136         }
9137
9138         /* check dest operand */
9139         err = check_reg_arg(env, insn->dst_reg, DST_OP);
9140         if (err)
9141             return err;
9142
9143     } else if (opcode == BPF_MOV) {
9144
9145         if (BPF_SRC(insn->code) == BPF_X) {
9146             if (insn->imm != 0 || insn->off != 0) {
9147                 verbose(env, "BPF_MOV uses reserved fields\n");
9148                 return -EINVAL;
9149             }
9150
9151             /* check src operand */
9152             err = check_reg_arg(env, insn->src_reg, SRC_OP);
9153             if (err)
9154                 return err;
9155         } else {
9156             if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
9157                 verbose(env, "BPF_MOV uses reserved fields\n");
9158                 return -EINVAL;
9159             }
9160         }
9161
9162         /* check dest operand, mark as required later */
9163         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9164         if (err)
9165             return err;
9166
9167         if (BPF_SRC(insn->code) == BPF_X) {
9168             struct bpf_reg_state *src_reg = regs + insn->src_reg;
9169             struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
9170
9171             if (BPF_CLASS(insn->code) == BPF_ALU64) {
9172                 /* case: R1 = R2
9173                  * copy register state to dest reg
9174                  */
9175                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
9176                     /* Assign src and dst registers the same ID
9177                      * that will be used by find_equal_scalars()
9178                      * to propagate min/max range.
9179                      */
9180                     src_reg->id = ++env->id_gen;
9181                 *dst_reg = *src_reg;
9182                 dst_reg->live |= REG_LIVE_WRITTEN;
9183                 dst_reg->subreg_def = DEF_NOT_SUBREG;
9184             } else {
9185                 /* R1 = (u32) R2 */
9186                 if (is_pointer_value(env, insn->src_reg)) {
9187                     verbose(env,
9188                         "R%d partial copy of pointer\n",
9189                         insn->src_reg);
9190                     return -EACCES;
9191                 } else if (src_reg->type == SCALAR_VALUE) {
9192                     *dst_reg = *src_reg;
9193                     /* Make sure ID is cleared otherwise
9194                      * dst_reg min/max could be incorrectly
9195                      * propagated into src_reg by find_equal_scalars()
9196                      */
9197                     dst_reg->id = 0;
9198                     dst_reg->live |= REG_LIVE_WRITTEN;
9199                     dst_reg->subreg_def = env->insn_idx + 1;
9200                 } else {
9201                     mark_reg_unknown(env, regs,
9202                              insn->dst_reg);
9203                 }
9204                 zext_32_to_64(dst_reg);
9205                 reg_bounds_sync(dst_reg);
9206             }
9207         } else {
9208             /* case: R = imm
9209              * remember the value we stored into this reg
9210              */
9211             /* clear any state __mark_reg_known doesn't set */
9212             mark_reg_unknown(env, regs, insn->dst_reg);
9213             regs[insn->dst_reg].type = SCALAR_VALUE;
9214             if (BPF_CLASS(insn->code) == BPF_ALU64) {
9215                 __mark_reg_known(regs + insn->dst_reg,
9216                          insn->imm);
9217             } else {
9218                 __mark_reg_known(regs + insn->dst_reg,
9219                          (u32)insn->imm);
9220             }
9221         }
9222
9223     } else if (opcode > BPF_END) {
9224         verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
9225         return -EINVAL;
9226
9227     } else {    /* all other ALU ops: and, sub, xor, add, ... */
9228
9229         if (BPF_SRC(insn->code) == BPF_X) {
9230             if (insn->imm != 0 || insn->off != 0) {
9231                 verbose(env, "BPF_ALU uses reserved fields\n");
9232                 return -EINVAL;
9233             }
9234             /* check src1 operand */
9235             err = check_reg_arg(env, insn->src_reg, SRC_OP);
9236             if (err)
9237                 return err;
9238         } else {
9239             if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
9240                 verbose(env, "BPF_ALU uses reserved fields\n");
9241                 return -EINVAL;
9242             }
9243         }
9244
9245         /* check src2 operand */
9246         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9247         if (err)
9248             return err;
9249
9250         if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
9251             BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
9252             verbose(env, "div by zero\n");
9253             return -EINVAL;
9254         }
9255
9256         if ((opcode == BPF_LSH || opcode == BPF_RSH ||
9257              opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
9258             int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
9259
9260             if (insn->imm < 0 || insn->imm >= size) {
9261                 verbose(env, "invalid shift %d\n", insn->imm);
9262                 return -EINVAL;
9263             }
9264         }
9265
9266         /* check dest operand */
9267         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9268         if (err)
9269             return err;
9270
9271         return adjust_reg_min_max_vals(env, insn);
9272     }
9273
9274     return 0;
9275 }
9276
9277 static void __find_good_pkt_pointers(struct bpf_func_state *state,
9278                      struct bpf_reg_state *dst_reg,
9279                      enum bpf_reg_type type, int new_range)
9280 {
9281     struct bpf_reg_state *reg;
9282     int i;
9283
9284     for (i = 0; i < MAX_BPF_REG; i++) {
9285         reg = &state->regs[i];
9286         if (reg->type == type && reg->id == dst_reg->id)
9287             /* keep the maximum range already checked */
9288             reg->range = max(reg->range, new_range);
9289     }
9290
9291     bpf_for_each_spilled_reg(i, state, reg) {
9292         if (!reg)
9293             continue;
9294         if (reg->type == type && reg->id == dst_reg->id)
9295             reg->range = max(reg->range, new_range);
9296     }
9297 }
9298
9299 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
9300                    struct bpf_reg_state *dst_reg,
9301                    enum bpf_reg_type type,
9302                    bool range_right_open)
9303 {
9304     int new_range, i;
9305
9306     if (dst_reg->off < 0 ||
9307         (dst_reg->off == 0 && range_right_open))
9308         /* This doesn't give us any range */
9309         return;
9310
9311     if (dst_reg->umax_value > MAX_PACKET_OFF ||
9312         dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
9313         /* Risk of overflow.  For instance, ptr + (1<<63) may be less
9314          * than pkt_end, but that's because it's also less than pkt.
9315          */
9316         return;
9317
9318     new_range = dst_reg->off;
9319     if (range_right_open)
9320         new_range++;
9321
9322     /* Examples for register markings:
9323      *
9324      * pkt_data in dst register:
9325      *
9326      *   r2 = r3;
9327      *   r2 += 8;
9328      *   if (r2 > pkt_end) goto <handle exception>
9329      *   <access okay>
9330      *
9331      *   r2 = r3;
9332      *   r2 += 8;
9333      *   if (r2 < pkt_end) goto <access okay>
9334      *   <handle exception>
9335      *
9336      *   Where:
9337      *     r2 == dst_reg, pkt_end == src_reg
9338      *     r2=pkt(id=n,off=8,r=0)
9339      *     r3=pkt(id=n,off=0,r=0)
9340      *
9341      * pkt_data in src register:
9342      *
9343      *   r2 = r3;
9344      *   r2 += 8;
9345      *   if (pkt_end >= r2) goto <access okay>
9346      *   <handle exception>
9347      *
9348      *   r2 = r3;
9349      *   r2 += 8;
9350      *   if (pkt_end <= r2) goto <handle exception>
9351      *   <access okay>
9352      *
9353      *   Where:
9354      *     pkt_end == dst_reg, r2 == src_reg
9355      *     r2=pkt(id=n,off=8,r=0)
9356      *     r3=pkt(id=n,off=0,r=0)
9357      *
9358      * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
9359      * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
9360      * and [r3, r3 + 8-1) respectively is safe to access depending on
9361      * the check.
9362      */
9363
9364     /* If our ids match, then we must have the same max_value.  And we
9365      * don't care about the other reg's fixed offset, since if it's too big
9366      * the range won't allow anything.
9367      * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
9368      */
9369     for (i = 0; i <= vstate->curframe; i++)
9370         __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
9371                      new_range);
9372 }
9373
9374 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
9375 {
9376     struct tnum subreg = tnum_subreg(reg->var_off);
9377     s32 sval = (s32)val;
9378
9379     switch (opcode) {
9380     case BPF_JEQ:
9381         if (tnum_is_const(subreg))
9382             return !!tnum_equals_const(subreg, val);
9383         break;
9384     case BPF_JNE:
9385         if (tnum_is_const(subreg))
9386             return !tnum_equals_const(subreg, val);
9387         break;
9388     case BPF_JSET:
9389         if ((~subreg.mask & subreg.value) & val)
9390             return 1;
9391         if (!((subreg.mask | subreg.value) & val))
9392             return 0;
9393         break;
9394     case BPF_JGT:
9395         if (reg->u32_min_value > val)
9396             return 1;
9397         else if (reg->u32_max_value <= val)
9398             return 0;
9399         break;
9400     case BPF_JSGT:
9401         if (reg->s32_min_value > sval)
9402             return 1;
9403         else if (reg->s32_max_value <= sval)
9404             return 0;
9405         break;
9406     case BPF_JLT:
9407         if (reg->u32_max_value < val)
9408             return 1;
9409         else if (reg->u32_min_value >= val)
9410             return 0;
9411         break;
9412     case BPF_JSLT:
9413         if (reg->s32_max_value < sval)
9414             return 1;
9415         else if (reg->s32_min_value >= sval)
9416             return 0;
9417         break;
9418     case BPF_JGE:
9419         if (reg->u32_min_value >= val)
9420             return 1;
9421         else if (reg->u32_max_value < val)
9422             return 0;
9423         break;
9424     case BPF_JSGE:
9425         if (reg->s32_min_value >= sval)
9426             return 1;
9427         else if (reg->s32_max_value < sval)
9428             return 0;
9429         break;
9430     case BPF_JLE:
9431         if (reg->u32_max_value <= val)
9432             return 1;
9433         else if (reg->u32_min_value > val)
9434             return 0;
9435         break;
9436     case BPF_JSLE:
9437         if (reg->s32_max_value <= sval)
9438             return 1;
9439         else if (reg->s32_min_value > sval)
9440             return 0;
9441         break;
9442     }
9443
9444     return -1;
9445 }
9446
9447
9448 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
9449 {
9450     s64 sval = (s64)val;
9451
9452     switch (opcode) {
9453     case BPF_JEQ:
9454         if (tnum_is_const(reg->var_off))
9455             return !!tnum_equals_const(reg->var_off, val);
9456         break;
9457     case BPF_JNE:
9458         if (tnum_is_const(reg->var_off))
9459             return !tnum_equals_const(reg->var_off, val);
9460         break;
9461     case BPF_JSET:
9462         if ((~reg->var_off.mask & reg->var_off.value) & val)
9463             return 1;
9464         if (!((reg->var_off.mask | reg->var_off.value) & val))
9465             return 0;
9466         break;
9467     case BPF_JGT:
9468         if (reg->umin_value > val)
9469             return 1;
9470         else if (reg->umax_value <= val)
9471             return 0;
9472         break;
9473     case BPF_JSGT:
9474         if (reg->smin_value > sval)
9475             return 1;
9476         else if (reg->smax_value <= sval)
9477             return 0;
9478         break;
9479     case BPF_JLT:
9480         if (reg->umax_value < val)
9481             return 1;
9482         else if (reg->umin_value >= val)
9483             return 0;
9484         break;
9485     case BPF_JSLT:
9486         if (reg->smax_value < sval)
9487             return 1;
9488         else if (reg->smin_value >= sval)
9489             return 0;
9490         break;
9491     case BPF_JGE:
9492         if (reg->umin_value >= val)
9493             return 1;
9494         else if (reg->umax_value < val)
9495             return 0;
9496         break;
9497     case BPF_JSGE:
9498         if (reg->smin_value >= sval)
9499             return 1;
9500         else if (reg->smax_value < sval)
9501             return 0;
9502         break;
9503     case BPF_JLE:
9504         if (reg->umax_value <= val)
9505             return 1;
9506         else if (reg->umin_value > val)
9507             return 0;
9508         break;
9509     case BPF_JSLE:
9510         if (reg->smax_value <= sval)
9511             return 1;
9512         else if (reg->smin_value > sval)
9513             return 0;
9514         break;
9515     }
9516
9517     return -1;
9518 }
9519
9520 /* compute branch direction of the expression "if (reg opcode val) goto target;"
9521  * and return:
9522  *  1 - branch will be taken and "goto target" will be executed
9523  *  0 - branch will not be taken and fall-through to next insn
9524  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
9525  *      range [0,10]
9526  */
9527 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
9528                bool is_jmp32)
9529 {
9530     if (__is_pointer_value(false, reg)) {
9531         if (!reg_type_not_null(reg->type))
9532             return -1;
9533
9534         /* If pointer is valid tests against zero will fail so we can
9535          * use this to direct branch taken.
9536          */
9537         if (val != 0)
9538             return -1;
9539
9540         switch (opcode) {
9541         case BPF_JEQ:
9542             return 0;
9543         case BPF_JNE:
9544             return 1;
9545         default:
9546             return -1;
9547         }
9548     }
9549
9550     if (is_jmp32)
9551         return is_branch32_taken(reg, val, opcode);
9552     return is_branch64_taken(reg, val, opcode);
9553 }
9554
9555 static int flip_opcode(u32 opcode)
9556 {
9557     /* How can we transform "a <op> b" into "b <op> a"? */
9558     static const u8 opcode_flip[16] = {
9559         /* these stay the same */
9560         [BPF_JEQ  >> 4] = BPF_JEQ,
9561         [BPF_JNE  >> 4] = BPF_JNE,
9562         [BPF_JSET >> 4] = BPF_JSET,
9563         /* these swap "lesser" and "greater" (L and G in the opcodes) */
9564         [BPF_JGE  >> 4] = BPF_JLE,
9565         [BPF_JGT  >> 4] = BPF_JLT,
9566         [BPF_JLE  >> 4] = BPF_JGE,
9567         [BPF_JLT  >> 4] = BPF_JGT,
9568         [BPF_JSGE >> 4] = BPF_JSLE,
9569         [BPF_JSGT >> 4] = BPF_JSLT,
9570         [BPF_JSLE >> 4] = BPF_JSGE,
9571         [BPF_JSLT >> 4] = BPF_JSGT
9572     };
9573     return opcode_flip[opcode >> 4];
9574 }
9575
9576 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
9577                    struct bpf_reg_state *src_reg,
9578                    u8 opcode)
9579 {
9580     struct bpf_reg_state *pkt;
9581
9582     if (src_reg->type == PTR_TO_PACKET_END) {
9583         pkt = dst_reg;
9584     } else if (dst_reg->type == PTR_TO_PACKET_END) {
9585         pkt = src_reg;
9586         opcode = flip_opcode(opcode);
9587     } else {
9588         return -1;
9589     }
9590
9591     if (pkt->range >= 0)
9592         return -1;
9593
9594     switch (opcode) {
9595     case BPF_JLE:
9596         /* pkt <= pkt_end */
9597         fallthrough;
9598     case BPF_JGT:
9599         /* pkt > pkt_end */
9600         if (pkt->range == BEYOND_PKT_END)
9601             /* pkt has at last one extra byte beyond pkt_end */
9602             return opcode == BPF_JGT;
9603         break;
9604     case BPF_JLT:
9605         /* pkt < pkt_end */
9606         fallthrough;
9607     case BPF_JGE:
9608         /* pkt >= pkt_end */
9609         if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
9610             return opcode == BPF_JGE;
9611         break;
9612     }
9613     return -1;
9614 }
9615
9616 /* Adjusts the register min/max values in the case that the dst_reg is the
9617  * variable register that we are working on, and src_reg is a constant or we're
9618  * simply doing a BPF_K check.
9619  * In JEQ/JNE cases we also adjust the var_off values.
9620  */
9621 static void reg_set_min_max(struct bpf_reg_state *true_reg,
9622                 struct bpf_reg_state *false_reg,
9623                 u64 val, u32 val32,
9624                 u8 opcode, bool is_jmp32)
9625 {
9626     struct tnum false_32off = tnum_subreg(false_reg->var_off);
9627     struct tnum false_64off = false_reg->var_off;
9628     struct tnum true_32off = tnum_subreg(true_reg->var_off);
9629     struct tnum true_64off = true_reg->var_off;
9630     s64 sval = (s64)val;
9631     s32 sval32 = (s32)val32;
9632
9633     /* If the dst_reg is a pointer, we can't learn anything about its
9634      * variable offset from the compare (unless src_reg were a pointer into
9635      * the same object, but we don't bother with that.
9636      * Since false_reg and true_reg have the same type by construction, we
9637      * only need to check one of them for pointerness.
9638      */
9639     if (__is_pointer_value(false, false_reg))
9640         return;
9641
9642     switch (opcode) {
9643     /* JEQ/JNE comparison doesn't change the register equivalence.
9644      *
9645      * r1 = r2;
9646      * if (r1 == 42) goto label;
9647      * ...
9648      * label: // here both r1 and r2 are known to be 42.
9649      *
9650      * Hence when marking register as known preserve it's ID.
9651      */
9652     case BPF_JEQ:
9653         if (is_jmp32) {
9654             __mark_reg32_known(true_reg, val32);
9655             true_32off = tnum_subreg(true_reg->var_off);
9656         } else {
9657             ___mark_reg_known(true_reg, val);
9658             true_64off = true_reg->var_off;
9659         }
9660         break;
9661     case BPF_JNE:
9662         if (is_jmp32) {
9663             __mark_reg32_known(false_reg, val32);
9664             false_32off = tnum_subreg(false_reg->var_off);
9665         } else {
9666             ___mark_reg_known(false_reg, val);
9667             false_64off = false_reg->var_off;
9668         }
9669         break;
9670     case BPF_JSET:
9671         if (is_jmp32) {
9672             false_32off = tnum_and(false_32off, tnum_const(~val32));
9673             if (is_power_of_2(val32))
9674                 true_32off = tnum_or(true_32off,
9675                              tnum_const(val32));
9676         } else {
9677             false_64off = tnum_and(false_64off, tnum_const(~val));
9678             if (is_power_of_2(val))
9679                 true_64off = tnum_or(true_64off,
9680                              tnum_const(val));
9681         }
9682         break;
9683     case BPF_JGE:
9684     case BPF_JGT:
9685     {
9686         if (is_jmp32) {
9687             u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
9688             u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
9689
9690             false_reg->u32_max_value = min(false_reg->u32_max_value,
9691                                false_umax);
9692             true_reg->u32_min_value = max(true_reg->u32_min_value,
9693                               true_umin);
9694         } else {
9695             u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
9696             u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
9697
9698             false_reg->umax_value = min(false_reg->umax_value, false_umax);
9699             true_reg->umin_value = max(true_reg->umin_value, true_umin);
9700         }
9701         break;
9702     }
9703     case BPF_JSGE:
9704     case BPF_JSGT:
9705     {
9706         if (is_jmp32) {
9707             s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
9708             s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
9709
9710             false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
9711             true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
9712         } else {
9713             s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
9714             s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
9715
9716             false_reg->smax_value = min(false_reg->smax_value, false_smax);
9717             true_reg->smin_value = max(true_reg->smin_value, true_smin);
9718         }
9719         break;
9720     }
9721     case BPF_JLE:
9722     case BPF_JLT:
9723     {
9724         if (is_jmp32) {
9725             u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
9726             u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
9727
9728             false_reg->u32_min_value = max(false_reg->u32_min_value,
9729                                false_umin);
9730             true_reg->u32_max_value = min(true_reg->u32_max_value,
9731                               true_umax);
9732         } else {
9733             u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
9734             u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
9735
9736             false_reg->umin_value = max(false_reg->umin_value, false_umin);
9737             true_reg->umax_value = min(true_reg->umax_value, true_umax);
9738         }
9739         break;
9740     }
9741     case BPF_JSLE:
9742     case BPF_JSLT:
9743     {
9744         if (is_jmp32) {
9745             s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
9746             s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
9747
9748             false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
9749             true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
9750         } else {
9751             s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
9752             s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
9753
9754             false_reg->smin_value = max(false_reg->smin_value, false_smin);
9755             true_reg->smax_value = min(true_reg->smax_value, true_smax);
9756         }
9757         break;
9758     }
9759     default:
9760         return;
9761     }
9762
9763     if (is_jmp32) {
9764         false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
9765                          tnum_subreg(false_32off));
9766         true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
9767                         tnum_subreg(true_32off));
9768         __reg_combine_32_into_64(false_reg);
9769         __reg_combine_32_into_64(true_reg);
9770     } else {
9771         false_reg->var_off = false_64off;
9772         true_reg->var_off = true_64off;
9773         __reg_combine_64_into_32(false_reg);
9774         __reg_combine_64_into_32(true_reg);
9775     }
9776 }
9777
9778 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
9779  * the variable reg.
9780  */
9781 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
9782                 struct bpf_reg_state *false_reg,
9783                 u64 val, u32 val32,
9784                 u8 opcode, bool is_jmp32)
9785 {
9786     opcode = flip_opcode(opcode);
9787     /* This uses zero as "not present in table"; luckily the zero opcode,
9788      * BPF_JA, can't get here.
9789      */
9790     if (opcode)
9791         reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
9792 }
9793
9794 /* Regs are known to be equal, so intersect their min/max/var_off */
9795 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
9796                   struct bpf_reg_state *dst_reg)
9797 {
9798     src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
9799                             dst_reg->umin_value);
9800     src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
9801                             dst_reg->umax_value);
9802     src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
9803                             dst_reg->smin_value);
9804     src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
9805                             dst_reg->smax_value);
9806     src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
9807                                  dst_reg->var_off);
9808     reg_bounds_sync(src_reg);
9809     reg_bounds_sync(dst_reg);
9810 }
9811
9812 static void reg_combine_min_max(struct bpf_reg_state *true_src,
9813                 struct bpf_reg_state *true_dst,
9814                 struct bpf_reg_state *false_src,
9815                 struct bpf_reg_state *false_dst,
9816                 u8 opcode)
9817 {
9818     switch (opcode) {
9819     case BPF_JEQ:
9820         __reg_combine_min_max(true_src, true_dst);
9821         break;
9822     case BPF_JNE:
9823         __reg_combine_min_max(false_src, false_dst);
9824         break;
9825     }
9826 }
9827
9828 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
9829                  struct bpf_reg_state *reg, u32 id,
9830                  bool is_null)
9831 {
9832     if (type_may_be_null(reg->type) && reg->id == id &&
9833         !WARN_ON_ONCE(!reg->id)) {
9834         if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
9835                  !tnum_equals_const(reg->var_off, 0) ||
9836                  reg->off)) {
9837             /* Old offset (both fixed and variable parts) should
9838              * have been known-zero, because we don't allow pointer
9839              * arithmetic on pointers that might be NULL. If we
9840              * see this happening, don't convert the register.
9841              */
9842             return;
9843         }
9844         if (is_null) {
9845             reg->type = SCALAR_VALUE;
9846             /* We don't need id and ref_obj_id from this point
9847              * onwards anymore, thus we should better reset it,
9848              * so that state pruning has chances to take effect.
9849              */
9850             reg->id = 0;
9851             reg->ref_obj_id = 0;
9852
9853             return;
9854         }
9855
9856         mark_ptr_not_null_reg(reg);
9857
9858         if (!reg_may_point_to_spin_lock(reg)) {
9859             /* For not-NULL ptr, reg->ref_obj_id will be reset
9860              * in release_reg_references().
9861              *
9862              * reg->id is still used by spin_lock ptr. Other
9863              * than spin_lock ptr type, reg->id can be reset.
9864              */
9865             reg->id = 0;
9866         }
9867     }
9868 }
9869
9870 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
9871                     bool is_null)
9872 {
9873     struct bpf_reg_state *reg;
9874     int i;
9875
9876     for (i = 0; i < MAX_BPF_REG; i++)
9877         mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
9878
9879     bpf_for_each_spilled_reg(i, state, reg) {
9880         if (!reg)
9881             continue;
9882         mark_ptr_or_null_reg(state, reg, id, is_null);
9883     }
9884 }
9885
9886 /* The logic is similar to find_good_pkt_pointers(), both could eventually
9887  * be folded together at some point.
9888  */
9889 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9890                   bool is_null)
9891 {
9892     struct bpf_func_state *state = vstate->frame[vstate->curframe];
9893     struct bpf_reg_state *regs = state->regs;
9894     u32 ref_obj_id = regs[regno].ref_obj_id;
9895     u32 id = regs[regno].id;
9896     int i;
9897
9898     if (ref_obj_id && ref_obj_id == id && is_null)
9899         /* regs[regno] is in the " == NULL" branch.
9900          * No one could have freed the reference state before
9901          * doing the NULL check.
9902          */
9903         WARN_ON_ONCE(release_reference_state(state, id));
9904
9905     for (i = 0; i <= vstate->curframe; i++)
9906         __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9907 }
9908
9909 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9910                    struct bpf_reg_state *dst_reg,
9911                    struct bpf_reg_state *src_reg,
9912                    struct bpf_verifier_state *this_branch,
9913                    struct bpf_verifier_state *other_branch)
9914 {
9915     if (BPF_SRC(insn->code) != BPF_X)
9916         return false;
9917
9918     /* Pointers are always 64-bit. */
9919     if (BPF_CLASS(insn->code) == BPF_JMP32)
9920         return false;
9921
9922     switch (BPF_OP(insn->code)) {
9923     case BPF_JGT:
9924         if ((dst_reg->type == PTR_TO_PACKET &&
9925              src_reg->type == PTR_TO_PACKET_END) ||
9926             (dst_reg->type == PTR_TO_PACKET_META &&
9927              reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9928             /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9929             find_good_pkt_pointers(this_branch, dst_reg,
9930                            dst_reg->type, false);
9931             mark_pkt_end(other_branch, insn->dst_reg, true);
9932         } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9933                 src_reg->type == PTR_TO_PACKET) ||
9934                (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9935                 src_reg->type == PTR_TO_PACKET_META)) {
9936             /* pkt_end > pkt_data', pkt_data > pkt_meta' */
9937             find_good_pkt_pointers(other_branch, src_reg,
9938                            src_reg->type, true);
9939             mark_pkt_end(this_branch, insn->src_reg, false);
9940         } else {
9941             return false;
9942         }
9943         break;
9944     case BPF_JLT:
9945         if ((dst_reg->type == PTR_TO_PACKET &&
9946              src_reg->type == PTR_TO_PACKET_END) ||
9947             (dst_reg->type == PTR_TO_PACKET_META &&
9948              reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9949             /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9950             find_good_pkt_pointers(other_branch, dst_reg,
9951                            dst_reg->type, true);
9952             mark_pkt_end(this_branch, insn->dst_reg, false);
9953         } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9954                 src_reg->type == PTR_TO_PACKET) ||
9955                (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9956                 src_reg->type == PTR_TO_PACKET_META)) {
9957             /* pkt_end < pkt_data', pkt_data > pkt_meta' */
9958             find_good_pkt_pointers(this_branch, src_reg,
9959                            src_reg->type, false);
9960             mark_pkt_end(other_branch, insn->src_reg, true);
9961         } else {
9962             return false;
9963         }
9964         break;
9965     case BPF_JGE:
9966         if ((dst_reg->type == PTR_TO_PACKET &&
9967              src_reg->type == PTR_TO_PACKET_END) ||
9968             (dst_reg->type == PTR_TO_PACKET_META &&
9969              reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9970             /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9971             find_good_pkt_pointers(this_branch, dst_reg,
9972                            dst_reg->type, true);
9973             mark_pkt_end(other_branch, insn->dst_reg, false);
9974         } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9975                 src_reg->type == PTR_TO_PACKET) ||
9976                (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9977                 src_reg->type == PTR_TO_PACKET_META)) {
9978             /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9979             find_good_pkt_pointers(other_branch, src_reg,
9980                            src_reg->type, false);
9981             mark_pkt_end(this_branch, insn->src_reg, true);
9982         } else {
9983             return false;
9984         }
9985         break;
9986     case BPF_JLE:
9987         if ((dst_reg->type == PTR_TO_PACKET &&
9988              src_reg->type == PTR_TO_PACKET_END) ||
9989             (dst_reg->type == PTR_TO_PACKET_META &&
9990              reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9991             /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9992             find_good_pkt_pointers(other_branch, dst_reg,
9993                            dst_reg->type, false);
9994             mark_pkt_end(this_branch, insn->dst_reg, true);
9995         } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9996                 src_reg->type == PTR_TO_PACKET) ||
9997                (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9998                 src_reg->type == PTR_TO_PACKET_META)) {
9999             /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
10000             find_good_pkt_pointers(this_branch, src_reg,
10001                            src_reg->type, true);
10002             mark_pkt_end(other_branch, insn->src_reg, false);
10003         } else {
10004             return false;
10005         }
10006         break;
10007     default:
10008         return false;
10009     }
10010
10011     return true;
10012 }
10013
10014 static void find_equal_scalars(struct bpf_verifier_state *vstate,
10015                    struct bpf_reg_state *known_reg)
10016 {
10017     struct bpf_func_state *state;
10018     struct bpf_reg_state *reg;
10019     int i, j;
10020
10021     for (i = 0; i <= vstate->curframe; i++) {
10022         state = vstate->frame[i];
10023         for (j = 0; j < MAX_BPF_REG; j++) {
10024             reg = &state->regs[j];
10025             if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
10026                 *reg = *known_reg;
10027         }
10028
10029         bpf_for_each_spilled_reg(j, state, reg) {
10030             if (!reg)
10031                 continue;
10032             if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
10033                 *reg = *known_reg;
10034         }
10035     }
10036 }
10037
10038 static int check_cond_jmp_op(struct bpf_verifier_env *env,
10039                  struct bpf_insn *insn, int *insn_idx)
10040 {
10041     struct bpf_verifier_state *this_branch = env->cur_state;
10042     struct bpf_verifier_state *other_branch;
10043     struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
10044     struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
10045     u8 opcode = BPF_OP(insn->code);
10046     bool is_jmp32;
10047     int pred = -1;
10048     int err;
10049
10050     /* Only conditional jumps are expected to reach here. */
10051     if (opcode == BPF_JA || opcode > BPF_JSLE) {
10052         verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
10053         return -EINVAL;
10054     }
10055
10056     if (BPF_SRC(insn->code) == BPF_X) {
10057         if (insn->imm != 0) {
10058             verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
10059             return -EINVAL;
10060         }
10061
10062         /* check src1 operand */
10063         err = check_reg_arg(env, insn->src_reg, SRC_OP);
10064         if (err)
10065             return err;
10066
10067         if (is_pointer_value(env, insn->src_reg)) {
10068             verbose(env, "R%d pointer comparison prohibited\n",
10069                 insn->src_reg);
10070             return -EACCES;
10071         }
10072         src_reg = &regs[insn->src_reg];
10073     } else {
10074         if (insn->src_reg != BPF_REG_0) {
10075             verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
10076             return -EINVAL;
10077         }
10078     }
10079
10080     /* check src2 operand */
10081     err = check_reg_arg(env, insn->dst_reg, SRC_OP);
10082     if (err)
10083         return err;
10084
10085     dst_reg = &regs[insn->dst_reg];
10086     is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
10087
10088     if (BPF_SRC(insn->code) == BPF_K) {
10089         pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
10090     } else if (src_reg->type == SCALAR_VALUE &&
10091            is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
10092         pred = is_branch_taken(dst_reg,
10093                        tnum_subreg(src_reg->var_off).value,
10094                        opcode,
10095                        is_jmp32);
10096     } else if (src_reg->type == SCALAR_VALUE &&
10097            !is_jmp32 && tnum_is_const(src_reg->var_off)) {
10098         pred = is_branch_taken(dst_reg,
10099                        src_reg->var_off.value,
10100                        opcode,
10101                        is_jmp32);
10102     } else if (reg_is_pkt_pointer_any(dst_reg) &&
10103            reg_is_pkt_pointer_any(src_reg) &&
10104            !is_jmp32) {
10105         pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
10106     }
10107
10108     if (pred >= 0) {
10109         /* If we get here with a dst_reg pointer type it is because
10110          * above is_branch_taken() special cased the 0 comparison.
10111          */
10112         if (!__is_pointer_value(false, dst_reg))
10113             err = mark_chain_precision(env, insn->dst_reg);
10114         if (BPF_SRC(insn->code) == BPF_X && !err &&
10115             !__is_pointer_value(false, src_reg))
10116             err = mark_chain_precision(env, insn->src_reg);
10117         if (err)
10118             return err;
10119     }
10120
10121     if (pred == 1) {
10122         /* Only follow the goto, ignore fall-through. If needed, push
10123          * the fall-through branch for simulation under speculative
10124          * execution.
10125          */
10126         if (!env->bypass_spec_v1 &&
10127             !sanitize_speculative_path(env, insn, *insn_idx + 1,
10128                            *insn_idx))
10129             return -EFAULT;
10130         *insn_idx += insn->off;
10131         return 0;
10132     } else if (pred == 0) {
10133         /* Only follow the fall-through branch, since that's where the
10134          * program will go. If needed, push the goto branch for
10135          * simulation under speculative execution.
10136          */
10137         if (!env->bypass_spec_v1 &&
10138             !sanitize_speculative_path(env, insn,
10139                            *insn_idx + insn->off + 1,
10140                            *insn_idx))
10141             return -EFAULT;
10142         return 0;
10143     }
10144
10145     other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
10146                   false);
10147     if (!other_branch)
10148         return -EFAULT;
10149     other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
10150
10151     /* detect if we are comparing against a constant value so we can adjust
10152      * our min/max values for our dst register.
10153      * this is only legit if both are scalars (or pointers to the same
10154      * object, I suppose, but we don't support that right now), because
10155      * otherwise the different base pointers mean the offsets aren't
10156      * comparable.
10157      */
10158     if (BPF_SRC(insn->code) == BPF_X) {
10159         struct bpf_reg_state *src_reg = &regs[insn->src_reg];
10160
10161         if (dst_reg->type == SCALAR_VALUE &&
10162             src_reg->type == SCALAR_VALUE) {
10163             if (tnum_is_const(src_reg->var_off) ||
10164                 (is_jmp32 &&
10165                  tnum_is_const(tnum_subreg(src_reg->var_off))))
10166                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
10167                         dst_reg,
10168                         src_reg->var_off.value,
10169                         tnum_subreg(src_reg->var_off).value,
10170                         opcode, is_jmp32);
10171             else if (tnum_is_const(dst_reg->var_off) ||
10172                  (is_jmp32 &&
10173                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
10174                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
10175                             src_reg,
10176                             dst_reg->var_off.value,
10177                             tnum_subreg(dst_reg->var_off).value,
10178                             opcode, is_jmp32);
10179             else if (!is_jmp32 &&
10180                  (opcode == BPF_JEQ || opcode == BPF_JNE))
10181                 /* Comparing for equality, we can combine knowledge */
10182                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
10183                             &other_branch_regs[insn->dst_reg],
10184                             src_reg, dst_reg, opcode);
10185             if (src_reg->id &&
10186                 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
10187                 find_equal_scalars(this_branch, src_reg);
10188                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
10189             }
10190
10191         }
10192     } else if (dst_reg->type == SCALAR_VALUE) {
10193         reg_set_min_max(&other_branch_regs[insn->dst_reg],
10194                     dst_reg, insn->imm, (u32)insn->imm,
10195                     opcode, is_jmp32);
10196     }
10197
10198     if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
10199         !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
10200         find_equal_scalars(this_branch, dst_reg);
10201         find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
10202     }
10203
10204     /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
10205      * NOTE: these optimizations below are related with pointer comparison
10206      *       which will never be JMP32.
10207      */
10208     if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
10209         insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
10210         type_may_be_null(dst_reg->type)) {
10211         /* Mark all identical registers in each branch as either
10212          * safe or unknown depending R == 0 or R != 0 conditional.
10213          */
10214         mark_ptr_or_null_regs(this_branch, insn->dst_reg,
10215                       opcode == BPF_JNE);
10216         mark_ptr_or_null_regs(other_branch, insn->dst_reg,
10217                       opcode == BPF_JEQ);
10218     } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
10219                        this_branch, other_branch) &&
10220            is_pointer_value(env, insn->dst_reg)) {
10221         verbose(env, "R%d pointer comparison prohibited\n",
10222             insn->dst_reg);
10223         return -EACCES;
10224     }
10225     if (env->log.level & BPF_LOG_LEVEL)
10226         print_insn_state(env, this_branch->frame[this_branch->curframe]);
10227     return 0;
10228 }
10229
10230 /* verify BPF_LD_IMM64 instruction */
10231 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
10232 {
10233     struct bpf_insn_aux_data *aux = cur_aux(env);
10234     struct bpf_reg_state *regs = cur_regs(env);
10235     struct bpf_reg_state *dst_reg;
10236     struct bpf_map *map;
10237     int err;
10238
10239     if (BPF_SIZE(insn->code) != BPF_DW) {
10240         verbose(env, "invalid BPF_LD_IMM insn\n");
10241         return -EINVAL;
10242     }
10243     if (insn->off != 0) {
10244         verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
10245         return -EINVAL;
10246     }
10247
10248     err = check_reg_arg(env, insn->dst_reg, DST_OP);
10249     if (err)
10250         return err;
10251
10252     dst_reg = &regs[insn->dst_reg];
10253     if (insn->src_reg == 0) {
10254         u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
10255
10256         dst_reg->type = SCALAR_VALUE;
10257         __mark_reg_known(&regs[insn->dst_reg], imm);
10258         return 0;
10259     }
10260
10261     /* All special src_reg cases are listed below. From this point onwards
10262      * we either succeed and assign a corresponding dst_reg->type after
10263      * zeroing the offset, or fail and reject the program.
10264      */
10265     mark_reg_known_zero(env, regs, insn->dst_reg);
10266
10267     if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
10268         dst_reg->type = aux->btf_var.reg_type;
10269         switch (base_type(dst_reg->type)) {
10270         case PTR_TO_MEM:
10271             dst_reg->mem_size = aux->btf_var.mem_size;
10272             break;
10273         case PTR_TO_BTF_ID:
10274             dst_reg->btf = aux->btf_var.btf;
10275             dst_reg->btf_id = aux->btf_var.btf_id;
10276             break;
10277         default:
10278             verbose(env, "bpf verifier is misconfigured\n");
10279             return -EFAULT;
10280         }
10281         return 0;
10282     }
10283
10284     if (insn->src_reg == BPF_PSEUDO_FUNC) {
10285         struct bpf_prog_aux *aux = env->prog->aux;
10286         u32 subprogno = find_subprog(env,
10287                          env->insn_idx + insn->imm + 1);
10288
10289         if (!aux->func_info) {
10290             verbose(env, "missing btf func_info\n");
10291             return -EINVAL;
10292         }
10293         if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
10294             verbose(env, "callback function not static\n");
10295             return -EINVAL;
10296         }
10297
10298         dst_reg->type = PTR_TO_FUNC;
10299         dst_reg->subprogno = subprogno;
10300         return 0;
10301     }
10302
10303     map = env->used_maps[aux->map_index];
10304     dst_reg->map_ptr = map;
10305
10306     if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
10307         insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
10308         dst_reg->type = PTR_TO_MAP_VALUE;
10309         dst_reg->off = aux->map_off;
10310         if (map_value_has_spin_lock(map))
10311             dst_reg->id = ++env->id_gen;
10312     } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
10313            insn->src_reg == BPF_PSEUDO_MAP_IDX) {
10314         dst_reg->type = CONST_PTR_TO_MAP;
10315     } else {
10316         verbose(env, "bpf verifier is misconfigured\n");
10317         return -EINVAL;
10318     }
10319
10320     return 0;
10321 }
10322
10323 static bool may_access_skb(enum bpf_prog_type type)
10324 {
10325     switch (type) {
10326     case BPF_PROG_TYPE_SOCKET_FILTER:
10327     case BPF_PROG_TYPE_SCHED_CLS:
10328     case BPF_PROG_TYPE_SCHED_ACT:
10329         return true;
10330     default:
10331         return false;
10332     }
10333 }
10334
10335 /* verify safety of LD_ABS|LD_IND instructions:
10336  * - they can only appear in the programs where ctx == skb
10337  * - since they are wrappers of function calls, they scratch R1-R5 registers,
10338  *   preserve R6-R9, and store return value into R0
10339  *
10340  * Implicit input:
10341  *   ctx == skb == R6 == CTX
10342  *
10343  * Explicit input:
10344  *   SRC == any register
10345  *   IMM == 32-bit immediate
10346  *
10347  * Output:
10348  *   R0 - 8/16/32-bit skb data converted to cpu endianness
10349  */
10350 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
10351 {
10352     struct bpf_reg_state *regs = cur_regs(env);
10353     static const int ctx_reg = BPF_REG_6;
10354     u8 mode = BPF_MODE(insn->code);
10355     int i, err;
10356
10357     if (!may_access_skb(resolve_prog_type(env->prog))) {
10358         verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
10359         return -EINVAL;
10360     }
10361
10362     if (!env->ops->gen_ld_abs) {
10363         verbose(env, "bpf verifier is misconfigured\n");
10364         return -EINVAL;
10365     }
10366
10367     if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
10368         BPF_SIZE(insn->code) == BPF_DW ||
10369         (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
10370         verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
10371         return -EINVAL;
10372     }
10373
10374     /* check whether implicit source operand (register R6) is readable */
10375     err = check_reg_arg(env, ctx_reg, SRC_OP);
10376     if (err)
10377         return err;
10378
10379     /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
10380      * gen_ld_abs() may terminate the program at runtime, leading to
10381      * reference leak.
10382      */
10383     err = check_reference_leak(env);
10384     if (err) {
10385         verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
10386         return err;
10387     }
10388
10389     if (env->cur_state->active_spin_lock) {
10390         verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
10391         return -EINVAL;
10392     }
10393
10394     if (regs[ctx_reg].type != PTR_TO_CTX) {
10395         verbose(env,
10396             "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
10397         return -EINVAL;
10398     }
10399
10400     if (mode == BPF_IND) {
10401         /* check explicit source operand */
10402         err = check_reg_arg(env, insn->src_reg, SRC_OP);
10403         if (err)
10404             return err;
10405     }
10406
10407     err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
10408     if (err < 0)
10409         return err;
10410
10411     /* reset caller saved regs to unreadable */
10412     for (i = 0; i < CALLER_SAVED_REGS; i++) {
10413         mark_reg_not_init(env, regs, caller_saved[i]);
10414         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10415     }
10416
10417     /* mark destination R0 register as readable, since it contains
10418      * the value fetched from the packet.
10419      * Already marked as written above.
10420      */
10421     mark_reg_unknown(env, regs, BPF_REG_0);
10422     /* ld_abs load up to 32-bit skb data. */
10423     regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
10424     return 0;
10425 }
10426
10427 static int check_return_code(struct bpf_verifier_env *env)
10428 {
10429     struct tnum enforce_attach_type_range = tnum_unknown;
10430     const struct bpf_prog *prog = env->prog;
10431     struct bpf_reg_state *reg;
10432     struct tnum range = tnum_range(0, 1);
10433     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10434     int err;
10435     struct bpf_func_state *frame = env->cur_state->frame[0];
10436     const bool is_subprog = frame->subprogno;
10437
10438     /* LSM and struct_ops func-ptr's return type could be "void" */
10439     if (!is_subprog) {
10440         switch (prog_type) {
10441         case BPF_PROG_TYPE_LSM:
10442             if (prog->expected_attach_type == BPF_LSM_CGROUP)
10443                 /* See below, can be 0 or 0-1 depending on hook. */
10444                 break;
10445             fallthrough;
10446         case BPF_PROG_TYPE_STRUCT_OPS:
10447             if (!prog->aux->attach_func_proto->type)
10448                 return 0;
10449             break;
10450         default:
10451             break;
10452         }
10453     }
10454
10455     /* eBPF calling convention is such that R0 is used
10456      * to return the value from eBPF program.
10457      * Make sure that it's readable at this time
10458      * of bpf_exit, which means that program wrote
10459      * something into it earlier
10460      */
10461     err = check_reg_arg(env, BPF_REG_0, SRC_OP);
10462     if (err)
10463         return err;
10464
10465     if (is_pointer_value(env, BPF_REG_0)) {
10466         verbose(env, "R0 leaks addr as return value\n");
10467         return -EACCES;
10468     }
10469
10470     reg = cur_regs(env) + BPF_REG_0;
10471
10472     if (frame->in_async_callback_fn) {
10473         /* enforce return zero from async callbacks like timer */
10474         if (reg->type != SCALAR_VALUE) {
10475             verbose(env, "In async callback the register R0 is not a known value (%s)\n",
10476                 reg_type_str(env, reg->type));
10477             return -EINVAL;
10478         }
10479
10480         if (!tnum_in(tnum_const(0), reg->var_off)) {
10481             verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
10482             return -EINVAL;
10483         }
10484         return 0;
10485     }
10486
10487     if (is_subprog) {
10488         if (reg->type != SCALAR_VALUE) {
10489             verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
10490                 reg_type_str(env, reg->type));
10491             return -EINVAL;
10492         }
10493         return 0;
10494     }
10495
10496     switch (prog_type) {
10497     case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
10498         if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
10499             env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
10500             env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
10501             env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
10502             env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
10503             env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
10504             range = tnum_range(1, 1);
10505         if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
10506             env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
10507             range = tnum_range(0, 3);
10508         break;
10509     case BPF_PROG_TYPE_CGROUP_SKB:
10510         if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
10511             range = tnum_range(0, 3);
10512             enforce_attach_type_range = tnum_range(2, 3);
10513         }
10514         break;
10515     case BPF_PROG_TYPE_CGROUP_SOCK:
10516     case BPF_PROG_TYPE_SOCK_OPS:
10517     case BPF_PROG_TYPE_CGROUP_DEVICE:
10518     case BPF_PROG_TYPE_CGROUP_SYSCTL:
10519     case BPF_PROG_TYPE_CGROUP_SOCKOPT:
10520         break;
10521     case BPF_PROG_TYPE_RAW_TRACEPOINT:
10522         if (!env->prog->aux->attach_btf_id)
10523             return 0;
10524         range = tnum_const(0);
10525         break;
10526     case BPF_PROG_TYPE_TRACING:
10527         switch (env->prog->expected_attach_type) {
10528         case BPF_TRACE_FENTRY:
10529         case BPF_TRACE_FEXIT:
10530             range = tnum_const(0);
10531             break;
10532         case BPF_TRACE_RAW_TP:
10533         case BPF_MODIFY_RETURN:
10534             return 0;
10535         case BPF_TRACE_ITER:
10536             break;
10537         default:
10538             return -ENOTSUPP;
10539         }
10540         break;
10541     case BPF_PROG_TYPE_SK_LOOKUP:
10542         range = tnum_range(SK_DROP, SK_PASS);
10543         break;
10544
10545     case BPF_PROG_TYPE_LSM:
10546         if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
10547             /* Regular BPF_PROG_TYPE_LSM programs can return
10548              * any value.
10549              */
10550             return 0;
10551         }
10552         if (!env->prog->aux->attach_func_proto->type) {
10553             /* Make sure programs that attach to void
10554              * hooks don't try to modify return value.
10555              */
10556             range = tnum_range(1, 1);
10557         }
10558         break;
10559
10560     case BPF_PROG_TYPE_EXT:
10561         /* freplace program can return anything as its return value
10562          * depends on the to-be-replaced kernel func or bpf program.
10563          */
10564     default:
10565         return 0;
10566     }
10567
10568     if (reg->type != SCALAR_VALUE) {
10569         verbose(env, "At program exit the register R0 is not a known value (%s)\n",
10570             reg_type_str(env, reg->type));
10571         return -EINVAL;
10572     }
10573
10574     if (!tnum_in(range, reg->var_off)) {
10575         verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
10576         if (prog->expected_attach_type == BPF_LSM_CGROUP &&
10577             prog_type == BPF_PROG_TYPE_LSM &&
10578             !prog->aux->attach_func_proto->type)
10579             verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10580         return -EINVAL;
10581     }
10582
10583     if (!tnum_is_unknown(enforce_attach_type_range) &&
10584         tnum_in(enforce_attach_type_range, reg->var_off))
10585         env->prog->enforce_expected_attach_type = 1;
10586     return 0;
10587 }
10588
10589 /* non-recursive DFS pseudo code
10590  * 1  procedure DFS-iterative(G,v):
10591  * 2      label v as discovered
10592  * 3      let S be a stack
10593  * 4      S.push(v)
10594  * 5      while S is not empty
10595  * 6            t <- S.pop()
10596  * 7            if t is what we're looking for:
10597  * 8                return t
10598  * 9            for all edges e in G.adjacentEdges(t) do
10599  * 10               if edge e is already labelled
10600  * 11                   continue with the next edge
10601  * 12               w <- G.adjacentVertex(t,e)
10602  * 13               if vertex w is not discovered and not explored
10603  * 14                   label e as tree-edge
10604  * 15                   label w as discovered
10605  * 16                   S.push(w)
10606  * 17                   continue at 5
10607  * 18               else if vertex w is discovered
10608  * 19                   label e as back-edge
10609  * 20               else
10610  * 21                   // vertex w is explored
10611  * 22                   label e as forward- or cross-edge
10612  * 23           label t as explored
10613  * 24           S.pop()
10614  *
10615  * convention:
10616  * 0x10 - discovered
10617  * 0x11 - discovered and fall-through edge labelled
10618  * 0x12 - discovered and fall-through and branch edges labelled
10619  * 0x20 - explored
10620  */
10621
10622 enum {
10623     DISCOVERED = 0x10,
10624     EXPLORED = 0x20,
10625     FALLTHROUGH = 1,
10626     BRANCH = 2,
10627 };
10628
10629 static u32 state_htab_size(struct bpf_verifier_env *env)
10630 {
10631     return env->prog->len;
10632 }
10633
10634 static struct bpf_verifier_state_list **explored_state(
10635                     struct bpf_verifier_env *env,
10636                     int idx)
10637 {
10638     struct bpf_verifier_state *cur = env->cur_state;
10639     struct bpf_func_state *state = cur->frame[cur->curframe];
10640
10641     return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
10642 }
10643
10644 static void init_explored_state(struct bpf_verifier_env *env, int idx)
10645 {
10646     env->insn_aux_data[idx].prune_point = true;
10647 }
10648
10649 enum {
10650     DONE_EXPLORING = 0,
10651     KEEP_EXPLORING = 1,
10652 };
10653
10654 /* t, w, e - match pseudo-code above:
10655  * t - index of current instruction
10656  * w - next instruction
10657  * e - edge
10658  */
10659 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
10660              bool loop_ok)
10661 {
10662     int *insn_stack = env->cfg.insn_stack;
10663     int *insn_state = env->cfg.insn_state;
10664
10665     if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
10666         return DONE_EXPLORING;
10667
10668     if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
10669         return DONE_EXPLORING;
10670
10671     if (w < 0 || w >= env->prog->len) {
10672         verbose_linfo(env, t, "%d: ", t);
10673         verbose(env, "jump out of range from insn %d to %d\n", t, w);
10674         return -EINVAL;
10675     }
10676
10677     if (e == BRANCH)
10678         /* mark branch target for state pruning */
10679         init_explored_state(env, w);
10680
10681     if (insn_state[w] == 0) {
10682         /* tree-edge */
10683         insn_state[t] = DISCOVERED | e;
10684         insn_state[w] = DISCOVERED;
10685         if (env->cfg.cur_stack >= env->prog->len)
10686             return -E2BIG;
10687         insn_stack[env->cfg.cur_stack++] = w;
10688         return KEEP_EXPLORING;
10689     } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
10690         if (loop_ok && env->bpf_capable)
10691             return DONE_EXPLORING;
10692         verbose_linfo(env, t, "%d: ", t);
10693         verbose_linfo(env, w, "%d: ", w);
10694         verbose(env, "back-edge from insn %d to %d\n", t, w);
10695         return -EINVAL;
10696     } else if (insn_state[w] == EXPLORED) {
10697         /* forward- or cross-edge */
10698         insn_state[t] = DISCOVERED | e;
10699     } else {
10700         verbose(env, "insn state internal bug\n");
10701         return -EFAULT;
10702     }
10703     return DONE_EXPLORING;
10704 }
10705
10706 static int visit_func_call_insn(int t, int insn_cnt,
10707                 struct bpf_insn *insns,
10708                 struct bpf_verifier_env *env,
10709                 bool visit_callee)
10710 {
10711     int ret;
10712
10713     ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
10714     if (ret)
10715         return ret;
10716
10717     if (t + 1 < insn_cnt)
10718         init_explored_state(env, t + 1);
10719     if (visit_callee) {
10720         init_explored_state(env, t);
10721         ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
10722                 /* It's ok to allow recursion from CFG point of
10723                  * view. __check_func_call() will do the actual
10724                  * check.
10725                  */
10726                 bpf_pseudo_func(insns + t));
10727     }
10728     return ret;
10729 }
10730
10731 /* Visits the instruction at index t and returns one of the following:
10732  *  < 0 - an error occurred
10733  *  DONE_EXPLORING - the instruction was fully explored
10734  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
10735  */
10736 static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
10737 {
10738     struct bpf_insn *insns = env->prog->insnsi;
10739     int ret;
10740
10741     if (bpf_pseudo_func(insns + t))
10742         return visit_func_call_insn(t, insn_cnt, insns, env, true);
10743
10744     /* All non-branch instructions have a single fall-through edge. */
10745     if (BPF_CLASS(insns[t].code) != BPF_JMP &&
10746         BPF_CLASS(insns[t].code) != BPF_JMP32)
10747         return push_insn(t, t + 1, FALLTHROUGH, env, false);
10748
10749     switch (BPF_OP(insns[t].code)) {
10750     case BPF_EXIT:
10751         return DONE_EXPLORING;
10752
10753     case BPF_CALL:
10754         if (insns[t].imm == BPF_FUNC_timer_set_callback)
10755             /* Mark this call insn to trigger is_state_visited() check
10756              * before call itself is processed by __check_func_call().
10757              * Otherwise new async state will be pushed for further
10758              * exploration.
10759              */
10760             init_explored_state(env, t);
10761         return visit_func_call_insn(t, insn_cnt, insns, env,
10762                         insns[t].src_reg == BPF_PSEUDO_CALL);
10763
10764     case BPF_JA:
10765         if (BPF_SRC(insns[t].code) != BPF_K)
10766             return -EINVAL;
10767
10768         /* unconditional jump with single edge */
10769         ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
10770                 true);
10771         if (ret)
10772             return ret;
10773
10774         /* unconditional jmp is not a good pruning point,
10775          * but it's marked, since backtracking needs
10776          * to record jmp history in is_state_visited().
10777          */
10778         init_explored_state(env, t + insns[t].off + 1);
10779         /* tell verifier to check for equivalent states
10780          * after every call and jump
10781          */
10782         if (t + 1 < insn_cnt)
10783             init_explored_state(env, t + 1);
10784
10785         return ret;
10786
10787     default:
10788         /* conditional jump with two edges */
10789         init_explored_state(env, t);
10790         ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
10791         if (ret)
10792             return ret;
10793
10794         return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
10795     }
10796 }
10797
10798 /* non-recursive depth-first-search to detect loops in BPF program
10799  * loop == back-edge in directed graph
10800  */
10801 static int check_cfg(struct bpf_verifier_env *env)
10802 {
10803     int insn_cnt = env->prog->len;
10804     int *insn_stack, *insn_state;
10805     int ret = 0;
10806     int i;
10807
10808     insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10809     if (!insn_state)
10810         return -ENOMEM;
10811
10812     insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10813     if (!insn_stack) {
10814         kvfree(insn_state);
10815         return -ENOMEM;
10816     }
10817
10818     insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
10819     insn_stack[0] = 0; /* 0 is the first instruction */
10820     env->cfg.cur_stack = 1;
10821
10822     while (env->cfg.cur_stack > 0) {
10823         int t = insn_stack[env->cfg.cur_stack - 1];
10824
10825         ret = visit_insn(t, insn_cnt, env);
10826         switch (ret) {
10827         case DONE_EXPLORING:
10828             insn_state[t] = EXPLORED;
10829             env->cfg.cur_stack--;
10830             break;
10831         case KEEP_EXPLORING:
10832             break;
10833         default:
10834             if (ret > 0) {
10835                 verbose(env, "visit_insn internal bug\n");
10836                 ret = -EFAULT;
10837             }
10838             goto err_free;
10839         }
10840     }
10841
10842     if (env->cfg.cur_stack < 0) {
10843         verbose(env, "pop stack internal bug\n");
10844         ret = -EFAULT;
10845         goto err_free;
10846     }
10847
10848     for (i = 0; i < insn_cnt; i++) {
10849         if (insn_state[i] != EXPLORED) {
10850             verbose(env, "unreachable insn %d\n", i);
10851             ret = -EINVAL;
10852             goto err_free;
10853         }
10854     }
10855     ret = 0; /* cfg looks good */
10856
10857 err_free:
10858     kvfree(insn_state);
10859     kvfree(insn_stack);
10860     env->cfg.insn_state = env->cfg.insn_stack = NULL;
10861     return ret;
10862 }
10863
10864 static int check_abnormal_return(struct bpf_verifier_env *env)
10865 {
10866     int i;
10867
10868     for (i = 1; i < env->subprog_cnt; i++) {
10869         if (env->subprog_info[i].has_ld_abs) {
10870             verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
10871             return -EINVAL;
10872         }
10873         if (env->subprog_info[i].has_tail_call) {
10874             verbose(env, "tail_call is not allowed in subprogs without BTF\n");
10875             return -EINVAL;
10876         }
10877     }
10878     return 0;
10879 }
10880
10881 /* The minimum supported BTF func info size */
10882 #define MIN_BPF_FUNCINFO_SIZE   8
10883 #define MAX_FUNCINFO_REC_SIZE   252
10884
10885 static int check_btf_func(struct bpf_verifier_env *env,
10886               const union bpf_attr *attr,
10887               bpfptr_t uattr)
10888 {
10889     const struct btf_type *type, *func_proto, *ret_type;
10890     u32 i, nfuncs, urec_size, min_size;
10891     u32 krec_size = sizeof(struct bpf_func_info);
10892     struct bpf_func_info *krecord;
10893     struct bpf_func_info_aux *info_aux = NULL;
10894     struct bpf_prog *prog;
10895     const struct btf *btf;
10896     bpfptr_t urecord;
10897     u32 prev_offset = 0;
10898     bool scalar_return;
10899     int ret = -ENOMEM;
10900
10901     nfuncs = attr->func_info_cnt;
10902     if (!nfuncs) {
10903         if (check_abnormal_return(env))
10904             return -EINVAL;
10905         return 0;
10906     }
10907
10908     if (nfuncs != env->subprog_cnt) {
10909         verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
10910         return -EINVAL;
10911     }
10912
10913     urec_size = attr->func_info_rec_size;
10914     if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
10915         urec_size > MAX_FUNCINFO_REC_SIZE ||
10916         urec_size % sizeof(u32)) {
10917         verbose(env, "invalid func info rec size %u\n", urec_size);
10918         return -EINVAL;
10919     }
10920
10921     prog = env->prog;
10922     btf = prog->aux->btf;
10923
10924     urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
10925     min_size = min_t(u32, krec_size, urec_size);
10926
10927     krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
10928     if (!krecord)
10929         return -ENOMEM;
10930     info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10931     if (!info_aux)
10932         goto err_free;
10933
10934     for (i = 0; i < nfuncs; i++) {
10935         ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10936         if (ret) {
10937             if (ret == -E2BIG) {
10938                 verbose(env, "nonzero tailing record in func info");
10939                 /* set the size kernel expects so loader can zero
10940                  * out the rest of the record.
10941                  */
10942                 if (copy_to_bpfptr_offset(uattr,
10943                               offsetof(union bpf_attr, func_info_rec_size),
10944                               &min_size, sizeof(min_size)))
10945                     ret = -EFAULT;
10946             }
10947             goto err_free;
10948         }
10949
10950         if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10951             ret = -EFAULT;
10952             goto err_free;
10953         }
10954
10955         /* check insn_off */
10956         ret = -EINVAL;
10957         if (i == 0) {
10958             if (krecord[i].insn_off) {
10959                 verbose(env,
10960                     "nonzero insn_off %u for the first func info record",
10961                     krecord[i].insn_off);
10962                 goto err_free;
10963             }
10964         } else if (krecord[i].insn_off <= prev_offset) {
10965             verbose(env,
10966                 "same or smaller insn offset (%u) than previous func info record (%u)",
10967                 krecord[i].insn_off, prev_offset);
10968             goto err_free;
10969         }
10970
10971         if (env->subprog_info[i].start != krecord[i].insn_off) {
10972             verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10973             goto err_free;
10974         }
10975
10976         /* check type_id */
10977         type = btf_type_by_id(btf, krecord[i].type_id);
10978         if (!type || !btf_type_is_func(type)) {
10979             verbose(env, "invalid type id %d in func info",
10980                 krecord[i].type_id);
10981             goto err_free;
10982         }
10983         info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10984
10985         func_proto = btf_type_by_id(btf, type->type);
10986         if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10987             /* btf_func_check() already verified it during BTF load */
10988             goto err_free;
10989         ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10990         scalar_return =
10991             btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
10992         if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10993             verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10994             goto err_free;
10995         }
10996         if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10997             verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10998             goto err_free;
10999         }
11000
11001         prev_offset = krecord[i].insn_off;
11002         bpfptr_add(&urecord, urec_size);
11003     }
11004
11005     prog->aux->func_info = krecord;
11006     prog->aux->func_info_cnt = nfuncs;
11007     prog->aux->func_info_aux = info_aux;
11008     return 0;
11009
11010 err_free:
11011     kvfree(krecord);
11012     kfree(info_aux);
11013     return ret;
11014 }
11015
11016 static void adjust_btf_func(struct bpf_verifier_env *env)
11017 {
11018     struct bpf_prog_aux *aux = env->prog->aux;
11019     int i;
11020
11021     if (!aux->func_info)
11022         return;
11023
11024     for (i = 0; i < env->subprog_cnt; i++)
11025         aux->func_info[i].insn_off = env->subprog_info[i].start;
11026 }
11027
11028 #define MIN_BPF_LINEINFO_SIZE   offsetofend(struct bpf_line_info, line_col)
11029 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
11030
11031 static int check_btf_line(struct bpf_verifier_env *env,
11032               const union bpf_attr *attr,
11033               bpfptr_t uattr)
11034 {
11035     u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
11036     struct bpf_subprog_info *sub;
11037     struct bpf_line_info *linfo;
11038     struct bpf_prog *prog;
11039     const struct btf *btf;
11040     bpfptr_t ulinfo;
11041     int err;
11042
11043     nr_linfo = attr->line_info_cnt;
11044     if (!nr_linfo)
11045         return 0;
11046     if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
11047         return -EINVAL;
11048
11049     rec_size = attr->line_info_rec_size;
11050     if (rec_size < MIN_BPF_LINEINFO_SIZE ||
11051         rec_size > MAX_LINEINFO_REC_SIZE ||
11052         rec_size & (sizeof(u32) - 1))
11053         return -EINVAL;
11054
11055     /* Need to zero it in case the userspace may
11056      * pass in a smaller bpf_line_info object.
11057      */
11058     linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
11059              GFP_KERNEL | __GFP_NOWARN);
11060     if (!linfo)
11061         return -ENOMEM;
11062
11063     prog = env->prog;
11064     btf = prog->aux->btf;
11065
11066     s = 0;
11067     sub = env->subprog_info;
11068     ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
11069     expected_size = sizeof(struct bpf_line_info);
11070     ncopy = min_t(u32, expected_size, rec_size);
11071     for (i = 0; i < nr_linfo; i++) {
11072         err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
11073         if (err) {
11074             if (err == -E2BIG) {
11075                 verbose(env, "nonzero tailing record in line_info");
11076                 if (copy_to_bpfptr_offset(uattr,
11077                               offsetof(union bpf_attr, line_info_rec_size),
11078                               &expected_size, sizeof(expected_size)))
11079                     err = -EFAULT;
11080             }
11081             goto err_free;
11082         }
11083
11084         if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
11085             err = -EFAULT;
11086             goto err_free;
11087         }
11088
11089         /*
11090          * Check insn_off to ensure
11091          * 1) strictly increasing AND
11092          * 2) bounded by prog->len
11093          *
11094          * The linfo[0].insn_off == 0 check logically falls into
11095          * the later "missing bpf_line_info for func..." case
11096          * because the first linfo[0].insn_off must be the
11097          * first sub also and the first sub must have
11098          * subprog_info[0].start == 0.
11099          */
11100         if ((i && linfo[i].insn_off <= prev_offset) ||
11101             linfo[i].insn_off >= prog->len) {
11102             verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
11103                 i, linfo[i].insn_off, prev_offset,
11104                 prog->len);
11105             err = -EINVAL;
11106             goto err_free;
11107         }
11108
11109         if (!prog->insnsi[linfo[i].insn_off].code) {
11110             verbose(env,
11111                 "Invalid insn code at line_info[%u].insn_off\n",
11112                 i);
11113             err = -EINVAL;
11114             goto err_free;
11115         }
11116
11117         if (!btf_name_by_offset(btf, linfo[i].line_off) ||
11118             !btf_name_by_offset(btf, linfo[i].file_name_off)) {
11119             verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
11120             err = -EINVAL;
11121             goto err_free;
11122         }
11123
11124         if (s != env->subprog_cnt) {
11125             if (linfo[i].insn_off == sub[s].start) {
11126                 sub[s].linfo_idx = i;
11127                 s++;
11128             } else if (sub[s].start < linfo[i].insn_off) {
11129                 verbose(env, "missing bpf_line_info for func#%u\n", s);
11130                 err = -EINVAL;
11131                 goto err_free;
11132             }
11133         }
11134
11135         prev_offset = linfo[i].insn_off;
11136         bpfptr_add(&ulinfo, rec_size);
11137     }
11138
11139     if (s != env->subprog_cnt) {
11140         verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
11141             env->subprog_cnt - s, s);
11142         err = -EINVAL;
11143         goto err_free;
11144     }
11145
11146     prog->aux->linfo = linfo;
11147     prog->aux->nr_linfo = nr_linfo;
11148
11149     return 0;
11150
11151 err_free:
11152     kvfree(linfo);
11153     return err;
11154 }
11155
11156 #define MIN_CORE_RELO_SIZE  sizeof(struct bpf_core_relo)
11157 #define MAX_CORE_RELO_SIZE  MAX_FUNCINFO_REC_SIZE
11158
11159 static int check_core_relo(struct bpf_verifier_env *env,
11160                const union bpf_attr *attr,
11161                bpfptr_t uattr)
11162 {
11163     u32 i, nr_core_relo, ncopy, expected_size, rec_size;
11164     struct bpf_core_relo core_relo = {};
11165     struct bpf_prog *prog = env->prog;
11166     const struct btf *btf = prog->aux->btf;
11167     struct bpf_core_ctx ctx = {
11168         .log = &env->log,
11169         .btf = btf,
11170     };
11171     bpfptr_t u_core_relo;
11172     int err;
11173
11174     nr_core_relo = attr->core_relo_cnt;
11175     if (!nr_core_relo)
11176         return 0;
11177     if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
11178         return -EINVAL;
11179
11180     rec_size = attr->core_relo_rec_size;
11181     if (rec_size < MIN_CORE_RELO_SIZE ||
11182         rec_size > MAX_CORE_RELO_SIZE ||
11183         rec_size % sizeof(u32))
11184         return -EINVAL;
11185
11186     u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
11187     expected_size = sizeof(struct bpf_core_relo);
11188     ncopy = min_t(u32, expected_size, rec_size);
11189
11190     /* Unlike func_info and line_info, copy and apply each CO-RE
11191      * relocation record one at a time.
11192      */
11193     for (i = 0; i < nr_core_relo; i++) {
11194         /* future proofing when sizeof(bpf_core_relo) changes */
11195         err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
11196         if (err) {
11197             if (err == -E2BIG) {
11198                 verbose(env, "nonzero tailing record in core_relo");
11199                 if (copy_to_bpfptr_offset(uattr,
11200                               offsetof(union bpf_attr, core_relo_rec_size),
11201                               &expected_size, sizeof(expected_size)))
11202                     err = -EFAULT;
11203             }
11204             break;
11205         }
11206
11207         if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
11208             err = -EFAULT;
11209             break;
11210         }
11211
11212         if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
11213             verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
11214                 i, core_relo.insn_off, prog->len);
11215             err = -EINVAL;
11216             break;
11217         }
11218
11219         err = bpf_core_apply(&ctx, &core_relo, i,
11220                      &prog->insnsi[core_relo.insn_off / 8]);
11221         if (err)
11222             break;
11223         bpfptr_add(&u_core_relo, rec_size);
11224     }
11225     return err;
11226 }
11227
11228 static int check_btf_info(struct bpf_verifier_env *env,
11229               const union bpf_attr *attr,
11230               bpfptr_t uattr)
11231 {
11232     struct btf *btf;
11233     int err;
11234
11235     if (!attr->func_info_cnt && !attr->line_info_cnt) {
11236         if (check_abnormal_return(env))
11237             return -EINVAL;
11238         return 0;
11239     }
11240
11241     btf = btf_get_by_fd(attr->prog_btf_fd);
11242     if (IS_ERR(btf))
11243         return PTR_ERR(btf);
11244     if (btf_is_kernel(btf)) {
11245         btf_put(btf);
11246         return -EACCES;
11247     }
11248     env->prog->aux->btf = btf;
11249
11250     err = check_btf_func(env, attr, uattr);
11251     if (err)
11252         return err;
11253
11254     err = check_btf_line(env, attr, uattr);
11255     if (err)
11256         return err;
11257
11258     err = check_core_relo(env, attr, uattr);
11259     if (err)
11260         return err;
11261
11262     return 0;
11263 }
11264
11265 /* check %cur's range satisfies %old's */
11266 static bool range_within(struct bpf_reg_state *old,
11267              struct bpf_reg_state *cur)
11268 {
11269     return old->umin_value <= cur->umin_value &&
11270            old->umax_value >= cur->umax_value &&
11271            old->smin_value <= cur->smin_value &&
11272            old->smax_value >= cur->smax_value &&
11273            old->u32_min_value <= cur->u32_min_value &&
11274            old->u32_max_value >= cur->u32_max_value &&
11275            old->s32_min_value <= cur->s32_min_value &&
11276            old->s32_max_value >= cur->s32_max_value;
11277 }
11278
11279 /* If in the old state two registers had the same id, then they need to have
11280  * the same id in the new state as well.  But that id could be different from
11281  * the old state, so we need to track the mapping from old to new ids.
11282  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
11283  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
11284  * regs with a different old id could still have new id 9, we don't care about
11285  * that.
11286  * So we look through our idmap to see if this old id has been seen before.  If
11287  * so, we require the new id to match; otherwise, we add the id pair to the map.
11288  */
11289 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
11290 {
11291     unsigned int i;
11292
11293     for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
11294         if (!idmap[i].old) {
11295             /* Reached an empty slot; haven't seen this id before */
11296             idmap[i].old = old_id;
11297             idmap[i].cur = cur_id;
11298             return true;
11299         }
11300         if (idmap[i].old == old_id)
11301             return idmap[i].cur == cur_id;
11302     }
11303     /* We ran out of idmap slots, which should be impossible */
11304     WARN_ON_ONCE(1);
11305     return false;
11306 }
11307
11308 static void clean_func_state(struct bpf_verifier_env *env,
11309                  struct bpf_func_state *st)
11310 {
11311     enum bpf_reg_liveness live;
11312     int i, j;
11313
11314     for (i = 0; i < BPF_REG_FP; i++) {
11315         live = st->regs[i].live;
11316         /* liveness must not touch this register anymore */
11317         st->regs[i].live |= REG_LIVE_DONE;
11318         if (!(live & REG_LIVE_READ))
11319             /* since the register is unused, clear its state
11320              * to make further comparison simpler
11321              */
11322             __mark_reg_not_init(env, &st->regs[i]);
11323     }
11324
11325     for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
11326         live = st->stack[i].spilled_ptr.live;
11327         /* liveness must not touch this stack slot anymore */
11328         st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
11329         if (!(live & REG_LIVE_READ)) {
11330             __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
11331             for (j = 0; j < BPF_REG_SIZE; j++)
11332                 st->stack[i].slot_type[j] = STACK_INVALID;
11333         }
11334     }
11335 }
11336
11337 static void clean_verifier_state(struct bpf_verifier_env *env,
11338                  struct bpf_verifier_state *st)
11339 {
11340     int i;
11341
11342     if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
11343         /* all regs in this state in all frames were already marked */
11344         return;
11345
11346     for (i = 0; i <= st->curframe; i++)
11347         clean_func_state(env, st->frame[i]);
11348 }
11349
11350 /* the parentage chains form a tree.
11351  * the verifier states are added to state lists at given insn and
11352  * pushed into state stack for future exploration.
11353  * when the verifier reaches bpf_exit insn some of the verifer states
11354  * stored in the state lists have their final liveness state already,
11355  * but a lot of states will get revised from liveness point of view when
11356  * the verifier explores other branches.
11357  * Example:
11358  * 1: r0 = 1
11359  * 2: if r1 == 100 goto pc+1
11360  * 3: r0 = 2
11361  * 4: exit
11362  * when the verifier reaches exit insn the register r0 in the state list of
11363  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
11364  * of insn 2 and goes exploring further. At the insn 4 it will walk the
11365  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
11366  *
11367  * Since the verifier pushes the branch states as it sees them while exploring
11368  * the program the condition of walking the branch instruction for the second
11369  * time means that all states below this branch were already explored and
11370  * their final liveness marks are already propagated.
11371  * Hence when the verifier completes the search of state list in is_state_visited()
11372  * we can call this clean_live_states() function to mark all liveness states
11373  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
11374  * will not be used.
11375  * This function also clears the registers and stack for states that !READ
11376  * to simplify state merging.
11377  *
11378  * Important note here that walking the same branch instruction in the callee
11379  * doesn't meant that the states are DONE. The verifier has to compare
11380  * the callsites
11381  */
11382 static void clean_live_states(struct bpf_verifier_env *env, int insn,
11383                   struct bpf_verifier_state *cur)
11384 {
11385     struct bpf_verifier_state_list *sl;
11386     int i;
11387
11388     sl = *explored_state(env, insn);
11389     while (sl) {
11390         if (sl->state.branches)
11391             goto next;
11392         if (sl->state.insn_idx != insn ||
11393             sl->state.curframe != cur->curframe)
11394             goto next;
11395         for (i = 0; i <= cur->curframe; i++)
11396             if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
11397                 goto next;
11398         clean_verifier_state(env, &sl->state);
11399 next:
11400         sl = sl->next;
11401     }
11402 }
11403
11404 /* Returns true if (rold safe implies rcur safe) */
11405 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
11406             struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
11407 {
11408     bool equal;
11409
11410     if (!(rold->live & REG_LIVE_READ))
11411         /* explored state didn't use this */
11412         return true;
11413
11414     equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
11415
11416     if (rold->type == PTR_TO_STACK)
11417         /* two stack pointers are equal only if they're pointing to
11418          * the same stack frame, since fp-8 in foo != fp-8 in bar
11419          */
11420         return equal && rold->frameno == rcur->frameno;
11421
11422     if (equal)
11423         return true;
11424
11425     if (rold->type == NOT_INIT)
11426         /* explored state can't have used this */
11427         return true;
11428     if (rcur->type == NOT_INIT)
11429         return false;
11430     switch (base_type(rold->type)) {
11431     case SCALAR_VALUE:
11432         if (env->explore_alu_limits)
11433             return false;
11434         if (rcur->type == SCALAR_VALUE) {
11435             if (!rold->precise && !rcur->precise)
11436                 return true;
11437             /* new val must satisfy old val knowledge */
11438             return range_within(rold, rcur) &&
11439                    tnum_in(rold->var_off, rcur->var_off);
11440         } else {
11441             /* We're trying to use a pointer in place of a scalar.
11442              * Even if the scalar was unbounded, this could lead to
11443              * pointer leaks because scalars are allowed to leak
11444              * while pointers are not. We could make this safe in
11445              * special cases if root is calling us, but it's
11446              * probably not worth the hassle.
11447              */
11448             return false;
11449         }
11450     case PTR_TO_MAP_KEY:
11451     case PTR_TO_MAP_VALUE:
11452         /* a PTR_TO_MAP_VALUE could be safe to use as a
11453          * PTR_TO_MAP_VALUE_OR_NULL into the same map.
11454          * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
11455          * checked, doing so could have affected others with the same
11456          * id, and we can't check for that because we lost the id when
11457          * we converted to a PTR_TO_MAP_VALUE.
11458          */
11459         if (type_may_be_null(rold->type)) {
11460             if (!type_may_be_null(rcur->type))
11461                 return false;
11462             if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
11463                 return false;
11464             /* Check our ids match any regs they're supposed to */
11465             return check_ids(rold->id, rcur->id, idmap);
11466         }
11467
11468         /* If the new min/max/var_off satisfy the old ones and
11469          * everything else matches, we are OK.
11470          * 'id' is not compared, since it's only used for maps with
11471          * bpf_spin_lock inside map element and in such cases if
11472          * the rest of the prog is valid for one map element then
11473          * it's valid for all map elements regardless of the key
11474          * used in bpf_map_lookup()
11475          */
11476         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
11477                range_within(rold, rcur) &&
11478                tnum_in(rold->var_off, rcur->var_off);
11479     case PTR_TO_PACKET_META:
11480     case PTR_TO_PACKET:
11481         if (rcur->type != rold->type)
11482             return false;
11483         /* We must have at least as much range as the old ptr
11484          * did, so that any accesses which were safe before are
11485          * still safe.  This is true even if old range < old off,
11486          * since someone could have accessed through (ptr - k), or
11487          * even done ptr -= k in a register, to get a safe access.
11488          */
11489         if (rold->range > rcur->range)
11490             return false;
11491         /* If the offsets don't match, we can't trust our alignment;
11492          * nor can we be sure that we won't fall out of range.
11493          */
11494         if (rold->off != rcur->off)
11495             return false;
11496         /* id relations must be preserved */
11497         if (rold->id && !check_ids(rold->id, rcur->id, idmap))
11498             return false;
11499         /* new val must satisfy old val knowledge */
11500         return range_within(rold, rcur) &&
11501                tnum_in(rold->var_off, rcur->var_off);
11502     case PTR_TO_CTX:
11503     case CONST_PTR_TO_MAP:
11504     case PTR_TO_PACKET_END:
11505     case PTR_TO_FLOW_KEYS:
11506     case PTR_TO_SOCKET:
11507     case PTR_TO_SOCK_COMMON:
11508     case PTR_TO_TCP_SOCK:
11509     case PTR_TO_XDP_SOCK:
11510         /* Only valid matches are exact, which memcmp() above
11511          * would have accepted
11512          */
11513     default:
11514         /* Don't know what's going on, just say it's not safe */
11515         return false;
11516     }
11517
11518     /* Shouldn't get here; if we do, say it's not safe */
11519     WARN_ON_ONCE(1);
11520     return false;
11521 }
11522
11523 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
11524               struct bpf_func_state *cur, struct bpf_id_pair *idmap)
11525 {
11526     int i, spi;
11527
11528     /* walk slots of the explored stack and ignore any additional
11529      * slots in the current stack, since explored(safe) state
11530      * didn't use them
11531      */
11532     for (i = 0; i < old->allocated_stack; i++) {
11533         spi = i / BPF_REG_SIZE;
11534
11535         if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
11536             i += BPF_REG_SIZE - 1;
11537             /* explored state didn't use this */
11538             continue;
11539         }
11540
11541         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
11542             continue;
11543
11544         /* explored stack has more populated slots than current stack
11545          * and these slots were used
11546          */
11547         if (i >= cur->allocated_stack)
11548             return false;
11549
11550         /* if old state was safe with misc data in the stack
11551          * it will be safe with zero-initialized stack.
11552          * The opposite is not true
11553          */
11554         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
11555             cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
11556             continue;
11557         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
11558             cur->stack[spi].slot_type[i % BPF_REG_SIZE])
11559             /* Ex: old explored (safe) state has STACK_SPILL in
11560              * this stack slot, but current has STACK_MISC ->
11561              * this verifier states are not equivalent,
11562              * return false to continue verification of this path
11563              */
11564             return false;
11565         if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
11566             continue;
11567         if (!is_spilled_reg(&old->stack[spi]))
11568             continue;
11569         if (!regsafe(env, &old->stack[spi].spilled_ptr,
11570                  &cur->stack[spi].spilled_ptr, idmap))
11571             /* when explored and current stack slot are both storing
11572              * spilled registers, check that stored pointers types
11573              * are the same as well.
11574              * Ex: explored safe path could have stored
11575              * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
11576              * but current path has stored:
11577              * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
11578              * such verifier states are not equivalent.
11579              * return false to continue verification of this path
11580              */
11581             return false;
11582     }
11583     return true;
11584 }
11585
11586 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
11587 {
11588     if (old->acquired_refs != cur->acquired_refs)
11589         return false;
11590     return !memcmp(old->refs, cur->refs,
11591                sizeof(*old->refs) * old->acquired_refs);
11592 }
11593
11594 /* compare two verifier states
11595  *
11596  * all states stored in state_list are known to be valid, since
11597  * verifier reached 'bpf_exit' instruction through them
11598  *
11599  * this function is called when verifier exploring different branches of
11600  * execution popped from the state stack. If it sees an old state that has
11601  * more strict register state and more strict stack state then this execution
11602  * branch doesn't need to be explored further, since verifier already
11603  * concluded that more strict state leads to valid finish.
11604  *
11605  * Therefore two states are equivalent if register state is more conservative
11606  * and explored stack state is more conservative than the current one.
11607  * Example:
11608  *       explored                   current
11609  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
11610  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
11611  *
11612  * In other words if current stack state (one being explored) has more
11613  * valid slots than old one that already passed validation, it means
11614  * the verifier can stop exploring and conclude that current state is valid too
11615  *
11616  * Similarly with registers. If explored state has register type as invalid
11617  * whereas register type in current state is meaningful, it means that
11618  * the current state will reach 'bpf_exit' instruction safely
11619  */
11620 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
11621                   struct bpf_func_state *cur)
11622 {
11623     int i;
11624
11625     memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
11626     for (i = 0; i < MAX_BPF_REG; i++)
11627         if (!regsafe(env, &old->regs[i], &cur->regs[i],
11628                  env->idmap_scratch))
11629             return false;
11630
11631     if (!stacksafe(env, old, cur, env->idmap_scratch))
11632         return false;
11633
11634     if (!refsafe(old, cur))
11635         return false;
11636
11637     return true;
11638 }
11639
11640 static bool states_equal(struct bpf_verifier_env *env,
11641              struct bpf_verifier_state *old,
11642              struct bpf_verifier_state *cur)
11643 {
11644     int i;
11645
11646     if (old->curframe != cur->curframe)
11647         return false;
11648
11649     /* Verification state from speculative execution simulation
11650      * must never prune a non-speculative execution one.
11651      */
11652     if (old->speculative && !cur->speculative)
11653         return false;
11654
11655     if (old->active_spin_lock != cur->active_spin_lock)
11656         return false;
11657
11658     /* for states to be equal callsites have to be the same
11659      * and all frame states need to be equivalent
11660      */
11661     for (i = 0; i <= old->curframe; i++) {
11662         if (old->frame[i]->callsite != cur->frame[i]->callsite)
11663             return false;
11664         if (!func_states_equal(env, old->frame[i], cur->frame[i]))
11665             return false;
11666     }
11667     return true;
11668 }
11669
11670 /* Return 0 if no propagation happened. Return negative error code if error
11671  * happened. Otherwise, return the propagated bit.
11672  */
11673 static int propagate_liveness_reg(struct bpf_verifier_env *env,
11674                   struct bpf_reg_state *reg,
11675                   struct bpf_reg_state *parent_reg)
11676 {
11677     u8 parent_flag = parent_reg->live & REG_LIVE_READ;
11678     u8 flag = reg->live & REG_LIVE_READ;
11679     int err;
11680
11681     /* When comes here, read flags of PARENT_REG or REG could be any of
11682      * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
11683      * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
11684      */
11685     if (parent_flag == REG_LIVE_READ64 ||
11686         /* Or if there is no read flag from REG. */
11687         !flag ||
11688         /* Or if the read flag from REG is the same as PARENT_REG. */
11689         parent_flag == flag)
11690         return 0;
11691
11692     err = mark_reg_read(env, reg, parent_reg, flag);
11693     if (err)
11694         return err;
11695
11696     return flag;
11697 }
11698
11699 /* A write screens off any subsequent reads; but write marks come from the
11700  * straight-line code between a state and its parent.  When we arrive at an
11701  * equivalent state (jump target or such) we didn't arrive by the straight-line
11702  * code, so read marks in the state must propagate to the parent regardless
11703  * of the state's write marks. That's what 'parent == state->parent' comparison
11704  * in mark_reg_read() is for.
11705  */
11706 static int propagate_liveness(struct bpf_verifier_env *env,
11707                   const struct bpf_verifier_state *vstate,
11708                   struct bpf_verifier_state *vparent)
11709 {
11710     struct bpf_reg_state *state_reg, *parent_reg;
11711     struct bpf_func_state *state, *parent;
11712     int i, frame, err = 0;
11713
11714     if (vparent->curframe != vstate->curframe) {
11715         WARN(1, "propagate_live: parent frame %d current frame %d\n",
11716              vparent->curframe, vstate->curframe);
11717         return -EFAULT;
11718     }
11719     /* Propagate read liveness of registers... */
11720     BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
11721     for (frame = 0; frame <= vstate->curframe; frame++) {
11722         parent = vparent->frame[frame];
11723         state = vstate->frame[frame];
11724         parent_reg = parent->regs;
11725         state_reg = state->regs;
11726         /* We don't need to worry about FP liveness, it's read-only */
11727         for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
11728             err = propagate_liveness_reg(env, &state_reg[i],
11729                              &parent_reg[i]);
11730             if (err < 0)
11731                 return err;
11732             if (err == REG_LIVE_READ64)
11733                 mark_insn_zext(env, &parent_reg[i]);
11734         }
11735
11736         /* Propagate stack slots. */
11737         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
11738                 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
11739             parent_reg = &parent->stack[i].spilled_ptr;
11740             state_reg = &state->stack[i].spilled_ptr;
11741             err = propagate_liveness_reg(env, state_reg,
11742                              parent_reg);
11743             if (err < 0)
11744                 return err;
11745         }
11746     }
11747     return 0;
11748 }
11749
11750 /* find precise scalars in the previous equivalent state and
11751  * propagate them into the current state
11752  */
11753 static int propagate_precision(struct bpf_verifier_env *env,
11754                    const struct bpf_verifier_state *old)
11755 {
11756     struct bpf_reg_state *state_reg;
11757     struct bpf_func_state *state;
11758     int i, err = 0;
11759
11760     state = old->frame[old->curframe];
11761     state_reg = state->regs;
11762     for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
11763         if (state_reg->type != SCALAR_VALUE ||
11764             !state_reg->precise)
11765             continue;
11766         if (env->log.level & BPF_LOG_LEVEL2)
11767             verbose(env, "propagating r%d\n", i);
11768         err = mark_chain_precision(env, i);
11769         if (err < 0)
11770             return err;
11771     }
11772
11773     for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
11774         if (!is_spilled_reg(&state->stack[i]))
11775             continue;
11776         state_reg = &state->stack[i].spilled_ptr;
11777         if (state_reg->type != SCALAR_VALUE ||
11778             !state_reg->precise)
11779             continue;
11780         if (env->log.level & BPF_LOG_LEVEL2)
11781             verbose(env, "propagating fp%d\n",
11782                 (-i - 1) * BPF_REG_SIZE);
11783         err = mark_chain_precision_stack(env, i);
11784         if (err < 0)
11785             return err;
11786     }
11787     return 0;
11788 }
11789
11790 static bool states_maybe_looping(struct bpf_verifier_state *old,
11791                  struct bpf_verifier_state *cur)
11792 {
11793     struct bpf_func_state *fold, *fcur;
11794     int i, fr = cur->curframe;
11795
11796     if (old->curframe != fr)
11797         return false;
11798
11799     fold = old->frame[fr];
11800     fcur = cur->frame[fr];
11801     for (i = 0; i < MAX_BPF_REG; i++)
11802         if (memcmp(&fold->regs[i], &fcur->regs[i],
11803                offsetof(struct bpf_reg_state, parent)))
11804             return false;
11805     return true;
11806 }
11807
11808
11809 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
11810 {
11811     struct bpf_verifier_state_list *new_sl;
11812     struct bpf_verifier_state_list *sl, **pprev;
11813     struct bpf_verifier_state *cur = env->cur_state, *new;
11814     int i, j, err, states_cnt = 0;
11815     bool add_new_state = env->test_state_freq ? true : false;
11816
11817     cur->last_insn_idx = env->prev_insn_idx;
11818     if (!env->insn_aux_data[insn_idx].prune_point)
11819         /* this 'insn_idx' instruction wasn't marked, so we will not
11820          * be doing state search here
11821          */
11822         return 0;
11823
11824     /* bpf progs typically have pruning point every 4 instructions
11825      * http://vger.kernel.org/bpfconf2019.html#session-1
11826      * Do not add new state for future pruning if the verifier hasn't seen
11827      * at least 2 jumps and at least 8 instructions.
11828      * This heuristics helps decrease 'total_states' and 'peak_states' metric.
11829      * In tests that amounts to up to 50% reduction into total verifier
11830      * memory consumption and 20% verifier time speedup.
11831      */
11832     if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
11833         env->insn_processed - env->prev_insn_processed >= 8)
11834         add_new_state = true;
11835
11836     pprev = explored_state(env, insn_idx);
11837     sl = *pprev;
11838
11839     clean_live_states(env, insn_idx, cur);
11840
11841     while (sl) {
11842         states_cnt++;
11843         if (sl->state.insn_idx != insn_idx)
11844             goto next;
11845
11846         if (sl->state.branches) {
11847             struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
11848
11849             if (frame->in_async_callback_fn &&
11850                 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
11851                 /* Different async_entry_cnt means that the verifier is
11852                  * processing another entry into async callback.
11853                  * Seeing the same state is not an indication of infinite
11854                  * loop or infinite recursion.
11855                  * But finding the same state doesn't mean that it's safe
11856                  * to stop processing the current state. The previous state
11857                  * hasn't yet reached bpf_exit, since state.branches > 0.
11858                  * Checking in_async_callback_fn alone is not enough either.
11859                  * Since the verifier still needs to catch infinite loops
11860                  * inside async callbacks.
11861                  */
11862             } else if (states_maybe_looping(&sl->state, cur) &&
11863                    states_equal(env, &sl->state, cur)) {
11864                 verbose_linfo(env, insn_idx, "; ");
11865                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
11866                 return -EINVAL;
11867             }
11868             /* if the verifier is processing a loop, avoid adding new state
11869              * too often, since different loop iterations have distinct
11870              * states and may not help future pruning.
11871              * This threshold shouldn't be too low to make sure that
11872              * a loop with large bound will be rejected quickly.
11873              * The most abusive loop will be:
11874              * r1 += 1
11875              * if r1 < 1000000 goto pc-2
11876              * 1M insn_procssed limit / 100 == 10k peak states.
11877              * This threshold shouldn't be too high either, since states
11878              * at the end of the loop are likely to be useful in pruning.
11879              */
11880             if (env->jmps_processed - env->prev_jmps_processed < 20 &&
11881                 env->insn_processed - env->prev_insn_processed < 100)
11882                 add_new_state = false;
11883             goto miss;
11884         }
11885         if (states_equal(env, &sl->state, cur)) {
11886             sl->hit_cnt++;
11887             /* reached equivalent register/stack state,
11888              * prune the search.
11889              * Registers read by the continuation are read by us.
11890              * If we have any write marks in env->cur_state, they
11891              * will prevent corresponding reads in the continuation
11892              * from reaching our parent (an explored_state).  Our
11893              * own state will get the read marks recorded, but
11894              * they'll be immediately forgotten as we're pruning
11895              * this state and will pop a new one.
11896              */
11897             err = propagate_liveness(env, &sl->state, cur);
11898
11899             /* if previous state reached the exit with precision and
11900              * current state is equivalent to it (except precsion marks)
11901              * the precision needs to be propagated back in
11902              * the current state.
11903              */
11904             err = err ? : push_jmp_history(env, cur);
11905             err = err ? : propagate_precision(env, &sl->state);
11906             if (err)
11907                 return err;
11908             return 1;
11909         }
11910 miss:
11911         /* when new state is not going to be added do not increase miss count.
11912          * Otherwise several loop iterations will remove the state
11913          * recorded earlier. The goal of these heuristics is to have
11914          * states from some iterations of the loop (some in the beginning
11915          * and some at the end) to help pruning.
11916          */
11917         if (add_new_state)
11918             sl->miss_cnt++;
11919         /* heuristic to determine whether this state is beneficial
11920          * to keep checking from state equivalence point of view.
11921          * Higher numbers increase max_states_per_insn and verification time,
11922          * but do not meaningfully decrease insn_processed.
11923          */
11924         if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
11925             /* the state is unlikely to be useful. Remove it to
11926              * speed up verification
11927              */
11928             *pprev = sl->next;
11929             if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
11930                 u32 br = sl->state.branches;
11931
11932                 WARN_ONCE(br,
11933                       "BUG live_done but branches_to_explore %d\n",
11934                       br);
11935                 free_verifier_state(&sl->state, false);
11936                 kfree(sl);
11937                 env->peak_states--;
11938             } else {
11939                 /* cannot free this state, since parentage chain may
11940                  * walk it later. Add it for free_list instead to
11941                  * be freed at the end of verification
11942                  */
11943                 sl->next = env->free_list;
11944                 env->free_list = sl;
11945             }
11946             sl = *pprev;
11947             continue;
11948         }
11949 next:
11950         pprev = &sl->next;
11951         sl = *pprev;
11952     }
11953
11954     if (env->max_states_per_insn < states_cnt)
11955         env->max_states_per_insn = states_cnt;
11956
11957     if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
11958         return push_jmp_history(env, cur);
11959
11960     if (!add_new_state)
11961         return push_jmp_history(env, cur);
11962
11963     /* There were no equivalent states, remember the current one.
11964      * Technically the current state is not proven to be safe yet,
11965      * but it will either reach outer most bpf_exit (which means it's safe)
11966      * or it will be rejected. When there are no loops the verifier won't be
11967      * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
11968      * again on the way to bpf_exit.
11969      * When looping the sl->state.branches will be > 0 and this state
11970      * will not be considered for equivalence until branches == 0.
11971      */
11972     new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
11973     if (!new_sl)
11974         return -ENOMEM;
11975     env->total_states++;
11976     env->peak_states++;
11977     env->prev_jmps_processed = env->jmps_processed;
11978     env->prev_insn_processed = env->insn_processed;
11979
11980     /* add new state to the head of linked list */
11981     new = &new_sl->state;
11982     err = copy_verifier_state(new, cur);
11983     if (err) {
11984         free_verifier_state(new, false);
11985         kfree(new_sl);
11986         return err;
11987     }
11988     new->insn_idx = insn_idx;
11989     WARN_ONCE(new->branches != 1,
11990           "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
11991
11992     cur->parent = new;
11993     cur->first_insn_idx = insn_idx;
11994     clear_jmp_history(cur);
11995     new_sl->next = *explored_state(env, insn_idx);
11996     *explored_state(env, insn_idx) = new_sl;
11997     /* connect new state to parentage chain. Current frame needs all
11998      * registers connected. Only r6 - r9 of the callers are alive (pushed
11999      * to the stack implicitly by JITs) so in callers' frames connect just
12000      * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
12001      * the state of the call instruction (with WRITTEN set), and r0 comes
12002      * from callee with its full parentage chain, anyway.
12003      */
12004     /* clear write marks in current state: the writes we did are not writes
12005      * our child did, so they don't screen off its reads from us.
12006      * (There are no read marks in current state, because reads always mark
12007      * their parent and current state never has children yet.  Only
12008      * explored_states can get read marks.)
12009      */
12010     for (j = 0; j <= cur->curframe; j++) {
12011         for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
12012             cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
12013         for (i = 0; i < BPF_REG_FP; i++)
12014             cur->frame[j]->regs[i].live = REG_LIVE_NONE;
12015     }
12016
12017     /* all stack frames are accessible from callee, clear them all */
12018     for (j = 0; j <= cur->curframe; j++) {
12019         struct bpf_func_state *frame = cur->frame[j];
12020         struct bpf_func_state *newframe = new->frame[j];
12021
12022         for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
12023             frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
12024             frame->stack[i].spilled_ptr.parent =
12025                         &newframe->stack[i].spilled_ptr;
12026         }
12027     }
12028     return 0;
12029 }
12030
12031 /* Return true if it's OK to have the same insn return a different type. */
12032 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
12033 {
12034     switch (base_type(type)) {
12035     case PTR_TO_CTX:
12036     case PTR_TO_SOCKET:
12037     case PTR_TO_SOCK_COMMON:
12038     case PTR_TO_TCP_SOCK:
12039     case PTR_TO_XDP_SOCK:
12040     case PTR_TO_BTF_ID:
12041         return false;
12042     default:
12043         return true;
12044     }
12045 }
12046
12047 /* If an instruction was previously used with particular pointer types, then we
12048  * need to be careful to avoid cases such as the below, where it may be ok
12049  * for one branch accessing the pointer, but not ok for the other branch:
12050  *
12051  * R1 = sock_ptr
12052  * goto X;
12053  * ...
12054  * R1 = some_other_valid_ptr;
12055  * goto X;
12056  * ...
12057  * R2 = *(u32 *)(R1 + 0);
12058  */
12059 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
12060 {
12061     return src != prev && (!reg_type_mismatch_ok(src) ||
12062                    !reg_type_mismatch_ok(prev));
12063 }
12064
12065 static int do_check(struct bpf_verifier_env *env)
12066 {
12067     bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
12068     struct bpf_verifier_state *state = env->cur_state;
12069     struct bpf_insn *insns = env->prog->insnsi;
12070     struct bpf_reg_state *regs;
12071     int insn_cnt = env->prog->len;
12072     bool do_print_state = false;
12073     int prev_insn_idx = -1;
12074
12075     for (;;) {
12076         struct bpf_insn *insn;
12077         u8 class;
12078         int err;
12079
12080         env->prev_insn_idx = prev_insn_idx;
12081         if (env->insn_idx >= insn_cnt) {
12082             verbose(env, "invalid insn idx %d insn_cnt %d\n",
12083                 env->insn_idx, insn_cnt);
12084             return -EFAULT;
12085         }
12086
12087         insn = &insns[env->insn_idx];
12088         class = BPF_CLASS(insn->code);
12089
12090         if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
12091             verbose(env,
12092                 "BPF program is too large. Processed %d insn\n",
12093                 env->insn_processed);
12094             return -E2BIG;
12095         }
12096
12097         err = is_state_visited(env, env->insn_idx);
12098         if (err < 0)
12099             return err;
12100         if (err == 1) {
12101             /* found equivalent state, can prune the search */
12102             if (env->log.level & BPF_LOG_LEVEL) {
12103                 if (do_print_state)
12104                     verbose(env, "\nfrom %d to %d%s: safe\n",
12105                         env->prev_insn_idx, env->insn_idx,
12106                         env->cur_state->speculative ?
12107                         " (speculative execution)" : "");
12108                 else
12109                     verbose(env, "%d: safe\n", env->insn_idx);
12110             }
12111             goto process_bpf_exit;
12112         }
12113
12114         if (signal_pending(current))
12115             return -EAGAIN;
12116
12117         if (need_resched())
12118             cond_resched();
12119
12120         if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
12121             verbose(env, "\nfrom %d to %d%s:",
12122                 env->prev_insn_idx, env->insn_idx,
12123                 env->cur_state->speculative ?
12124                 " (speculative execution)" : "");
12125             print_verifier_state(env, state->frame[state->curframe], true);
12126             do_print_state = false;
12127         }
12128
12129         if (env->log.level & BPF_LOG_LEVEL) {
12130             const struct bpf_insn_cbs cbs = {
12131                 .cb_call    = disasm_kfunc_name,
12132                 .cb_print   = verbose,
12133                 .private_data   = env,
12134             };
12135
12136             if (verifier_state_scratched(env))
12137                 print_insn_state(env, state->frame[state->curframe]);
12138
12139             verbose_linfo(env, env->insn_idx, "; ");
12140             env->prev_log_len = env->log.len_used;
12141             verbose(env, "%d: ", env->insn_idx);
12142             print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
12143             env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
12144             env->prev_log_len = env->log.len_used;
12145         }
12146
12147         if (bpf_prog_is_dev_bound(env->prog->aux)) {
12148             err = bpf_prog_offload_verify_insn(env, env->insn_idx,
12149                                env->prev_insn_idx);
12150             if (err)
12151                 return err;
12152         }
12153
12154         regs = cur_regs(env);
12155         sanitize_mark_insn_seen(env);
12156         prev_insn_idx = env->insn_idx;
12157
12158         if (class == BPF_ALU || class == BPF_ALU64) {
12159             err = check_alu_op(env, insn);
12160             if (err)
12161                 return err;
12162
12163         } else if (class == BPF_LDX) {
12164             enum bpf_reg_type *prev_src_type, src_reg_type;
12165
12166             /* check for reserved fields is already done */
12167
12168             /* check src operand */
12169             err = check_reg_arg(env, insn->src_reg, SRC_OP);
12170             if (err)
12171                 return err;
12172
12173             err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
12174             if (err)
12175                 return err;
12176
12177             src_reg_type = regs[insn->src_reg].type;
12178
12179             /* check that memory (src_reg + off) is readable,
12180              * the state of dst_reg will be updated by this func
12181              */
12182             err = check_mem_access(env, env->insn_idx, insn->src_reg,
12183                            insn->off, BPF_SIZE(insn->code),
12184                            BPF_READ, insn->dst_reg, false);
12185             if (err)
12186                 return err;
12187
12188             prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
12189
12190             if (*prev_src_type == NOT_INIT) {
12191                 /* saw a valid insn
12192                  * dst_reg = *(u32 *)(src_reg + off)
12193                  * save type to validate intersecting paths
12194                  */
12195                 *prev_src_type = src_reg_type;
12196
12197             } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
12198                 /* ABuser program is trying to use the same insn
12199                  * dst_reg = *(u32*) (src_reg + off)
12200                  * with different pointer types:
12201                  * src_reg == ctx in one branch and
12202                  * src_reg == stack|map in some other branch.
12203                  * Reject it.
12204                  */
12205                 verbose(env, "same insn cannot be used with different pointers\n");
12206                 return -EINVAL;
12207             }
12208
12209         } else if (class == BPF_STX) {
12210             enum bpf_reg_type *prev_dst_type, dst_reg_type;
12211
12212             if (BPF_MODE(insn->code) == BPF_ATOMIC) {
12213                 err = check_atomic(env, env->insn_idx, insn);
12214                 if (err)
12215                     return err;
12216                 env->insn_idx++;
12217                 continue;
12218             }
12219
12220             if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
12221                 verbose(env, "BPF_STX uses reserved fields\n");
12222                 return -EINVAL;
12223             }
12224
12225             /* check src1 operand */
12226             err = check_reg_arg(env, insn->src_reg, SRC_OP);
12227             if (err)
12228                 return err;
12229             /* check src2 operand */
12230             err = check_reg_arg(env, insn->dst_reg, SRC_OP);
12231             if (err)
12232                 return err;
12233
12234             dst_reg_type = regs[insn->dst_reg].type;
12235
12236             /* check that memory (dst_reg + off) is writeable */
12237             err = check_mem_access(env, env->insn_idx, insn->dst_reg,
12238                            insn->off, BPF_SIZE(insn->code),
12239                            BPF_WRITE, insn->src_reg, false);
12240             if (err)
12241                 return err;
12242
12243             prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
12244
12245             if (*prev_dst_type == NOT_INIT) {
12246                 *prev_dst_type = dst_reg_type;
12247             } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
12248                 verbose(env, "same insn cannot be used with different pointers\n");
12249                 return -EINVAL;
12250             }
12251
12252         } else if (class == BPF_ST) {
12253             if (BPF_MODE(insn->code) != BPF_MEM ||
12254                 insn->src_reg != BPF_REG_0) {
12255                 verbose(env, "BPF_ST uses reserved fields\n");
12256                 return -EINVAL;
12257             }
12258             /* check src operand */
12259             err = check_reg_arg(env, insn->dst_reg, SRC_OP);
12260             if (err)
12261                 return err;
12262
12263             if (is_ctx_reg(env, insn->dst_reg)) {
12264                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
12265                     insn->dst_reg,
12266                     reg_type_str(env, reg_state(env, insn->dst_reg)->type));
12267                 return -EACCES;
12268             }
12269
12270             /* check that memory (dst_reg + off) is writeable */
12271             err = check_mem_access(env, env->insn_idx, insn->dst_reg,
12272                            insn->off, BPF_SIZE(insn->code),
12273                            BPF_WRITE, -1, false);
12274             if (err)
12275                 return err;
12276
12277         } else if (class == BPF_JMP || class == BPF_JMP32) {
12278             u8 opcode = BPF_OP(insn->code);
12279
12280             env->jmps_processed++;
12281             if (opcode == BPF_CALL) {
12282                 if (BPF_SRC(insn->code) != BPF_K ||
12283                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
12284                      && insn->off != 0) ||
12285                     (insn->src_reg != BPF_REG_0 &&
12286                      insn->src_reg != BPF_PSEUDO_CALL &&
12287                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
12288                     insn->dst_reg != BPF_REG_0 ||
12289                     class == BPF_JMP32) {
12290                     verbose(env, "BPF_CALL uses reserved fields\n");
12291                     return -EINVAL;
12292                 }
12293
12294                 if (env->cur_state->active_spin_lock &&
12295                     (insn->src_reg == BPF_PSEUDO_CALL ||
12296                      insn->imm != BPF_FUNC_spin_unlock)) {
12297                     verbose(env, "function calls are not allowed while holding a lock\n");
12298                     return -EINVAL;
12299                 }
12300                 if (insn->src_reg == BPF_PSEUDO_CALL)
12301                     err = check_func_call(env, insn, &env->insn_idx);
12302                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
12303                     err = check_kfunc_call(env, insn, &env->insn_idx);
12304                 else
12305                     err = check_helper_call(env, insn, &env->insn_idx);
12306                 if (err)
12307                     return err;
12308             } else if (opcode == BPF_JA) {
12309                 if (BPF_SRC(insn->code) != BPF_K ||
12310                     insn->imm != 0 ||
12311                     insn->src_reg != BPF_REG_0 ||
12312                     insn->dst_reg != BPF_REG_0 ||
12313                     class == BPF_JMP32) {
12314                     verbose(env, "BPF_JA uses reserved fields\n");
12315                     return -EINVAL;
12316                 }
12317
12318                 env->insn_idx += insn->off + 1;
12319                 continue;
12320
12321             } else if (opcode == BPF_EXIT) {
12322                 if (BPF_SRC(insn->code) != BPF_K ||
12323                     insn->imm != 0 ||
12324                     insn->src_reg != BPF_REG_0 ||
12325                     insn->dst_reg != BPF_REG_0 ||
12326                     class == BPF_JMP32) {
12327                     verbose(env, "BPF_EXIT uses reserved fields\n");
12328                     return -EINVAL;
12329                 }
12330
12331                 if (env->cur_state->active_spin_lock) {
12332                     verbose(env, "bpf_spin_unlock is missing\n");
12333                     return -EINVAL;
12334                 }
12335
12336                 if (state->curframe) {
12337                     /* exit from nested function */
12338                     err = prepare_func_exit(env, &env->insn_idx);
12339                     if (err)
12340                         return err;
12341                     do_print_state = true;
12342                     continue;
12343                 }
12344
12345                 err = check_reference_leak(env);
12346                 if (err)
12347                     return err;
12348
12349                 err = check_return_code(env);
12350                 if (err)
12351                     return err;
12352 process_bpf_exit:
12353                 mark_verifier_state_scratched(env);
12354                 update_branch_counts(env, env->cur_state);
12355                 err = pop_stack(env, &prev_insn_idx,
12356                         &env->insn_idx, pop_log);
12357                 if (err < 0) {
12358                     if (err != -ENOENT)
12359                         return err;
12360                     break;
12361                 } else {
12362                     do_print_state = true;
12363                     continue;
12364                 }
12365             } else {
12366                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
12367                 if (err)
12368                     return err;
12369             }
12370         } else if (class == BPF_LD) {
12371             u8 mode = BPF_MODE(insn->code);
12372
12373             if (mode == BPF_ABS || mode == BPF_IND) {
12374                 err = check_ld_abs(env, insn);
12375                 if (err)
12376                     return err;
12377
12378             } else if (mode == BPF_IMM) {
12379                 err = check_ld_imm(env, insn);
12380                 if (err)
12381                     return err;
12382
12383                 env->insn_idx++;
12384                 sanitize_mark_insn_seen(env);
12385             } else {
12386                 verbose(env, "invalid BPF_LD mode\n");
12387                 return -EINVAL;
12388             }
12389         } else {
12390             verbose(env, "unknown insn class %d\n", class);
12391             return -EINVAL;
12392         }
12393
12394         env->insn_idx++;
12395     }
12396
12397     return 0;
12398 }
12399
12400 static int find_btf_percpu_datasec(struct btf *btf)
12401 {
12402     const struct btf_type *t;
12403     const char *tname;
12404     int i, n;
12405
12406     /*
12407      * Both vmlinux and module each have their own ".data..percpu"
12408      * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
12409      * types to look at only module's own BTF types.
12410      */
12411     n = btf_nr_types(btf);
12412     if (btf_is_module(btf))
12413         i = btf_nr_types(btf_vmlinux);
12414     else
12415         i = 1;
12416
12417     for(; i < n; i++) {
12418         t = btf_type_by_id(btf, i);
12419         if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
12420             continue;
12421
12422         tname = btf_name_by_offset(btf, t->name_off);
12423         if (!strcmp(tname, ".data..percpu"))
12424             return i;
12425     }
12426
12427     return -ENOENT;
12428 }
12429
12430 /* replace pseudo btf_id with kernel symbol address */
12431 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
12432                    struct bpf_insn *insn,
12433                    struct bpf_insn_aux_data *aux)
12434 {
12435     const struct btf_var_secinfo *vsi;
12436     const struct btf_type *datasec;
12437     struct btf_mod_pair *btf_mod;
12438     const struct btf_type *t;
12439     const char *sym_name;
12440     bool percpu = false;
12441     u32 type, id = insn->imm;
12442     struct btf *btf;
12443     s32 datasec_id;
12444     u64 addr;
12445     int i, btf_fd, err;
12446
12447     btf_fd = insn[1].imm;
12448     if (btf_fd) {
12449         btf = btf_get_by_fd(btf_fd);
12450         if (IS_ERR(btf)) {
12451             verbose(env, "invalid module BTF object FD specified.\n");
12452             return -EINVAL;
12453         }
12454     } else {
12455         if (!btf_vmlinux) {
12456             verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
12457             return -EINVAL;
12458         }
12459         btf = btf_vmlinux;
12460         btf_get(btf);
12461     }
12462
12463     t = btf_type_by_id(btf, id);
12464     if (!t) {
12465         verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
12466         err = -ENOENT;
12467         goto err_put;
12468     }
12469
12470     if (!btf_type_is_var(t)) {
12471         verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
12472         err = -EINVAL;
12473         goto err_put;
12474     }
12475
12476     sym_name = btf_name_by_offset(btf, t->name_off);
12477     addr = kallsyms_lookup_name(sym_name);
12478     if (!addr) {
12479         verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
12480             sym_name);
12481         err = -ENOENT;
12482         goto err_put;
12483     }
12484
12485     datasec_id = find_btf_percpu_datasec(btf);
12486     if (datasec_id > 0) {
12487         datasec = btf_type_by_id(btf, datasec_id);
12488         for_each_vsi(i, datasec, vsi) {
12489             if (vsi->type == id) {
12490                 percpu = true;
12491                 break;
12492             }
12493         }
12494     }
12495
12496     insn[0].imm = (u32)addr;
12497     insn[1].imm = addr >> 32;
12498
12499     type = t->type;
12500     t = btf_type_skip_modifiers(btf, type, NULL);
12501     if (percpu) {
12502         aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
12503         aux->btf_var.btf = btf;
12504         aux->btf_var.btf_id = type;
12505     } else if (!btf_type_is_struct(t)) {
12506         const struct btf_type *ret;
12507         const char *tname;
12508         u32 tsize;
12509
12510         /* resolve the type size of ksym. */
12511         ret = btf_resolve_size(btf, t, &tsize);
12512         if (IS_ERR(ret)) {
12513             tname = btf_name_by_offset(btf, t->name_off);
12514             verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
12515                 tname, PTR_ERR(ret));
12516             err = -EINVAL;
12517             goto err_put;
12518         }
12519         aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
12520         aux->btf_var.mem_size = tsize;
12521     } else {
12522         aux->btf_var.reg_type = PTR_TO_BTF_ID;
12523         aux->btf_var.btf = btf;
12524         aux->btf_var.btf_id = type;
12525     }
12526
12527     /* check whether we recorded this BTF (and maybe module) already */
12528     for (i = 0; i < env->used_btf_cnt; i++) {
12529         if (env->used_btfs[i].btf == btf) {
12530             btf_put(btf);
12531             return 0;
12532         }
12533     }
12534
12535     if (env->used_btf_cnt >= MAX_USED_BTFS) {
12536         err = -E2BIG;
12537         goto err_put;
12538     }
12539
12540     btf_mod = &env->used_btfs[env->used_btf_cnt];
12541     btf_mod->btf = btf;
12542     btf_mod->module = NULL;
12543
12544     /* if we reference variables from kernel module, bump its refcount */
12545     if (btf_is_module(btf)) {
12546         btf_mod->module = btf_try_get_module(btf);
12547         if (!btf_mod->module) {
12548             err = -ENXIO;
12549             goto err_put;
12550         }
12551     }
12552
12553     env->used_btf_cnt++;
12554
12555     return 0;
12556 err_put:
12557     btf_put(btf);
12558     return err;
12559 }
12560
12561 static int check_map_prealloc(struct bpf_map *map)
12562 {
12563     return (map->map_type != BPF_MAP_TYPE_HASH &&
12564         map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
12565         map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
12566         !(map->map_flags & BPF_F_NO_PREALLOC);
12567 }
12568
12569 static bool is_tracing_prog_type(enum bpf_prog_type type)
12570 {
12571     switch (type) {
12572     case BPF_PROG_TYPE_KPROBE:
12573     case BPF_PROG_TYPE_TRACEPOINT:
12574     case BPF_PROG_TYPE_PERF_EVENT:
12575     case BPF_PROG_TYPE_RAW_TRACEPOINT:
12576     case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
12577         return true;
12578     default:
12579         return false;
12580     }
12581 }
12582
12583 static bool is_preallocated_map(struct bpf_map *map)
12584 {
12585     if (!check_map_prealloc(map))
12586         return false;
12587     if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
12588         return false;
12589     return true;
12590 }
12591
12592 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
12593                     struct bpf_map *map,
12594                     struct bpf_prog *prog)
12595
12596 {
12597     enum bpf_prog_type prog_type = resolve_prog_type(prog);
12598     /*
12599      * Validate that trace type programs use preallocated hash maps.
12600      *
12601      * For programs attached to PERF events this is mandatory as the
12602      * perf NMI can hit any arbitrary code sequence.
12603      *
12604      * All other trace types using preallocated hash maps are unsafe as
12605      * well because tracepoint or kprobes can be inside locked regions
12606      * of the memory allocator or at a place where a recursion into the
12607      * memory allocator would see inconsistent state.
12608      *
12609      * On RT enabled kernels run-time allocation of all trace type
12610      * programs is strictly prohibited due to lock type constraints. On
12611      * !RT kernels it is allowed for backwards compatibility reasons for
12612      * now, but warnings are emitted so developers are made aware of
12613      * the unsafety and can fix their programs before this is enforced.
12614      */
12615     if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
12616         if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
12617             verbose(env, "perf_event programs can only use preallocated hash map\n");
12618             return -EINVAL;
12619         }
12620         if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
12621             verbose(env, "trace type programs can only use preallocated hash map\n");
12622             return -EINVAL;
12623         }
12624         WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
12625         verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
12626     }
12627
12628     if (map_value_has_spin_lock(map)) {
12629         if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
12630             verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
12631             return -EINVAL;
12632         }
12633
12634         if (is_tracing_prog_type(prog_type)) {
12635             verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
12636             return -EINVAL;
12637         }
12638
12639         if (prog->aux->sleepable) {
12640             verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
12641             return -EINVAL;
12642         }
12643     }
12644
12645     if (map_value_has_timer(map)) {
12646         if (is_tracing_prog_type(prog_type)) {
12647             verbose(env, "tracing progs cannot use bpf_timer yet\n");
12648             return -EINVAL;
12649         }
12650     }
12651
12652     if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
12653         !bpf_offload_prog_map_match(prog, map)) {
12654         verbose(env, "offload device mismatch between prog and map\n");
12655         return -EINVAL;
12656     }
12657
12658     if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
12659         verbose(env, "bpf_struct_ops map cannot be used in prog\n");
12660         return -EINVAL;
12661     }
12662
12663     if (prog->aux->sleepable)
12664         switch (map->map_type) {
12665         case BPF_MAP_TYPE_HASH:
12666         case BPF_MAP_TYPE_LRU_HASH:
12667         case BPF_MAP_TYPE_ARRAY:
12668         case BPF_MAP_TYPE_PERCPU_HASH:
12669         case BPF_MAP_TYPE_PERCPU_ARRAY:
12670         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
12671         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
12672         case BPF_MAP_TYPE_HASH_OF_MAPS:
12673             if (!is_preallocated_map(map)) {
12674                 verbose(env,
12675                     "Sleepable programs can only use preallocated maps\n");
12676                 return -EINVAL;
12677             }
12678             break;
12679         case BPF_MAP_TYPE_RINGBUF:
12680         case BPF_MAP_TYPE_INODE_STORAGE:
12681         case BPF_MAP_TYPE_SK_STORAGE:
12682         case BPF_MAP_TYPE_TASK_STORAGE:
12683             break;
12684         default:
12685             verbose(env,
12686                 "Sleepable programs can only use array, hash, and ringbuf maps\n");
12687             return -EINVAL;
12688         }
12689
12690     return 0;
12691 }
12692
12693 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
12694 {
12695     return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
12696         map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
12697 }
12698
12699 /* find and rewrite pseudo imm in ld_imm64 instructions:
12700  *
12701  * 1. if it accesses map FD, replace it with actual map pointer.
12702  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
12703  *
12704  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
12705  */
12706 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
12707 {
12708     struct bpf_insn *insn = env->prog->insnsi;
12709     int insn_cnt = env->prog->len;
12710     int i, j, err;
12711
12712     err = bpf_prog_calc_tag(env->prog);
12713     if (err)
12714         return err;
12715
12716     for (i = 0; i < insn_cnt; i++, insn++) {
12717         if (BPF_CLASS(insn->code) == BPF_LDX &&
12718             (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
12719             verbose(env, "BPF_LDX uses reserved fields\n");
12720             return -EINVAL;
12721         }
12722
12723         if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
12724             struct bpf_insn_aux_data *aux;
12725             struct bpf_map *map;
12726             struct fd f;
12727             u64 addr;
12728             u32 fd;
12729
12730             if (i == insn_cnt - 1 || insn[1].code != 0 ||
12731                 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
12732                 insn[1].off != 0) {
12733                 verbose(env, "invalid bpf_ld_imm64 insn\n");
12734                 return -EINVAL;
12735             }
12736
12737             if (insn[0].src_reg == 0)
12738                 /* valid generic load 64-bit imm */
12739                 goto next_insn;
12740
12741             if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
12742                 aux = &env->insn_aux_data[i];
12743                 err = check_pseudo_btf_id(env, insn, aux);
12744                 if (err)
12745                     return err;
12746                 goto next_insn;
12747             }
12748
12749             if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
12750                 aux = &env->insn_aux_data[i];
12751                 aux->ptr_type = PTR_TO_FUNC;
12752                 goto next_insn;
12753             }
12754
12755             /* In final convert_pseudo_ld_imm64() step, this is
12756              * converted into regular 64-bit imm load insn.
12757              */
12758             switch (insn[0].src_reg) {
12759             case BPF_PSEUDO_MAP_VALUE:
12760             case BPF_PSEUDO_MAP_IDX_VALUE:
12761                 break;
12762             case BPF_PSEUDO_MAP_FD:
12763             case BPF_PSEUDO_MAP_IDX:
12764                 if (insn[1].imm == 0)
12765                     break;
12766                 fallthrough;
12767             default:
12768                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
12769                 return -EINVAL;
12770             }
12771
12772             switch (insn[0].src_reg) {
12773             case BPF_PSEUDO_MAP_IDX_VALUE:
12774             case BPF_PSEUDO_MAP_IDX:
12775                 if (bpfptr_is_null(env->fd_array)) {
12776                     verbose(env, "fd_idx without fd_array is invalid\n");
12777                     return -EPROTO;
12778                 }
12779                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
12780                                 insn[0].imm * sizeof(fd),
12781                                 sizeof(fd)))
12782                     return -EFAULT;
12783                 break;
12784             default:
12785                 fd = insn[0].imm;
12786                 break;
12787             }
12788
12789             f = fdget(fd);
12790             map = __bpf_map_get(f);
12791             if (IS_ERR(map)) {
12792                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
12793                     insn[0].imm);
12794                 return PTR_ERR(map);
12795             }
12796
12797             err = check_map_prog_compatibility(env, map, env->prog);
12798             if (err) {
12799                 fdput(f);
12800                 return err;
12801             }
12802
12803             aux = &env->insn_aux_data[i];
12804             if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
12805                 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
12806                 addr = (unsigned long)map;
12807             } else {
12808                 u32 off = insn[1].imm;
12809
12810                 if (off >= BPF_MAX_VAR_OFF) {
12811                     verbose(env, "direct value offset of %u is not allowed\n", off);
12812                     fdput(f);
12813                     return -EINVAL;
12814                 }
12815
12816                 if (!map->ops->map_direct_value_addr) {
12817                     verbose(env, "no direct value access support for this map type\n");
12818                     fdput(f);
12819                     return -EINVAL;
12820                 }
12821
12822                 err = map->ops->map_direct_value_addr(map, &addr, off);
12823                 if (err) {
12824                     verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
12825                         map->value_size, off);
12826                     fdput(f);
12827                     return err;
12828                 }
12829
12830                 aux->map_off = off;
12831                 addr += off;
12832             }
12833
12834             insn[0].imm = (u32)addr;
12835             insn[1].imm = addr >> 32;
12836
12837             /* check whether we recorded this map already */
12838             for (j = 0; j < env->used_map_cnt; j++) {
12839                 if (env->used_maps[j] == map) {
12840                     aux->map_index = j;
12841                     fdput(f);
12842                     goto next_insn;
12843                 }
12844             }
12845
12846             if (env->used_map_cnt >= MAX_USED_MAPS) {
12847                 fdput(f);
12848                 return -E2BIG;
12849             }
12850
12851             /* hold the map. If the program is rejected by verifier,
12852              * the map will be released by release_maps() or it
12853              * will be used by the valid program until it's unloaded
12854              * and all maps are released in free_used_maps()
12855              */
12856             bpf_map_inc(map);
12857
12858             aux->map_index = env->used_map_cnt;
12859             env->used_maps[env->used_map_cnt++] = map;
12860
12861             if (bpf_map_is_cgroup_storage(map) &&
12862                 bpf_cgroup_storage_assign(env->prog->aux, map)) {
12863                 verbose(env, "only one cgroup storage of each type is allowed\n");
12864                 fdput(f);
12865                 return -EBUSY;
12866             }
12867
12868             fdput(f);
12869 next_insn:
12870             insn++;
12871             i++;
12872             continue;
12873         }
12874
12875         /* Basic sanity check before we invest more work here. */
12876         if (!bpf_opcode_in_insntable(insn->code)) {
12877             verbose(env, "unknown opcode %02x\n", insn->code);
12878             return -EINVAL;
12879         }
12880     }
12881
12882     /* now all pseudo BPF_LD_IMM64 instructions load valid
12883      * 'struct bpf_map *' into a register instead of user map_fd.
12884      * These pointers will be used later by verifier to validate map access.
12885      */
12886     return 0;
12887 }
12888
12889 /* drop refcnt of maps used by the rejected program */
12890 static void release_maps(struct bpf_verifier_env *env)
12891 {
12892     __bpf_free_used_maps(env->prog->aux, env->used_maps,
12893                  env->used_map_cnt);
12894 }
12895
12896 /* drop refcnt of maps used by the rejected program */
12897 static void release_btfs(struct bpf_verifier_env *env)
12898 {
12899     __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
12900                  env->used_btf_cnt);
12901 }
12902
12903 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
12904 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
12905 {
12906     struct bpf_insn *insn = env->prog->insnsi;
12907     int insn_cnt = env->prog->len;
12908     int i;
12909
12910     for (i = 0; i < insn_cnt; i++, insn++) {
12911         if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
12912             continue;
12913         if (insn->src_reg == BPF_PSEUDO_FUNC)
12914             continue;
12915         insn->src_reg = 0;
12916     }
12917 }
12918
12919 /* single env->prog->insni[off] instruction was replaced with the range
12920  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
12921  * [0, off) and [off, end) to new locations, so the patched range stays zero
12922  */
12923 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
12924                  struct bpf_insn_aux_data *new_data,
12925                  struct bpf_prog *new_prog, u32 off, u32 cnt)
12926 {
12927     struct bpf_insn_aux_data *old_data = env->insn_aux_data;
12928     struct bpf_insn *insn = new_prog->insnsi;
12929     u32 old_seen = old_data[off].seen;
12930     u32 prog_len;
12931     int i;
12932
12933     /* aux info at OFF always needs adjustment, no matter fast path
12934      * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
12935      * original insn at old prog.
12936      */
12937     old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
12938
12939     if (cnt == 1)
12940         return;
12941     prog_len = new_prog->len;
12942
12943     memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
12944     memcpy(new_data + off + cnt - 1, old_data + off,
12945            sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
12946     for (i = off; i < off + cnt - 1; i++) {
12947         /* Expand insni[off]'s seen count to the patched range. */
12948         new_data[i].seen = old_seen;
12949         new_data[i].zext_dst = insn_has_def32(env, insn + i);
12950     }
12951     env->insn_aux_data = new_data;
12952     vfree(old_data);
12953 }
12954
12955 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
12956 {
12957     int i;
12958
12959     if (len == 1)
12960         return;
12961     /* NOTE: fake 'exit' subprog should be updated as well. */
12962     for (i = 0; i <= env->subprog_cnt; i++) {
12963         if (env->subprog_info[i].start <= off)
12964             continue;
12965         env->subprog_info[i].start += len - 1;
12966     }
12967 }
12968
12969 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
12970 {
12971     struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
12972     int i, sz = prog->aux->size_poke_tab;
12973     struct bpf_jit_poke_descriptor *desc;
12974
12975     for (i = 0; i < sz; i++) {
12976         desc = &tab[i];
12977         if (desc->insn_idx <= off)
12978             continue;
12979         desc->insn_idx += len - 1;
12980     }
12981 }
12982
12983 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
12984                         const struct bpf_insn *patch, u32 len)
12985 {
12986     struct bpf_prog *new_prog;
12987     struct bpf_insn_aux_data *new_data = NULL;
12988
12989     if (len > 1) {
12990         new_data = vzalloc(array_size(env->prog->len + len - 1,
12991                           sizeof(struct bpf_insn_aux_data)));
12992         if (!new_data)
12993             return NULL;
12994     }
12995
12996     new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
12997     if (IS_ERR(new_prog)) {
12998         if (PTR_ERR(new_prog) == -ERANGE)
12999             verbose(env,
13000                 "insn %d cannot be patched due to 16-bit range\n",
13001                 env->insn_aux_data[off].orig_idx);
13002         vfree(new_data);
13003         return NULL;
13004     }
13005     adjust_insn_aux_data(env, new_data, new_prog, off, len);
13006     adjust_subprog_starts(env, off, len);
13007     adjust_poke_descs(new_prog, off, len);
13008     return new_prog;
13009 }
13010
13011 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
13012                           u32 off, u32 cnt)
13013 {
13014     int i, j;
13015
13016     /* find first prog starting at or after off (first to remove) */
13017     for (i = 0; i < env->subprog_cnt; i++)
13018         if (env->subprog_info[i].start >= off)
13019             break;
13020     /* find first prog starting at or after off + cnt (first to stay) */
13021     for (j = i; j < env->subprog_cnt; j++)
13022         if (env->subprog_info[j].start >= off + cnt)
13023             break;
13024     /* if j doesn't start exactly at off + cnt, we are just removing
13025      * the front of previous prog
13026      */
13027     if (env->subprog_info[j].start != off + cnt)
13028         j--;
13029
13030     if (j > i) {
13031         struct bpf_prog_aux *aux = env->prog->aux;
13032         int move;
13033
13034         /* move fake 'exit' subprog as well */
13035         move = env->subprog_cnt + 1 - j;
13036
13037         memmove(env->subprog_info + i,
13038             env->subprog_info + j,
13039             sizeof(*env->subprog_info) * move);
13040         env->subprog_cnt -= j - i;
13041
13042         /* remove func_info */
13043         if (aux->func_info) {
13044             move = aux->func_info_cnt - j;
13045
13046             memmove(aux->func_info + i,
13047                 aux->func_info + j,
13048                 sizeof(*aux->func_info) * move);
13049             aux->func_info_cnt -= j - i;
13050             /* func_info->insn_off is set after all code rewrites,
13051              * in adjust_btf_func() - no need to adjust
13052              */
13053         }
13054     } else {
13055         /* convert i from "first prog to remove" to "first to adjust" */
13056         if (env->subprog_info[i].start == off)
13057             i++;
13058     }
13059
13060     /* update fake 'exit' subprog as well */
13061     for (; i <= env->subprog_cnt; i++)
13062         env->subprog_info[i].start -= cnt;
13063
13064     return 0;
13065 }
13066
13067 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
13068                       u32 cnt)
13069 {
13070     struct bpf_prog *prog = env->prog;
13071     u32 i, l_off, l_cnt, nr_linfo;
13072     struct bpf_line_info *linfo;
13073
13074     nr_linfo = prog->aux->nr_linfo;
13075     if (!nr_linfo)
13076         return 0;
13077
13078     linfo = prog->aux->linfo;
13079
13080     /* find first line info to remove, count lines to be removed */
13081     for (i = 0; i < nr_linfo; i++)
13082         if (linfo[i].insn_off >= off)
13083             break;
13084
13085     l_off = i;
13086     l_cnt = 0;
13087     for (; i < nr_linfo; i++)
13088         if (linfo[i].insn_off < off + cnt)
13089             l_cnt++;
13090         else
13091             break;
13092
13093     /* First live insn doesn't match first live linfo, it needs to "inherit"
13094      * last removed linfo.  prog is already modified, so prog->len == off
13095      * means no live instructions after (tail of the program was removed).
13096      */
13097     if (prog->len != off && l_cnt &&
13098         (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
13099         l_cnt--;
13100         linfo[--i].insn_off = off + cnt;
13101     }
13102
13103     /* remove the line info which refer to the removed instructions */
13104     if (l_cnt) {
13105         memmove(linfo + l_off, linfo + i,
13106             sizeof(*linfo) * (nr_linfo - i));
13107
13108         prog->aux->nr_linfo -= l_cnt;
13109         nr_linfo = prog->aux->nr_linfo;
13110     }
13111
13112     /* pull all linfo[i].insn_off >= off + cnt in by cnt */
13113     for (i = l_off; i < nr_linfo; i++)
13114         linfo[i].insn_off -= cnt;
13115
13116     /* fix up all subprogs (incl. 'exit') which start >= off */
13117     for (i = 0; i <= env->subprog_cnt; i++)
13118         if (env->subprog_info[i].linfo_idx > l_off) {
13119             /* program may have started in the removed region but
13120              * may not be fully removed
13121              */
13122             if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
13123                 env->subprog_info[i].linfo_idx -= l_cnt;
13124             else
13125                 env->subprog_info[i].linfo_idx = l_off;
13126         }
13127
13128     return 0;
13129 }
13130
13131 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
13132 {
13133     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
13134     unsigned int orig_prog_len = env->prog->len;
13135     int err;
13136
13137     if (bpf_prog_is_dev_bound(env->prog->aux))
13138         bpf_prog_offload_remove_insns(env, off, cnt);
13139
13140     err = bpf_remove_insns(env->prog, off, cnt);
13141     if (err)
13142         return err;
13143
13144     err = adjust_subprog_starts_after_remove(env, off, cnt);
13145     if (err)
13146         return err;
13147
13148     err = bpf_adj_linfo_after_remove(env, off, cnt);
13149     if (err)
13150         return err;
13151
13152     memmove(aux_data + off, aux_data + off + cnt,
13153         sizeof(*aux_data) * (orig_prog_len - off - cnt));
13154
13155     return 0;
13156 }
13157
13158 /* The verifier does more data flow analysis than llvm and will not
13159  * explore branches that are dead at run time. Malicious programs can
13160  * have dead code too. Therefore replace all dead at-run-time code
13161  * with 'ja -1'.
13162  *
13163  * Just nops are not optimal, e.g. if they would sit at the end of the
13164  * program and through another bug we would manage to jump there, then
13165  * we'd execute beyond program memory otherwise. Returning exception
13166  * code also wouldn't work since we can have subprogs where the dead
13167  * code could be located.
13168  */
13169 static void sanitize_dead_code(struct bpf_verifier_env *env)
13170 {
13171     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
13172     struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
13173     struct bpf_insn *insn = env->prog->insnsi;
13174     const int insn_cnt = env->prog->len;
13175     int i;
13176
13177     for (i = 0; i < insn_cnt; i++) {
13178         if (aux_data[i].seen)
13179             continue;
13180         memcpy(insn + i, &trap, sizeof(trap));
13181         aux_data[i].zext_dst = false;
13182     }
13183 }
13184
13185 static bool insn_is_cond_jump(u8 code)
13186 {
13187     u8 op;
13188
13189     if (BPF_CLASS(code) == BPF_JMP32)
13190         return true;
13191
13192     if (BPF_CLASS(code) != BPF_JMP)
13193         return false;
13194
13195     op = BPF_OP(code);
13196     return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
13197 }
13198
13199 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
13200 {
13201     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
13202     struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
13203     struct bpf_insn *insn = env->prog->insnsi;
13204     const int insn_cnt = env->prog->len;
13205     int i;
13206
13207     for (i = 0; i < insn_cnt; i++, insn++) {
13208         if (!insn_is_cond_jump(insn->code))
13209             continue;
13210
13211         if (!aux_data[i + 1].seen)
13212             ja.off = insn->off;
13213         else if (!aux_data[i + 1 + insn->off].seen)
13214             ja.off = 0;
13215         else
13216             continue;
13217
13218         if (bpf_prog_is_dev_bound(env->prog->aux))
13219             bpf_prog_offload_replace_insn(env, i, &ja);
13220
13221         memcpy(insn, &ja, sizeof(ja));
13222     }
13223 }
13224
13225 static int opt_remove_dead_code(struct bpf_verifier_env *env)
13226 {
13227     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
13228     int insn_cnt = env->prog->len;
13229     int i, err;
13230
13231     for (i = 0; i < insn_cnt; i++) {
13232         int j;
13233
13234         j = 0;
13235         while (i + j < insn_cnt && !aux_data[i + j].seen)
13236             j++;
13237         if (!j)
13238             continue;
13239
13240         err = verifier_remove_insns(env, i, j);
13241         if (err)
13242             return err;
13243         insn_cnt = env->prog->len;
13244     }
13245
13246     return 0;
13247 }
13248
13249 static int opt_remove_nops(struct bpf_verifier_env *env)
13250 {
13251     const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
13252     struct bpf_insn *insn = env->prog->insnsi;
13253     int insn_cnt = env->prog->len;
13254     int i, err;
13255
13256     for (i = 0; i < insn_cnt; i++) {
13257         if (memcmp(&insn[i], &ja, sizeof(ja)))
13258             continue;
13259
13260         err = verifier_remove_insns(env, i, 1);
13261         if (err)
13262             return err;
13263         insn_cnt--;
13264         i--;
13265     }
13266
13267     return 0;
13268 }
13269
13270 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
13271                      const union bpf_attr *attr)
13272 {
13273     struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
13274     struct bpf_insn_aux_data *aux = env->insn_aux_data;
13275     int i, patch_len, delta = 0, len = env->prog->len;
13276     struct bpf_insn *insns = env->prog->insnsi;
13277     struct bpf_prog *new_prog;
13278     bool rnd_hi32;
13279
13280     rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
13281     zext_patch[1] = BPF_ZEXT_REG(0);
13282     rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
13283     rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
13284     rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
13285     for (i = 0; i < len; i++) {
13286         int adj_idx = i + delta;
13287         struct bpf_insn insn;
13288         int load_reg;
13289
13290         insn = insns[adj_idx];
13291         load_reg = insn_def_regno(&insn);
13292         if (!aux[adj_idx].zext_dst) {
13293             u8 code, class;
13294             u32 imm_rnd;
13295
13296             if (!rnd_hi32)
13297                 continue;
13298
13299             code = insn.code;
13300             class = BPF_CLASS(code);
13301             if (load_reg == -1)
13302                 continue;
13303
13304             /* NOTE: arg "reg" (the fourth one) is only used for
13305              *       BPF_STX + SRC_OP, so it is safe to pass NULL
13306              *       here.
13307              */
13308             if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
13309                 if (class == BPF_LD &&
13310                     BPF_MODE(code) == BPF_IMM)
13311                     i++;
13312                 continue;
13313             }
13314
13315             /* ctx load could be transformed into wider load. */
13316             if (class == BPF_LDX &&
13317                 aux[adj_idx].ptr_type == PTR_TO_CTX)
13318                 continue;
13319
13320             imm_rnd = get_random_int();
13321             rnd_hi32_patch[0] = insn;
13322             rnd_hi32_patch[1].imm = imm_rnd;
13323             rnd_hi32_patch[3].dst_reg = load_reg;
13324             patch = rnd_hi32_patch;
13325             patch_len = 4;
13326             goto apply_patch_buffer;
13327         }
13328
13329         /* Add in an zero-extend instruction if a) the JIT has requested
13330          * it or b) it's a CMPXCHG.
13331          *
13332          * The latter is because: BPF_CMPXCHG always loads a value into
13333          * R0, therefore always zero-extends. However some archs'
13334          * equivalent instruction only does this load when the
13335          * comparison is successful. This detail of CMPXCHG is
13336          * orthogonal to the general zero-extension behaviour of the
13337          * CPU, so it's treated independently of bpf_jit_needs_zext.
13338          */
13339         if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
13340             continue;
13341
13342         if (WARN_ON(load_reg == -1)) {
13343             verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
13344             return -EFAULT;
13345         }
13346
13347         zext_patch[0] = insn;
13348         zext_patch[1].dst_reg = load_reg;
13349         zext_patch[1].src_reg = load_reg;
13350         patch = zext_patch;
13351         patch_len = 2;
13352 apply_patch_buffer:
13353         new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
13354         if (!new_prog)
13355             return -ENOMEM;
13356         env->prog = new_prog;
13357         insns = new_prog->insnsi;
13358         aux = env->insn_aux_data;
13359         delta += patch_len - 1;
13360     }
13361
13362     return 0;
13363 }
13364
13365 /* convert load instructions that access fields of a context type into a
13366  * sequence of instructions that access fields of the underlying structure:
13367  *     struct __sk_buff    -> struct sk_buff
13368  *     struct bpf_sock_ops -> struct sock
13369  */
13370 static int convert_ctx_accesses(struct bpf_verifier_env *env)
13371 {
13372     const struct bpf_verifier_ops *ops = env->ops;
13373     int i, cnt, size, ctx_field_size, delta = 0;
13374     const int insn_cnt = env->prog->len;
13375     struct bpf_insn insn_buf[16], *insn;
13376     u32 target_size, size_default, off;
13377     struct bpf_prog *new_prog;
13378     enum bpf_access_type type;
13379     bool is_narrower_load;
13380
13381     if (ops->gen_prologue || env->seen_direct_write) {
13382         if (!ops->gen_prologue) {
13383             verbose(env, "bpf verifier is misconfigured\n");
13384             return -EINVAL;
13385         }
13386         cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
13387                     env->prog);
13388         if (cnt >= ARRAY_SIZE(insn_buf)) {
13389             verbose(env, "bpf verifier is misconfigured\n");
13390             return -EINVAL;
13391         } else if (cnt) {
13392             new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
13393             if (!new_prog)
13394                 return -ENOMEM;
13395
13396             env->prog = new_prog;
13397             delta += cnt - 1;
13398         }
13399     }
13400
13401     if (bpf_prog_is_dev_bound(env->prog->aux))
13402         return 0;
13403
13404     insn = env->prog->insnsi + delta;
13405
13406     for (i = 0; i < insn_cnt; i++, insn++) {
13407         bpf_convert_ctx_access_t convert_ctx_access;
13408         bool ctx_access;
13409
13410         if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
13411             insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
13412             insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
13413             insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
13414             type = BPF_READ;
13415             ctx_access = true;
13416         } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
13417                insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
13418                insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
13419                insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
13420                insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
13421                insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
13422                insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
13423                insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
13424             type = BPF_WRITE;
13425             ctx_access = BPF_CLASS(insn->code) == BPF_STX;
13426         } else {
13427             continue;
13428         }
13429
13430         if (type == BPF_WRITE &&
13431             env->insn_aux_data[i + delta].sanitize_stack_spill) {
13432             struct bpf_insn patch[] = {
13433                 *insn,
13434                 BPF_ST_NOSPEC(),
13435             };
13436
13437             cnt = ARRAY_SIZE(patch);
13438             new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
13439             if (!new_prog)
13440                 return -ENOMEM;
13441
13442             delta    += cnt - 1;
13443             env->prog = new_prog;
13444             insn      = new_prog->insnsi + i + delta;
13445             continue;
13446         }
13447
13448         if (!ctx_access)
13449             continue;
13450
13451         switch ((int)env->insn_aux_data[i + delta].ptr_type) {
13452         case PTR_TO_CTX:
13453             if (!ops->convert_ctx_access)
13454                 continue;
13455             convert_ctx_access = ops->convert_ctx_access;
13456             break;
13457         case PTR_TO_SOCKET:
13458         case PTR_TO_SOCK_COMMON:
13459             convert_ctx_access = bpf_sock_convert_ctx_access;
13460             break;
13461         case PTR_TO_TCP_SOCK:
13462             convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
13463             break;
13464         case PTR_TO_XDP_SOCK:
13465             convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
13466             break;
13467         case PTR_TO_BTF_ID:
13468         case PTR_TO_BTF_ID | PTR_UNTRUSTED:
13469             if (type == BPF_READ) {
13470                 insn->code = BPF_LDX | BPF_PROBE_MEM |
13471                     BPF_SIZE((insn)->code);
13472                 env->prog->aux->num_exentries++;
13473             } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
13474                 verbose(env, "Writes through BTF pointers are not allowed\n");
13475                 return -EINVAL;
13476             }
13477             continue;
13478         default:
13479             continue;
13480         }
13481
13482         ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
13483         size = BPF_LDST_BYTES(insn);
13484
13485         /* If the read access is a narrower load of the field,
13486          * convert to a 4/8-byte load, to minimum program type specific
13487          * convert_ctx_access changes. If conversion is successful,
13488          * we will apply proper mask to the result.
13489          */
13490         is_narrower_load = size < ctx_field_size;
13491         size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
13492         off = insn->off;
13493         if (is_narrower_load) {
13494             u8 size_code;
13495
13496             if (type == BPF_WRITE) {
13497                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
13498                 return -EINVAL;
13499             }
13500
13501             size_code = BPF_H;
13502             if (ctx_field_size == 4)
13503                 size_code = BPF_W;
13504             else if (ctx_field_size == 8)
13505                 size_code = BPF_DW;
13506
13507             insn->off = off & ~(size_default - 1);
13508             insn->code = BPF_LDX | BPF_MEM | size_code;
13509         }
13510
13511         target_size = 0;
13512         cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
13513                      &target_size);
13514         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
13515             (ctx_field_size && !target_size)) {
13516             verbose(env, "bpf verifier is misconfigured\n");
13517             return -EINVAL;
13518         }
13519
13520         if (is_narrower_load && size < target_size) {
13521             u8 shift = bpf_ctx_narrow_access_offset(
13522                 off, size, size_default) * 8;
13523             if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
13524                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
13525                 return -EINVAL;
13526             }
13527             if (ctx_field_size <= 4) {
13528                 if (shift)
13529                     insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
13530                                     insn->dst_reg,
13531                                     shift);
13532                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
13533                                 (1 << size * 8) - 1);
13534             } else {
13535                 if (shift)
13536                     insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
13537                                     insn->dst_reg,
13538                                     shift);
13539                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
13540                                 (1ULL << size * 8) - 1);
13541             }
13542         }
13543
13544         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13545         if (!new_prog)
13546             return -ENOMEM;
13547
13548         delta += cnt - 1;
13549
13550         /* keep walking new program and skip insns we just inserted */
13551         env->prog = new_prog;
13552         insn      = new_prog->insnsi + i + delta;
13553     }
13554
13555     return 0;
13556 }
13557
13558 static int jit_subprogs(struct bpf_verifier_env *env)
13559 {
13560     struct bpf_prog *prog = env->prog, **func, *tmp;
13561     int i, j, subprog_start, subprog_end = 0, len, subprog;
13562     struct bpf_map *map_ptr;
13563     struct bpf_insn *insn;
13564     void *old_bpf_func;
13565     int err, num_exentries;
13566
13567     if (env->subprog_cnt <= 1)
13568         return 0;
13569
13570     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13571         if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
13572             continue;
13573
13574         /* Upon error here we cannot fall back to interpreter but
13575          * need a hard reject of the program. Thus -EFAULT is
13576          * propagated in any case.
13577          */
13578         subprog = find_subprog(env, i + insn->imm + 1);
13579         if (subprog < 0) {
13580             WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
13581                   i + insn->imm + 1);
13582             return -EFAULT;
13583         }
13584         /* temporarily remember subprog id inside insn instead of
13585          * aux_data, since next loop will split up all insns into funcs
13586          */
13587         insn->off = subprog;
13588         /* remember original imm in case JIT fails and fallback
13589          * to interpreter will be needed
13590          */
13591         env->insn_aux_data[i].call_imm = insn->imm;
13592         /* point imm to __bpf_call_base+1 from JITs point of view */
13593         insn->imm = 1;
13594         if (bpf_pseudo_func(insn))
13595             /* jit (e.g. x86_64) may emit fewer instructions
13596              * if it learns a u32 imm is the same as a u64 imm.
13597              * Force a non zero here.
13598              */
13599             insn[1].imm = 1;
13600     }
13601
13602     err = bpf_prog_alloc_jited_linfo(prog);
13603     if (err)
13604         goto out_undo_insn;
13605
13606     err = -ENOMEM;
13607     func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
13608     if (!func)
13609         goto out_undo_insn;
13610
13611     for (i = 0; i < env->subprog_cnt; i++) {
13612         subprog_start = subprog_end;
13613         subprog_end = env->subprog_info[i + 1].start;
13614
13615         len = subprog_end - subprog_start;
13616         /* bpf_prog_run() doesn't call subprogs directly,
13617          * hence main prog stats include the runtime of subprogs.
13618          * subprogs don't have IDs and not reachable via prog_get_next_id
13619          * func[i]->stats will never be accessed and stays NULL
13620          */
13621         func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
13622         if (!func[i])
13623             goto out_free;
13624         memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
13625                len * sizeof(struct bpf_insn));
13626         func[i]->type = prog->type;
13627         func[i]->len = len;
13628         if (bpf_prog_calc_tag(func[i]))
13629             goto out_free;
13630         func[i]->is_func = 1;
13631         func[i]->aux->func_idx = i;
13632         /* Below members will be freed only at prog->aux */
13633         func[i]->aux->btf = prog->aux->btf;
13634         func[i]->aux->func_info = prog->aux->func_info;
13635         func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
13636         func[i]->aux->poke_tab = prog->aux->poke_tab;
13637         func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
13638
13639         for (j = 0; j < prog->aux->size_poke_tab; j++) {
13640             struct bpf_jit_poke_descriptor *poke;
13641
13642             poke = &prog->aux->poke_tab[j];
13643             if (poke->insn_idx < subprog_end &&
13644                 poke->insn_idx >= subprog_start)
13645                 poke->aux = func[i]->aux;
13646         }
13647
13648         func[i]->aux->name[0] = 'F';
13649         func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
13650         func[i]->jit_requested = 1;
13651         func[i]->blinding_requested = prog->blinding_requested;
13652         func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
13653         func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
13654         func[i]->aux->linfo = prog->aux->linfo;
13655         func[i]->aux->nr_linfo = prog->aux->nr_linfo;
13656         func[i]->aux->jited_linfo = prog->aux->jited_linfo;
13657         func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
13658         num_exentries = 0;
13659         insn = func[i]->insnsi;
13660         for (j = 0; j < func[i]->len; j++, insn++) {
13661             if (BPF_CLASS(insn->code) == BPF_LDX &&
13662                 BPF_MODE(insn->code) == BPF_PROBE_MEM)
13663                 num_exentries++;
13664         }
13665         func[i]->aux->num_exentries = num_exentries;
13666         func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
13667         func[i] = bpf_int_jit_compile(func[i]);
13668         if (!func[i]->jited) {
13669             err = -ENOTSUPP;
13670             goto out_free;
13671         }
13672         cond_resched();
13673     }
13674
13675     /* at this point all bpf functions were successfully JITed
13676      * now populate all bpf_calls with correct addresses and
13677      * run last pass of JIT
13678      */
13679     for (i = 0; i < env->subprog_cnt; i++) {
13680         insn = func[i]->insnsi;
13681         for (j = 0; j < func[i]->len; j++, insn++) {
13682             if (bpf_pseudo_func(insn)) {
13683                 subprog = insn->off;
13684                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
13685                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
13686                 continue;
13687             }
13688             if (!bpf_pseudo_call(insn))
13689                 continue;
13690             subprog = insn->off;
13691             insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
13692         }
13693
13694         /* we use the aux data to keep a list of the start addresses
13695          * of the JITed images for each function in the program
13696          *
13697          * for some architectures, such as powerpc64, the imm field
13698          * might not be large enough to hold the offset of the start
13699          * address of the callee's JITed image from __bpf_call_base
13700          *
13701          * in such cases, we can lookup the start address of a callee
13702          * by using its subprog id, available from the off field of
13703          * the call instruction, as an index for this list
13704          */
13705         func[i]->aux->func = func;
13706         func[i]->aux->func_cnt = env->subprog_cnt;
13707     }
13708     for (i = 0; i < env->subprog_cnt; i++) {
13709         old_bpf_func = func[i]->bpf_func;
13710         tmp = bpf_int_jit_compile(func[i]);
13711         if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
13712             verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
13713             err = -ENOTSUPP;
13714             goto out_free;
13715         }
13716         cond_resched();
13717     }
13718
13719     /* finally lock prog and jit images for all functions and
13720      * populate kallsysm
13721      */
13722     for (i = 0; i < env->subprog_cnt; i++) {
13723         bpf_prog_lock_ro(func[i]);
13724         bpf_prog_kallsyms_add(func[i]);
13725     }
13726
13727     /* Last step: make now unused interpreter insns from main
13728      * prog consistent for later dump requests, so they can
13729      * later look the same as if they were interpreted only.
13730      */
13731     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13732         if (bpf_pseudo_func(insn)) {
13733             insn[0].imm = env->insn_aux_data[i].call_imm;
13734             insn[1].imm = insn->off;
13735             insn->off = 0;
13736             continue;
13737         }
13738         if (!bpf_pseudo_call(insn))
13739             continue;
13740         insn->off = env->insn_aux_data[i].call_imm;
13741         subprog = find_subprog(env, i + insn->off + 1);
13742         insn->imm = subprog;
13743     }
13744
13745     prog->jited = 1;
13746     prog->bpf_func = func[0]->bpf_func;
13747     prog->jited_len = func[0]->jited_len;
13748     prog->aux->func = func;
13749     prog->aux->func_cnt = env->subprog_cnt;
13750     bpf_prog_jit_attempt_done(prog);
13751     return 0;
13752 out_free:
13753     /* We failed JIT'ing, so at this point we need to unregister poke
13754      * descriptors from subprogs, so that kernel is not attempting to
13755      * patch it anymore as we're freeing the subprog JIT memory.
13756      */
13757     for (i = 0; i < prog->aux->size_poke_tab; i++) {
13758         map_ptr = prog->aux->poke_tab[i].tail_call.map;
13759         map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
13760     }
13761     /* At this point we're guaranteed that poke descriptors are not
13762      * live anymore. We can just unlink its descriptor table as it's
13763      * released with the main prog.
13764      */
13765     for (i = 0; i < env->subprog_cnt; i++) {
13766         if (!func[i])
13767             continue;
13768         func[i]->aux->poke_tab = NULL;
13769         bpf_jit_free(func[i]);
13770     }
13771     kfree(func);
13772 out_undo_insn:
13773     /* cleanup main prog to be interpreted */
13774     prog->jit_requested = 0;
13775     prog->blinding_requested = 0;
13776     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13777         if (!bpf_pseudo_call(insn))
13778             continue;
13779         insn->off = 0;
13780         insn->imm = env->insn_aux_data[i].call_imm;
13781     }
13782     bpf_prog_jit_attempt_done(prog);
13783     return err;
13784 }
13785
13786 static int fixup_call_args(struct bpf_verifier_env *env)
13787 {
13788 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13789     struct bpf_prog *prog = env->prog;
13790     struct bpf_insn *insn = prog->insnsi;
13791     bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
13792     int i, depth;
13793 #endif
13794     int err = 0;
13795
13796     if (env->prog->jit_requested &&
13797         !bpf_prog_is_dev_bound(env->prog->aux)) {
13798         err = jit_subprogs(env);
13799         if (err == 0)
13800             return 0;
13801         if (err == -EFAULT)
13802             return err;
13803     }
13804 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13805     if (has_kfunc_call) {
13806         verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
13807         return -EINVAL;
13808     }
13809     if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
13810         /* When JIT fails the progs with bpf2bpf calls and tail_calls
13811          * have to be rejected, since interpreter doesn't support them yet.
13812          */
13813         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
13814         return -EINVAL;
13815     }
13816     for (i = 0; i < prog->len; i++, insn++) {
13817         if (bpf_pseudo_func(insn)) {
13818             /* When JIT fails the progs with callback calls
13819              * have to be rejected, since interpreter doesn't support them yet.
13820              */
13821             verbose(env, "callbacks are not allowed in non-JITed programs\n");
13822             return -EINVAL;
13823         }
13824
13825         if (!bpf_pseudo_call(insn))
13826             continue;
13827         depth = get_callee_stack_depth(env, insn, i);
13828         if (depth < 0)
13829             return depth;
13830         bpf_patch_call_args(insn, depth);
13831     }
13832     err = 0;
13833 #endif
13834     return err;
13835 }
13836
13837 static int fixup_kfunc_call(struct bpf_verifier_env *env,
13838                 struct bpf_insn *insn)
13839 {
13840     const struct bpf_kfunc_desc *desc;
13841
13842     if (!insn->imm) {
13843         verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
13844         return -EINVAL;
13845     }
13846
13847     /* insn->imm has the btf func_id. Replace it with
13848      * an address (relative to __bpf_base_call).
13849      */
13850     desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
13851     if (!desc) {
13852         verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
13853             insn->imm);
13854         return -EFAULT;
13855     }
13856
13857     insn->imm = desc->imm;
13858
13859     return 0;
13860 }
13861
13862 /* Do various post-verification rewrites in a single program pass.
13863  * These rewrites simplify JIT and interpreter implementations.
13864  */
13865 static int do_misc_fixups(struct bpf_verifier_env *env)
13866 {
13867     struct bpf_prog *prog = env->prog;
13868     enum bpf_attach_type eatype = prog->expected_attach_type;
13869     enum bpf_prog_type prog_type = resolve_prog_type(prog);
13870     struct bpf_insn *insn = prog->insnsi;
13871     const struct bpf_func_proto *fn;
13872     const int insn_cnt = prog->len;
13873     const struct bpf_map_ops *ops;
13874     struct bpf_insn_aux_data *aux;
13875     struct bpf_insn insn_buf[16];
13876     struct bpf_prog *new_prog;
13877     struct bpf_map *map_ptr;
13878     int i, ret, cnt, delta = 0;
13879
13880     for (i = 0; i < insn_cnt; i++, insn++) {
13881         /* Make divide-by-zero exceptions impossible. */
13882         if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
13883             insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
13884             insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
13885             insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
13886             bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
13887             bool isdiv = BPF_OP(insn->code) == BPF_DIV;
13888             struct bpf_insn *patchlet;
13889             struct bpf_insn chk_and_div[] = {
13890                 /* [R,W]x div 0 -> 0 */
13891                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13892                          BPF_JNE | BPF_K, insn->src_reg,
13893                          0, 2, 0),
13894                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
13895                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13896                 *insn,
13897             };
13898             struct bpf_insn chk_and_mod[] = {
13899                 /* [R,W]x mod 0 -> [R,W]x */
13900                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13901                          BPF_JEQ | BPF_K, insn->src_reg,
13902                          0, 1 + (is64 ? 0 : 1), 0),
13903                 *insn,
13904                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13905                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
13906             };
13907
13908             patchlet = isdiv ? chk_and_div : chk_and_mod;
13909             cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
13910                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
13911
13912             new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
13913             if (!new_prog)
13914                 return -ENOMEM;
13915
13916             delta    += cnt - 1;
13917             env->prog = prog = new_prog;
13918             insn      = new_prog->insnsi + i + delta;
13919             continue;
13920         }
13921
13922         /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
13923         if (BPF_CLASS(insn->code) == BPF_LD &&
13924             (BPF_MODE(insn->code) == BPF_ABS ||
13925              BPF_MODE(insn->code) == BPF_IND)) {
13926             cnt = env->ops->gen_ld_abs(insn, insn_buf);
13927             if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13928                 verbose(env, "bpf verifier is misconfigured\n");
13929                 return -EINVAL;
13930             }
13931
13932             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13933             if (!new_prog)
13934                 return -ENOMEM;
13935
13936             delta    += cnt - 1;
13937             env->prog = prog = new_prog;
13938             insn      = new_prog->insnsi + i + delta;
13939             continue;
13940         }
13941
13942         /* Rewrite pointer arithmetic to mitigate speculation attacks. */
13943         if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
13944             insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
13945             const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
13946             const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
13947             struct bpf_insn *patch = &insn_buf[0];
13948             bool issrc, isneg, isimm;
13949             u32 off_reg;
13950
13951             aux = &env->insn_aux_data[i + delta];
13952             if (!aux->alu_state ||
13953                 aux->alu_state == BPF_ALU_NON_POINTER)
13954                 continue;
13955
13956             isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
13957             issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
13958                 BPF_ALU_SANITIZE_SRC;
13959             isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
13960
13961             off_reg = issrc ? insn->src_reg : insn->dst_reg;
13962             if (isimm) {
13963                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13964             } else {
13965                 if (isneg)
13966                     *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13967                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13968                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
13969                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
13970                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
13971                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
13972                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
13973             }
13974             if (!issrc)
13975                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
13976             insn->src_reg = BPF_REG_AX;
13977             if (isneg)
13978                 insn->code = insn->code == code_add ?
13979                          code_sub : code_add;
13980             *patch++ = *insn;
13981             if (issrc && isneg && !isimm)
13982                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13983             cnt = patch - insn_buf;
13984
13985             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13986             if (!new_prog)
13987                 return -ENOMEM;
13988
13989             delta    += cnt - 1;
13990             env->prog = prog = new_prog;
13991             insn      = new_prog->insnsi + i + delta;
13992             continue;
13993         }
13994
13995         if (insn->code != (BPF_JMP | BPF_CALL))
13996             continue;
13997         if (insn->src_reg == BPF_PSEUDO_CALL)
13998             continue;
13999         if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
14000             ret = fixup_kfunc_call(env, insn);
14001             if (ret)
14002                 return ret;
14003             continue;
14004         }
14005
14006         if (insn->imm == BPF_FUNC_get_route_realm)
14007             prog->dst_needed = 1;
14008         if (insn->imm == BPF_FUNC_get_prandom_u32)
14009             bpf_user_rnd_init_once();
14010         if (insn->imm == BPF_FUNC_override_return)
14011             prog->kprobe_override = 1;
14012         if (insn->imm == BPF_FUNC_tail_call) {
14013             /* If we tail call into other programs, we
14014              * cannot make any assumptions since they can
14015              * be replaced dynamically during runtime in
14016              * the program array.
14017              */
14018             prog->cb_access = 1;
14019             if (!allow_tail_call_in_subprogs(env))
14020                 prog->aux->stack_depth = MAX_BPF_STACK;
14021             prog->aux->max_pkt_offset = MAX_PACKET_OFF;
14022
14023             /* mark bpf_tail_call as different opcode to avoid
14024              * conditional branch in the interpreter for every normal
14025              * call and to prevent accidental JITing by JIT compiler
14026              * that doesn't support bpf_tail_call yet
14027              */
14028             insn->imm = 0;
14029             insn->code = BPF_JMP | BPF_TAIL_CALL;
14030
14031             aux = &env->insn_aux_data[i + delta];
14032             if (env->bpf_capable && !prog->blinding_requested &&
14033                 prog->jit_requested &&
14034                 !bpf_map_key_poisoned(aux) &&
14035                 !bpf_map_ptr_poisoned(aux) &&
14036                 !bpf_map_ptr_unpriv(aux)) {
14037                 struct bpf_jit_poke_descriptor desc = {
14038                     .reason = BPF_POKE_REASON_TAIL_CALL,
14039                     .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
14040                     .tail_call.key = bpf_map_key_immediate(aux),
14041                     .insn_idx = i + delta,
14042                 };
14043
14044                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
14045                 if (ret < 0) {
14046                     verbose(env, "adding tail call poke descriptor failed\n");
14047                     return ret;
14048                 }
14049
14050                 insn->imm = ret + 1;
14051                 continue;
14052             }
14053
14054             if (!bpf_map_ptr_unpriv(aux))
14055                 continue;
14056
14057             /* instead of changing every JIT dealing with tail_call
14058              * emit two extra insns:
14059              * if (index >= max_entries) goto out;
14060              * index &= array->index_mask;
14061              * to avoid out-of-bounds cpu speculation
14062              */
14063             if (bpf_map_ptr_poisoned(aux)) {
14064                 verbose(env, "tail_call abusing map_ptr\n");
14065                 return -EINVAL;
14066             }
14067
14068             map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
14069             insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
14070                           map_ptr->max_entries, 2);
14071             insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
14072                             container_of(map_ptr,
14073                                  struct bpf_array,
14074                                  map)->index_mask);
14075             insn_buf[2] = *insn;
14076             cnt = 3;
14077             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
14078             if (!new_prog)
14079                 return -ENOMEM;
14080
14081             delta    += cnt - 1;
14082             env->prog = prog = new_prog;
14083             insn      = new_prog->insnsi + i + delta;
14084             continue;
14085         }
14086
14087         if (insn->imm == BPF_FUNC_timer_set_callback) {
14088             /* The verifier will process callback_fn as many times as necessary
14089              * with different maps and the register states prepared by
14090              * set_timer_callback_state will be accurate.
14091              *
14092              * The following use case is valid:
14093              *   map1 is shared by prog1, prog2, prog3.
14094              *   prog1 calls bpf_timer_init for some map1 elements
14095              *   prog2 calls bpf_timer_set_callback for some map1 elements.
14096              *     Those that were not bpf_timer_init-ed will return -EINVAL.
14097              *   prog3 calls bpf_timer_start for some map1 elements.
14098              *     Those that were not both bpf_timer_init-ed and
14099              *     bpf_timer_set_callback-ed will return -EINVAL.
14100              */
14101             struct bpf_insn ld_addrs[2] = {
14102                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
14103             };
14104
14105             insn_buf[0] = ld_addrs[0];
14106             insn_buf[1] = ld_addrs[1];
14107             insn_buf[2] = *insn;
14108             cnt = 3;
14109
14110             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
14111             if (!new_prog)
14112                 return -ENOMEM;
14113
14114             delta    += cnt - 1;
14115             env->prog = prog = new_prog;
14116             insn      = new_prog->insnsi + i + delta;
14117             goto patch_call_imm;
14118         }
14119
14120         if (insn->imm == BPF_FUNC_task_storage_get ||
14121             insn->imm == BPF_FUNC_sk_storage_get ||
14122             insn->imm == BPF_FUNC_inode_storage_get) {
14123             if (env->prog->aux->sleepable)
14124                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
14125             else
14126                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
14127             insn_buf[1] = *insn;
14128             cnt = 2;
14129
14130             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
14131             if (!new_prog)
14132                 return -ENOMEM;
14133
14134             delta += cnt - 1;
14135             env->prog = prog = new_prog;
14136             insn = new_prog->insnsi + i + delta;
14137             goto patch_call_imm;
14138         }
14139
14140         /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
14141          * and other inlining handlers are currently limited to 64 bit
14142          * only.
14143          */
14144         if (prog->jit_requested && BITS_PER_LONG == 64 &&
14145             (insn->imm == BPF_FUNC_map_lookup_elem ||
14146              insn->imm == BPF_FUNC_map_update_elem ||
14147              insn->imm == BPF_FUNC_map_delete_elem ||
14148              insn->imm == BPF_FUNC_map_push_elem   ||
14149              insn->imm == BPF_FUNC_map_pop_elem    ||
14150              insn->imm == BPF_FUNC_map_peek_elem   ||
14151              insn->imm == BPF_FUNC_redirect_map    ||
14152              insn->imm == BPF_FUNC_for_each_map_elem ||
14153              insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
14154             aux = &env->insn_aux_data[i + delta];
14155             if (bpf_map_ptr_poisoned(aux))
14156                 goto patch_call_imm;
14157
14158             map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
14159             ops = map_ptr->ops;
14160             if (insn->imm == BPF_FUNC_map_lookup_elem &&
14161                 ops->map_gen_lookup) {
14162                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
14163                 if (cnt == -EOPNOTSUPP)
14164                     goto patch_map_ops_generic;
14165                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
14166                     verbose(env, "bpf verifier is misconfigured\n");
14167                     return -EINVAL;
14168                 }
14169
14170                 new_prog = bpf_patch_insn_data(env, i + delta,
14171                                    insn_buf, cnt);
14172                 if (!new_prog)
14173                     return -ENOMEM;
14174
14175                 delta    += cnt - 1;
14176                 env->prog = prog = new_prog;
14177                 insn      = new_prog->insnsi + i + delta;
14178                 continue;
14179             }
14180
14181             BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
14182                      (void *(*)(struct bpf_map *map, void *key))NULL));
14183             BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
14184                      (int (*)(struct bpf_map *map, void *key))NULL));
14185             BUILD_BUG_ON(!__same_type(ops->map_update_elem,
14186                      (int (*)(struct bpf_map *map, void *key, void *value,
14187                           u64 flags))NULL));
14188             BUILD_BUG_ON(!__same_type(ops->map_push_elem,
14189                      (int (*)(struct bpf_map *map, void *value,
14190                           u64 flags))NULL));
14191             BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
14192                      (int (*)(struct bpf_map *map, void *value))NULL));
14193             BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
14194                      (int (*)(struct bpf_map *map, void *value))NULL));
14195             BUILD_BUG_ON(!__same_type(ops->map_redirect,
14196                      (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
14197             BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
14198                      (int (*)(struct bpf_map *map,
14199                           bpf_callback_t callback_fn,
14200                           void *callback_ctx,
14201                           u64 flags))NULL));
14202             BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
14203                      (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
14204
14205 patch_map_ops_generic:
14206             switch (insn->imm) {
14207             case BPF_FUNC_map_lookup_elem:
14208                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
14209                 continue;
14210             case BPF_FUNC_map_update_elem:
14211                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
14212                 continue;
14213             case BPF_FUNC_map_delete_elem:
14214                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
14215                 continue;
14216             case BPF_FUNC_map_push_elem:
14217                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
14218                 continue;
14219             case BPF_FUNC_map_pop_elem:
14220                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
14221                 continue;
14222             case BPF_FUNC_map_peek_elem:
14223                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
14224                 continue;
14225             case BPF_FUNC_redirect_map:
14226                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
14227                 continue;
14228             case BPF_FUNC_for_each_map_elem:
14229                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
14230                 continue;
14231             case BPF_FUNC_map_lookup_percpu_elem:
14232                 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
14233                 continue;
14234             }
14235
14236             goto patch_call_imm;
14237         }
14238
14239         /* Implement bpf_jiffies64 inline. */
14240         if (prog->jit_requested && BITS_PER_LONG == 64 &&
14241             insn->imm == BPF_FUNC_jiffies64) {
14242             struct bpf_insn ld_jiffies_addr[2] = {
14243                 BPF_LD_IMM64(BPF_REG_0,
14244                          (unsigned long)&jiffies),
14245             };
14246
14247             insn_buf[0] = ld_jiffies_addr[0];
14248             insn_buf[1] = ld_jiffies_addr[1];
14249             insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
14250                           BPF_REG_0, 0);
14251             cnt = 3;
14252
14253             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
14254                                cnt);
14255             if (!new_prog)
14256                 return -ENOMEM;
14257
14258             delta    += cnt - 1;
14259             env->prog = prog = new_prog;
14260             insn      = new_prog->insnsi + i + delta;
14261             continue;
14262         }
14263
14264         /* Implement bpf_get_func_arg inline. */
14265         if (prog_type == BPF_PROG_TYPE_TRACING &&
14266             insn->imm == BPF_FUNC_get_func_arg) {
14267             /* Load nr_args from ctx - 8 */
14268             insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
14269             insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
14270             insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
14271             insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
14272             insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
14273             insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
14274             insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
14275             insn_buf[7] = BPF_JMP_A(1);
14276             insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
14277             cnt = 9;
14278
14279             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
14280             if (!new_prog)
14281                 return -ENOMEM;
14282
14283             delta    += cnt - 1;
14284             env->prog = prog = new_prog;
14285             insn      = new_prog->insnsi + i + delta;
14286             continue;
14287         }
14288
14289         /* Implement bpf_get_func_ret inline. */
14290         if (prog_type == BPF_PROG_TYPE_TRACING &&
14291             insn->imm == BPF_FUNC_get_func_ret) {
14292             if (eatype == BPF_TRACE_FEXIT ||
14293                 eatype == BPF_MODIFY_RETURN) {
14294                 /* Load nr_args from ctx - 8 */
14295                 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
14296                 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
14297                 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
14298                 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
14299                 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
14300                 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
14301                 cnt = 6;
14302             } else {
14303                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
14304                 cnt = 1;
14305             }
14306
14307             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
14308             if (!new_prog)
14309                 return -ENOMEM;
14310
14311             delta    += cnt - 1;
14312             env->prog = prog = new_prog;
14313             insn      = new_prog->insnsi + i + delta;
14314             continue;
14315         }
14316
14317         /* Implement get_func_arg_cnt inline. */
14318         if (prog_type == BPF_PROG_TYPE_TRACING &&
14319             insn->imm == BPF_FUNC_get_func_arg_cnt) {
14320             /* Load nr_args from ctx - 8 */
14321             insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
14322
14323             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
14324             if (!new_prog)
14325                 return -ENOMEM;
14326
14327             env->prog = prog = new_prog;
14328             insn      = new_prog->insnsi + i + delta;
14329             continue;
14330         }
14331
14332         /* Implement bpf_get_func_ip inline. */
14333         if (prog_type == BPF_PROG_TYPE_TRACING &&
14334             insn->imm == BPF_FUNC_get_func_ip) {
14335             /* Load IP address from ctx - 16 */
14336             insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
14337
14338             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
14339             if (!new_prog)
14340                 return -ENOMEM;
14341
14342             env->prog = prog = new_prog;
14343             insn      = new_prog->insnsi + i + delta;
14344             continue;
14345         }
14346
14347 patch_call_imm:
14348         fn = env->ops->get_func_proto(insn->imm, env->prog);
14349         /* all functions that have prototype and verifier allowed
14350          * programs to call them, must be real in-kernel functions
14351          */
14352         if (!fn->func) {
14353             verbose(env,
14354                 "kernel subsystem misconfigured func %s#%d\n",
14355                 func_id_name(insn->imm), insn->imm);
14356             return -EFAULT;
14357         }
14358         insn->imm = fn->func - __bpf_call_base;
14359     }
14360
14361     /* Since poke tab is now finalized, publish aux to tracker. */
14362     for (i = 0; i < prog->aux->size_poke_tab; i++) {
14363         map_ptr = prog->aux->poke_tab[i].tail_call.map;
14364         if (!map_ptr->ops->map_poke_track ||
14365             !map_ptr->ops->map_poke_untrack ||
14366             !map_ptr->ops->map_poke_run) {
14367             verbose(env, "bpf verifier is misconfigured\n");
14368             return -EINVAL;
14369         }
14370
14371         ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
14372         if (ret < 0) {
14373             verbose(env, "tracking tail call prog failed\n");
14374             return ret;
14375         }
14376     }
14377
14378     sort_kfunc_descs_by_imm(env->prog);
14379
14380     return 0;
14381 }
14382
14383 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
14384                     int position,
14385                     s32 stack_base,
14386                     u32 callback_subprogno,
14387                     u32 *cnt)
14388 {
14389     s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
14390     s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
14391     s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
14392     int reg_loop_max = BPF_REG_6;
14393     int reg_loop_cnt = BPF_REG_7;
14394     int reg_loop_ctx = BPF_REG_8;
14395
14396     struct bpf_prog *new_prog;
14397     u32 callback_start;
14398     u32 call_insn_offset;
14399     s32 callback_offset;
14400
14401     /* This represents an inlined version of bpf_iter.c:bpf_loop,
14402      * be careful to modify this code in sync.
14403      */
14404     struct bpf_insn insn_buf[] = {
14405         /* Return error and jump to the end of the patch if
14406          * expected number of iterations is too big.
14407          */
14408         BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
14409         BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
14410         BPF_JMP_IMM(BPF_JA, 0, 0, 16),
14411         /* spill R6, R7, R8 to use these as loop vars */
14412         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
14413         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
14414         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
14415         /* initialize loop vars */
14416         BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
14417         BPF_MOV32_IMM(reg_loop_cnt, 0),
14418         BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
14419         /* loop header,
14420          * if reg_loop_cnt >= reg_loop_max skip the loop body
14421          */
14422         BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
14423         /* callback call,
14424          * correct callback offset would be set after patching
14425          */
14426         BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
14427         BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
14428         BPF_CALL_REL(0),
14429         /* increment loop counter */
14430         BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
14431         /* jump to loop header if callback returned 0 */
14432         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
14433         /* return value of bpf_loop,
14434          * set R0 to the number of iterations
14435          */
14436         BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
14437         /* restore original values of R6, R7, R8 */
14438         BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
14439         BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
14440         BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
14441     };
14442
14443     *cnt = ARRAY_SIZE(insn_buf);
14444     new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
14445     if (!new_prog)
14446         return new_prog;
14447
14448     /* callback start is known only after patching */
14449     callback_start = env->subprog_info[callback_subprogno].start;
14450     /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
14451     call_insn_offset = position + 12;
14452     callback_offset = callback_start - call_insn_offset - 1;
14453     new_prog->insnsi[call_insn_offset].imm = callback_offset;
14454
14455     return new_prog;
14456 }
14457
14458 static bool is_bpf_loop_call(struct bpf_insn *insn)
14459 {
14460     return insn->code == (BPF_JMP | BPF_CALL) &&
14461         insn->src_reg == 0 &&
14462         insn->imm == BPF_FUNC_loop;
14463 }
14464
14465 /* For all sub-programs in the program (including main) check
14466  * insn_aux_data to see if there are bpf_loop calls that require
14467  * inlining. If such calls are found the calls are replaced with a
14468  * sequence of instructions produced by `inline_bpf_loop` function and
14469  * subprog stack_depth is increased by the size of 3 registers.
14470  * This stack space is used to spill values of the R6, R7, R8.  These
14471  * registers are used to store the loop bound, counter and context
14472  * variables.
14473  */
14474 static int optimize_bpf_loop(struct bpf_verifier_env *env)
14475 {
14476     struct bpf_subprog_info *subprogs = env->subprog_info;
14477     int i, cur_subprog = 0, cnt, delta = 0;
14478     struct bpf_insn *insn = env->prog->insnsi;
14479     int insn_cnt = env->prog->len;
14480     u16 stack_depth = subprogs[cur_subprog].stack_depth;
14481     u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
14482     u16 stack_depth_extra = 0;
14483
14484     for (i = 0; i < insn_cnt; i++, insn++) {
14485         struct bpf_loop_inline_state *inline_state =
14486             &env->insn_aux_data[i + delta].loop_inline_state;
14487
14488         if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
14489             struct bpf_prog *new_prog;
14490
14491             stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
14492             new_prog = inline_bpf_loop(env,
14493                            i + delta,
14494                            -(stack_depth + stack_depth_extra),
14495                            inline_state->callback_subprogno,
14496                            &cnt);
14497             if (!new_prog)
14498                 return -ENOMEM;
14499
14500             delta     += cnt - 1;
14501             env->prog  = new_prog;
14502             insn       = new_prog->insnsi + i + delta;
14503         }
14504
14505         if (subprogs[cur_subprog + 1].start == i + delta + 1) {
14506             subprogs[cur_subprog].stack_depth += stack_depth_extra;
14507             cur_subprog++;
14508             stack_depth = subprogs[cur_subprog].stack_depth;
14509             stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
14510             stack_depth_extra = 0;
14511         }
14512     }
14513
14514     env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
14515
14516     return 0;
14517 }
14518
14519 static void free_states(struct bpf_verifier_env *env)
14520 {
14521     struct bpf_verifier_state_list *sl, *sln;
14522     int i;
14523
14524     sl = env->free_list;
14525     while (sl) {
14526         sln = sl->next;
14527         free_verifier_state(&sl->state, false);
14528         kfree(sl);
14529         sl = sln;
14530     }
14531     env->free_list = NULL;
14532
14533     if (!env->explored_states)
14534         return;
14535
14536     for (i = 0; i < state_htab_size(env); i++) {
14537         sl = env->explored_states[i];
14538
14539         while (sl) {
14540             sln = sl->next;
14541             free_verifier_state(&sl->state, false);
14542             kfree(sl);
14543             sl = sln;
14544         }
14545         env->explored_states[i] = NULL;
14546     }
14547 }
14548
14549 static int do_check_common(struct bpf_verifier_env *env, int subprog)
14550 {
14551     bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
14552     struct bpf_verifier_state *state;
14553     struct bpf_reg_state *regs;
14554     int ret, i;
14555
14556     env->prev_linfo = NULL;
14557     env->pass_cnt++;
14558
14559     state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
14560     if (!state)
14561         return -ENOMEM;
14562     state->curframe = 0;
14563     state->speculative = false;
14564     state->branches = 1;
14565     state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
14566     if (!state->frame[0]) {
14567         kfree(state);
14568         return -ENOMEM;
14569     }
14570     env->cur_state = state;
14571     init_func_state(env, state->frame[0],
14572             BPF_MAIN_FUNC /* callsite */,
14573             0 /* frameno */,
14574             subprog);
14575
14576     regs = state->frame[state->curframe]->regs;
14577     if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
14578         ret = btf_prepare_func_args(env, subprog, regs);
14579         if (ret)
14580             goto out;
14581         for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
14582             if (regs[i].type == PTR_TO_CTX)
14583                 mark_reg_known_zero(env, regs, i);
14584             else if (regs[i].type == SCALAR_VALUE)
14585                 mark_reg_unknown(env, regs, i);
14586             else if (base_type(regs[i].type) == PTR_TO_MEM) {
14587                 const u32 mem_size = regs[i].mem_size;
14588
14589                 mark_reg_known_zero(env, regs, i);
14590                 regs[i].mem_size = mem_size;
14591                 regs[i].id = ++env->id_gen;
14592             }
14593         }
14594     } else {
14595         /* 1st arg to a function */
14596         regs[BPF_REG_1].type = PTR_TO_CTX;
14597         mark_reg_known_zero(env, regs, BPF_REG_1);
14598         ret = btf_check_subprog_arg_match(env, subprog, regs);
14599         if (ret == -EFAULT)
14600             /* unlikely verifier bug. abort.
14601              * ret == 0 and ret < 0 are sadly acceptable for
14602              * main() function due to backward compatibility.
14603              * Like socket filter program may be written as:
14604              * int bpf_prog(struct pt_regs *ctx)
14605              * and never dereference that ctx in the program.
14606              * 'struct pt_regs' is a type mismatch for socket
14607              * filter that should be using 'struct __sk_buff'.
14608              */
14609             goto out;
14610     }
14611
14612     ret = do_check(env);
14613 out:
14614     /* check for NULL is necessary, since cur_state can be freed inside
14615      * do_check() under memory pressure.
14616      */
14617     if (env->cur_state) {
14618         free_verifier_state(env->cur_state, true);
14619         env->cur_state = NULL;
14620     }
14621     while (!pop_stack(env, NULL, NULL, false));
14622     if (!ret && pop_log)
14623         bpf_vlog_reset(&env->log, 0);
14624     free_states(env);
14625     return ret;
14626 }
14627
14628 /* Verify all global functions in a BPF program one by one based on their BTF.
14629  * All global functions must pass verification. Otherwise the whole program is rejected.
14630  * Consider:
14631  * int bar(int);
14632  * int foo(int f)
14633  * {
14634  *    return bar(f);
14635  * }
14636  * int bar(int b)
14637  * {
14638  *    ...
14639  * }
14640  * foo() will be verified first for R1=any_scalar_value. During verification it
14641  * will be assumed that bar() already verified successfully and call to bar()
14642  * from foo() will be checked for type match only. Later bar() will be verified
14643  * independently to check that it's safe for R1=any_scalar_value.
14644  */
14645 static int do_check_subprogs(struct bpf_verifier_env *env)
14646 {
14647     struct bpf_prog_aux *aux = env->prog->aux;
14648     int i, ret;
14649
14650     if (!aux->func_info)
14651         return 0;
14652
14653     for (i = 1; i < env->subprog_cnt; i++) {
14654         if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
14655             continue;
14656         env->insn_idx = env->subprog_info[i].start;
14657         WARN_ON_ONCE(env->insn_idx == 0);
14658         ret = do_check_common(env, i);
14659         if (ret) {
14660             return ret;
14661         } else if (env->log.level & BPF_LOG_LEVEL) {
14662             verbose(env,
14663                 "Func#%d is safe for any args that match its prototype\n",
14664                 i);
14665         }
14666     }
14667     return 0;
14668 }
14669
14670 static int do_check_main(struct bpf_verifier_env *env)
14671 {
14672     int ret;
14673
14674     env->insn_idx = 0;
14675     ret = do_check_common(env, 0);
14676     if (!ret)
14677         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
14678     return ret;
14679 }
14680
14681
14682 static void print_verification_stats(struct bpf_verifier_env *env)
14683 {
14684     int i;
14685
14686     if (env->log.level & BPF_LOG_STATS) {
14687         verbose(env, "verification time %lld usec\n",
14688             div_u64(env->verification_time, 1000));
14689         verbose(env, "stack depth ");
14690         for (i = 0; i < env->subprog_cnt; i++) {
14691             u32 depth = env->subprog_info[i].stack_depth;
14692
14693             verbose(env, "%d", depth);
14694             if (i + 1 < env->subprog_cnt)
14695                 verbose(env, "+");
14696         }
14697         verbose(env, "\n");
14698     }
14699     verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
14700         "total_states %d peak_states %d mark_read %d\n",
14701         env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
14702         env->max_states_per_insn, env->total_states,
14703         env->peak_states, env->longest_mark_read_walk);
14704 }
14705
14706 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
14707 {
14708     const struct btf_type *t, *func_proto;
14709     const struct bpf_struct_ops *st_ops;
14710     const struct btf_member *member;
14711     struct bpf_prog *prog = env->prog;
14712     u32 btf_id, member_idx;
14713     const char *mname;
14714
14715     if (!prog->gpl_compatible) {
14716         verbose(env, "struct ops programs must have a GPL compatible license\n");
14717         return -EINVAL;
14718     }
14719
14720     btf_id = prog->aux->attach_btf_id;
14721     st_ops = bpf_struct_ops_find(btf_id);
14722     if (!st_ops) {
14723         verbose(env, "attach_btf_id %u is not a supported struct\n",
14724             btf_id);
14725         return -ENOTSUPP;
14726     }
14727
14728     t = st_ops->type;
14729     member_idx = prog->expected_attach_type;
14730     if (member_idx >= btf_type_vlen(t)) {
14731         verbose(env, "attach to invalid member idx %u of struct %s\n",
14732             member_idx, st_ops->name);
14733         return -EINVAL;
14734     }
14735
14736     member = &btf_type_member(t)[member_idx];
14737     mname = btf_name_by_offset(btf_vmlinux, member->name_off);
14738     func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
14739                            NULL);
14740     if (!func_proto) {
14741         verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
14742             mname, member_idx, st_ops->name);
14743         return -EINVAL;
14744     }
14745
14746     if (st_ops->check_member) {
14747         int err = st_ops->check_member(t, member);
14748
14749         if (err) {
14750             verbose(env, "attach to unsupported member %s of struct %s\n",
14751                 mname, st_ops->name);
14752             return err;
14753         }
14754     }
14755
14756     prog->aux->attach_func_proto = func_proto;
14757     prog->aux->attach_func_name = mname;
14758     env->ops = st_ops->verifier_ops;
14759
14760     return 0;
14761 }
14762 #define SECURITY_PREFIX "security_"
14763
14764 static int check_attach_modify_return(unsigned long addr, const char *func_name)
14765 {
14766     if (within_error_injection_list(addr) ||
14767         !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
14768         return 0;
14769
14770     return -EINVAL;
14771 }
14772
14773 /* list of non-sleepable functions that are otherwise on
14774  * ALLOW_ERROR_INJECTION list
14775  */
14776 BTF_SET_START(btf_non_sleepable_error_inject)
14777 /* Three functions below can be called from sleepable and non-sleepable context.
14778  * Assume non-sleepable from bpf safety point of view.
14779  */
14780 BTF_ID(func, __filemap_add_folio)
14781 BTF_ID(func, should_fail_alloc_page)
14782 BTF_ID(func, should_failslab)
14783 BTF_SET_END(btf_non_sleepable_error_inject)
14784
14785 static int check_non_sleepable_error_inject(u32 btf_id)
14786 {
14787     return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
14788 }
14789
14790 int bpf_check_attach_target(struct bpf_verifier_log *log,
14791                 const struct bpf_prog *prog,
14792                 const struct bpf_prog *tgt_prog,
14793                 u32 btf_id,
14794                 struct bpf_attach_target_info *tgt_info)
14795 {
14796     bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
14797     const char prefix[] = "btf_trace_";
14798     int ret = 0, subprog = -1, i;
14799     const struct btf_type *t;
14800     bool conservative = true;
14801     const char *tname;
14802     struct btf *btf;
14803     long addr = 0;
14804
14805     if (!btf_id) {
14806         bpf_log(log, "Tracing programs must provide btf_id\n");
14807         return -EINVAL;
14808     }
14809     btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
14810     if (!btf) {
14811         bpf_log(log,
14812             "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
14813         return -EINVAL;
14814     }
14815     t = btf_type_by_id(btf, btf_id);
14816     if (!t) {
14817         bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
14818         return -EINVAL;
14819     }
14820     tname = btf_name_by_offset(btf, t->name_off);
14821     if (!tname) {
14822         bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
14823         return -EINVAL;
14824     }
14825     if (tgt_prog) {
14826         struct bpf_prog_aux *aux = tgt_prog->aux;
14827
14828         for (i = 0; i < aux->func_info_cnt; i++)
14829             if (aux->func_info[i].type_id == btf_id) {
14830                 subprog = i;
14831                 break;
14832             }
14833         if (subprog == -1) {
14834             bpf_log(log, "Subprog %s doesn't exist\n", tname);
14835             return -EINVAL;
14836         }
14837         conservative = aux->func_info_aux[subprog].unreliable;
14838         if (prog_extension) {
14839             if (conservative) {
14840                 bpf_log(log,
14841                     "Cannot replace static functions\n");
14842                 return -EINVAL;
14843             }
14844             if (!prog->jit_requested) {
14845                 bpf_log(log,
14846                     "Extension programs should be JITed\n");
14847                 return -EINVAL;
14848             }
14849         }
14850         if (!tgt_prog->jited) {
14851             bpf_log(log, "Can attach to only JITed progs\n");
14852             return -EINVAL;
14853         }
14854         if (tgt_prog->type == prog->type) {
14855             /* Cannot fentry/fexit another fentry/fexit program.
14856              * Cannot attach program extension to another extension.
14857              * It's ok to attach fentry/fexit to extension program.
14858              */
14859             bpf_log(log, "Cannot recursively attach\n");
14860             return -EINVAL;
14861         }
14862         if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
14863             prog_extension &&
14864             (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
14865              tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
14866             /* Program extensions can extend all program types
14867              * except fentry/fexit. The reason is the following.
14868              * The fentry/fexit programs are used for performance
14869              * analysis, stats and can be attached to any program
14870              * type except themselves. When extension program is
14871              * replacing XDP function it is necessary to allow
14872              * performance analysis of all functions. Both original
14873              * XDP program and its program extension. Hence
14874              * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
14875              * allowed. If extending of fentry/fexit was allowed it
14876              * would be possible to create long call chain
14877              * fentry->extension->fentry->extension beyond
14878              * reasonable stack size. Hence extending fentry is not
14879              * allowed.
14880              */
14881             bpf_log(log, "Cannot extend fentry/fexit\n");
14882             return -EINVAL;
14883         }
14884     } else {
14885         if (prog_extension) {
14886             bpf_log(log, "Cannot replace kernel functions\n");
14887             return -EINVAL;
14888         }
14889     }
14890
14891     switch (prog->expected_attach_type) {
14892     case BPF_TRACE_RAW_TP:
14893         if (tgt_prog) {
14894             bpf_log(log,
14895                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
14896             return -EINVAL;
14897         }
14898         if (!btf_type_is_typedef(t)) {
14899             bpf_log(log, "attach_btf_id %u is not a typedef\n",
14900                 btf_id);
14901             return -EINVAL;
14902         }
14903         if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
14904             bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
14905                 btf_id, tname);
14906             return -EINVAL;
14907         }
14908         tname += sizeof(prefix) - 1;
14909         t = btf_type_by_id(btf, t->type);
14910         if (!btf_type_is_ptr(t))
14911             /* should never happen in valid vmlinux build */
14912             return -EINVAL;
14913         t = btf_type_by_id(btf, t->type);
14914         if (!btf_type_is_func_proto(t))
14915             /* should never happen in valid vmlinux build */
14916             return -EINVAL;
14917
14918         break;
14919     case BPF_TRACE_ITER:
14920         if (!btf_type_is_func(t)) {
14921             bpf_log(log, "attach_btf_id %u is not a function\n",
14922                 btf_id);
14923             return -EINVAL;
14924         }
14925         t = btf_type_by_id(btf, t->type);
14926         if (!btf_type_is_func_proto(t))
14927             return -EINVAL;
14928         ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14929         if (ret)
14930             return ret;
14931         break;
14932     default:
14933         if (!prog_extension)
14934             return -EINVAL;
14935         fallthrough;
14936     case BPF_MODIFY_RETURN:
14937     case BPF_LSM_MAC:
14938     case BPF_LSM_CGROUP:
14939     case BPF_TRACE_FENTRY:
14940     case BPF_TRACE_FEXIT:
14941         if (!btf_type_is_func(t)) {
14942             bpf_log(log, "attach_btf_id %u is not a function\n",
14943                 btf_id);
14944             return -EINVAL;
14945         }
14946         if (prog_extension &&
14947             btf_check_type_match(log, prog, btf, t))
14948             return -EINVAL;
14949         t = btf_type_by_id(btf, t->type);
14950         if (!btf_type_is_func_proto(t))
14951             return -EINVAL;
14952
14953         if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
14954             (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
14955              prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
14956             return -EINVAL;
14957
14958         if (tgt_prog && conservative)
14959             t = NULL;
14960
14961         ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14962         if (ret < 0)
14963             return ret;
14964
14965         if (tgt_prog) {
14966             if (subprog == 0)
14967                 addr = (long) tgt_prog->bpf_func;
14968             else
14969                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
14970         } else {
14971             addr = kallsyms_lookup_name(tname);
14972             if (!addr) {
14973                 bpf_log(log,
14974                     "The address of function %s cannot be found\n",
14975                     tname);
14976                 return -ENOENT;
14977             }
14978         }
14979
14980         if (prog->aux->sleepable) {
14981             ret = -EINVAL;
14982             switch (prog->type) {
14983             case BPF_PROG_TYPE_TRACING:
14984                 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
14985                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
14986                  */
14987                 if (!check_non_sleepable_error_inject(btf_id) &&
14988                     within_error_injection_list(addr))
14989                     ret = 0;
14990                 break;
14991             case BPF_PROG_TYPE_LSM:
14992                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
14993                  * Only some of them are sleepable.
14994                  */
14995                 if (bpf_lsm_is_sleepable_hook(btf_id))
14996                     ret = 0;
14997                 break;
14998             default:
14999                 break;
15000             }
15001             if (ret) {
15002                 bpf_log(log, "%s is not sleepable\n", tname);
15003                 return ret;
15004             }
15005         } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
15006             if (tgt_prog) {
15007                 bpf_log(log, "can't modify return codes of BPF programs\n");
15008                 return -EINVAL;
15009             }
15010             ret = check_attach_modify_return(addr, tname);
15011             if (ret) {
15012                 bpf_log(log, "%s() is not modifiable\n", tname);
15013                 return ret;
15014             }
15015         }
15016
15017         break;
15018     }
15019     tgt_info->tgt_addr = addr;
15020     tgt_info->tgt_name = tname;
15021     tgt_info->tgt_type = t;
15022     return 0;
15023 }
15024
15025 BTF_SET_START(btf_id_deny)
15026 BTF_ID_UNUSED
15027 #ifdef CONFIG_SMP
15028 BTF_ID(func, migrate_disable)
15029 BTF_ID(func, migrate_enable)
15030 #endif
15031 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
15032 BTF_ID(func, rcu_read_unlock_strict)
15033 #endif
15034 BTF_SET_END(btf_id_deny)
15035
15036 static int check_attach_btf_id(struct bpf_verifier_env *env)
15037 {
15038     struct bpf_prog *prog = env->prog;
15039     struct bpf_prog *tgt_prog = prog->aux->dst_prog;
15040     struct bpf_attach_target_info tgt_info = {};
15041     u32 btf_id = prog->aux->attach_btf_id;
15042     struct bpf_trampoline *tr;
15043     int ret;
15044     u64 key;
15045
15046     if (prog->type == BPF_PROG_TYPE_SYSCALL) {
15047         if (prog->aux->sleepable)
15048             /* attach_btf_id checked to be zero already */
15049             return 0;
15050         verbose(env, "Syscall programs can only be sleepable\n");
15051         return -EINVAL;
15052     }
15053
15054     if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
15055         prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
15056         verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
15057         return -EINVAL;
15058     }
15059
15060     if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
15061         return check_struct_ops_btf_id(env);
15062
15063     if (prog->type != BPF_PROG_TYPE_TRACING &&
15064         prog->type != BPF_PROG_TYPE_LSM &&
15065         prog->type != BPF_PROG_TYPE_EXT)
15066         return 0;
15067
15068     ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
15069     if (ret)
15070         return ret;
15071
15072     if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
15073         /* to make freplace equivalent to their targets, they need to
15074          * inherit env->ops and expected_attach_type for the rest of the
15075          * verification
15076          */
15077         env->ops = bpf_verifier_ops[tgt_prog->type];
15078         prog->expected_attach_type = tgt_prog->expected_attach_type;
15079     }
15080
15081     /* store info about the attachment target that will be used later */
15082     prog->aux->attach_func_proto = tgt_info.tgt_type;
15083     prog->aux->attach_func_name = tgt_info.tgt_name;
15084
15085     if (tgt_prog) {
15086         prog->aux->saved_dst_prog_type = tgt_prog->type;
15087         prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
15088     }
15089
15090     if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
15091         prog->aux->attach_btf_trace = true;
15092         return 0;
15093     } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
15094         if (!bpf_iter_prog_supported(prog))
15095             return -EINVAL;
15096         return 0;
15097     }
15098
15099     if (prog->type == BPF_PROG_TYPE_LSM) {
15100         ret = bpf_lsm_verify_prog(&env->log, prog);
15101         if (ret < 0)
15102             return ret;
15103     } else if (prog->type == BPF_PROG_TYPE_TRACING &&
15104            btf_id_set_contains(&btf_id_deny, btf_id)) {
15105         return -EINVAL;
15106     }
15107
15108     key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
15109     tr = bpf_trampoline_get(key, &tgt_info);
15110     if (!tr)
15111         return -ENOMEM;
15112
15113     prog->aux->dst_trampoline = tr;
15114     return 0;
15115 }
15116
15117 struct btf *bpf_get_btf_vmlinux(void)
15118 {
15119     if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
15120         mutex_lock(&bpf_verifier_lock);
15121         if (!btf_vmlinux)
15122             btf_vmlinux = btf_parse_vmlinux();
15123         mutex_unlock(&bpf_verifier_lock);
15124     }
15125     return btf_vmlinux;
15126 }
15127
15128 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
15129 {
15130     u64 start_time = ktime_get_ns();
15131     struct bpf_verifier_env *env;
15132     struct bpf_verifier_log *log;
15133     int i, len, ret = -EINVAL;
15134     bool is_priv;
15135
15136     /* no program is valid */
15137     if (ARRAY_SIZE(bpf_verifier_ops) == 0)
15138         return -EINVAL;
15139
15140     /* 'struct bpf_verifier_env' can be global, but since it's not small,
15141      * allocate/free it every time bpf_check() is called
15142      */
15143     env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
15144     if (!env)
15145         return -ENOMEM;
15146     log = &env->log;
15147
15148     len = (*prog)->len;
15149     env->insn_aux_data =
15150         vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
15151     ret = -ENOMEM;
15152     if (!env->insn_aux_data)
15153         goto err_free_env;
15154     for (i = 0; i < len; i++)
15155         env->insn_aux_data[i].orig_idx = i;
15156     env->prog = *prog;
15157     env->ops = bpf_verifier_ops[env->prog->type];
15158     env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
15159     is_priv = bpf_capable();
15160
15161     bpf_get_btf_vmlinux();
15162
15163     /* grab the mutex to protect few globals used by verifier */
15164     if (!is_priv)
15165         mutex_lock(&bpf_verifier_lock);
15166
15167     if (attr->log_level || attr->log_buf || attr->log_size) {
15168         /* user requested verbose verifier output
15169          * and supplied buffer to store the verification trace
15170          */
15171         log->level = attr->log_level;
15172         log->ubuf = (char __user *) (unsigned long) attr->log_buf;
15173         log->len_total = attr->log_size;
15174
15175         /* log attributes have to be sane */
15176         if (!bpf_verifier_log_attr_valid(log)) {
15177             ret = -EINVAL;
15178             goto err_unlock;
15179         }
15180     }
15181
15182     mark_verifier_state_clean(env);
15183
15184     if (IS_ERR(btf_vmlinux)) {
15185         /* Either gcc or pahole or kernel are broken. */
15186         verbose(env, "in-kernel BTF is malformed\n");
15187         ret = PTR_ERR(btf_vmlinux);
15188         goto skip_full_check;
15189     }
15190
15191     env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
15192     if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
15193         env->strict_alignment = true;
15194     if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
15195         env->strict_alignment = false;
15196
15197     env->allow_ptr_leaks = bpf_allow_ptr_leaks();
15198     env->allow_uninit_stack = bpf_allow_uninit_stack();
15199     env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
15200     env->bypass_spec_v1 = bpf_bypass_spec_v1();
15201     env->bypass_spec_v4 = bpf_bypass_spec_v4();
15202     env->bpf_capable = bpf_capable();
15203
15204     if (is_priv)
15205         env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
15206
15207     env->explored_states = kvcalloc(state_htab_size(env),
15208                        sizeof(struct bpf_verifier_state_list *),
15209                        GFP_USER);
15210     ret = -ENOMEM;
15211     if (!env->explored_states)
15212         goto skip_full_check;
15213
15214     ret = add_subprog_and_kfunc(env);
15215     if (ret < 0)
15216         goto skip_full_check;
15217
15218     ret = check_subprogs(env);
15219     if (ret < 0)
15220         goto skip_full_check;
15221
15222     ret = check_btf_info(env, attr, uattr);
15223     if (ret < 0)
15224         goto skip_full_check;
15225
15226     ret = check_attach_btf_id(env);
15227     if (ret)
15228         goto skip_full_check;
15229
15230     ret = resolve_pseudo_ldimm64(env);
15231     if (ret < 0)
15232         goto skip_full_check;
15233
15234     if (bpf_prog_is_dev_bound(env->prog->aux)) {
15235         ret = bpf_prog_offload_verifier_prep(env->prog);
15236         if (ret)
15237             goto skip_full_check;
15238     }
15239
15240     ret = check_cfg(env);
15241     if (ret < 0)
15242         goto skip_full_check;
15243
15244     ret = do_check_subprogs(env);
15245     ret = ret ?: do_check_main(env);
15246
15247     if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
15248         ret = bpf_prog_offload_finalize(env);
15249
15250 skip_full_check:
15251     kvfree(env->explored_states);
15252
15253     if (ret == 0)
15254         ret = check_max_stack_depth(env);
15255
15256     /* instruction rewrites happen after this point */
15257     if (ret == 0)
15258         ret = optimize_bpf_loop(env);
15259
15260     if (is_priv) {
15261         if (ret == 0)
15262             opt_hard_wire_dead_code_branches(env);
15263         if (ret == 0)
15264             ret = opt_remove_dead_code(env);
15265         if (ret == 0)
15266             ret = opt_remove_nops(env);
15267     } else {
15268         if (ret == 0)
15269             sanitize_dead_code(env);
15270     }
15271
15272     if (ret == 0)
15273         /* program is valid, convert *(u32*)(ctx + off) accesses */
15274         ret = convert_ctx_accesses(env);
15275
15276     if (ret == 0)
15277         ret = do_misc_fixups(env);
15278
15279     /* do 32-bit optimization after insn patching has done so those patched
15280      * insns could be handled correctly.
15281      */
15282     if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
15283         ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
15284         env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
15285                                      : false;
15286     }
15287
15288     if (ret == 0)
15289         ret = fixup_call_args(env);
15290
15291     env->verification_time = ktime_get_ns() - start_time;
15292     print_verification_stats(env);
15293     env->prog->aux->verified_insns = env->insn_processed;
15294
15295     if (log->level && bpf_verifier_log_full(log))
15296         ret = -ENOSPC;
15297     if (log->level && !log->ubuf) {
15298         ret = -EFAULT;
15299         goto err_release_maps;
15300     }
15301
15302     if (ret)
15303         goto err_release_maps;
15304
15305     if (env->used_map_cnt) {
15306         /* if program passed verifier, update used_maps in bpf_prog_info */
15307         env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
15308                               sizeof(env->used_maps[0]),
15309                               GFP_KERNEL);
15310
15311         if (!env->prog->aux->used_maps) {
15312             ret = -ENOMEM;
15313             goto err_release_maps;
15314         }
15315
15316         memcpy(env->prog->aux->used_maps, env->used_maps,
15317                sizeof(env->used_maps[0]) * env->used_map_cnt);
15318         env->prog->aux->used_map_cnt = env->used_map_cnt;
15319     }
15320     if (env->used_btf_cnt) {
15321         /* if program passed verifier, update used_btfs in bpf_prog_aux */
15322         env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
15323                               sizeof(env->used_btfs[0]),
15324                               GFP_KERNEL);
15325         if (!env->prog->aux->used_btfs) {
15326             ret = -ENOMEM;
15327             goto err_release_maps;
15328         }
15329
15330         memcpy(env->prog->aux->used_btfs, env->used_btfs,
15331                sizeof(env->used_btfs[0]) * env->used_btf_cnt);
15332         env->prog->aux->used_btf_cnt = env->used_btf_cnt;
15333     }
15334     if (env->used_map_cnt || env->used_btf_cnt) {
15335         /* program is valid. Convert pseudo bpf_ld_imm64 into generic
15336          * bpf_ld_imm64 instructions
15337          */
15338         convert_pseudo_ld_imm64(env);
15339     }
15340
15341     adjust_btf_func(env);
15342
15343 err_release_maps:
15344     if (!env->prog->aux->used_maps)
15345         /* if we didn't copy map pointers into bpf_prog_info, release
15346          * them now. Otherwise free_used_maps() will release them.
15347          */
15348         release_maps(env);
15349     if (!env->prog->aux->used_btfs)
15350         release_btfs(env);
15351
15352     /* extension progs temporarily inherit the attach_type of their targets
15353        for verification purposes, so set it back to zero before returning
15354      */
15355     if (env->prog->type == BPF_PROG_TYPE_EXT)
15356         env->prog->expected_attach_type = 0;
15357
15358     *prog = env->prog;
15359 err_unlock:
15360     if (!is_priv)
15361         mutex_unlock(&bpf_verifier_lock);
15362     vfree(env->insn_aux_data);
15363 err_free_env:
15364     kfree(env);
15365     return ret;
15366 }