lib/bpf/libbpf.c

0001 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
0002
0003 /*
0004  * Common eBPF ELF object loading operations.
0005  *
0006  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
0007  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
0008  * Copyright (C) 2015 Huawei Inc.
0009  * Copyright (C) 2017 Nicira, Inc.
0010  * Copyright (C) 2019 Isovalent, Inc.
0011  */
0012
0013 #ifndef _GNU_SOURCE
0014 #define _GNU_SOURCE
0015 #endif
0016 #include <stdlib.h>
0017 #include <stdio.h>
0018 #include <stdarg.h>
0019 #include <libgen.h>
0020 #include <inttypes.h>
0021 #include <limits.h>
0022 #include <string.h>
0023 #include <unistd.h>
0024 #include <endian.h>
0025 #include <fcntl.h>
0026 #include <errno.h>
0027 #include <ctype.h>
0028 #include <asm/unistd.h>
0029 #include <linux/err.h>
0030 #include <linux/kernel.h>
0031 #include <linux/bpf.h>
0032 #include <linux/btf.h>
0033 #include <linux/filter.h>
0034 #include <linux/limits.h>
0035 #include <linux/perf_event.h>
0036 #include <linux/ring_buffer.h>
0037 #include <linux/version.h>
0038 #include <sys/epoll.h>
0039 #include <sys/ioctl.h>
0040 #include <sys/mman.h>
0041 #include <sys/stat.h>
0042 #include <sys/types.h>
0043 #include <sys/vfs.h>
0044 #include <sys/utsname.h>
0045 #include <sys/resource.h>
0046 #include <libelf.h>
0047 #include <gelf.h>
0048 #include <zlib.h>
0049
0050 #include "libbpf.h"
0051 #include "bpf.h"
0052 #include "btf.h"
0053 #include "str_error.h"
0054 #include "libbpf_internal.h"
0055 #include "hashmap.h"
0056 #include "bpf_gen_internal.h"
0057
0058 #ifndef BPF_FS_MAGIC
0059 #define BPF_FS_MAGIC        0xcafe4a11
0060 #endif
0061
0062 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
0063
0064 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
0065  * compilation if user enables corresponding warning. Disable it explicitly.
0066  */
0067 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
0068
0069 #define __printf(a, b)  __attribute__((format(printf, a, b)))
0070
0071 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
0072 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
0073
0074 static const char * const attach_type_name[] = {
0075     [BPF_CGROUP_INET_INGRESS]   = "cgroup_inet_ingress",
0076     [BPF_CGROUP_INET_EGRESS]    = "cgroup_inet_egress",
0077     [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
0078     [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
0079     [BPF_CGROUP_SOCK_OPS]       = "cgroup_sock_ops",
0080     [BPF_CGROUP_DEVICE]     = "cgroup_device",
0081     [BPF_CGROUP_INET4_BIND]     = "cgroup_inet4_bind",
0082     [BPF_CGROUP_INET6_BIND]     = "cgroup_inet6_bind",
0083     [BPF_CGROUP_INET4_CONNECT]  = "cgroup_inet4_connect",
0084     [BPF_CGROUP_INET6_CONNECT]  = "cgroup_inet6_connect",
0085     [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
0086     [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
0087     [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
0088     [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
0089     [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
0090     [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
0091     [BPF_CGROUP_UDP4_SENDMSG]   = "cgroup_udp4_sendmsg",
0092     [BPF_CGROUP_UDP6_SENDMSG]   = "cgroup_udp6_sendmsg",
0093     [BPF_CGROUP_SYSCTL]     = "cgroup_sysctl",
0094     [BPF_CGROUP_UDP4_RECVMSG]   = "cgroup_udp4_recvmsg",
0095     [BPF_CGROUP_UDP6_RECVMSG]   = "cgroup_udp6_recvmsg",
0096     [BPF_CGROUP_GETSOCKOPT]     = "cgroup_getsockopt",
0097     [BPF_CGROUP_SETSOCKOPT]     = "cgroup_setsockopt",
0098     [BPF_SK_SKB_STREAM_PARSER]  = "sk_skb_stream_parser",
0099     [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
0100     [BPF_SK_SKB_VERDICT]        = "sk_skb_verdict",
0101     [BPF_SK_MSG_VERDICT]        = "sk_msg_verdict",
0102     [BPF_LIRC_MODE2]        = "lirc_mode2",
0103     [BPF_FLOW_DISSECTOR]        = "flow_dissector",
0104     [BPF_TRACE_RAW_TP]      = "trace_raw_tp",
0105     [BPF_TRACE_FENTRY]      = "trace_fentry",
0106     [BPF_TRACE_FEXIT]       = "trace_fexit",
0107     [BPF_MODIFY_RETURN]     = "modify_return",
0108     [BPF_LSM_MAC]           = "lsm_mac",
0109     [BPF_LSM_CGROUP]        = "lsm_cgroup",
0110     [BPF_SK_LOOKUP]         = "sk_lookup",
0111     [BPF_TRACE_ITER]        = "trace_iter",
0112     [BPF_XDP_DEVMAP]        = "xdp_devmap",
0113     [BPF_XDP_CPUMAP]        = "xdp_cpumap",
0114     [BPF_XDP]           = "xdp",
0115     [BPF_SK_REUSEPORT_SELECT]   = "sk_reuseport_select",
0116     [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
0117     [BPF_PERF_EVENT]        = "perf_event",
0118     [BPF_TRACE_KPROBE_MULTI]    = "trace_kprobe_multi",
0119 };
0120
0121 static const char * const link_type_name[] = {
0122     [BPF_LINK_TYPE_UNSPEC]          = "unspec",
0123     [BPF_LINK_TYPE_RAW_TRACEPOINT]      = "raw_tracepoint",
0124     [BPF_LINK_TYPE_TRACING]         = "tracing",
0125     [BPF_LINK_TYPE_CGROUP]          = "cgroup",
0126     [BPF_LINK_TYPE_ITER]            = "iter",
0127     [BPF_LINK_TYPE_NETNS]           = "netns",
0128     [BPF_LINK_TYPE_XDP]         = "xdp",
0129     [BPF_LINK_TYPE_PERF_EVENT]      = "perf_event",
0130     [BPF_LINK_TYPE_KPROBE_MULTI]        = "kprobe_multi",
0131     [BPF_LINK_TYPE_STRUCT_OPS]      = "struct_ops",
0132 };
0133
0134 static const char * const map_type_name[] = {
0135     [BPF_MAP_TYPE_UNSPEC]           = "unspec",
0136     [BPF_MAP_TYPE_HASH]         = "hash",
0137     [BPF_MAP_TYPE_ARRAY]            = "array",
0138     [BPF_MAP_TYPE_PROG_ARRAY]       = "prog_array",
0139     [BPF_MAP_TYPE_PERF_EVENT_ARRAY]     = "perf_event_array",
0140     [BPF_MAP_TYPE_PERCPU_HASH]      = "percpu_hash",
0141     [BPF_MAP_TYPE_PERCPU_ARRAY]     = "percpu_array",
0142     [BPF_MAP_TYPE_STACK_TRACE]      = "stack_trace",
0143     [BPF_MAP_TYPE_CGROUP_ARRAY]     = "cgroup_array",
0144     [BPF_MAP_TYPE_LRU_HASH]         = "lru_hash",
0145     [BPF_MAP_TYPE_LRU_PERCPU_HASH]      = "lru_percpu_hash",
0146     [BPF_MAP_TYPE_LPM_TRIE]         = "lpm_trie",
0147     [BPF_MAP_TYPE_ARRAY_OF_MAPS]        = "array_of_maps",
0148     [BPF_MAP_TYPE_HASH_OF_MAPS]     = "hash_of_maps",
0149     [BPF_MAP_TYPE_DEVMAP]           = "devmap",
0150     [BPF_MAP_TYPE_DEVMAP_HASH]      = "devmap_hash",
0151     [BPF_MAP_TYPE_SOCKMAP]          = "sockmap",
0152     [BPF_MAP_TYPE_CPUMAP]           = "cpumap",
0153     [BPF_MAP_TYPE_XSKMAP]           = "xskmap",
0154     [BPF_MAP_TYPE_SOCKHASH]         = "sockhash",
0155     [BPF_MAP_TYPE_CGROUP_STORAGE]       = "cgroup_storage",
0156     [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]  = "reuseport_sockarray",
0157     [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
0158     [BPF_MAP_TYPE_QUEUE]            = "queue",
0159     [BPF_MAP_TYPE_STACK]            = "stack",
0160     [BPF_MAP_TYPE_SK_STORAGE]       = "sk_storage",
0161     [BPF_MAP_TYPE_STRUCT_OPS]       = "struct_ops",
0162     [BPF_MAP_TYPE_RINGBUF]          = "ringbuf",
0163     [BPF_MAP_TYPE_INODE_STORAGE]        = "inode_storage",
0164     [BPF_MAP_TYPE_TASK_STORAGE]     = "task_storage",
0165     [BPF_MAP_TYPE_BLOOM_FILTER]     = "bloom_filter",
0166 };
0167
0168 static const char * const prog_type_name[] = {
0169     [BPF_PROG_TYPE_UNSPEC]          = "unspec",
0170     [BPF_PROG_TYPE_SOCKET_FILTER]       = "socket_filter",
0171     [BPF_PROG_TYPE_KPROBE]          = "kprobe",
0172     [BPF_PROG_TYPE_SCHED_CLS]       = "sched_cls",
0173     [BPF_PROG_TYPE_SCHED_ACT]       = "sched_act",
0174     [BPF_PROG_TYPE_TRACEPOINT]      = "tracepoint",
0175     [BPF_PROG_TYPE_XDP]         = "xdp",
0176     [BPF_PROG_TYPE_PERF_EVENT]      = "perf_event",
0177     [BPF_PROG_TYPE_CGROUP_SKB]      = "cgroup_skb",
0178     [BPF_PROG_TYPE_CGROUP_SOCK]     = "cgroup_sock",
0179     [BPF_PROG_TYPE_LWT_IN]          = "lwt_in",
0180     [BPF_PROG_TYPE_LWT_OUT]         = "lwt_out",
0181     [BPF_PROG_TYPE_LWT_XMIT]        = "lwt_xmit",
0182     [BPF_PROG_TYPE_SOCK_OPS]        = "sock_ops",
0183     [BPF_PROG_TYPE_SK_SKB]          = "sk_skb",
0184     [BPF_PROG_TYPE_CGROUP_DEVICE]       = "cgroup_device",
0185     [BPF_PROG_TYPE_SK_MSG]          = "sk_msg",
0186     [BPF_PROG_TYPE_RAW_TRACEPOINT]      = "raw_tracepoint",
0187     [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]    = "cgroup_sock_addr",
0188     [BPF_PROG_TYPE_LWT_SEG6LOCAL]       = "lwt_seg6local",
0189     [BPF_PROG_TYPE_LIRC_MODE2]      = "lirc_mode2",
0190     [BPF_PROG_TYPE_SK_REUSEPORT]        = "sk_reuseport",
0191     [BPF_PROG_TYPE_FLOW_DISSECTOR]      = "flow_dissector",
0192     [BPF_PROG_TYPE_CGROUP_SYSCTL]       = "cgroup_sysctl",
0193     [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
0194     [BPF_PROG_TYPE_CGROUP_SOCKOPT]      = "cgroup_sockopt",
0195     [BPF_PROG_TYPE_TRACING]         = "tracing",
0196     [BPF_PROG_TYPE_STRUCT_OPS]      = "struct_ops",
0197     [BPF_PROG_TYPE_EXT]         = "ext",
0198     [BPF_PROG_TYPE_LSM]         = "lsm",
0199     [BPF_PROG_TYPE_SK_LOOKUP]       = "sk_lookup",
0200     [BPF_PROG_TYPE_SYSCALL]         = "syscall",
0201 };
0202
0203 static int __base_pr(enum libbpf_print_level level, const char *format,
0204              va_list args)
0205 {
0206     if (level == LIBBPF_DEBUG)
0207         return 0;
0208
0209     return vfprintf(stderr, format, args);
0210 }
0211
0212 static libbpf_print_fn_t __libbpf_pr = __base_pr;
0213
0214 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
0215 {
0216     libbpf_print_fn_t old_print_fn = __libbpf_pr;
0217
0218     __libbpf_pr = fn;
0219     return old_print_fn;
0220 }
0221
0222 __printf(2, 3)
0223 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
0224 {
0225     va_list args;
0226
0227     if (!__libbpf_pr)
0228         return;
0229
0230     va_start(args, format);
0231     __libbpf_pr(level, format, args);
0232     va_end(args);
0233 }
0234
0235 static void pr_perm_msg(int err)
0236 {
0237     struct rlimit limit;
0238     char buf[100];
0239
0240     if (err != -EPERM || geteuid() != 0)
0241         return;
0242
0243     err = getrlimit(RLIMIT_MEMLOCK, &limit);
0244     if (err)
0245         return;
0246
0247     if (limit.rlim_cur == RLIM_INFINITY)
0248         return;
0249
0250     if (limit.rlim_cur < 1024)
0251         snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
0252     else if (limit.rlim_cur < 1024*1024)
0253         snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
0254     else
0255         snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
0256
0257     pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
0258         buf);
0259 }
0260
0261 #define STRERR_BUFSIZE  128
0262
0263 /* Copied from tools/perf/util/util.h */
0264 #ifndef zfree
0265 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
0266 #endif
0267
0268 #ifndef zclose
0269 # define zclose(fd) ({          \
0270     int ___err = 0;         \
0271     if ((fd) >= 0)          \
0272         ___err = close((fd));   \
0273     fd = -1;            \
0274     ___err; })
0275 #endif
0276
0277 static inline __u64 ptr_to_u64(const void *ptr)
0278 {
0279     return (__u64) (unsigned long) ptr;
0280 }
0281
0282 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
0283 {
0284     /* as of v1.0 libbpf_set_strict_mode() is a no-op */
0285     return 0;
0286 }
0287
0288 __u32 libbpf_major_version(void)
0289 {
0290     return LIBBPF_MAJOR_VERSION;
0291 }
0292
0293 __u32 libbpf_minor_version(void)
0294 {
0295     return LIBBPF_MINOR_VERSION;
0296 }
0297
0298 const char *libbpf_version_string(void)
0299 {
0300 #define __S(X) #X
0301 #define _S(X) __S(X)
0302     return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
0303 #undef _S
0304 #undef __S
0305 }
0306
0307 enum reloc_type {
0308     RELO_LD64,
0309     RELO_CALL,
0310     RELO_DATA,
0311     RELO_EXTERN_VAR,
0312     RELO_EXTERN_FUNC,
0313     RELO_SUBPROG_ADDR,
0314     RELO_CORE,
0315 };
0316
0317 struct reloc_desc {
0318     enum reloc_type type;
0319     int insn_idx;
0320     union {
0321         const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
0322         struct {
0323             int map_idx;
0324             int sym_off;
0325         };
0326     };
0327 };
0328
0329 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
0330 enum sec_def_flags {
0331     SEC_NONE = 0,
0332     /* expected_attach_type is optional, if kernel doesn't support that */
0333     SEC_EXP_ATTACH_OPT = 1,
0334     /* legacy, only used by libbpf_get_type_names() and
0335      * libbpf_attach_type_by_name(), not used by libbpf itself at all.
0336      * This used to be associated with cgroup (and few other) BPF programs
0337      * that were attachable through BPF_PROG_ATTACH command. Pretty
0338      * meaningless nowadays, though.
0339      */
0340     SEC_ATTACHABLE = 2,
0341     SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
0342     /* attachment target is specified through BTF ID in either kernel or
0343      * other BPF program's BTF object */
0344     SEC_ATTACH_BTF = 4,
0345     /* BPF program type allows sleeping/blocking in kernel */
0346     SEC_SLEEPABLE = 8,
0347     /* BPF program support non-linear XDP buffer */
0348     SEC_XDP_FRAGS = 16,
0349 };
0350
0351 struct bpf_sec_def {
0352     char *sec;
0353     enum bpf_prog_type prog_type;
0354     enum bpf_attach_type expected_attach_type;
0355     long cookie;
0356     int handler_id;
0357
0358     libbpf_prog_setup_fn_t prog_setup_fn;
0359     libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
0360     libbpf_prog_attach_fn_t prog_attach_fn;
0361 };
0362
0363 /*
0364  * bpf_prog should be a better name but it has been used in
0365  * linux/filter.h.
0366  */
0367 struct bpf_program {
0368     char *name;
0369     char *sec_name;
0370     size_t sec_idx;
0371     const struct bpf_sec_def *sec_def;
0372     /* this program's instruction offset (in number of instructions)
0373      * within its containing ELF section
0374      */
0375     size_t sec_insn_off;
0376     /* number of original instructions in ELF section belonging to this
0377      * program, not taking into account subprogram instructions possible
0378      * appended later during relocation
0379      */
0380     size_t sec_insn_cnt;
0381     /* Offset (in number of instructions) of the start of instruction
0382      * belonging to this BPF program  within its containing main BPF
0383      * program. For the entry-point (main) BPF program, this is always
0384      * zero. For a sub-program, this gets reset before each of main BPF
0385      * programs are processed and relocated and is used to determined
0386      * whether sub-program was already appended to the main program, and
0387      * if yes, at which instruction offset.
0388      */
0389     size_t sub_insn_off;
0390
0391     /* instructions that belong to BPF program; insns[0] is located at
0392      * sec_insn_off instruction within its ELF section in ELF file, so
0393      * when mapping ELF file instruction index to the local instruction,
0394      * one needs to subtract sec_insn_off; and vice versa.
0395      */
0396     struct bpf_insn *insns;
0397     /* actual number of instruction in this BPF program's image; for
0398      * entry-point BPF programs this includes the size of main program
0399      * itself plus all the used sub-programs, appended at the end
0400      */
0401     size_t insns_cnt;
0402
0403     struct reloc_desc *reloc_desc;
0404     int nr_reloc;
0405
0406     /* BPF verifier log settings */
0407     char *log_buf;
0408     size_t log_size;
0409     __u32 log_level;
0410
0411     struct bpf_object *obj;
0412
0413     int fd;
0414     bool autoload;
0415     bool mark_btf_static;
0416     enum bpf_prog_type type;
0417     enum bpf_attach_type expected_attach_type;
0418
0419     int prog_ifindex;
0420     __u32 attach_btf_obj_fd;
0421     __u32 attach_btf_id;
0422     __u32 attach_prog_fd;
0423
0424     void *func_info;
0425     __u32 func_info_rec_size;
0426     __u32 func_info_cnt;
0427
0428     void *line_info;
0429     __u32 line_info_rec_size;
0430     __u32 line_info_cnt;
0431     __u32 prog_flags;
0432 };
0433
0434 struct bpf_struct_ops {
0435     const char *tname;
0436     const struct btf_type *type;
0437     struct bpf_program **progs;
0438     __u32 *kern_func_off;
0439     /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
0440     void *data;
0441     /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
0442      *      btf_vmlinux's format.
0443      * struct bpf_struct_ops_tcp_congestion_ops {
0444      *  [... some other kernel fields ...]
0445      *  struct tcp_congestion_ops data;
0446      * }
0447      * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
0448      * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
0449      * from "data".
0450      */
0451     void *kern_vdata;
0452     __u32 type_id;
0453 };
0454
0455 #define DATA_SEC ".data"
0456 #define BSS_SEC ".bss"
0457 #define RODATA_SEC ".rodata"
0458 #define KCONFIG_SEC ".kconfig"
0459 #define KSYMS_SEC ".ksyms"
0460 #define STRUCT_OPS_SEC ".struct_ops"
0461
0462 enum libbpf_map_type {
0463     LIBBPF_MAP_UNSPEC,
0464     LIBBPF_MAP_DATA,
0465     LIBBPF_MAP_BSS,
0466     LIBBPF_MAP_RODATA,
0467     LIBBPF_MAP_KCONFIG,
0468 };
0469
0470 struct bpf_map_def {
0471     unsigned int type;
0472     unsigned int key_size;
0473     unsigned int value_size;
0474     unsigned int max_entries;
0475     unsigned int map_flags;
0476 };
0477
0478 struct bpf_map {
0479     struct bpf_object *obj;
0480     char *name;
0481     /* real_name is defined for special internal maps (.rodata*,
0482      * .data*, .bss, .kconfig) and preserves their original ELF section
0483      * name. This is important to be be able to find corresponding BTF
0484      * DATASEC information.
0485      */
0486     char *real_name;
0487     int fd;
0488     int sec_idx;
0489     size_t sec_offset;
0490     int map_ifindex;
0491     int inner_map_fd;
0492     struct bpf_map_def def;
0493     __u32 numa_node;
0494     __u32 btf_var_idx;
0495     __u32 btf_key_type_id;
0496     __u32 btf_value_type_id;
0497     __u32 btf_vmlinux_value_type_id;
0498     enum libbpf_map_type libbpf_type;
0499     void *mmaped;
0500     struct bpf_struct_ops *st_ops;
0501     struct bpf_map *inner_map;
0502     void **init_slots;
0503     int init_slots_sz;
0504     char *pin_path;
0505     bool pinned;
0506     bool reused;
0507     bool autocreate;
0508     __u64 map_extra;
0509 };
0510
0511 enum extern_type {
0512     EXT_UNKNOWN,
0513     EXT_KCFG,
0514     EXT_KSYM,
0515 };
0516
0517 enum kcfg_type {
0518     KCFG_UNKNOWN,
0519     KCFG_CHAR,
0520     KCFG_BOOL,
0521     KCFG_INT,
0522     KCFG_TRISTATE,
0523     KCFG_CHAR_ARR,
0524 };
0525
0526 struct extern_desc {
0527     enum extern_type type;
0528     int sym_idx;
0529     int btf_id;
0530     int sec_btf_id;
0531     const char *name;
0532     bool is_set;
0533     bool is_weak;
0534     union {
0535         struct {
0536             enum kcfg_type type;
0537             int sz;
0538             int align;
0539             int data_off;
0540             bool is_signed;
0541         } kcfg;
0542         struct {
0543             unsigned long long addr;
0544
0545             /* target btf_id of the corresponding kernel var. */
0546             int kernel_btf_obj_fd;
0547             int kernel_btf_id;
0548
0549             /* local btf_id of the ksym extern's type. */
0550             __u32 type_id;
0551             /* BTF fd index to be patched in for insn->off, this is
0552              * 0 for vmlinux BTF, index in obj->fd_array for module
0553              * BTF
0554              */
0555             __s16 btf_fd_idx;
0556         } ksym;
0557     };
0558 };
0559
0560 struct module_btf {
0561     struct btf *btf;
0562     char *name;
0563     __u32 id;
0564     int fd;
0565     int fd_array_idx;
0566 };
0567
0568 enum sec_type {
0569     SEC_UNUSED = 0,
0570     SEC_RELO,
0571     SEC_BSS,
0572     SEC_DATA,
0573     SEC_RODATA,
0574 };
0575
0576 struct elf_sec_desc {
0577     enum sec_type sec_type;
0578     Elf64_Shdr *shdr;
0579     Elf_Data *data;
0580 };
0581
0582 struct elf_state {
0583     int fd;
0584     const void *obj_buf;
0585     size_t obj_buf_sz;
0586     Elf *elf;
0587     Elf64_Ehdr *ehdr;
0588     Elf_Data *symbols;
0589     Elf_Data *st_ops_data;
0590     size_t shstrndx; /* section index for section name strings */
0591     size_t strtabidx;
0592     struct elf_sec_desc *secs;
0593     int sec_cnt;
0594     int maps_shndx;
0595     int btf_maps_shndx;
0596     __u32 btf_maps_sec_btf_id;
0597     int text_shndx;
0598     int symbols_shndx;
0599     int st_ops_shndx;
0600 };
0601
0602 struct usdt_manager;
0603
0604 struct bpf_object {
0605     char name[BPF_OBJ_NAME_LEN];
0606     char license[64];
0607     __u32 kern_version;
0608
0609     struct bpf_program *programs;
0610     size_t nr_programs;
0611     struct bpf_map *maps;
0612     size_t nr_maps;
0613     size_t maps_cap;
0614
0615     char *kconfig;
0616     struct extern_desc *externs;
0617     int nr_extern;
0618     int kconfig_map_idx;
0619
0620     bool loaded;
0621     bool has_subcalls;
0622     bool has_rodata;
0623
0624     struct bpf_gen *gen_loader;
0625
0626     /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
0627     struct elf_state efile;
0628
0629     struct btf *btf;
0630     struct btf_ext *btf_ext;
0631
0632     /* Parse and load BTF vmlinux if any of the programs in the object need
0633      * it at load time.
0634      */
0635     struct btf *btf_vmlinux;
0636     /* Path to the custom BTF to be used for BPF CO-RE relocations as an
0637      * override for vmlinux BTF.
0638      */
0639     char *btf_custom_path;
0640     /* vmlinux BTF override for CO-RE relocations */
0641     struct btf *btf_vmlinux_override;
0642     /* Lazily initialized kernel module BTFs */
0643     struct module_btf *btf_modules;
0644     bool btf_modules_loaded;
0645     size_t btf_module_cnt;
0646     size_t btf_module_cap;
0647
0648     /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
0649     char *log_buf;
0650     size_t log_size;
0651     __u32 log_level;
0652
0653     int *fd_array;
0654     size_t fd_array_cap;
0655     size_t fd_array_cnt;
0656
0657     struct usdt_manager *usdt_man;
0658
0659     char path[];
0660 };
0661
0662 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
0663 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
0664 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
0665 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
0666 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
0667 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
0668 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
0669 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
0670 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
0671
0672 void bpf_program__unload(struct bpf_program *prog)
0673 {
0674     if (!prog)
0675         return;
0676
0677     zclose(prog->fd);
0678
0679     zfree(&prog->func_info);
0680     zfree(&prog->line_info);
0681 }
0682
0683 static void bpf_program__exit(struct bpf_program *prog)
0684 {
0685     if (!prog)
0686         return;
0687
0688     bpf_program__unload(prog);
0689     zfree(&prog->name);
0690     zfree(&prog->sec_name);
0691     zfree(&prog->insns);
0692     zfree(&prog->reloc_desc);
0693
0694     prog->nr_reloc = 0;
0695     prog->insns_cnt = 0;
0696     prog->sec_idx = -1;
0697 }
0698
0699 static bool insn_is_subprog_call(const struct bpf_insn *insn)
0700 {
0701     return BPF_CLASS(insn->code) == BPF_JMP &&
0702            BPF_OP(insn->code) == BPF_CALL &&
0703            BPF_SRC(insn->code) == BPF_K &&
0704            insn->src_reg == BPF_PSEUDO_CALL &&
0705            insn->dst_reg == 0 &&
0706            insn->off == 0;
0707 }
0708
0709 static bool is_call_insn(const struct bpf_insn *insn)
0710 {
0711     return insn->code == (BPF_JMP | BPF_CALL);
0712 }
0713
0714 static bool insn_is_pseudo_func(struct bpf_insn *insn)
0715 {
0716     return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
0717 }
0718
0719 static int
0720 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
0721               const char *name, size_t sec_idx, const char *sec_name,
0722               size_t sec_off, void *insn_data, size_t insn_data_sz)
0723 {
0724     if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
0725         pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
0726             sec_name, name, sec_off, insn_data_sz);
0727         return -EINVAL;
0728     }
0729
0730     memset(prog, 0, sizeof(*prog));
0731     prog->obj = obj;
0732
0733     prog->sec_idx = sec_idx;
0734     prog->sec_insn_off = sec_off / BPF_INSN_SZ;
0735     prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
0736     /* insns_cnt can later be increased by appending used subprograms */
0737     prog->insns_cnt = prog->sec_insn_cnt;
0738
0739     prog->type = BPF_PROG_TYPE_UNSPEC;
0740     prog->fd = -1;
0741
0742     /* libbpf's convention for SEC("?abc...") is that it's just like
0743      * SEC("abc...") but the corresponding bpf_program starts out with
0744      * autoload set to false.
0745      */
0746     if (sec_name[0] == '?') {
0747         prog->autoload = false;
0748         /* from now on forget there was ? in section name */
0749         sec_name++;
0750     } else {
0751         prog->autoload = true;
0752     }
0753
0754     /* inherit object's log_level */
0755     prog->log_level = obj->log_level;
0756
0757     prog->sec_name = strdup(sec_name);
0758     if (!prog->sec_name)
0759         goto errout;
0760
0761     prog->name = strdup(name);
0762     if (!prog->name)
0763         goto errout;
0764
0765     prog->insns = malloc(insn_data_sz);
0766     if (!prog->insns)
0767         goto errout;
0768     memcpy(prog->insns, insn_data, insn_data_sz);
0769
0770     return 0;
0771 errout:
0772     pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
0773     bpf_program__exit(prog);
0774     return -ENOMEM;
0775 }
0776
0777 static int
0778 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
0779              const char *sec_name, int sec_idx)
0780 {
0781     Elf_Data *symbols = obj->efile.symbols;
0782     struct bpf_program *prog, *progs;
0783     void *data = sec_data->d_buf;
0784     size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
0785     int nr_progs, err, i;
0786     const char *name;
0787     Elf64_Sym *sym;
0788
0789     progs = obj->programs;
0790     nr_progs = obj->nr_programs;
0791     nr_syms = symbols->d_size / sizeof(Elf64_Sym);
0792     sec_off = 0;
0793
0794     for (i = 0; i < nr_syms; i++) {
0795         sym = elf_sym_by_idx(obj, i);
0796
0797         if (sym->st_shndx != sec_idx)
0798             continue;
0799         if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
0800             continue;
0801
0802         prog_sz = sym->st_size;
0803         sec_off = sym->st_value;
0804
0805         name = elf_sym_str(obj, sym->st_name);
0806         if (!name) {
0807             pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
0808                 sec_name, sec_off);
0809             return -LIBBPF_ERRNO__FORMAT;
0810         }
0811
0812         if (sec_off + prog_sz > sec_sz) {
0813             pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
0814                 sec_name, sec_off);
0815             return -LIBBPF_ERRNO__FORMAT;
0816         }
0817
0818         if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
0819             pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
0820             return -ENOTSUP;
0821         }
0822
0823         pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
0824              sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
0825
0826         progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
0827         if (!progs) {
0828             /*
0829              * In this case the original obj->programs
0830              * is still valid, so don't need special treat for
0831              * bpf_close_object().
0832              */
0833             pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
0834                 sec_name, name);
0835             return -ENOMEM;
0836         }
0837         obj->programs = progs;
0838
0839         prog = &progs[nr_progs];
0840
0841         err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
0842                         sec_off, data + sec_off, prog_sz);
0843         if (err)
0844             return err;
0845
0846         /* if function is a global/weak symbol, but has restricted
0847          * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
0848          * as static to enable more permissive BPF verification mode
0849          * with more outside context available to BPF verifier
0850          */
0851         if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
0852             && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
0853             || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
0854             prog->mark_btf_static = true;
0855
0856         nr_progs++;
0857         obj->nr_programs = nr_progs;
0858     }
0859
0860     return 0;
0861 }
0862
0863 __u32 get_kernel_version(void)
0864 {
0865     /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
0866      * but Ubuntu provides /proc/version_signature file, as described at
0867      * https://ubuntu.com/kernel, with an example contents below, which we
0868      * can use to get a proper LINUX_VERSION_CODE.
0869      *
0870      *   Ubuntu 5.4.0-12.15-generic 5.4.8
0871      *
0872      * In the above, 5.4.8 is what kernel is actually expecting, while
0873      * uname() call will return 5.4.0 in info.release.
0874      */
0875     const char *ubuntu_kver_file = "/proc/version_signature";
0876     __u32 major, minor, patch;
0877     struct utsname info;
0878
0879     if (access(ubuntu_kver_file, R_OK) == 0) {
0880         FILE *f;
0881
0882         f = fopen(ubuntu_kver_file, "r");
0883         if (f) {
0884             if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) {
0885                 fclose(f);
0886                 return KERNEL_VERSION(major, minor, patch);
0887             }
0888             fclose(f);
0889         }
0890         /* something went wrong, fall back to uname() approach */
0891     }
0892
0893     uname(&info);
0894     if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
0895         return 0;
0896     return KERNEL_VERSION(major, minor, patch);
0897 }
0898
0899 static const struct btf_member *
0900 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
0901 {
0902     struct btf_member *m;
0903     int i;
0904
0905     for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
0906         if (btf_member_bit_offset(t, i) == bit_offset)
0907             return m;
0908     }
0909
0910     return NULL;
0911 }
0912
0913 static const struct btf_member *
0914 find_member_by_name(const struct btf *btf, const struct btf_type *t,
0915             const char *name)
0916 {
0917     struct btf_member *m;
0918     int i;
0919
0920     for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
0921         if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
0922             return m;
0923     }
0924
0925     return NULL;
0926 }
0927
0928 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
0929 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
0930                    const char *name, __u32 kind);
0931
0932 static int
0933 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
0934                const struct btf_type **type, __u32 *type_id,
0935                const struct btf_type **vtype, __u32 *vtype_id,
0936                const struct btf_member **data_member)
0937 {
0938     const struct btf_type *kern_type, *kern_vtype;
0939     const struct btf_member *kern_data_member;
0940     __s32 kern_vtype_id, kern_type_id;
0941     __u32 i;
0942
0943     kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
0944     if (kern_type_id < 0) {
0945         pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
0946             tname);
0947         return kern_type_id;
0948     }
0949     kern_type = btf__type_by_id(btf, kern_type_id);
0950
0951     /* Find the corresponding "map_value" type that will be used
0952      * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
0953      * find "struct bpf_struct_ops_tcp_congestion_ops" from the
0954      * btf_vmlinux.
0955      */
0956     kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
0957                         tname, BTF_KIND_STRUCT);
0958     if (kern_vtype_id < 0) {
0959         pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
0960             STRUCT_OPS_VALUE_PREFIX, tname);
0961         return kern_vtype_id;
0962     }
0963     kern_vtype = btf__type_by_id(btf, kern_vtype_id);
0964
0965     /* Find "struct tcp_congestion_ops" from
0966      * struct bpf_struct_ops_tcp_congestion_ops {
0967      *  [ ... ]
0968      *  struct tcp_congestion_ops data;
0969      * }
0970      */
0971     kern_data_member = btf_members(kern_vtype);
0972     for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
0973         if (kern_data_member->type == kern_type_id)
0974             break;
0975     }
0976     if (i == btf_vlen(kern_vtype)) {
0977         pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
0978             tname, STRUCT_OPS_VALUE_PREFIX, tname);
0979         return -EINVAL;
0980     }
0981
0982     *type = kern_type;
0983     *type_id = kern_type_id;
0984     *vtype = kern_vtype;
0985     *vtype_id = kern_vtype_id;
0986     *data_member = kern_data_member;
0987
0988     return 0;
0989 }
0990
0991 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
0992 {
0993     return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
0994 }
0995
0996 /* Init the map's fields that depend on kern_btf */
0997 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
0998                      const struct btf *btf,
0999                      const struct btf *kern_btf)
1000 {
1001     const struct btf_member *member, *kern_member, *kern_data_member;
1002     const struct btf_type *type, *kern_type, *kern_vtype;
1003     __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1004     struct bpf_struct_ops *st_ops;
1005     void *data, *kern_data;
1006     const char *tname;
1007     int err;
1008
1009     st_ops = map->st_ops;
1010     type = st_ops->type;
1011     tname = st_ops->tname;
1012     err = find_struct_ops_kern_types(kern_btf, tname,
1013                      &kern_type, &kern_type_id,
1014                      &kern_vtype, &kern_vtype_id,
1015                      &kern_data_member);
1016     if (err)
1017         return err;
1018
1019     pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1020          map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1021
1022     map->def.value_size = kern_vtype->size;
1023     map->btf_vmlinux_value_type_id = kern_vtype_id;
1024
1025     st_ops->kern_vdata = calloc(1, kern_vtype->size);
1026     if (!st_ops->kern_vdata)
1027         return -ENOMEM;
1028
1029     data = st_ops->data;
1030     kern_data_off = kern_data_member->offset / 8;
1031     kern_data = st_ops->kern_vdata + kern_data_off;
1032
1033     member = btf_members(type);
1034     for (i = 0; i < btf_vlen(type); i++, member++) {
1035         const struct btf_type *mtype, *kern_mtype;
1036         __u32 mtype_id, kern_mtype_id;
1037         void *mdata, *kern_mdata;
1038         __s64 msize, kern_msize;
1039         __u32 moff, kern_moff;
1040         __u32 kern_member_idx;
1041         const char *mname;
1042
1043         mname = btf__name_by_offset(btf, member->name_off);
1044         kern_member = find_member_by_name(kern_btf, kern_type, mname);
1045         if (!kern_member) {
1046             pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1047                 map->name, mname);
1048             return -ENOTSUP;
1049         }
1050
1051         kern_member_idx = kern_member - btf_members(kern_type);
1052         if (btf_member_bitfield_size(type, i) ||
1053             btf_member_bitfield_size(kern_type, kern_member_idx)) {
1054             pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1055                 map->name, mname);
1056             return -ENOTSUP;
1057         }
1058
1059         moff = member->offset / 8;
1060         kern_moff = kern_member->offset / 8;
1061
1062         mdata = data + moff;
1063         kern_mdata = kern_data + kern_moff;
1064
1065         mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1066         kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1067                             &kern_mtype_id);
1068         if (BTF_INFO_KIND(mtype->info) !=
1069             BTF_INFO_KIND(kern_mtype->info)) {
1070             pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1071                 map->name, mname, BTF_INFO_KIND(mtype->info),
1072                 BTF_INFO_KIND(kern_mtype->info));
1073             return -ENOTSUP;
1074         }
1075
1076         if (btf_is_ptr(mtype)) {
1077             struct bpf_program *prog;
1078
1079             prog = st_ops->progs[i];
1080             if (!prog)
1081                 continue;
1082
1083             kern_mtype = skip_mods_and_typedefs(kern_btf,
1084                                 kern_mtype->type,
1085                                 &kern_mtype_id);
1086
1087             /* mtype->type must be a func_proto which was
1088              * guaranteed in bpf_object__collect_st_ops_relos(),
1089              * so only check kern_mtype for func_proto here.
1090              */
1091             if (!btf_is_func_proto(kern_mtype)) {
1092                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1093                     map->name, mname);
1094                 return -ENOTSUP;
1095             }
1096
1097             prog->attach_btf_id = kern_type_id;
1098             prog->expected_attach_type = kern_member_idx;
1099
1100             st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1101
1102             pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1103                  map->name, mname, prog->name, moff,
1104                  kern_moff);
1105
1106             continue;
1107         }
1108
1109         msize = btf__resolve_size(btf, mtype_id);
1110         kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1111         if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1112             pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1113                 map->name, mname, (ssize_t)msize,
1114                 (ssize_t)kern_msize);
1115             return -ENOTSUP;
1116         }
1117
1118         pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1119              map->name, mname, (unsigned int)msize,
1120              moff, kern_moff);
1121         memcpy(kern_mdata, mdata, msize);
1122     }
1123
1124     return 0;
1125 }
1126
1127 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1128 {
1129     struct bpf_map *map;
1130     size_t i;
1131     int err;
1132
1133     for (i = 0; i < obj->nr_maps; i++) {
1134         map = &obj->maps[i];
1135
1136         if (!bpf_map__is_struct_ops(map))
1137             continue;
1138
1139         err = bpf_map__init_kern_struct_ops(map, obj->btf,
1140                             obj->btf_vmlinux);
1141         if (err)
1142             return err;
1143     }
1144
1145     return 0;
1146 }
1147
1148 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
1149 {
1150     const struct btf_type *type, *datasec;
1151     const struct btf_var_secinfo *vsi;
1152     struct bpf_struct_ops *st_ops;
1153     const char *tname, *var_name;
1154     __s32 type_id, datasec_id;
1155     const struct btf *btf;
1156     struct bpf_map *map;
1157     __u32 i;
1158
1159     if (obj->efile.st_ops_shndx == -1)
1160         return 0;
1161
1162     btf = obj->btf;
1163     datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
1164                         BTF_KIND_DATASEC);
1165     if (datasec_id < 0) {
1166         pr_warn("struct_ops init: DATASEC %s not found\n",
1167             STRUCT_OPS_SEC);
1168         return -EINVAL;
1169     }
1170
1171     datasec = btf__type_by_id(btf, datasec_id);
1172     vsi = btf_var_secinfos(datasec);
1173     for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1174         type = btf__type_by_id(obj->btf, vsi->type);
1175         var_name = btf__name_by_offset(obj->btf, type->name_off);
1176
1177         type_id = btf__resolve_type(obj->btf, vsi->type);
1178         if (type_id < 0) {
1179             pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1180                 vsi->type, STRUCT_OPS_SEC);
1181             return -EINVAL;
1182         }
1183
1184         type = btf__type_by_id(obj->btf, type_id);
1185         tname = btf__name_by_offset(obj->btf, type->name_off);
1186         if (!tname[0]) {
1187             pr_warn("struct_ops init: anonymous type is not supported\n");
1188             return -ENOTSUP;
1189         }
1190         if (!btf_is_struct(type)) {
1191             pr_warn("struct_ops init: %s is not a struct\n", tname);
1192             return -EINVAL;
1193         }
1194
1195         map = bpf_object__add_map(obj);
1196         if (IS_ERR(map))
1197             return PTR_ERR(map);
1198
1199         map->sec_idx = obj->efile.st_ops_shndx;
1200         map->sec_offset = vsi->offset;
1201         map->name = strdup(var_name);
1202         if (!map->name)
1203             return -ENOMEM;
1204
1205         map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1206         map->def.key_size = sizeof(int);
1207         map->def.value_size = type->size;
1208         map->def.max_entries = 1;
1209
1210         map->st_ops = calloc(1, sizeof(*map->st_ops));
1211         if (!map->st_ops)
1212             return -ENOMEM;
1213         st_ops = map->st_ops;
1214         st_ops->data = malloc(type->size);
1215         st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1216         st_ops->kern_func_off = malloc(btf_vlen(type) *
1217                            sizeof(*st_ops->kern_func_off));
1218         if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1219             return -ENOMEM;
1220
1221         if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1222             pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1223                 var_name, STRUCT_OPS_SEC);
1224             return -EINVAL;
1225         }
1226
1227         memcpy(st_ops->data,
1228                obj->efile.st_ops_data->d_buf + vsi->offset,
1229                type->size);
1230         st_ops->tname = tname;
1231         st_ops->type = type;
1232         st_ops->type_id = type_id;
1233
1234         pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1235              tname, type_id, var_name, vsi->offset);
1236     }
1237
1238     return 0;
1239 }
1240
1241 static struct bpf_object *bpf_object__new(const char *path,
1242                       const void *obj_buf,
1243                       size_t obj_buf_sz,
1244                       const char *obj_name)
1245 {
1246     struct bpf_object *obj;
1247     char *end;
1248
1249     obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1250     if (!obj) {
1251         pr_warn("alloc memory failed for %s\n", path);
1252         return ERR_PTR(-ENOMEM);
1253     }
1254
1255     strcpy(obj->path, path);
1256     if (obj_name) {
1257         libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1258     } else {
1259         /* Using basename() GNU version which doesn't modify arg. */
1260         libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1261         end = strchr(obj->name, '.');
1262         if (end)
1263             *end = 0;
1264     }
1265
1266     obj->efile.fd = -1;
1267     /*
1268      * Caller of this function should also call
1269      * bpf_object__elf_finish() after data collection to return
1270      * obj_buf to user. If not, we should duplicate the buffer to
1271      * avoid user freeing them before elf finish.
1272      */
1273     obj->efile.obj_buf = obj_buf;
1274     obj->efile.obj_buf_sz = obj_buf_sz;
1275     obj->efile.maps_shndx = -1;
1276     obj->efile.btf_maps_shndx = -1;
1277     obj->efile.st_ops_shndx = -1;
1278     obj->kconfig_map_idx = -1;
1279
1280     obj->kern_version = get_kernel_version();
1281     obj->loaded = false;
1282
1283     return obj;
1284 }
1285
1286 static void bpf_object__elf_finish(struct bpf_object *obj)
1287 {
1288     if (!obj->efile.elf)
1289         return;
1290
1291     elf_end(obj->efile.elf);
1292     obj->efile.elf = NULL;
1293     obj->efile.symbols = NULL;
1294     obj->efile.st_ops_data = NULL;
1295
1296     zfree(&obj->efile.secs);
1297     obj->efile.sec_cnt = 0;
1298     zclose(obj->efile.fd);
1299     obj->efile.obj_buf = NULL;
1300     obj->efile.obj_buf_sz = 0;
1301 }
1302
1303 static int bpf_object__elf_init(struct bpf_object *obj)
1304 {
1305     Elf64_Ehdr *ehdr;
1306     int err = 0;
1307     Elf *elf;
1308
1309     if (obj->efile.elf) {
1310         pr_warn("elf: init internal error\n");
1311         return -LIBBPF_ERRNO__LIBELF;
1312     }
1313
1314     if (obj->efile.obj_buf_sz > 0) {
1315         /* obj_buf should have been validated by bpf_object__open_mem(). */
1316         elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1317     } else {
1318         obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1319         if (obj->efile.fd < 0) {
1320             char errmsg[STRERR_BUFSIZE], *cp;
1321
1322             err = -errno;
1323             cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1324             pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1325             return err;
1326         }
1327
1328         elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1329     }
1330
1331     if (!elf) {
1332         pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1333         err = -LIBBPF_ERRNO__LIBELF;
1334         goto errout;
1335     }
1336
1337     obj->efile.elf = elf;
1338
1339     if (elf_kind(elf) != ELF_K_ELF) {
1340         err = -LIBBPF_ERRNO__FORMAT;
1341         pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1342         goto errout;
1343     }
1344
1345     if (gelf_getclass(elf) != ELFCLASS64) {
1346         err = -LIBBPF_ERRNO__FORMAT;
1347         pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1348         goto errout;
1349     }
1350
1351     obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1352     if (!obj->efile.ehdr) {
1353         pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1354         err = -LIBBPF_ERRNO__FORMAT;
1355         goto errout;
1356     }
1357
1358     if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1359         pr_warn("elf: failed to get section names section index for %s: %s\n",
1360             obj->path, elf_errmsg(-1));
1361         err = -LIBBPF_ERRNO__FORMAT;
1362         goto errout;
1363     }
1364
1365     /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1366     if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1367         pr_warn("elf: failed to get section names strings from %s: %s\n",
1368             obj->path, elf_errmsg(-1));
1369         err = -LIBBPF_ERRNO__FORMAT;
1370         goto errout;
1371     }
1372
1373     /* Old LLVM set e_machine to EM_NONE */
1374     if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1375         pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1376         err = -LIBBPF_ERRNO__FORMAT;
1377         goto errout;
1378     }
1379
1380     return 0;
1381 errout:
1382     bpf_object__elf_finish(obj);
1383     return err;
1384 }
1385
1386 static int bpf_object__check_endianness(struct bpf_object *obj)
1387 {
1388 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1389     if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1390         return 0;
1391 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1392     if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1393         return 0;
1394 #else
1395 # error "Unrecognized __BYTE_ORDER__"
1396 #endif
1397     pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1398     return -LIBBPF_ERRNO__ENDIAN;
1399 }
1400
1401 static int
1402 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1403 {
1404     /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1405      * go over allowed ELF data section buffer
1406      */
1407     libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1408     pr_debug("license of %s is %s\n", obj->path, obj->license);
1409     return 0;
1410 }
1411
1412 static int
1413 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1414 {
1415     __u32 kver;
1416
1417     if (size != sizeof(kver)) {
1418         pr_warn("invalid kver section in %s\n", obj->path);
1419         return -LIBBPF_ERRNO__FORMAT;
1420     }
1421     memcpy(&kver, data, sizeof(kver));
1422     obj->kern_version = kver;
1423     pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1424     return 0;
1425 }
1426
1427 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1428 {
1429     if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1430         type == BPF_MAP_TYPE_HASH_OF_MAPS)
1431         return true;
1432     return false;
1433 }
1434
1435 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1436 {
1437     Elf_Data *data;
1438     Elf_Scn *scn;
1439
1440     if (!name)
1441         return -EINVAL;
1442
1443     scn = elf_sec_by_name(obj, name);
1444     data = elf_sec_data(obj, scn);
1445     if (data) {
1446         *size = data->d_size;
1447         return 0; /* found it */
1448     }
1449
1450     return -ENOENT;
1451 }
1452
1453 static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
1454 {
1455     Elf_Data *symbols = obj->efile.symbols;
1456     const char *sname;
1457     size_t si;
1458
1459     if (!name || !off)
1460         return -EINVAL;
1461
1462     for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1463         Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1464
1465         if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1466             continue;
1467
1468         if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1469             ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1470             continue;
1471
1472         sname = elf_sym_str(obj, sym->st_name);
1473         if (!sname) {
1474             pr_warn("failed to get sym name string for var %s\n", name);
1475             return -EIO;
1476         }
1477         if (strcmp(name, sname) == 0) {
1478             *off = sym->st_value;
1479             return 0;
1480         }
1481     }
1482
1483     return -ENOENT;
1484 }
1485
1486 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1487 {
1488     struct bpf_map *map;
1489     int err;
1490
1491     err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1492                 sizeof(*obj->maps), obj->nr_maps + 1);
1493     if (err)
1494         return ERR_PTR(err);
1495
1496     map = &obj->maps[obj->nr_maps++];
1497     map->obj = obj;
1498     map->fd = -1;
1499     map->inner_map_fd = -1;
1500     map->autocreate = true;
1501
1502     return map;
1503 }
1504
1505 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1506 {
1507     long page_sz = sysconf(_SC_PAGE_SIZE);
1508     size_t map_sz;
1509
1510     map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1511     map_sz = roundup(map_sz, page_sz);
1512     return map_sz;
1513 }
1514
1515 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1516 {
1517     char map_name[BPF_OBJ_NAME_LEN], *p;
1518     int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1519
1520     /* This is one of the more confusing parts of libbpf for various
1521      * reasons, some of which are historical. The original idea for naming
1522      * internal names was to include as much of BPF object name prefix as
1523      * possible, so that it can be distinguished from similar internal
1524      * maps of a different BPF object.
1525      * As an example, let's say we have bpf_object named 'my_object_name'
1526      * and internal map corresponding to '.rodata' ELF section. The final
1527      * map name advertised to user and to the kernel will be
1528      * 'my_objec.rodata', taking first 8 characters of object name and
1529      * entire 7 characters of '.rodata'.
1530      * Somewhat confusingly, if internal map ELF section name is shorter
1531      * than 7 characters, e.g., '.bss', we still reserve 7 characters
1532      * for the suffix, even though we only have 4 actual characters, and
1533      * resulting map will be called 'my_objec.bss', not even using all 15
1534      * characters allowed by the kernel. Oh well, at least the truncated
1535      * object name is somewhat consistent in this case. But if the map
1536      * name is '.kconfig', we'll still have entirety of '.kconfig' added
1537      * (8 chars) and thus will be left with only first 7 characters of the
1538      * object name ('my_obje'). Happy guessing, user, that the final map
1539      * name will be "my_obje.kconfig".
1540      * Now, with libbpf starting to support arbitrarily named .rodata.*
1541      * and .data.* data sections, it's possible that ELF section name is
1542      * longer than allowed 15 chars, so we now need to be careful to take
1543      * only up to 15 first characters of ELF name, taking no BPF object
1544      * name characters at all. So '.rodata.abracadabra' will result in
1545      * '.rodata.abracad' kernel and user-visible name.
1546      * We need to keep this convoluted logic intact for .data, .bss and
1547      * .rodata maps, but for new custom .data.custom and .rodata.custom
1548      * maps we use their ELF names as is, not prepending bpf_object name
1549      * in front. We still need to truncate them to 15 characters for the
1550      * kernel. Full name can be recovered for such maps by using DATASEC
1551      * BTF type associated with such map's value type, though.
1552      */
1553     if (sfx_len >= BPF_OBJ_NAME_LEN)
1554         sfx_len = BPF_OBJ_NAME_LEN - 1;
1555
1556     /* if there are two or more dots in map name, it's a custom dot map */
1557     if (strchr(real_name + 1, '.') != NULL)
1558         pfx_len = 0;
1559     else
1560         pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1561
1562     snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1563          sfx_len, real_name);
1564
1565     /* sanitise map name to characters allowed by kernel */
1566     for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1567         if (!isalnum(*p) && *p != '_' && *p != '.')
1568             *p = '_';
1569
1570     return strdup(map_name);
1571 }
1572
1573 static int
1574 bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
1575
1576 static int
1577 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1578                   const char *real_name, int sec_idx, void *data, size_t data_sz)
1579 {
1580     struct bpf_map_def *def;
1581     struct bpf_map *map;
1582     int err;
1583
1584     map = bpf_object__add_map(obj);
1585     if (IS_ERR(map))
1586         return PTR_ERR(map);
1587
1588     map->libbpf_type = type;
1589     map->sec_idx = sec_idx;
1590     map->sec_offset = 0;
1591     map->real_name = strdup(real_name);
1592     map->name = internal_map_name(obj, real_name);
1593     if (!map->real_name || !map->name) {
1594         zfree(&map->real_name);
1595         zfree(&map->name);
1596         return -ENOMEM;
1597     }
1598
1599     def = &map->def;
1600     def->type = BPF_MAP_TYPE_ARRAY;
1601     def->key_size = sizeof(int);
1602     def->value_size = data_sz;
1603     def->max_entries = 1;
1604     def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1605              ? BPF_F_RDONLY_PROG : 0;
1606     def->map_flags |= BPF_F_MMAPABLE;
1607
1608     pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1609          map->name, map->sec_idx, map->sec_offset, def->map_flags);
1610
1611     map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1612                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1613     if (map->mmaped == MAP_FAILED) {
1614         err = -errno;
1615         map->mmaped = NULL;
1616         pr_warn("failed to alloc map '%s' content buffer: %d\n",
1617             map->name, err);
1618         zfree(&map->real_name);
1619         zfree(&map->name);
1620         return err;
1621     }
1622
1623     /* failures are fine because of maps like .rodata.str1.1 */
1624     (void) bpf_map_find_btf_info(obj, map);
1625
1626     if (data)
1627         memcpy(map->mmaped, data, data_sz);
1628
1629     pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1630     return 0;
1631 }
1632
1633 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1634 {
1635     struct elf_sec_desc *sec_desc;
1636     const char *sec_name;
1637     int err = 0, sec_idx;
1638
1639     /*
1640      * Populate obj->maps with libbpf internal maps.
1641      */
1642     for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1643         sec_desc = &obj->efile.secs[sec_idx];
1644
1645         switch (sec_desc->sec_type) {
1646         case SEC_DATA:
1647             sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1648             err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1649                                 sec_name, sec_idx,
1650                                 sec_desc->data->d_buf,
1651                                 sec_desc->data->d_size);
1652             break;
1653         case SEC_RODATA:
1654             obj->has_rodata = true;
1655             sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1656             err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1657                                 sec_name, sec_idx,
1658                                 sec_desc->data->d_buf,
1659                                 sec_desc->data->d_size);
1660             break;
1661         case SEC_BSS:
1662             sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1663             err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1664                                 sec_name, sec_idx,
1665                                 NULL,
1666                                 sec_desc->data->d_size);
1667             break;
1668         default:
1669             /* skip */
1670             break;
1671         }
1672         if (err)
1673             return err;
1674     }
1675     return 0;
1676 }
1677
1678
1679 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1680                            const void *name)
1681 {
1682     int i;
1683
1684     for (i = 0; i < obj->nr_extern; i++) {
1685         if (strcmp(obj->externs[i].name, name) == 0)
1686             return &obj->externs[i];
1687     }
1688     return NULL;
1689 }
1690
1691 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1692                   char value)
1693 {
1694     switch (ext->kcfg.type) {
1695     case KCFG_BOOL:
1696         if (value == 'm') {
1697             pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1698                 ext->name, value);
1699             return -EINVAL;
1700         }
1701         *(bool *)ext_val = value == 'y' ? true : false;
1702         break;
1703     case KCFG_TRISTATE:
1704         if (value == 'y')
1705             *(enum libbpf_tristate *)ext_val = TRI_YES;
1706         else if (value == 'm')
1707             *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1708         else /* value == 'n' */
1709             *(enum libbpf_tristate *)ext_val = TRI_NO;
1710         break;
1711     case KCFG_CHAR:
1712         *(char *)ext_val = value;
1713         break;
1714     case KCFG_UNKNOWN:
1715     case KCFG_INT:
1716     case KCFG_CHAR_ARR:
1717     default:
1718         pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1719             ext->name, value);
1720         return -EINVAL;
1721     }
1722     ext->is_set = true;
1723     return 0;
1724 }
1725
1726 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1727                   const char *value)
1728 {
1729     size_t len;
1730
1731     if (ext->kcfg.type != KCFG_CHAR_ARR) {
1732         pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1733             ext->name, value);
1734         return -EINVAL;
1735     }
1736
1737     len = strlen(value);
1738     if (value[len - 1] != '"') {
1739         pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1740             ext->name, value);
1741         return -EINVAL;
1742     }
1743
1744     /* strip quotes */
1745     len -= 2;
1746     if (len >= ext->kcfg.sz) {
1747         pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1748             ext->name, value, len, ext->kcfg.sz - 1);
1749         len = ext->kcfg.sz - 1;
1750     }
1751     memcpy(ext_val, value + 1, len);
1752     ext_val[len] = '\0';
1753     ext->is_set = true;
1754     return 0;
1755 }
1756
1757 static int parse_u64(const char *value, __u64 *res)
1758 {
1759     char *value_end;
1760     int err;
1761
1762     errno = 0;
1763     *res = strtoull(value, &value_end, 0);
1764     if (errno) {
1765         err = -errno;
1766         pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1767         return err;
1768     }
1769     if (*value_end) {
1770         pr_warn("failed to parse '%s' as integer completely\n", value);
1771         return -EINVAL;
1772     }
1773     return 0;
1774 }
1775
1776 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1777 {
1778     int bit_sz = ext->kcfg.sz * 8;
1779
1780     if (ext->kcfg.sz == 8)
1781         return true;
1782
1783     /* Validate that value stored in u64 fits in integer of `ext->sz`
1784      * bytes size without any loss of information. If the target integer
1785      * is signed, we rely on the following limits of integer type of
1786      * Y bits and subsequent transformation:
1787      *
1788      *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1789      *            0 <= X + 2^(Y-1) <= 2^Y - 1
1790      *            0 <= X + 2^(Y-1) <  2^Y
1791      *
1792      *  For unsigned target integer, check that all the (64 - Y) bits are
1793      *  zero.
1794      */
1795     if (ext->kcfg.is_signed)
1796         return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1797     else
1798         return (v >> bit_sz) == 0;
1799 }
1800
1801 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1802                   __u64 value)
1803 {
1804     if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
1805         ext->kcfg.type != KCFG_BOOL) {
1806         pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
1807             ext->name, (unsigned long long)value);
1808         return -EINVAL;
1809     }
1810     if (ext->kcfg.type == KCFG_BOOL && value > 1) {
1811         pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
1812             ext->name, (unsigned long long)value);
1813         return -EINVAL;
1814
1815     }
1816     if (!is_kcfg_value_in_range(ext, value)) {
1817         pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
1818             ext->name, (unsigned long long)value, ext->kcfg.sz);
1819         return -ERANGE;
1820     }
1821     switch (ext->kcfg.sz) {
1822         case 1: *(__u8 *)ext_val = value; break;
1823         case 2: *(__u16 *)ext_val = value; break;
1824         case 4: *(__u32 *)ext_val = value; break;
1825         case 8: *(__u64 *)ext_val = value; break;
1826         default:
1827             return -EINVAL;
1828     }
1829     ext->is_set = true;
1830     return 0;
1831 }
1832
1833 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1834                         char *buf, void *data)
1835 {
1836     struct extern_desc *ext;
1837     char *sep, *value;
1838     int len, err = 0;
1839     void *ext_val;
1840     __u64 num;
1841
1842     if (!str_has_pfx(buf, "CONFIG_"))
1843         return 0;
1844
1845     sep = strchr(buf, '=');
1846     if (!sep) {
1847         pr_warn("failed to parse '%s': no separator\n", buf);
1848         return -EINVAL;
1849     }
1850
1851     /* Trim ending '\n' */
1852     len = strlen(buf);
1853     if (buf[len - 1] == '\n')
1854         buf[len - 1] = '\0';
1855     /* Split on '=' and ensure that a value is present. */
1856     *sep = '\0';
1857     if (!sep[1]) {
1858         *sep = '=';
1859         pr_warn("failed to parse '%s': no value\n", buf);
1860         return -EINVAL;
1861     }
1862
1863     ext = find_extern_by_name(obj, buf);
1864     if (!ext || ext->is_set)
1865         return 0;
1866
1867     ext_val = data + ext->kcfg.data_off;
1868     value = sep + 1;
1869
1870     switch (*value) {
1871     case 'y': case 'n': case 'm':
1872         err = set_kcfg_value_tri(ext, ext_val, *value);
1873         break;
1874     case '"':
1875         err = set_kcfg_value_str(ext, ext_val, value);
1876         break;
1877     default:
1878         /* assume integer */
1879         err = parse_u64(value, &num);
1880         if (err) {
1881             pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
1882             return err;
1883         }
1884         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1885             pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
1886             return -EINVAL;
1887         }
1888         err = set_kcfg_value_num(ext, ext_val, num);
1889         break;
1890     }
1891     if (err)
1892         return err;
1893     pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
1894     return 0;
1895 }
1896
1897 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1898 {
1899     char buf[PATH_MAX];
1900     struct utsname uts;
1901     int len, err = 0;
1902     gzFile file;
1903
1904     uname(&uts);
1905     len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1906     if (len < 0)
1907         return -EINVAL;
1908     else if (len >= PATH_MAX)
1909         return -ENAMETOOLONG;
1910
1911     /* gzopen also accepts uncompressed files. */
1912     file = gzopen(buf, "r");
1913     if (!file)
1914         file = gzopen("/proc/config.gz", "r");
1915
1916     if (!file) {
1917         pr_warn("failed to open system Kconfig\n");
1918         return -ENOENT;
1919     }
1920
1921     while (gzgets(file, buf, sizeof(buf))) {
1922         err = bpf_object__process_kconfig_line(obj, buf, data);
1923         if (err) {
1924             pr_warn("error parsing system Kconfig line '%s': %d\n",
1925                 buf, err);
1926             goto out;
1927         }
1928     }
1929
1930 out:
1931     gzclose(file);
1932     return err;
1933 }
1934
1935 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1936                     const char *config, void *data)
1937 {
1938     char buf[PATH_MAX];
1939     int err = 0;
1940     FILE *file;
1941
1942     file = fmemopen((void *)config, strlen(config), "r");
1943     if (!file) {
1944         err = -errno;
1945         pr_warn("failed to open in-memory Kconfig: %d\n", err);
1946         return err;
1947     }
1948
1949     while (fgets(buf, sizeof(buf), file)) {
1950         err = bpf_object__process_kconfig_line(obj, buf, data);
1951         if (err) {
1952             pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1953                 buf, err);
1954             break;
1955         }
1956     }
1957
1958     fclose(file);
1959     return err;
1960 }
1961
1962 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1963 {
1964     struct extern_desc *last_ext = NULL, *ext;
1965     size_t map_sz;
1966     int i, err;
1967
1968     for (i = 0; i < obj->nr_extern; i++) {
1969         ext = &obj->externs[i];
1970         if (ext->type == EXT_KCFG)
1971             last_ext = ext;
1972     }
1973
1974     if (!last_ext)
1975         return 0;
1976
1977     map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1978     err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1979                         ".kconfig", obj->efile.symbols_shndx,
1980                         NULL, map_sz);
1981     if (err)
1982         return err;
1983
1984     obj->kconfig_map_idx = obj->nr_maps - 1;
1985
1986     return 0;
1987 }
1988
1989 const struct btf_type *
1990 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1991 {
1992     const struct btf_type *t = btf__type_by_id(btf, id);
1993
1994     if (res_id)
1995         *res_id = id;
1996
1997     while (btf_is_mod(t) || btf_is_typedef(t)) {
1998         if (res_id)
1999             *res_id = t->type;
2000         t = btf__type_by_id(btf, t->type);
2001     }
2002
2003     return t;
2004 }
2005
2006 static const struct btf_type *
2007 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2008 {
2009     const struct btf_type *t;
2010
2011     t = skip_mods_and_typedefs(btf, id, NULL);
2012     if (!btf_is_ptr(t))
2013         return NULL;
2014
2015     t = skip_mods_and_typedefs(btf, t->type, res_id);
2016
2017     return btf_is_func_proto(t) ? t : NULL;
2018 }
2019
2020 static const char *__btf_kind_str(__u16 kind)
2021 {
2022     switch (kind) {
2023     case BTF_KIND_UNKN: return "void";
2024     case BTF_KIND_INT: return "int";
2025     case BTF_KIND_PTR: return "ptr";
2026     case BTF_KIND_ARRAY: return "array";
2027     case BTF_KIND_STRUCT: return "struct";
2028     case BTF_KIND_UNION: return "union";
2029     case BTF_KIND_ENUM: return "enum";
2030     case BTF_KIND_FWD: return "fwd";
2031     case BTF_KIND_TYPEDEF: return "typedef";
2032     case BTF_KIND_VOLATILE: return "volatile";
2033     case BTF_KIND_CONST: return "const";
2034     case BTF_KIND_RESTRICT: return "restrict";
2035     case BTF_KIND_FUNC: return "func";
2036     case BTF_KIND_FUNC_PROTO: return "func_proto";
2037     case BTF_KIND_VAR: return "var";
2038     case BTF_KIND_DATASEC: return "datasec";
2039     case BTF_KIND_FLOAT: return "float";
2040     case BTF_KIND_DECL_TAG: return "decl_tag";
2041     case BTF_KIND_TYPE_TAG: return "type_tag";
2042     case BTF_KIND_ENUM64: return "enum64";
2043     default: return "unknown";
2044     }
2045 }
2046
2047 const char *btf_kind_str(const struct btf_type *t)
2048 {
2049     return __btf_kind_str(btf_kind(t));
2050 }
2051
2052 /*
2053  * Fetch integer attribute of BTF map definition. Such attributes are
2054  * represented using a pointer to an array, in which dimensionality of array
2055  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2056  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2057  * type definition, while using only sizeof(void *) space in ELF data section.
2058  */
2059 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2060                   const struct btf_member *m, __u32 *res)
2061 {
2062     const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2063     const char *name = btf__name_by_offset(btf, m->name_off);
2064     const struct btf_array *arr_info;
2065     const struct btf_type *arr_t;
2066
2067     if (!btf_is_ptr(t)) {
2068         pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2069             map_name, name, btf_kind_str(t));
2070         return false;
2071     }
2072
2073     arr_t = btf__type_by_id(btf, t->type);
2074     if (!arr_t) {
2075         pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2076             map_name, name, t->type);
2077         return false;
2078     }
2079     if (!btf_is_array(arr_t)) {
2080         pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2081             map_name, name, btf_kind_str(arr_t));
2082         return false;
2083     }
2084     arr_info = btf_array(arr_t);
2085     *res = arr_info->nelems;
2086     return true;
2087 }
2088
2089 static int build_map_pin_path(struct bpf_map *map, const char *path)
2090 {
2091     char buf[PATH_MAX];
2092     int len;
2093
2094     if (!path)
2095         path = "/sys/fs/bpf";
2096
2097     len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
2098     if (len < 0)
2099         return -EINVAL;
2100     else if (len >= PATH_MAX)
2101         return -ENAMETOOLONG;
2102
2103     return bpf_map__set_pin_path(map, buf);
2104 }
2105
2106 /* should match definition in bpf_helpers.h */
2107 enum libbpf_pin_type {
2108     LIBBPF_PIN_NONE,
2109     /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2110     LIBBPF_PIN_BY_NAME,
2111 };
2112
2113 int parse_btf_map_def(const char *map_name, struct btf *btf,
2114               const struct btf_type *def_t, bool strict,
2115               struct btf_map_def *map_def, struct btf_map_def *inner_def)
2116 {
2117     const struct btf_type *t;
2118     const struct btf_member *m;
2119     bool is_inner = inner_def == NULL;
2120     int vlen, i;
2121
2122     vlen = btf_vlen(def_t);
2123     m = btf_members(def_t);
2124     for (i = 0; i < vlen; i++, m++) {
2125         const char *name = btf__name_by_offset(btf, m->name_off);
2126
2127         if (!name) {
2128             pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2129             return -EINVAL;
2130         }
2131         if (strcmp(name, "type") == 0) {
2132             if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2133                 return -EINVAL;
2134             map_def->parts |= MAP_DEF_MAP_TYPE;
2135         } else if (strcmp(name, "max_entries") == 0) {
2136             if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2137                 return -EINVAL;
2138             map_def->parts |= MAP_DEF_MAX_ENTRIES;
2139         } else if (strcmp(name, "map_flags") == 0) {
2140             if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2141                 return -EINVAL;
2142             map_def->parts |= MAP_DEF_MAP_FLAGS;
2143         } else if (strcmp(name, "numa_node") == 0) {
2144             if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2145                 return -EINVAL;
2146             map_def->parts |= MAP_DEF_NUMA_NODE;
2147         } else if (strcmp(name, "key_size") == 0) {
2148             __u32 sz;
2149
2150             if (!get_map_field_int(map_name, btf, m, &sz))
2151                 return -EINVAL;
2152             if (map_def->key_size && map_def->key_size != sz) {
2153                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2154                     map_name, map_def->key_size, sz);
2155                 return -EINVAL;
2156             }
2157             map_def->key_size = sz;
2158             map_def->parts |= MAP_DEF_KEY_SIZE;
2159         } else if (strcmp(name, "key") == 0) {
2160             __s64 sz;
2161
2162             t = btf__type_by_id(btf, m->type);
2163             if (!t) {
2164                 pr_warn("map '%s': key type [%d] not found.\n",
2165                     map_name, m->type);
2166                 return -EINVAL;
2167             }
2168             if (!btf_is_ptr(t)) {
2169                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2170                     map_name, btf_kind_str(t));
2171                 return -EINVAL;
2172             }
2173             sz = btf__resolve_size(btf, t->type);
2174             if (sz < 0) {
2175                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2176                     map_name, t->type, (ssize_t)sz);
2177                 return sz;
2178             }
2179             if (map_def->key_size && map_def->key_size != sz) {
2180                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2181                     map_name, map_def->key_size, (ssize_t)sz);
2182                 return -EINVAL;
2183             }
2184             map_def->key_size = sz;
2185             map_def->key_type_id = t->type;
2186             map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2187         } else if (strcmp(name, "value_size") == 0) {
2188             __u32 sz;
2189
2190             if (!get_map_field_int(map_name, btf, m, &sz))
2191                 return -EINVAL;
2192             if (map_def->value_size && map_def->value_size != sz) {
2193                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2194                     map_name, map_def->value_size, sz);
2195                 return -EINVAL;
2196             }
2197             map_def->value_size = sz;
2198             map_def->parts |= MAP_DEF_VALUE_SIZE;
2199         } else if (strcmp(name, "value") == 0) {
2200             __s64 sz;
2201
2202             t = btf__type_by_id(btf, m->type);
2203             if (!t) {
2204                 pr_warn("map '%s': value type [%d] not found.\n",
2205                     map_name, m->type);
2206                 return -EINVAL;
2207             }
2208             if (!btf_is_ptr(t)) {
2209                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2210                     map_name, btf_kind_str(t));
2211                 return -EINVAL;
2212             }
2213             sz = btf__resolve_size(btf, t->type);
2214             if (sz < 0) {
2215                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2216                     map_name, t->type, (ssize_t)sz);
2217                 return sz;
2218             }
2219             if (map_def->value_size && map_def->value_size != sz) {
2220                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2221                     map_name, map_def->value_size, (ssize_t)sz);
2222                 return -EINVAL;
2223             }
2224             map_def->value_size = sz;
2225             map_def->value_type_id = t->type;
2226             map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2227         }
2228         else if (strcmp(name, "values") == 0) {
2229             bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2230             bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2231             const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2232             char inner_map_name[128];
2233             int err;
2234
2235             if (is_inner) {
2236                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2237                     map_name);
2238                 return -ENOTSUP;
2239             }
2240             if (i != vlen - 1) {
2241                 pr_warn("map '%s': '%s' member should be last.\n",
2242                     map_name, name);
2243                 return -EINVAL;
2244             }
2245             if (!is_map_in_map && !is_prog_array) {
2246                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2247                     map_name);
2248                 return -ENOTSUP;
2249             }
2250             if (map_def->value_size && map_def->value_size != 4) {
2251                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2252                     map_name, map_def->value_size);
2253                 return -EINVAL;
2254             }
2255             map_def->value_size = 4;
2256             t = btf__type_by_id(btf, m->type);
2257             if (!t) {
2258                 pr_warn("map '%s': %s type [%d] not found.\n",
2259                     map_name, desc, m->type);
2260                 return -EINVAL;
2261             }
2262             if (!btf_is_array(t) || btf_array(t)->nelems) {
2263                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2264                     map_name, desc);
2265                 return -EINVAL;
2266             }
2267             t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2268             if (!btf_is_ptr(t)) {
2269                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2270                     map_name, desc, btf_kind_str(t));
2271                 return -EINVAL;
2272             }
2273             t = skip_mods_and_typedefs(btf, t->type, NULL);
2274             if (is_prog_array) {
2275                 if (!btf_is_func_proto(t)) {
2276                     pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2277                         map_name, btf_kind_str(t));
2278                     return -EINVAL;
2279                 }
2280                 continue;
2281             }
2282             if (!btf_is_struct(t)) {
2283                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2284                     map_name, btf_kind_str(t));
2285                 return -EINVAL;
2286             }
2287
2288             snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2289             err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2290             if (err)
2291                 return err;
2292
2293             map_def->parts |= MAP_DEF_INNER_MAP;
2294         } else if (strcmp(name, "pinning") == 0) {
2295             __u32 val;
2296
2297             if (is_inner) {
2298                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2299                 return -EINVAL;
2300             }
2301             if (!get_map_field_int(map_name, btf, m, &val))
2302                 return -EINVAL;
2303             if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2304                 pr_warn("map '%s': invalid pinning value %u.\n",
2305                     map_name, val);
2306                 return -EINVAL;
2307             }
2308             map_def->pinning = val;
2309             map_def->parts |= MAP_DEF_PINNING;
2310         } else if (strcmp(name, "map_extra") == 0) {
2311             __u32 map_extra;
2312
2313             if (!get_map_field_int(map_name, btf, m, &map_extra))
2314                 return -EINVAL;
2315             map_def->map_extra = map_extra;
2316             map_def->parts |= MAP_DEF_MAP_EXTRA;
2317         } else {
2318             if (strict) {
2319                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2320                 return -ENOTSUP;
2321             }
2322             pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2323         }
2324     }
2325
2326     if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2327         pr_warn("map '%s': map type isn't specified.\n", map_name);
2328         return -EINVAL;
2329     }
2330
2331     return 0;
2332 }
2333
2334 static size_t adjust_ringbuf_sz(size_t sz)
2335 {
2336     __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2337     __u32 mul;
2338
2339     /* if user forgot to set any size, make sure they see error */
2340     if (sz == 0)
2341         return 0;
2342     /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2343      * a power-of-2 multiple of kernel's page size. If user diligently
2344      * satisified these conditions, pass the size through.
2345      */
2346     if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2347         return sz;
2348
2349     /* Otherwise find closest (page_sz * power_of_2) product bigger than
2350      * user-set size to satisfy both user size request and kernel
2351      * requirements and substitute correct max_entries for map creation.
2352      */
2353     for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2354         if (mul * page_sz > sz)
2355             return mul * page_sz;
2356     }
2357
2358     /* if it's impossible to satisfy the conditions (i.e., user size is
2359      * very close to UINT_MAX but is not a power-of-2 multiple of
2360      * page_size) then just return original size and let kernel reject it
2361      */
2362     return sz;
2363 }
2364
2365 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2366 {
2367     map->def.type = def->map_type;
2368     map->def.key_size = def->key_size;
2369     map->def.value_size = def->value_size;
2370     map->def.max_entries = def->max_entries;
2371     map->def.map_flags = def->map_flags;
2372     map->map_extra = def->map_extra;
2373
2374     map->numa_node = def->numa_node;
2375     map->btf_key_type_id = def->key_type_id;
2376     map->btf_value_type_id = def->value_type_id;
2377
2378     /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2379     if (map->def.type == BPF_MAP_TYPE_RINGBUF)
2380         map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2381
2382     if (def->parts & MAP_DEF_MAP_TYPE)
2383         pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2384
2385     if (def->parts & MAP_DEF_KEY_TYPE)
2386         pr_debug("map '%s': found key [%u], sz = %u.\n",
2387              map->name, def->key_type_id, def->key_size);
2388     else if (def->parts & MAP_DEF_KEY_SIZE)
2389         pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2390
2391     if (def->parts & MAP_DEF_VALUE_TYPE)
2392         pr_debug("map '%s': found value [%u], sz = %u.\n",
2393              map->name, def->value_type_id, def->value_size);
2394     else if (def->parts & MAP_DEF_VALUE_SIZE)
2395         pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2396
2397     if (def->parts & MAP_DEF_MAX_ENTRIES)
2398         pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2399     if (def->parts & MAP_DEF_MAP_FLAGS)
2400         pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2401     if (def->parts & MAP_DEF_MAP_EXTRA)
2402         pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2403              (unsigned long long)def->map_extra);
2404     if (def->parts & MAP_DEF_PINNING)
2405         pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2406     if (def->parts & MAP_DEF_NUMA_NODE)
2407         pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2408
2409     if (def->parts & MAP_DEF_INNER_MAP)
2410         pr_debug("map '%s': found inner map definition.\n", map->name);
2411 }
2412
2413 static const char *btf_var_linkage_str(__u32 linkage)
2414 {
2415     switch (linkage) {
2416     case BTF_VAR_STATIC: return "static";
2417     case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2418     case BTF_VAR_GLOBAL_EXTERN: return "extern";
2419     default: return "unknown";
2420     }
2421 }
2422
2423 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2424                      const struct btf_type *sec,
2425                      int var_idx, int sec_idx,
2426                      const Elf_Data *data, bool strict,
2427                      const char *pin_root_path)
2428 {
2429     struct btf_map_def map_def = {}, inner_def = {};
2430     const struct btf_type *var, *def;
2431     const struct btf_var_secinfo *vi;
2432     const struct btf_var *var_extra;
2433     const char *map_name;
2434     struct bpf_map *map;
2435     int err;
2436
2437     vi = btf_var_secinfos(sec) + var_idx;
2438     var = btf__type_by_id(obj->btf, vi->type);
2439     var_extra = btf_var(var);
2440     map_name = btf__name_by_offset(obj->btf, var->name_off);
2441
2442     if (map_name == NULL || map_name[0] == '\0') {
2443         pr_warn("map #%d: empty name.\n", var_idx);
2444         return -EINVAL;
2445     }
2446     if ((__u64)vi->offset + vi->size > data->d_size) {
2447         pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2448         return -EINVAL;
2449     }
2450     if (!btf_is_var(var)) {
2451         pr_warn("map '%s': unexpected var kind %s.\n",
2452             map_name, btf_kind_str(var));
2453         return -EINVAL;
2454     }
2455     if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2456         pr_warn("map '%s': unsupported map linkage %s.\n",
2457             map_name, btf_var_linkage_str(var_extra->linkage));
2458         return -EOPNOTSUPP;
2459     }
2460
2461     def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2462     if (!btf_is_struct(def)) {
2463         pr_warn("map '%s': unexpected def kind %s.\n",
2464             map_name, btf_kind_str(var));
2465         return -EINVAL;
2466     }
2467     if (def->size > vi->size) {
2468         pr_warn("map '%s': invalid def size.\n", map_name);
2469         return -EINVAL;
2470     }
2471
2472     map = bpf_object__add_map(obj);
2473     if (IS_ERR(map))
2474         return PTR_ERR(map);
2475     map->name = strdup(map_name);
2476     if (!map->name) {
2477         pr_warn("map '%s': failed to alloc map name.\n", map_name);
2478         return -ENOMEM;
2479     }
2480     map->libbpf_type = LIBBPF_MAP_UNSPEC;
2481     map->def.type = BPF_MAP_TYPE_UNSPEC;
2482     map->sec_idx = sec_idx;
2483     map->sec_offset = vi->offset;
2484     map->btf_var_idx = var_idx;
2485     pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2486          map_name, map->sec_idx, map->sec_offset);
2487
2488     err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2489     if (err)
2490         return err;
2491
2492     fill_map_from_def(map, &map_def);
2493
2494     if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2495         err = build_map_pin_path(map, pin_root_path);
2496         if (err) {
2497             pr_warn("map '%s': couldn't build pin path.\n", map->name);
2498             return err;
2499         }
2500     }
2501
2502     if (map_def.parts & MAP_DEF_INNER_MAP) {
2503         map->inner_map = calloc(1, sizeof(*map->inner_map));
2504         if (!map->inner_map)
2505             return -ENOMEM;
2506         map->inner_map->fd = -1;
2507         map->inner_map->sec_idx = sec_idx;
2508         map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2509         if (!map->inner_map->name)
2510             return -ENOMEM;
2511         sprintf(map->inner_map->name, "%s.inner", map_name);
2512
2513         fill_map_from_def(map->inner_map, &inner_def);
2514     }
2515
2516     err = bpf_map_find_btf_info(obj, map);
2517     if (err)
2518         return err;
2519
2520     return 0;
2521 }
2522
2523 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2524                       const char *pin_root_path)
2525 {
2526     const struct btf_type *sec = NULL;
2527     int nr_types, i, vlen, err;
2528     const struct btf_type *t;
2529     const char *name;
2530     Elf_Data *data;
2531     Elf_Scn *scn;
2532
2533     if (obj->efile.btf_maps_shndx < 0)
2534         return 0;
2535
2536     scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2537     data = elf_sec_data(obj, scn);
2538     if (!scn || !data) {
2539         pr_warn("elf: failed to get %s map definitions for %s\n",
2540             MAPS_ELF_SEC, obj->path);
2541         return -EINVAL;
2542     }
2543
2544     nr_types = btf__type_cnt(obj->btf);
2545     for (i = 1; i < nr_types; i++) {
2546         t = btf__type_by_id(obj->btf, i);
2547         if (!btf_is_datasec(t))
2548             continue;
2549         name = btf__name_by_offset(obj->btf, t->name_off);
2550         if (strcmp(name, MAPS_ELF_SEC) == 0) {
2551             sec = t;
2552             obj->efile.btf_maps_sec_btf_id = i;
2553             break;
2554         }
2555     }
2556
2557     if (!sec) {
2558         pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2559         return -ENOENT;
2560     }
2561
2562     vlen = btf_vlen(sec);
2563     for (i = 0; i < vlen; i++) {
2564         err = bpf_object__init_user_btf_map(obj, sec, i,
2565                             obj->efile.btf_maps_shndx,
2566                             data, strict,
2567                             pin_root_path);
2568         if (err)
2569             return err;
2570     }
2571
2572     return 0;
2573 }
2574
2575 static int bpf_object__init_maps(struct bpf_object *obj,
2576                  const struct bpf_object_open_opts *opts)
2577 {
2578     const char *pin_root_path;
2579     bool strict;
2580     int err = 0;
2581
2582     strict = !OPTS_GET(opts, relaxed_maps, false);
2583     pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2584
2585     err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2586     err = err ?: bpf_object__init_global_data_maps(obj);
2587     err = err ?: bpf_object__init_kconfig_map(obj);
2588     err = err ?: bpf_object__init_struct_ops_maps(obj);
2589
2590     return err;
2591 }
2592
2593 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2594 {
2595     Elf64_Shdr *sh;
2596
2597     sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2598     if (!sh)
2599         return false;
2600
2601     return sh->sh_flags & SHF_EXECINSTR;
2602 }
2603
2604 static bool btf_needs_sanitization(struct bpf_object *obj)
2605 {
2606     bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2607     bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2608     bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2609     bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2610     bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2611     bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2612     bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2613
2614     return !has_func || !has_datasec || !has_func_global || !has_float ||
2615            !has_decl_tag || !has_type_tag || !has_enum64;
2616 }
2617
2618 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2619 {
2620     bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2621     bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2622     bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2623     bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2624     bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2625     bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2626     bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2627     int enum64_placeholder_id = 0;
2628     struct btf_type *t;
2629     int i, j, vlen;
2630
2631     for (i = 1; i < btf__type_cnt(btf); i++) {
2632         t = (struct btf_type *)btf__type_by_id(btf, i);
2633
2634         if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2635             /* replace VAR/DECL_TAG with INT */
2636             t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2637             /*
2638              * using size = 1 is the safest choice, 4 will be too
2639              * big and cause kernel BTF validation failure if
2640              * original variable took less than 4 bytes
2641              */
2642             t->size = 1;
2643             *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2644         } else if (!has_datasec && btf_is_datasec(t)) {
2645             /* replace DATASEC with STRUCT */
2646             const struct btf_var_secinfo *v = btf_var_secinfos(t);
2647             struct btf_member *m = btf_members(t);
2648             struct btf_type *vt;
2649             char *name;
2650
2651             name = (char *)btf__name_by_offset(btf, t->name_off);
2652             while (*name) {
2653                 if (*name == '.')
2654                     *name = '_';
2655                 name++;
2656             }
2657
2658             vlen = btf_vlen(t);
2659             t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2660             for (j = 0; j < vlen; j++, v++, m++) {
2661                 /* order of field assignments is important */
2662                 m->offset = v->offset * 8;
2663                 m->type = v->type;
2664                 /* preserve variable name as member name */
2665                 vt = (void *)btf__type_by_id(btf, v->type);
2666                 m->name_off = vt->name_off;
2667             }
2668         } else if (!has_func && btf_is_func_proto(t)) {
2669             /* replace FUNC_PROTO with ENUM */
2670             vlen = btf_vlen(t);
2671             t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2672             t->size = sizeof(__u32); /* kernel enforced */
2673         } else if (!has_func && btf_is_func(t)) {
2674             /* replace FUNC with TYPEDEF */
2675             t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2676         } else if (!has_func_global && btf_is_func(t)) {
2677             /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2678             t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2679         } else if (!has_float && btf_is_float(t)) {
2680             /* replace FLOAT with an equally-sized empty STRUCT;
2681              * since C compilers do not accept e.g. "float" as a
2682              * valid struct name, make it anonymous
2683              */
2684             t->name_off = 0;
2685             t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2686         } else if (!has_type_tag && btf_is_type_tag(t)) {
2687             /* replace TYPE_TAG with a CONST */
2688             t->name_off = 0;
2689             t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2690         } else if (!has_enum64 && btf_is_enum(t)) {
2691             /* clear the kflag */
2692             t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2693         } else if (!has_enum64 && btf_is_enum64(t)) {
2694             /* replace ENUM64 with a union */
2695             struct btf_member *m;
2696
2697             if (enum64_placeholder_id == 0) {
2698                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2699                 if (enum64_placeholder_id < 0)
2700                     return enum64_placeholder_id;
2701
2702                 t = (struct btf_type *)btf__type_by_id(btf, i);
2703             }
2704
2705             m = btf_members(t);
2706             vlen = btf_vlen(t);
2707             t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2708             for (j = 0; j < vlen; j++, m++) {
2709                 m->type = enum64_placeholder_id;
2710                 m->offset = 0;
2711             }
2712                 }
2713     }
2714
2715     return 0;
2716 }
2717
2718 static bool libbpf_needs_btf(const struct bpf_object *obj)
2719 {
2720     return obj->efile.btf_maps_shndx >= 0 ||
2721            obj->efile.st_ops_shndx >= 0 ||
2722            obj->nr_extern > 0;
2723 }
2724
2725 static bool kernel_needs_btf(const struct bpf_object *obj)
2726 {
2727     return obj->efile.st_ops_shndx >= 0;
2728 }
2729
2730 static int bpf_object__init_btf(struct bpf_object *obj,
2731                 Elf_Data *btf_data,
2732                 Elf_Data *btf_ext_data)
2733 {
2734     int err = -ENOENT;
2735
2736     if (btf_data) {
2737         obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2738         err = libbpf_get_error(obj->btf);
2739         if (err) {
2740             obj->btf = NULL;
2741             pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2742             goto out;
2743         }
2744         /* enforce 8-byte pointers for BPF-targeted BTFs */
2745         btf__set_pointer_size(obj->btf, 8);
2746     }
2747     if (btf_ext_data) {
2748         struct btf_ext_info *ext_segs[3];
2749         int seg_num, sec_num;
2750
2751         if (!obj->btf) {
2752             pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2753                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2754             goto out;
2755         }
2756         obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2757         err = libbpf_get_error(obj->btf_ext);
2758         if (err) {
2759             pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2760                 BTF_EXT_ELF_SEC, err);
2761             obj->btf_ext = NULL;
2762             goto out;
2763         }
2764
2765         /* setup .BTF.ext to ELF section mapping */
2766         ext_segs[0] = &obj->btf_ext->func_info;
2767         ext_segs[1] = &obj->btf_ext->line_info;
2768         ext_segs[2] = &obj->btf_ext->core_relo_info;
2769         for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2770             struct btf_ext_info *seg = ext_segs[seg_num];
2771             const struct btf_ext_info_sec *sec;
2772             const char *sec_name;
2773             Elf_Scn *scn;
2774
2775             if (seg->sec_cnt == 0)
2776                 continue;
2777
2778             seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2779             if (!seg->sec_idxs) {
2780                 err = -ENOMEM;
2781                 goto out;
2782             }
2783
2784             sec_num = 0;
2785             for_each_btf_ext_sec(seg, sec) {
2786                 /* preventively increment index to avoid doing
2787                  * this before every continue below
2788                  */
2789                 sec_num++;
2790
2791                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2792                 if (str_is_empty(sec_name))
2793                     continue;
2794                 scn = elf_sec_by_name(obj, sec_name);
2795                 if (!scn)
2796                     continue;
2797
2798                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2799             }
2800         }
2801     }
2802 out:
2803     if (err && libbpf_needs_btf(obj)) {
2804         pr_warn("BTF is required, but is missing or corrupted.\n");
2805         return err;
2806     }
2807     return 0;
2808 }
2809
2810 static int compare_vsi_off(const void *_a, const void *_b)
2811 {
2812     const struct btf_var_secinfo *a = _a;
2813     const struct btf_var_secinfo *b = _b;
2814
2815     return a->offset - b->offset;
2816 }
2817
2818 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2819                  struct btf_type *t)
2820 {
2821     __u32 size = 0, off = 0, i, vars = btf_vlen(t);
2822     const char *name = btf__name_by_offset(btf, t->name_off);
2823     const struct btf_type *t_var;
2824     struct btf_var_secinfo *vsi;
2825     const struct btf_var *var;
2826     int ret;
2827
2828     if (!name) {
2829         pr_debug("No name found in string section for DATASEC kind.\n");
2830         return -ENOENT;
2831     }
2832
2833     /* .extern datasec size and var offsets were set correctly during
2834      * extern collection step, so just skip straight to sorting variables
2835      */
2836     if (t->size)
2837         goto sort_vars;
2838
2839     ret = find_elf_sec_sz(obj, name, &size);
2840     if (ret || !size) {
2841         pr_debug("Invalid size for section %s: %u bytes\n", name, size);
2842         return -ENOENT;
2843     }
2844
2845     t->size = size;
2846
2847     for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
2848         t_var = btf__type_by_id(btf, vsi->type);
2849         if (!t_var || !btf_is_var(t_var)) {
2850             pr_debug("Non-VAR type seen in section %s\n", name);
2851             return -EINVAL;
2852         }
2853
2854         var = btf_var(t_var);
2855         if (var->linkage == BTF_VAR_STATIC)
2856             continue;
2857
2858         name = btf__name_by_offset(btf, t_var->name_off);
2859         if (!name) {
2860             pr_debug("No name found in string section for VAR kind\n");
2861             return -ENOENT;
2862         }
2863
2864         ret = find_elf_var_offset(obj, name, &off);
2865         if (ret) {
2866             pr_debug("No offset found in symbol table for VAR %s\n",
2867                  name);
2868             return -ENOENT;
2869         }
2870
2871         vsi->offset = off;
2872     }
2873
2874 sort_vars:
2875     qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
2876     return 0;
2877 }
2878
2879 static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
2880 {
2881     int err = 0;
2882     __u32 i, n = btf__type_cnt(btf);
2883
2884     for (i = 1; i < n; i++) {
2885         struct btf_type *t = btf_type_by_id(btf, i);
2886
2887         /* Loader needs to fix up some of the things compiler
2888          * couldn't get its hands on while emitting BTF. This
2889          * is section size and global variable offset. We use
2890          * the info from the ELF itself for this purpose.
2891          */
2892         if (btf_is_datasec(t)) {
2893             err = btf_fixup_datasec(obj, btf, t);
2894             if (err)
2895                 break;
2896         }
2897     }
2898
2899     return libbpf_err(err);
2900 }
2901
2902 static int bpf_object__finalize_btf(struct bpf_object *obj)
2903 {
2904     int err;
2905
2906     if (!obj->btf)
2907         return 0;
2908
2909     err = btf_finalize_data(obj, obj->btf);
2910     if (err) {
2911         pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2912         return err;
2913     }
2914
2915     return 0;
2916 }
2917
2918 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2919 {
2920     if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2921         prog->type == BPF_PROG_TYPE_LSM)
2922         return true;
2923
2924     /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2925      * also need vmlinux BTF
2926      */
2927     if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2928         return true;
2929
2930     return false;
2931 }
2932
2933 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2934 {
2935     struct bpf_program *prog;
2936     int i;
2937
2938     /* CO-RE relocations need kernel BTF, only when btf_custom_path
2939      * is not specified
2940      */
2941     if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
2942         return true;
2943
2944     /* Support for typed ksyms needs kernel BTF */
2945     for (i = 0; i < obj->nr_extern; i++) {
2946         const struct extern_desc *ext;
2947
2948         ext = &obj->externs[i];
2949         if (ext->type == EXT_KSYM && ext->ksym.type_id)
2950             return true;
2951     }
2952
2953     bpf_object__for_each_program(prog, obj) {
2954         if (!prog->autoload)
2955             continue;
2956         if (prog_needs_vmlinux_btf(prog))
2957             return true;
2958     }
2959
2960     return false;
2961 }
2962
2963 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2964 {
2965     int err;
2966
2967     /* btf_vmlinux could be loaded earlier */
2968     if (obj->btf_vmlinux || obj->gen_loader)
2969         return 0;
2970
2971     if (!force && !obj_needs_vmlinux_btf(obj))
2972         return 0;
2973
2974     obj->btf_vmlinux = btf__load_vmlinux_btf();
2975     err = libbpf_get_error(obj->btf_vmlinux);
2976     if (err) {
2977         pr_warn("Error loading vmlinux BTF: %d\n", err);
2978         obj->btf_vmlinux = NULL;
2979         return err;
2980     }
2981     return 0;
2982 }
2983
2984 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2985 {
2986     struct btf *kern_btf = obj->btf;
2987     bool btf_mandatory, sanitize;
2988     int i, err = 0;
2989
2990     if (!obj->btf)
2991         return 0;
2992
2993     if (!kernel_supports(obj, FEAT_BTF)) {
2994         if (kernel_needs_btf(obj)) {
2995             err = -EOPNOTSUPP;
2996             goto report;
2997         }
2998         pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2999         return 0;
3000     }
3001
3002     /* Even though some subprogs are global/weak, user might prefer more
3003      * permissive BPF verification process that BPF verifier performs for
3004      * static functions, taking into account more context from the caller
3005      * functions. In such case, they need to mark such subprogs with
3006      * __attribute__((visibility("hidden"))) and libbpf will adjust
3007      * corresponding FUNC BTF type to be marked as static and trigger more
3008      * involved BPF verification process.
3009      */
3010     for (i = 0; i < obj->nr_programs; i++) {
3011         struct bpf_program *prog = &obj->programs[i];
3012         struct btf_type *t;
3013         const char *name;
3014         int j, n;
3015
3016         if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3017             continue;
3018
3019         n = btf__type_cnt(obj->btf);
3020         for (j = 1; j < n; j++) {
3021             t = btf_type_by_id(obj->btf, j);
3022             if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3023                 continue;
3024
3025             name = btf__str_by_offset(obj->btf, t->name_off);
3026             if (strcmp(name, prog->name) != 0)
3027                 continue;
3028
3029             t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3030             break;
3031         }
3032     }
3033
3034     sanitize = btf_needs_sanitization(obj);
3035     if (sanitize) {
3036         const void *raw_data;
3037         __u32 sz;
3038
3039         /* clone BTF to sanitize a copy and leave the original intact */
3040         raw_data = btf__raw_data(obj->btf, &sz);
3041         kern_btf = btf__new(raw_data, sz);
3042         err = libbpf_get_error(kern_btf);
3043         if (err)
3044             return err;
3045
3046         /* enforce 8-byte pointers for BPF-targeted BTFs */
3047         btf__set_pointer_size(obj->btf, 8);
3048         err = bpf_object__sanitize_btf(obj, kern_btf);
3049         if (err)
3050             return err;
3051     }
3052
3053     if (obj->gen_loader) {
3054         __u32 raw_size = 0;
3055         const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3056
3057         if (!raw_data)
3058             return -ENOMEM;
3059         bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3060         /* Pretend to have valid FD to pass various fd >= 0 checks.
3061          * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3062          */
3063         btf__set_fd(kern_btf, 0);
3064     } else {
3065         /* currently BPF_BTF_LOAD only supports log_level 1 */
3066         err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3067                        obj->log_level ? 1 : 0);
3068     }
3069     if (sanitize) {
3070         if (!err) {
3071             /* move fd to libbpf's BTF */
3072             btf__set_fd(obj->btf, btf__fd(kern_btf));
3073             btf__set_fd(kern_btf, -1);
3074         }
3075         btf__free(kern_btf);
3076     }
3077 report:
3078     if (err) {
3079         btf_mandatory = kernel_needs_btf(obj);
3080         pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3081             btf_mandatory ? "BTF is mandatory, can't proceed."
3082                       : "BTF is optional, ignoring.");
3083         if (!btf_mandatory)
3084             err = 0;
3085     }
3086     return err;
3087 }
3088
3089 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3090 {
3091     const char *name;
3092
3093     name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3094     if (!name) {
3095         pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3096             off, obj->path, elf_errmsg(-1));
3097         return NULL;
3098     }
3099
3100     return name;
3101 }
3102
3103 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3104 {
3105     const char *name;
3106
3107     name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3108     if (!name) {
3109         pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3110             off, obj->path, elf_errmsg(-1));
3111         return NULL;
3112     }
3113
3114     return name;
3115 }
3116
3117 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3118 {
3119     Elf_Scn *scn;
3120
3121     scn = elf_getscn(obj->efile.elf, idx);
3122     if (!scn) {
3123         pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3124             idx, obj->path, elf_errmsg(-1));
3125         return NULL;
3126     }
3127     return scn;
3128 }
3129
3130 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3131 {
3132     Elf_Scn *scn = NULL;
3133     Elf *elf = obj->efile.elf;
3134     const char *sec_name;
3135
3136     while ((scn = elf_nextscn(elf, scn)) != NULL) {
3137         sec_name = elf_sec_name(obj, scn);
3138         if (!sec_name)
3139             return NULL;
3140
3141         if (strcmp(sec_name, name) != 0)
3142             continue;
3143
3144         return scn;
3145     }
3146     return NULL;
3147 }
3148
3149 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3150 {
3151     Elf64_Shdr *shdr;
3152
3153     if (!scn)
3154         return NULL;
3155
3156     shdr = elf64_getshdr(scn);
3157     if (!shdr) {
3158         pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3159             elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3160         return NULL;
3161     }
3162
3163     return shdr;
3164 }
3165
3166 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3167 {
3168     const char *name;
3169     Elf64_Shdr *sh;
3170
3171     if (!scn)
3172         return NULL;
3173
3174     sh = elf_sec_hdr(obj, scn);
3175     if (!sh)
3176         return NULL;
3177
3178     name = elf_sec_str(obj, sh->sh_name);
3179     if (!name) {
3180         pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3181             elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3182         return NULL;
3183     }
3184
3185     return name;
3186 }
3187
3188 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3189 {
3190     Elf_Data *data;
3191
3192     if (!scn)
3193         return NULL;
3194
3195     data = elf_getdata(scn, 0);
3196     if (!data) {
3197         pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3198             elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3199             obj->path, elf_errmsg(-1));
3200         return NULL;
3201     }
3202
3203     return data;
3204 }
3205
3206 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3207 {
3208     if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3209         return NULL;
3210
3211     return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3212 }
3213
3214 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3215 {
3216     if (idx >= data->d_size / sizeof(Elf64_Rel))
3217         return NULL;
3218
3219     return (Elf64_Rel *)data->d_buf + idx;
3220 }
3221
3222 static bool is_sec_name_dwarf(const char *name)
3223 {
3224     /* approximation, but the actual list is too long */
3225     return str_has_pfx(name, ".debug_");
3226 }
3227
3228 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3229 {
3230     /* no special handling of .strtab */
3231     if (hdr->sh_type == SHT_STRTAB)
3232         return true;
3233
3234     /* ignore .llvm_addrsig section as well */
3235     if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3236         return true;
3237
3238     /* no subprograms will lead to an empty .text section, ignore it */
3239     if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3240         strcmp(name, ".text") == 0)
3241         return true;
3242
3243     /* DWARF sections */
3244     if (is_sec_name_dwarf(name))
3245         return true;
3246
3247     if (str_has_pfx(name, ".rel")) {
3248         name += sizeof(".rel") - 1;
3249         /* DWARF section relocations */
3250         if (is_sec_name_dwarf(name))
3251             return true;
3252
3253         /* .BTF and .BTF.ext don't need relocations */
3254         if (strcmp(name, BTF_ELF_SEC) == 0 ||
3255             strcmp(name, BTF_EXT_ELF_SEC) == 0)
3256             return true;
3257     }
3258
3259     return false;
3260 }
3261
3262 static int cmp_progs(const void *_a, const void *_b)
3263 {
3264     const struct bpf_program *a = _a;
3265     const struct bpf_program *b = _b;
3266
3267     if (a->sec_idx != b->sec_idx)
3268         return a->sec_idx < b->sec_idx ? -1 : 1;
3269
3270     /* sec_insn_off can't be the same within the section */
3271     return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3272 }
3273
3274 static int bpf_object__elf_collect(struct bpf_object *obj)
3275 {
3276     struct elf_sec_desc *sec_desc;
3277     Elf *elf = obj->efile.elf;
3278     Elf_Data *btf_ext_data = NULL;
3279     Elf_Data *btf_data = NULL;
3280     int idx = 0, err = 0;
3281     const char *name;
3282     Elf_Data *data;
3283     Elf_Scn *scn;
3284     Elf64_Shdr *sh;
3285
3286     /* ELF section indices are 0-based, but sec #0 is special "invalid"
3287      * section. e_shnum does include sec #0, so e_shnum is the necessary
3288      * size of an array to keep all the sections.
3289      */
3290     obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
3291     obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3292     if (!obj->efile.secs)
3293         return -ENOMEM;
3294
3295     /* a bunch of ELF parsing functionality depends on processing symbols,
3296      * so do the first pass and find the symbol table
3297      */
3298     scn = NULL;
3299     while ((scn = elf_nextscn(elf, scn)) != NULL) {
3300         sh = elf_sec_hdr(obj, scn);
3301         if (!sh)
3302             return -LIBBPF_ERRNO__FORMAT;
3303
3304         if (sh->sh_type == SHT_SYMTAB) {
3305             if (obj->efile.symbols) {
3306                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3307                 return -LIBBPF_ERRNO__FORMAT;
3308             }
3309
3310             data = elf_sec_data(obj, scn);
3311             if (!data)
3312                 return -LIBBPF_ERRNO__FORMAT;
3313
3314             idx = elf_ndxscn(scn);
3315
3316             obj->efile.symbols = data;
3317             obj->efile.symbols_shndx = idx;
3318             obj->efile.strtabidx = sh->sh_link;
3319         }
3320     }
3321
3322     if (!obj->efile.symbols) {
3323         pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3324             obj->path);
3325         return -ENOENT;
3326     }
3327
3328     scn = NULL;
3329     while ((scn = elf_nextscn(elf, scn)) != NULL) {
3330         idx = elf_ndxscn(scn);
3331         sec_desc = &obj->efile.secs[idx];
3332
3333         sh = elf_sec_hdr(obj, scn);
3334         if (!sh)
3335             return -LIBBPF_ERRNO__FORMAT;
3336
3337         name = elf_sec_str(obj, sh->sh_name);
3338         if (!name)
3339             return -LIBBPF_ERRNO__FORMAT;
3340
3341         if (ignore_elf_section(sh, name))
3342             continue;
3343
3344         data = elf_sec_data(obj, scn);
3345         if (!data)
3346             return -LIBBPF_ERRNO__FORMAT;
3347
3348         pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3349              idx, name, (unsigned long)data->d_size,
3350              (int)sh->sh_link, (unsigned long)sh->sh_flags,
3351              (int)sh->sh_type);
3352
3353         if (strcmp(name, "license") == 0) {
3354             err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3355             if (err)
3356                 return err;
3357         } else if (strcmp(name, "version") == 0) {
3358             err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3359             if (err)
3360                 return err;
3361         } else if (strcmp(name, "maps") == 0) {
3362             obj->efile.maps_shndx = idx;
3363         } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3364             obj->efile.btf_maps_shndx = idx;
3365         } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3366             if (sh->sh_type != SHT_PROGBITS)
3367                 return -LIBBPF_ERRNO__FORMAT;
3368             btf_data = data;
3369         } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3370             if (sh->sh_type != SHT_PROGBITS)
3371                 return -LIBBPF_ERRNO__FORMAT;
3372             btf_ext_data = data;
3373         } else if (sh->sh_type == SHT_SYMTAB) {
3374             /* already processed during the first pass above */
3375         } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3376             if (sh->sh_flags & SHF_EXECINSTR) {
3377                 if (strcmp(name, ".text") == 0)
3378                     obj->efile.text_shndx = idx;
3379                 err = bpf_object__add_programs(obj, data, name, idx);
3380                 if (err)
3381                     return err;
3382             } else if (strcmp(name, DATA_SEC) == 0 ||
3383                    str_has_pfx(name, DATA_SEC ".")) {
3384                 sec_desc->sec_type = SEC_DATA;
3385                 sec_desc->shdr = sh;
3386                 sec_desc->data = data;
3387             } else if (strcmp(name, RODATA_SEC) == 0 ||
3388                    str_has_pfx(name, RODATA_SEC ".")) {
3389                 sec_desc->sec_type = SEC_RODATA;
3390                 sec_desc->shdr = sh;
3391                 sec_desc->data = data;
3392             } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3393                 obj->efile.st_ops_data = data;
3394                 obj->efile.st_ops_shndx = idx;
3395             } else {
3396                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3397                     idx, name);
3398             }
3399         } else if (sh->sh_type == SHT_REL) {
3400             int targ_sec_idx = sh->sh_info; /* points to other section */
3401
3402             if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3403                 targ_sec_idx >= obj->efile.sec_cnt)
3404                 return -LIBBPF_ERRNO__FORMAT;
3405
3406             /* Only do relo for section with exec instructions */
3407             if (!section_have_execinstr(obj, targ_sec_idx) &&
3408                 strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3409                 strcmp(name, ".rel" MAPS_ELF_SEC)) {
3410                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3411                     idx, name, targ_sec_idx,
3412                     elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3413                 continue;
3414             }
3415
3416             sec_desc->sec_type = SEC_RELO;
3417             sec_desc->shdr = sh;
3418             sec_desc->data = data;
3419         } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
3420             sec_desc->sec_type = SEC_BSS;
3421             sec_desc->shdr = sh;
3422             sec_desc->data = data;
3423         } else {
3424             pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3425                 (size_t)sh->sh_size);
3426         }
3427     }
3428
3429     if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3430         pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3431         return -LIBBPF_ERRNO__FORMAT;
3432     }
3433
3434     /* sort BPF programs by section name and in-section instruction offset
3435      * for faster search */
3436     if (obj->nr_programs)
3437         qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3438
3439     return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3440 }
3441
3442 static bool sym_is_extern(const Elf64_Sym *sym)
3443 {
3444     int bind = ELF64_ST_BIND(sym->st_info);
3445     /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3446     return sym->st_shndx == SHN_UNDEF &&
3447            (bind == STB_GLOBAL || bind == STB_WEAK) &&
3448            ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3449 }
3450
3451 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3452 {
3453     int bind = ELF64_ST_BIND(sym->st_info);
3454     int type = ELF64_ST_TYPE(sym->st_info);
3455
3456     /* in .text section */
3457     if (sym->st_shndx != text_shndx)
3458         return false;
3459
3460     /* local function */
3461     if (bind == STB_LOCAL && type == STT_SECTION)
3462         return true;
3463
3464     /* global function */
3465     return bind == STB_GLOBAL && type == STT_FUNC;
3466 }
3467
3468 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3469 {
3470     const struct btf_type *t;
3471     const char *tname;
3472     int i, n;
3473
3474     if (!btf)
3475         return -ESRCH;
3476
3477     n = btf__type_cnt(btf);
3478     for (i = 1; i < n; i++) {
3479         t = btf__type_by_id(btf, i);
3480
3481         if (!btf_is_var(t) && !btf_is_func(t))
3482             continue;
3483
3484         tname = btf__name_by_offset(btf, t->name_off);
3485         if (strcmp(tname, ext_name))
3486             continue;
3487
3488         if (btf_is_var(t) &&
3489             btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3490             return -EINVAL;
3491
3492         if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3493             return -EINVAL;
3494
3495         return i;
3496     }
3497
3498     return -ENOENT;
3499 }
3500
3501 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3502     const struct btf_var_secinfo *vs;
3503     const struct btf_type *t;
3504     int i, j, n;
3505
3506     if (!btf)
3507         return -ESRCH;
3508
3509     n = btf__type_cnt(btf);
3510     for (i = 1; i < n; i++) {
3511         t = btf__type_by_id(btf, i);
3512
3513         if (!btf_is_datasec(t))
3514             continue;
3515
3516         vs = btf_var_secinfos(t);
3517         for (j = 0; j < btf_vlen(t); j++, vs++) {
3518             if (vs->type == ext_btf_id)
3519                 return i;
3520         }
3521     }
3522
3523     return -ENOENT;
3524 }
3525
3526 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3527                      bool *is_signed)
3528 {
3529     const struct btf_type *t;
3530     const char *name;
3531
3532     t = skip_mods_and_typedefs(btf, id, NULL);
3533     name = btf__name_by_offset(btf, t->name_off);
3534
3535     if (is_signed)
3536         *is_signed = false;
3537     switch (btf_kind(t)) {
3538     case BTF_KIND_INT: {
3539         int enc = btf_int_encoding(t);
3540
3541         if (enc & BTF_INT_BOOL)
3542             return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3543         if (is_signed)
3544             *is_signed = enc & BTF_INT_SIGNED;
3545         if (t->size == 1)
3546             return KCFG_CHAR;
3547         if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3548             return KCFG_UNKNOWN;
3549         return KCFG_INT;
3550     }
3551     case BTF_KIND_ENUM:
3552         if (t->size != 4)
3553             return KCFG_UNKNOWN;
3554         if (strcmp(name, "libbpf_tristate"))
3555             return KCFG_UNKNOWN;
3556         return KCFG_TRISTATE;
3557     case BTF_KIND_ENUM64:
3558         if (strcmp(name, "libbpf_tristate"))
3559             return KCFG_UNKNOWN;
3560         return KCFG_TRISTATE;
3561     case BTF_KIND_ARRAY:
3562         if (btf_array(t)->nelems == 0)
3563             return KCFG_UNKNOWN;
3564         if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3565             return KCFG_UNKNOWN;
3566         return KCFG_CHAR_ARR;
3567     default:
3568         return KCFG_UNKNOWN;
3569     }
3570 }
3571
3572 static int cmp_externs(const void *_a, const void *_b)
3573 {
3574     const struct extern_desc *a = _a;
3575     const struct extern_desc *b = _b;
3576
3577     if (a->type != b->type)
3578         return a->type < b->type ? -1 : 1;
3579
3580     if (a->type == EXT_KCFG) {
3581         /* descending order by alignment requirements */
3582         if (a->kcfg.align != b->kcfg.align)
3583             return a->kcfg.align > b->kcfg.align ? -1 : 1;
3584         /* ascending order by size, within same alignment class */
3585         if (a->kcfg.sz != b->kcfg.sz)
3586             return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3587     }
3588
3589     /* resolve ties by name */
3590     return strcmp(a->name, b->name);
3591 }
3592
3593 static int find_int_btf_id(const struct btf *btf)
3594 {
3595     const struct btf_type *t;
3596     int i, n;
3597
3598     n = btf__type_cnt(btf);
3599     for (i = 1; i < n; i++) {
3600         t = btf__type_by_id(btf, i);
3601
3602         if (btf_is_int(t) && btf_int_bits(t) == 32)
3603             return i;
3604     }
3605
3606     return 0;
3607 }
3608
3609 static int add_dummy_ksym_var(struct btf *btf)
3610 {
3611     int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3612     const struct btf_var_secinfo *vs;
3613     const struct btf_type *sec;
3614
3615     if (!btf)
3616         return 0;
3617
3618     sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3619                         BTF_KIND_DATASEC);
3620     if (sec_btf_id < 0)
3621         return 0;
3622
3623     sec = btf__type_by_id(btf, sec_btf_id);
3624     vs = btf_var_secinfos(sec);
3625     for (i = 0; i < btf_vlen(sec); i++, vs++) {
3626         const struct btf_type *vt;
3627
3628         vt = btf__type_by_id(btf, vs->type);
3629         if (btf_is_func(vt))
3630             break;
3631     }
3632
3633     /* No func in ksyms sec.  No need to add dummy var. */
3634     if (i == btf_vlen(sec))
3635         return 0;
3636
3637     int_btf_id = find_int_btf_id(btf);
3638     dummy_var_btf_id = btf__add_var(btf,
3639                     "dummy_ksym",
3640                     BTF_VAR_GLOBAL_ALLOCATED,
3641                     int_btf_id);
3642     if (dummy_var_btf_id < 0)
3643         pr_warn("cannot create a dummy_ksym var\n");
3644
3645     return dummy_var_btf_id;
3646 }
3647
3648 static int bpf_object__collect_externs(struct bpf_object *obj)
3649 {
3650     struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3651     const struct btf_type *t;
3652     struct extern_desc *ext;
3653     int i, n, off, dummy_var_btf_id;
3654     const char *ext_name, *sec_name;
3655     Elf_Scn *scn;
3656     Elf64_Shdr *sh;
3657
3658     if (!obj->efile.symbols)
3659         return 0;
3660
3661     scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3662     sh = elf_sec_hdr(obj, scn);
3663     if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3664         return -LIBBPF_ERRNO__FORMAT;
3665
3666     dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3667     if (dummy_var_btf_id < 0)
3668         return dummy_var_btf_id;
3669
3670     n = sh->sh_size / sh->sh_entsize;
3671     pr_debug("looking for externs among %d symbols...\n", n);
3672
3673     for (i = 0; i < n; i++) {
3674         Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3675
3676         if (!sym)
3677             return -LIBBPF_ERRNO__FORMAT;
3678         if (!sym_is_extern(sym))
3679             continue;
3680         ext_name = elf_sym_str(obj, sym->st_name);
3681         if (!ext_name || !ext_name[0])
3682             continue;
3683
3684         ext = obj->externs;
3685         ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3686         if (!ext)
3687             return -ENOMEM;
3688         obj->externs = ext;
3689         ext = &ext[obj->nr_extern];
3690         memset(ext, 0, sizeof(*ext));
3691         obj->nr_extern++;
3692
3693         ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3694         if (ext->btf_id <= 0) {
3695             pr_warn("failed to find BTF for extern '%s': %d\n",
3696                 ext_name, ext->btf_id);
3697             return ext->btf_id;
3698         }
3699         t = btf__type_by_id(obj->btf, ext->btf_id);
3700         ext->name = btf__name_by_offset(obj->btf, t->name_off);
3701         ext->sym_idx = i;
3702         ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3703
3704         ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3705         if (ext->sec_btf_id <= 0) {
3706             pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3707                 ext_name, ext->btf_id, ext->sec_btf_id);
3708             return ext->sec_btf_id;
3709         }
3710         sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3711         sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3712
3713         if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3714             if (btf_is_func(t)) {
3715                 pr_warn("extern function %s is unsupported under %s section\n",
3716                     ext->name, KCONFIG_SEC);
3717                 return -ENOTSUP;
3718             }
3719             kcfg_sec = sec;
3720             ext->type = EXT_KCFG;
3721             ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3722             if (ext->kcfg.sz <= 0) {
3723                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3724                     ext_name, ext->kcfg.sz);
3725                 return ext->kcfg.sz;
3726             }
3727             ext->kcfg.align = btf__align_of(obj->btf, t->type);
3728             if (ext->kcfg.align <= 0) {
3729                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3730                     ext_name, ext->kcfg.align);
3731                 return -EINVAL;
3732             }
3733             ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3734                                 &ext->kcfg.is_signed);
3735             if (ext->kcfg.type == KCFG_UNKNOWN) {
3736                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
3737                 return -ENOTSUP;
3738             }
3739         } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3740             ksym_sec = sec;
3741             ext->type = EXT_KSYM;
3742             skip_mods_and_typedefs(obj->btf, t->type,
3743                            &ext->ksym.type_id);
3744         } else {
3745             pr_warn("unrecognized extern section '%s'\n", sec_name);
3746             return -ENOTSUP;
3747         }
3748     }
3749     pr_debug("collected %d externs total\n", obj->nr_extern);
3750
3751     if (!obj->nr_extern)
3752         return 0;
3753
3754     /* sort externs by type, for kcfg ones also by (align, size, name) */
3755     qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3756
3757     /* for .ksyms section, we need to turn all externs into allocated
3758      * variables in BTF to pass kernel verification; we do this by
3759      * pretending that each extern is a 8-byte variable
3760      */
3761     if (ksym_sec) {
3762         /* find existing 4-byte integer type in BTF to use for fake
3763          * extern variables in DATASEC
3764          */
3765         int int_btf_id = find_int_btf_id(obj->btf);
3766         /* For extern function, a dummy_var added earlier
3767          * will be used to replace the vs->type and
3768          * its name string will be used to refill
3769          * the missing param's name.
3770          */
3771         const struct btf_type *dummy_var;
3772
3773         dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3774         for (i = 0; i < obj->nr_extern; i++) {
3775             ext = &obj->externs[i];
3776             if (ext->type != EXT_KSYM)
3777                 continue;
3778             pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3779                  i, ext->sym_idx, ext->name);
3780         }
3781
3782         sec = ksym_sec;
3783         n = btf_vlen(sec);
3784         for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3785             struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3786             struct btf_type *vt;
3787
3788             vt = (void *)btf__type_by_id(obj->btf, vs->type);
3789             ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3790             ext = find_extern_by_name(obj, ext_name);
3791             if (!ext) {
3792                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3793                     btf_kind_str(vt), ext_name);
3794                 return -ESRCH;
3795             }
3796             if (btf_is_func(vt)) {
3797                 const struct btf_type *func_proto;
3798                 struct btf_param *param;
3799                 int j;
3800
3801                 func_proto = btf__type_by_id(obj->btf,
3802                                  vt->type);
3803                 param = btf_params(func_proto);
3804                 /* Reuse the dummy_var string if the
3805                  * func proto does not have param name.
3806                  */
3807                 for (j = 0; j < btf_vlen(func_proto); j++)
3808                     if (param[j].type && !param[j].name_off)
3809                         param[j].name_off =
3810                             dummy_var->name_off;
3811                 vs->type = dummy_var_btf_id;
3812                 vt->info &= ~0xffff;
3813                 vt->info |= BTF_FUNC_GLOBAL;
3814             } else {
3815                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3816                 vt->type = int_btf_id;
3817             }
3818             vs->offset = off;
3819             vs->size = sizeof(int);
3820         }
3821         sec->size = off;
3822     }
3823
3824     if (kcfg_sec) {
3825         sec = kcfg_sec;
3826         /* for kcfg externs calculate their offsets within a .kconfig map */
3827         off = 0;
3828         for (i = 0; i < obj->nr_extern; i++) {
3829             ext = &obj->externs[i];
3830             if (ext->type != EXT_KCFG)
3831                 continue;
3832
3833             ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3834             off = ext->kcfg.data_off + ext->kcfg.sz;
3835             pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3836                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3837         }
3838         sec->size = off;
3839         n = btf_vlen(sec);
3840         for (i = 0; i < n; i++) {
3841             struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3842
3843             t = btf__type_by_id(obj->btf, vs->type);
3844             ext_name = btf__name_by_offset(obj->btf, t->name_off);
3845             ext = find_extern_by_name(obj, ext_name);
3846             if (!ext) {
3847                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3848                     ext_name);
3849                 return -ESRCH;
3850             }
3851             btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3852             vs->offset = ext->kcfg.data_off;
3853         }
3854     }
3855     return 0;
3856 }
3857
3858 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
3859 {
3860     return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3861 }
3862
3863 struct bpf_program *
3864 bpf_object__find_program_by_name(const struct bpf_object *obj,
3865                  const char *name)
3866 {
3867     struct bpf_program *prog;
3868
3869     bpf_object__for_each_program(prog, obj) {
3870         if (prog_is_subprog(obj, prog))
3871             continue;
3872         if (!strcmp(prog->name, name))
3873             return prog;
3874     }
3875     return errno = ENOENT, NULL;
3876 }
3877
3878 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3879                       int shndx)
3880 {
3881     switch (obj->efile.secs[shndx].sec_type) {
3882     case SEC_BSS:
3883     case SEC_DATA:
3884     case SEC_RODATA:
3885         return true;
3886     default:
3887         return false;
3888     }
3889 }
3890
3891 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3892                       int shndx)
3893 {
3894     return shndx == obj->efile.maps_shndx ||
3895            shndx == obj->efile.btf_maps_shndx;
3896 }
3897
3898 static enum libbpf_map_type
3899 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3900 {
3901     if (shndx == obj->efile.symbols_shndx)
3902         return LIBBPF_MAP_KCONFIG;
3903
3904     switch (obj->efile.secs[shndx].sec_type) {
3905     case SEC_BSS:
3906         return LIBBPF_MAP_BSS;
3907     case SEC_DATA:
3908         return LIBBPF_MAP_DATA;
3909     case SEC_RODATA:
3910         return LIBBPF_MAP_RODATA;
3911     default:
3912         return LIBBPF_MAP_UNSPEC;
3913     }
3914 }
3915
3916 static int bpf_program__record_reloc(struct bpf_program *prog,
3917                      struct reloc_desc *reloc_desc,
3918                      __u32 insn_idx, const char *sym_name,
3919                      const Elf64_Sym *sym, const Elf64_Rel *rel)
3920 {
3921     struct bpf_insn *insn = &prog->insns[insn_idx];
3922     size_t map_idx, nr_maps = prog->obj->nr_maps;
3923     struct bpf_object *obj = prog->obj;
3924     __u32 shdr_idx = sym->st_shndx;
3925     enum libbpf_map_type type;
3926     const char *sym_sec_name;
3927     struct bpf_map *map;
3928
3929     if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
3930         pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3931             prog->name, sym_name, insn_idx, insn->code);
3932         return -LIBBPF_ERRNO__RELOC;
3933     }
3934
3935     if (sym_is_extern(sym)) {
3936         int sym_idx = ELF64_R_SYM(rel->r_info);
3937         int i, n = obj->nr_extern;
3938         struct extern_desc *ext;
3939
3940         for (i = 0; i < n; i++) {
3941             ext = &obj->externs[i];
3942             if (ext->sym_idx == sym_idx)
3943                 break;
3944         }
3945         if (i >= n) {
3946             pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3947                 prog->name, sym_name, sym_idx);
3948             return -LIBBPF_ERRNO__RELOC;
3949         }
3950         pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3951              prog->name, i, ext->name, ext->sym_idx, insn_idx);
3952         if (insn->code == (BPF_JMP | BPF_CALL))
3953             reloc_desc->type = RELO_EXTERN_FUNC;
3954         else
3955             reloc_desc->type = RELO_EXTERN_VAR;
3956         reloc_desc->insn_idx = insn_idx;
3957         reloc_desc->sym_off = i; /* sym_off stores extern index */
3958         return 0;
3959     }
3960
3961     /* sub-program call relocation */
3962     if (is_call_insn(insn)) {
3963         if (insn->src_reg != BPF_PSEUDO_CALL) {
3964             pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3965             return -LIBBPF_ERRNO__RELOC;
3966         }
3967         /* text_shndx can be 0, if no default "main" program exists */
3968         if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3969             sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3970             pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3971                 prog->name, sym_name, sym_sec_name);
3972             return -LIBBPF_ERRNO__RELOC;
3973         }
3974         if (sym->st_value % BPF_INSN_SZ) {
3975             pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3976                 prog->name, sym_name, (size_t)sym->st_value);
3977             return -LIBBPF_ERRNO__RELOC;
3978         }
3979         reloc_desc->type = RELO_CALL;
3980         reloc_desc->insn_idx = insn_idx;
3981         reloc_desc->sym_off = sym->st_value;
3982         return 0;
3983     }
3984
3985     if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3986         pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3987             prog->name, sym_name, shdr_idx);
3988         return -LIBBPF_ERRNO__RELOC;
3989     }
3990
3991     /* loading subprog addresses */
3992     if (sym_is_subprog(sym, obj->efile.text_shndx)) {
3993         /* global_func: sym->st_value = offset in the section, insn->imm = 0.
3994          * local_func: sym->st_value = 0, insn->imm = offset in the section.
3995          */
3996         if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
3997             pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
3998                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
3999             return -LIBBPF_ERRNO__RELOC;
4000         }
4001
4002         reloc_desc->type = RELO_SUBPROG_ADDR;
4003         reloc_desc->insn_idx = insn_idx;
4004         reloc_desc->sym_off = sym->st_value;
4005         return 0;
4006     }
4007
4008     type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4009     sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4010
4011     /* generic map reference relocation */
4012     if (type == LIBBPF_MAP_UNSPEC) {
4013         if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4014             pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4015                 prog->name, sym_name, sym_sec_name);
4016             return -LIBBPF_ERRNO__RELOC;
4017         }
4018         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4019             map = &obj->maps[map_idx];
4020             if (map->libbpf_type != type ||
4021                 map->sec_idx != sym->st_shndx ||
4022                 map->sec_offset != sym->st_value)
4023                 continue;
4024             pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4025                  prog->name, map_idx, map->name, map->sec_idx,
4026                  map->sec_offset, insn_idx);
4027             break;
4028         }
4029         if (map_idx >= nr_maps) {
4030             pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4031                 prog->name, sym_sec_name, (size_t)sym->st_value);
4032             return -LIBBPF_ERRNO__RELOC;
4033         }
4034         reloc_desc->type = RELO_LD64;
4035         reloc_desc->insn_idx = insn_idx;
4036         reloc_desc->map_idx = map_idx;
4037         reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4038         return 0;
4039     }
4040
4041     /* global data map relocation */
4042     if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4043         pr_warn("prog '%s': bad data relo against section '%s'\n",
4044             prog->name, sym_sec_name);
4045         return -LIBBPF_ERRNO__RELOC;
4046     }
4047     for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4048         map = &obj->maps[map_idx];
4049         if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4050             continue;
4051         pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4052              prog->name, map_idx, map->name, map->sec_idx,
4053              map->sec_offset, insn_idx);
4054         break;
4055     }
4056     if (map_idx >= nr_maps) {
4057         pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4058             prog->name, sym_sec_name);
4059         return -LIBBPF_ERRNO__RELOC;
4060     }
4061
4062     reloc_desc->type = RELO_DATA;
4063     reloc_desc->insn_idx = insn_idx;
4064     reloc_desc->map_idx = map_idx;
4065     reloc_desc->sym_off = sym->st_value;
4066     return 0;
4067 }
4068
4069 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4070 {
4071     return insn_idx >= prog->sec_insn_off &&
4072            insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4073 }
4074
4075 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4076                          size_t sec_idx, size_t insn_idx)
4077 {
4078     int l = 0, r = obj->nr_programs - 1, m;
4079     struct bpf_program *prog;
4080
4081     while (l < r) {
4082         m = l + (r - l + 1) / 2;
4083         prog = &obj->programs[m];
4084
4085         if (prog->sec_idx < sec_idx ||
4086             (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4087             l = m;
4088         else
4089             r = m - 1;
4090     }
4091     /* matching program could be at index l, but it still might be the
4092      * wrong one, so we need to double check conditions for the last time
4093      */
4094     prog = &obj->programs[l];
4095     if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4096         return prog;
4097     return NULL;
4098 }
4099
4100 static int
4101 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4102 {
4103     const char *relo_sec_name, *sec_name;
4104     size_t sec_idx = shdr->sh_info, sym_idx;
4105     struct bpf_program *prog;
4106     struct reloc_desc *relos;
4107     int err, i, nrels;
4108     const char *sym_name;
4109     __u32 insn_idx;
4110     Elf_Scn *scn;
4111     Elf_Data *scn_data;
4112     Elf64_Sym *sym;
4113     Elf64_Rel *rel;
4114
4115     if (sec_idx >= obj->efile.sec_cnt)
4116         return -EINVAL;
4117
4118     scn = elf_sec_by_idx(obj, sec_idx);
4119     scn_data = elf_sec_data(obj, scn);
4120
4121     relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4122     sec_name = elf_sec_name(obj, scn);
4123     if (!relo_sec_name || !sec_name)
4124         return -EINVAL;
4125
4126     pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4127          relo_sec_name, sec_idx, sec_name);
4128     nrels = shdr->sh_size / shdr->sh_entsize;
4129
4130     for (i = 0; i < nrels; i++) {
4131         rel = elf_rel_by_idx(data, i);
4132         if (!rel) {
4133             pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4134             return -LIBBPF_ERRNO__FORMAT;
4135         }
4136
4137         sym_idx = ELF64_R_SYM(rel->r_info);
4138         sym = elf_sym_by_idx(obj, sym_idx);
4139         if (!sym) {
4140             pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4141                 relo_sec_name, sym_idx, i);
4142             return -LIBBPF_ERRNO__FORMAT;
4143         }
4144
4145         if (sym->st_shndx >= obj->efile.sec_cnt) {
4146             pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4147                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4148             return -LIBBPF_ERRNO__FORMAT;
4149         }
4150
4151         if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4152             pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4153                 relo_sec_name, (size_t)rel->r_offset, i);
4154             return -LIBBPF_ERRNO__FORMAT;
4155         }
4156
4157         insn_idx = rel->r_offset / BPF_INSN_SZ;
4158         /* relocations against static functions are recorded as
4159          * relocations against the section that contains a function;
4160          * in such case, symbol will be STT_SECTION and sym.st_name
4161          * will point to empty string (0), so fetch section name
4162          * instead
4163          */
4164         if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4165             sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4166         else
4167             sym_name = elf_sym_str(obj, sym->st_name);
4168         sym_name = sym_name ?: "<?";
4169
4170         pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4171              relo_sec_name, i, insn_idx, sym_name);
4172
4173         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4174         if (!prog) {
4175             pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4176                 relo_sec_name, i, sec_name, insn_idx);
4177             continue;
4178         }
4179
4180         relos = libbpf_reallocarray(prog->reloc_desc,
4181                         prog->nr_reloc + 1, sizeof(*relos));
4182         if (!relos)
4183             return -ENOMEM;
4184         prog->reloc_desc = relos;
4185
4186         /* adjust insn_idx to local BPF program frame of reference */
4187         insn_idx -= prog->sec_insn_off;
4188         err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4189                         insn_idx, sym_name, sym, rel);
4190         if (err)
4191             return err;
4192
4193         prog->nr_reloc++;
4194     }
4195     return 0;
4196 }
4197
4198 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
4199 {
4200     int id;
4201
4202     if (!obj->btf)
4203         return -ENOENT;
4204
4205     /* if it's BTF-defined map, we don't need to search for type IDs.
4206      * For struct_ops map, it does not need btf_key_type_id and
4207      * btf_value_type_id.
4208      */
4209     if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4210         return 0;
4211
4212     /*
4213      * LLVM annotates global data differently in BTF, that is,
4214      * only as '.data', '.bss' or '.rodata'.
4215      */
4216     if (!bpf_map__is_internal(map))
4217         return -ENOENT;
4218
4219     id = btf__find_by_name(obj->btf, map->real_name);
4220     if (id < 0)
4221         return id;
4222
4223     map->btf_key_type_id = 0;
4224     map->btf_value_type_id = id;
4225     return 0;
4226 }
4227
4228 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4229 {
4230     char file[PATH_MAX], buff[4096];
4231     FILE *fp;
4232     __u32 val;
4233     int err;
4234
4235     snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4236     memset(info, 0, sizeof(*info));
4237
4238     fp = fopen(file, "r");
4239     if (!fp) {
4240         err = -errno;
4241         pr_warn("failed to open %s: %d. No procfs support?\n", file,
4242             err);
4243         return err;
4244     }
4245
4246     while (fgets(buff, sizeof(buff), fp)) {
4247         if (sscanf(buff, "map_type:\t%u", &val) == 1)
4248             info->type = val;
4249         else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4250             info->key_size = val;
4251         else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4252             info->value_size = val;
4253         else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4254             info->max_entries = val;
4255         else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4256             info->map_flags = val;
4257     }
4258
4259     fclose(fp);
4260
4261     return 0;
4262 }
4263
4264 bool bpf_map__autocreate(const struct bpf_map *map)
4265 {
4266     return map->autocreate;
4267 }
4268
4269 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4270 {
4271     if (map->obj->loaded)
4272         return libbpf_err(-EBUSY);
4273
4274     map->autocreate = autocreate;
4275     return 0;
4276 }
4277
4278 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4279 {
4280     struct bpf_map_info info = {};
4281     __u32 len = sizeof(info), name_len;
4282     int new_fd, err;
4283     char *new_name;
4284
4285     err = bpf_obj_get_info_by_fd(fd, &info, &len);
4286     if (err && errno == EINVAL)
4287         err = bpf_get_map_info_from_fdinfo(fd, &info);
4288     if (err)
4289         return libbpf_err(err);
4290
4291     name_len = strlen(info.name);
4292     if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4293         new_name = strdup(map->name);
4294     else
4295         new_name = strdup(info.name);
4296
4297     if (!new_name)
4298         return libbpf_err(-errno);
4299
4300     new_fd = open("/", O_RDONLY | O_CLOEXEC);
4301     if (new_fd < 0) {
4302         err = -errno;
4303         goto err_free_new_name;
4304     }
4305
4306     new_fd = dup3(fd, new_fd, O_CLOEXEC);
4307     if (new_fd < 0) {
4308         err = -errno;
4309         goto err_close_new_fd;
4310     }
4311
4312     err = zclose(map->fd);
4313     if (err) {
4314         err = -errno;
4315         goto err_close_new_fd;
4316     }
4317     free(map->name);
4318
4319     map->fd = new_fd;
4320     map->name = new_name;
4321     map->def.type = info.type;
4322     map->def.key_size = info.key_size;
4323     map->def.value_size = info.value_size;
4324     map->def.max_entries = info.max_entries;
4325     map->def.map_flags = info.map_flags;
4326     map->btf_key_type_id = info.btf_key_type_id;
4327     map->btf_value_type_id = info.btf_value_type_id;
4328     map->reused = true;
4329     map->map_extra = info.map_extra;
4330
4331     return 0;
4332
4333 err_close_new_fd:
4334     close(new_fd);
4335 err_free_new_name:
4336     free(new_name);
4337     return libbpf_err(err);
4338 }
4339
4340 __u32 bpf_map__max_entries(const struct bpf_map *map)
4341 {
4342     return map->def.max_entries;
4343 }
4344
4345 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4346 {
4347     if (!bpf_map_type__is_map_in_map(map->def.type))
4348         return errno = EINVAL, NULL;
4349
4350     return map->inner_map;
4351 }
4352
4353 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4354 {
4355     if (map->obj->loaded)
4356         return libbpf_err(-EBUSY);
4357
4358     map->def.max_entries = max_entries;
4359
4360     /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4361     if (map->def.type == BPF_MAP_TYPE_RINGBUF)
4362         map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4363
4364     return 0;
4365 }
4366
4367 static int
4368 bpf_object__probe_loading(struct bpf_object *obj)
4369 {
4370     char *cp, errmsg[STRERR_BUFSIZE];
4371     struct bpf_insn insns[] = {
4372         BPF_MOV64_IMM(BPF_REG_0, 0),
4373         BPF_EXIT_INSN(),
4374     };
4375     int ret, insn_cnt = ARRAY_SIZE(insns);
4376
4377     if (obj->gen_loader)
4378         return 0;
4379
4380     ret = bump_rlimit_memlock();
4381     if (ret)
4382         pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4383
4384     /* make sure basic loading works */
4385     ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4386     if (ret < 0)
4387         ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4388     if (ret < 0) {
4389         ret = errno;
4390         cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4391         pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4392             "program. Make sure your kernel supports BPF "
4393             "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4394             "set to big enough value.\n", __func__, cp, ret);
4395         return -ret;
4396     }
4397     close(ret);
4398
4399     return 0;
4400 }
4401
4402 static int probe_fd(int fd)
4403 {
4404     if (fd >= 0)
4405         close(fd);
4406     return fd >= 0;
4407 }
4408
4409 static int probe_kern_prog_name(void)
4410 {
4411     struct bpf_insn insns[] = {
4412         BPF_MOV64_IMM(BPF_REG_0, 0),
4413         BPF_EXIT_INSN(),
4414     };
4415     int ret, insn_cnt = ARRAY_SIZE(insns);
4416
4417     /* make sure loading with name works */
4418     ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
4419     return probe_fd(ret);
4420 }
4421
4422 static int probe_kern_global_data(void)
4423 {
4424     char *cp, errmsg[STRERR_BUFSIZE];
4425     struct bpf_insn insns[] = {
4426         BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4427         BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4428         BPF_MOV64_IMM(BPF_REG_0, 0),
4429         BPF_EXIT_INSN(),
4430     };
4431     int ret, map, insn_cnt = ARRAY_SIZE(insns);
4432
4433     map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
4434     if (map < 0) {
4435         ret = -errno;
4436         cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4437         pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4438             __func__, cp, -ret);
4439         return ret;
4440     }
4441
4442     insns[0].imm = map;
4443
4444     ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4445     close(map);
4446     return probe_fd(ret);
4447 }
4448
4449 static int probe_kern_btf(void)
4450 {
4451     static const char strs[] = "\0int";
4452     __u32 types[] = {
4453         /* int */
4454         BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4455     };
4456
4457     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4458                          strs, sizeof(strs)));
4459 }
4460
4461 static int probe_kern_btf_func(void)
4462 {
4463     static const char strs[] = "\0int\0x\0a";
4464     /* void x(int a) {} */
4465     __u32 types[] = {
4466         /* int */
4467         BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4468         /* FUNC_PROTO */                                /* [2] */
4469         BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4470         BTF_PARAM_ENC(7, 1),
4471         /* FUNC x */                                    /* [3] */
4472         BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4473     };
4474
4475     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4476                          strs, sizeof(strs)));
4477 }
4478
4479 static int probe_kern_btf_func_global(void)
4480 {
4481     static const char strs[] = "\0int\0x\0a";
4482     /* static void x(int a) {} */
4483     __u32 types[] = {
4484         /* int */
4485         BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4486         /* FUNC_PROTO */                                /* [2] */
4487         BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4488         BTF_PARAM_ENC(7, 1),
4489         /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4490         BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4491     };
4492
4493     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4494                          strs, sizeof(strs)));
4495 }
4496
4497 static int probe_kern_btf_datasec(void)
4498 {
4499     static const char strs[] = "\0x\0.data";
4500     /* static int a; */
4501     __u32 types[] = {
4502         /* int */
4503         BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4504         /* VAR x */                                     /* [2] */
4505         BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4506         BTF_VAR_STATIC,
4507         /* DATASEC val */                               /* [3] */
4508         BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4509         BTF_VAR_SECINFO_ENC(2, 0, 4),
4510     };
4511
4512     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4513                          strs, sizeof(strs)));
4514 }
4515
4516 static int probe_kern_btf_float(void)
4517 {
4518     static const char strs[] = "\0float";
4519     __u32 types[] = {
4520         /* float */
4521         BTF_TYPE_FLOAT_ENC(1, 4),
4522     };
4523
4524     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4525                          strs, sizeof(strs)));
4526 }
4527
4528 static int probe_kern_btf_decl_tag(void)
4529 {
4530     static const char strs[] = "\0tag";
4531     __u32 types[] = {
4532         /* int */
4533         BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4534         /* VAR x */                                     /* [2] */
4535         BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4536         BTF_VAR_STATIC,
4537         /* attr */
4538         BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
4539     };
4540
4541     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4542                          strs, sizeof(strs)));
4543 }
4544
4545 static int probe_kern_btf_type_tag(void)
4546 {
4547     static const char strs[] = "\0tag";
4548     __u32 types[] = {
4549         /* int */
4550         BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),      /* [1] */
4551         /* attr */
4552         BTF_TYPE_TYPE_TAG_ENC(1, 1),                /* [2] */
4553         /* ptr */
4554         BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),   /* [3] */
4555     };
4556
4557     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4558                          strs, sizeof(strs)));
4559 }
4560
4561 static int probe_kern_array_mmap(void)
4562 {
4563     LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
4564     int fd;
4565
4566     fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
4567     return probe_fd(fd);
4568 }
4569
4570 static int probe_kern_exp_attach_type(void)
4571 {
4572     LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
4573     struct bpf_insn insns[] = {
4574         BPF_MOV64_IMM(BPF_REG_0, 0),
4575         BPF_EXIT_INSN(),
4576     };
4577     int fd, insn_cnt = ARRAY_SIZE(insns);
4578
4579     /* use any valid combination of program type and (optional)
4580      * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4581      * to see if kernel supports expected_attach_type field for
4582      * BPF_PROG_LOAD command
4583      */
4584     fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
4585     return probe_fd(fd);
4586 }
4587
4588 static int probe_kern_probe_read_kernel(void)
4589 {
4590     struct bpf_insn insns[] = {
4591         BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4592         BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4593         BPF_MOV64_IMM(BPF_REG_2, 8),        /* r2 = 8 */
4594         BPF_MOV64_IMM(BPF_REG_3, 0),        /* r3 = 0 */
4595         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4596         BPF_EXIT_INSN(),
4597     };
4598     int fd, insn_cnt = ARRAY_SIZE(insns);
4599
4600     fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4601     return probe_fd(fd);
4602 }
4603
4604 static int probe_prog_bind_map(void)
4605 {
4606     char *cp, errmsg[STRERR_BUFSIZE];
4607     struct bpf_insn insns[] = {
4608         BPF_MOV64_IMM(BPF_REG_0, 0),
4609         BPF_EXIT_INSN(),
4610     };
4611     int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
4612
4613     map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
4614     if (map < 0) {
4615         ret = -errno;
4616         cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4617         pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4618             __func__, cp, -ret);
4619         return ret;
4620     }
4621
4622     prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4623     if (prog < 0) {
4624         close(map);
4625         return 0;
4626     }
4627
4628     ret = bpf_prog_bind_map(prog, map, NULL);
4629
4630     close(map);
4631     close(prog);
4632
4633     return ret >= 0;
4634 }
4635
4636 static int probe_module_btf(void)
4637 {
4638     static const char strs[] = "\0int";
4639     __u32 types[] = {
4640         /* int */
4641         BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4642     };
4643     struct bpf_btf_info info;
4644     __u32 len = sizeof(info);
4645     char name[16];
4646     int fd, err;
4647
4648     fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4649     if (fd < 0)
4650         return 0; /* BTF not supported at all */
4651
4652     memset(&info, 0, sizeof(info));
4653     info.name = ptr_to_u64(name);
4654     info.name_len = sizeof(name);
4655
4656     /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4657      * kernel's module BTF support coincides with support for
4658      * name/name_len fields in struct bpf_btf_info.
4659      */
4660     err = bpf_obj_get_info_by_fd(fd, &info, &len);
4661     close(fd);
4662     return !err;
4663 }
4664
4665 static int probe_perf_link(void)
4666 {
4667     struct bpf_insn insns[] = {
4668         BPF_MOV64_IMM(BPF_REG_0, 0),
4669         BPF_EXIT_INSN(),
4670     };
4671     int prog_fd, link_fd, err;
4672
4673     prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
4674                 insns, ARRAY_SIZE(insns), NULL);
4675     if (prog_fd < 0)
4676         return -errno;
4677
4678     /* use invalid perf_event FD to get EBADF, if link is supported;
4679      * otherwise EINVAL should be returned
4680      */
4681     link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
4682     err = -errno; /* close() can clobber errno */
4683
4684     if (link_fd >= 0)
4685         close(link_fd);
4686     close(prog_fd);
4687
4688     return link_fd < 0 && err == -EBADF;
4689 }
4690
4691 static int probe_kern_bpf_cookie(void)
4692 {
4693     struct bpf_insn insns[] = {
4694         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
4695         BPF_EXIT_INSN(),
4696     };
4697     int ret, insn_cnt = ARRAY_SIZE(insns);
4698
4699     ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
4700     return probe_fd(ret);
4701 }
4702
4703 static int probe_kern_btf_enum64(void)
4704 {
4705     static const char strs[] = "\0enum64";
4706     __u32 types[] = {
4707         BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
4708     };
4709
4710     return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4711                          strs, sizeof(strs)));
4712 }
4713
4714 static int probe_kern_syscall_wrapper(void);
4715
4716 enum kern_feature_result {
4717     FEAT_UNKNOWN = 0,
4718     FEAT_SUPPORTED = 1,
4719     FEAT_MISSING = 2,
4720 };
4721
4722 typedef int (*feature_probe_fn)(void);
4723
4724 static struct kern_feature_desc {
4725     const char *desc;
4726     feature_probe_fn probe;
4727     enum kern_feature_result res;
4728 } feature_probes[__FEAT_CNT] = {
4729     [FEAT_PROG_NAME] = {
4730         "BPF program name", probe_kern_prog_name,
4731     },
4732     [FEAT_GLOBAL_DATA] = {
4733         "global variables", probe_kern_global_data,
4734     },
4735     [FEAT_BTF] = {
4736         "minimal BTF", probe_kern_btf,
4737     },
4738     [FEAT_BTF_FUNC] = {
4739         "BTF functions", probe_kern_btf_func,
4740     },
4741     [FEAT_BTF_GLOBAL_FUNC] = {
4742         "BTF global function", probe_kern_btf_func_global,
4743     },
4744     [FEAT_BTF_DATASEC] = {
4745         "BTF data section and variable", probe_kern_btf_datasec,
4746     },
4747     [FEAT_ARRAY_MMAP] = {
4748         "ARRAY map mmap()", probe_kern_array_mmap,
4749     },
4750     [FEAT_EXP_ATTACH_TYPE] = {
4751         "BPF_PROG_LOAD expected_attach_type attribute",
4752         probe_kern_exp_attach_type,
4753     },
4754     [FEAT_PROBE_READ_KERN] = {
4755         "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4756     },
4757     [FEAT_PROG_BIND_MAP] = {
4758         "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4759     },
4760     [FEAT_MODULE_BTF] = {
4761         "module BTF support", probe_module_btf,
4762     },
4763     [FEAT_BTF_FLOAT] = {
4764         "BTF_KIND_FLOAT support", probe_kern_btf_float,
4765     },
4766     [FEAT_PERF_LINK] = {
4767         "BPF perf link support", probe_perf_link,
4768     },
4769     [FEAT_BTF_DECL_TAG] = {
4770         "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
4771     },
4772     [FEAT_BTF_TYPE_TAG] = {
4773         "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
4774     },
4775     [FEAT_MEMCG_ACCOUNT] = {
4776         "memcg-based memory accounting", probe_memcg_account,
4777     },
4778     [FEAT_BPF_COOKIE] = {
4779         "BPF cookie support", probe_kern_bpf_cookie,
4780     },
4781     [FEAT_BTF_ENUM64] = {
4782         "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
4783     },
4784     [FEAT_SYSCALL_WRAPPER] = {
4785         "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
4786     },
4787 };
4788
4789 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4790 {
4791     struct kern_feature_desc *feat = &feature_probes[feat_id];
4792     int ret;
4793
4794     if (obj && obj->gen_loader)
4795         /* To generate loader program assume the latest kernel
4796          * to avoid doing extra prog_load, map_create syscalls.
4797          */
4798         return true;
4799
4800     if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4801         ret = feat->probe();
4802         if (ret > 0) {
4803             WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4804         } else if (ret == 0) {
4805             WRITE_ONCE(feat->res, FEAT_MISSING);
4806         } else {
4807             pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4808             WRITE_ONCE(feat->res, FEAT_MISSING);
4809         }
4810     }
4811
4812     return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4813 }
4814
4815 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4816 {
4817     struct bpf_map_info map_info = {};
4818     char msg[STRERR_BUFSIZE];
4819     __u32 map_info_len;
4820     int err;
4821
4822     map_info_len = sizeof(map_info);
4823
4824     err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4825     if (err && errno == EINVAL)
4826         err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4827     if (err) {
4828         pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4829             libbpf_strerror_r(errno, msg, sizeof(msg)));
4830         return false;
4831     }
4832
4833     return (map_info.type == map->def.type &&
4834         map_info.key_size == map->def.key_size &&
4835         map_info.value_size == map->def.value_size &&
4836         map_info.max_entries == map->def.max_entries &&
4837         map_info.map_flags == map->def.map_flags &&
4838         map_info.map_extra == map->map_extra);
4839 }
4840
4841 static int
4842 bpf_object__reuse_map(struct bpf_map *map)
4843 {
4844     char *cp, errmsg[STRERR_BUFSIZE];
4845     int err, pin_fd;
4846
4847     pin_fd = bpf_obj_get(map->pin_path);
4848     if (pin_fd < 0) {
4849         err = -errno;
4850         if (err == -ENOENT) {
4851             pr_debug("found no pinned map to reuse at '%s'\n",
4852                  map->pin_path);
4853             return 0;
4854         }
4855
4856         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4857         pr_warn("couldn't retrieve pinned map '%s': %s\n",
4858             map->pin_path, cp);
4859         return err;
4860     }
4861
4862     if (!map_is_reuse_compat(map, pin_fd)) {
4863         pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4864             map->pin_path);
4865         close(pin_fd);
4866         return -EINVAL;
4867     }
4868
4869     err = bpf_map__reuse_fd(map, pin_fd);
4870     close(pin_fd);
4871     if (err) {
4872         return err;
4873     }
4874     map->pinned = true;
4875     pr_debug("reused pinned map at '%s'\n", map->pin_path);
4876
4877     return 0;
4878 }
4879
4880 static int
4881 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4882 {
4883     enum libbpf_map_type map_type = map->libbpf_type;
4884     char *cp, errmsg[STRERR_BUFSIZE];
4885     int err, zero = 0;
4886
4887     if (obj->gen_loader) {
4888         bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4889                      map->mmaped, map->def.value_size);
4890         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4891             bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4892         return 0;
4893     }
4894     err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4895     if (err) {
4896         err = -errno;
4897         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4898         pr_warn("Error setting initial map(%s) contents: %s\n",
4899             map->name, cp);
4900         return err;
4901     }
4902
4903     /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4904     if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4905         err = bpf_map_freeze(map->fd);
4906         if (err) {
4907             err = -errno;
4908             cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4909             pr_warn("Error freezing map(%s) as read-only: %s\n",
4910                 map->name, cp);
4911             return err;
4912         }
4913     }
4914     return 0;
4915 }
4916
4917 static void bpf_map__destroy(struct bpf_map *map);
4918
4919 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4920 {
4921     LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4922     struct bpf_map_def *def = &map->def;
4923     const char *map_name = NULL;
4924     int err = 0;
4925
4926     if (kernel_supports(obj, FEAT_PROG_NAME))
4927         map_name = map->name;
4928     create_attr.map_ifindex = map->map_ifindex;
4929     create_attr.map_flags = def->map_flags;
4930     create_attr.numa_node = map->numa_node;
4931     create_attr.map_extra = map->map_extra;
4932
4933     if (bpf_map__is_struct_ops(map))
4934         create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4935
4936     if (obj->btf && btf__fd(obj->btf) >= 0) {
4937         create_attr.btf_fd = btf__fd(obj->btf);
4938         create_attr.btf_key_type_id = map->btf_key_type_id;
4939         create_attr.btf_value_type_id = map->btf_value_type_id;
4940     }
4941
4942     if (bpf_map_type__is_map_in_map(def->type)) {
4943         if (map->inner_map) {
4944             err = bpf_object__create_map(obj, map->inner_map, true);
4945             if (err) {
4946                 pr_warn("map '%s': failed to create inner map: %d\n",
4947                     map->name, err);
4948                 return err;
4949             }
4950             map->inner_map_fd = bpf_map__fd(map->inner_map);
4951         }
4952         if (map->inner_map_fd >= 0)
4953             create_attr.inner_map_fd = map->inner_map_fd;
4954     }
4955
4956     switch (def->type) {
4957     case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4958     case BPF_MAP_TYPE_CGROUP_ARRAY:
4959     case BPF_MAP_TYPE_STACK_TRACE:
4960     case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4961     case BPF_MAP_TYPE_HASH_OF_MAPS:
4962     case BPF_MAP_TYPE_DEVMAP:
4963     case BPF_MAP_TYPE_DEVMAP_HASH:
4964     case BPF_MAP_TYPE_CPUMAP:
4965     case BPF_MAP_TYPE_XSKMAP:
4966     case BPF_MAP_TYPE_SOCKMAP:
4967     case BPF_MAP_TYPE_SOCKHASH:
4968     case BPF_MAP_TYPE_QUEUE:
4969     case BPF_MAP_TYPE_STACK:
4970         create_attr.btf_fd = 0;
4971         create_attr.btf_key_type_id = 0;
4972         create_attr.btf_value_type_id = 0;
4973         map->btf_key_type_id = 0;
4974         map->btf_value_type_id = 0;
4975     default:
4976         break;
4977     }
4978
4979     if (obj->gen_loader) {
4980         bpf_gen__map_create(obj->gen_loader, def->type, map_name,
4981                     def->key_size, def->value_size, def->max_entries,
4982                     &create_attr, is_inner ? -1 : map - obj->maps);
4983         /* Pretend to have valid FD to pass various fd >= 0 checks.
4984          * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
4985          */
4986         map->fd = 0;
4987     } else {
4988         map->fd = bpf_map_create(def->type, map_name,
4989                      def->key_size, def->value_size,
4990                      def->max_entries, &create_attr);
4991     }
4992     if (map->fd < 0 && (create_attr.btf_key_type_id ||
4993                 create_attr.btf_value_type_id)) {
4994         char *cp, errmsg[STRERR_BUFSIZE];
4995
4996         err = -errno;
4997         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4998         pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4999             map->name, cp, err);
5000         create_attr.btf_fd = 0;
5001         create_attr.btf_key_type_id = 0;
5002         create_attr.btf_value_type_id = 0;
5003         map->btf_key_type_id = 0;
5004         map->btf_value_type_id = 0;
5005         map->fd = bpf_map_create(def->type, map_name,
5006                      def->key_size, def->value_size,
5007                      def->max_entries, &create_attr);
5008     }
5009
5010     err = map->fd < 0 ? -errno : 0;
5011
5012     if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5013         if (obj->gen_loader)
5014             map->inner_map->fd = -1;
5015         bpf_map__destroy(map->inner_map);
5016         zfree(&map->inner_map);
5017     }
5018
5019     return err;
5020 }
5021
5022 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5023 {
5024     const struct bpf_map *targ_map;
5025     unsigned int i;
5026     int fd, err = 0;
5027
5028     for (i = 0; i < map->init_slots_sz; i++) {
5029         if (!map->init_slots[i])
5030             continue;
5031
5032         targ_map = map->init_slots[i];
5033         fd = bpf_map__fd(targ_map);
5034
5035         if (obj->gen_loader) {
5036             bpf_gen__populate_outer_map(obj->gen_loader,
5037                             map - obj->maps, i,
5038                             targ_map - obj->maps);
5039         } else {
5040             err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5041         }
5042         if (err) {
5043             err = -errno;
5044             pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5045                 map->name, i, targ_map->name, fd, err);
5046             return err;
5047         }
5048         pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5049              map->name, i, targ_map->name, fd);
5050     }
5051
5052     zfree(&map->init_slots);
5053     map->init_slots_sz = 0;
5054
5055     return 0;
5056 }
5057
5058 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5059 {
5060     const struct bpf_program *targ_prog;
5061     unsigned int i;
5062     int fd, err;
5063
5064     if (obj->gen_loader)
5065         return -ENOTSUP;
5066
5067     for (i = 0; i < map->init_slots_sz; i++) {
5068         if (!map->init_slots[i])
5069             continue;
5070
5071         targ_prog = map->init_slots[i];
5072         fd = bpf_program__fd(targ_prog);
5073
5074         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5075         if (err) {
5076             err = -errno;
5077             pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5078                 map->name, i, targ_prog->name, fd, err);
5079             return err;
5080         }
5081         pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5082              map->name, i, targ_prog->name, fd);
5083     }
5084
5085     zfree(&map->init_slots);
5086     map->init_slots_sz = 0;
5087
5088     return 0;
5089 }
5090
5091 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5092 {
5093     struct bpf_map *map;
5094     int i, err;
5095
5096     for (i = 0; i < obj->nr_maps; i++) {
5097         map = &obj->maps[i];
5098
5099         if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5100             continue;
5101
5102         err = init_prog_array_slots(obj, map);
5103         if (err < 0) {
5104             zclose(map->fd);
5105             return err;
5106         }
5107     }
5108     return 0;
5109 }
5110
5111 static int map_set_def_max_entries(struct bpf_map *map)
5112 {
5113     if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5114         int nr_cpus;
5115
5116         nr_cpus = libbpf_num_possible_cpus();
5117         if (nr_cpus < 0) {
5118             pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5119                 map->name, nr_cpus);
5120             return nr_cpus;
5121         }
5122         pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5123         map->def.max_entries = nr_cpus;
5124     }
5125
5126     return 0;
5127 }
5128
5129 static int
5130 bpf_object__create_maps(struct bpf_object *obj)
5131 {
5132     struct bpf_map *map;
5133     char *cp, errmsg[STRERR_BUFSIZE];
5134     unsigned int i, j;
5135     int err;
5136     bool retried;
5137
5138     for (i = 0; i < obj->nr_maps; i++) {
5139         map = &obj->maps[i];
5140
5141         /* To support old kernels, we skip creating global data maps
5142          * (.rodata, .data, .kconfig, etc); later on, during program
5143          * loading, if we detect that at least one of the to-be-loaded
5144          * programs is referencing any global data map, we'll error
5145          * out with program name and relocation index logged.
5146          * This approach allows to accommodate Clang emitting
5147          * unnecessary .rodata.str1.1 sections for string literals,
5148          * but also it allows to have CO-RE applications that use
5149          * global variables in some of BPF programs, but not others.
5150          * If those global variable-using programs are not loaded at
5151          * runtime due to bpf_program__set_autoload(prog, false),
5152          * bpf_object loading will succeed just fine even on old
5153          * kernels.
5154          */
5155         if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5156             map->autocreate = false;
5157
5158         if (!map->autocreate) {
5159             pr_debug("map '%s': skipped auto-creating...\n", map->name);
5160             continue;
5161         }
5162
5163         err = map_set_def_max_entries(map);
5164         if (err)
5165             goto err_out;
5166
5167         retried = false;
5168 retry:
5169         if (map->pin_path) {
5170             err = bpf_object__reuse_map(map);
5171             if (err) {
5172                 pr_warn("map '%s': error reusing pinned map\n",
5173                     map->name);
5174                 goto err_out;
5175             }
5176             if (retried && map->fd < 0) {
5177                 pr_warn("map '%s': cannot find pinned map\n",
5178                     map->name);
5179                 err = -ENOENT;
5180                 goto err_out;
5181             }
5182         }
5183
5184         if (map->fd >= 0) {
5185             pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5186                  map->name, map->fd);
5187         } else {
5188             err = bpf_object__create_map(obj, map, false);
5189             if (err)
5190                 goto err_out;
5191
5192             pr_debug("map '%s': created successfully, fd=%d\n",
5193                  map->name, map->fd);
5194
5195             if (bpf_map__is_internal(map)) {
5196                 err = bpf_object__populate_internal_map(obj, map);
5197                 if (err < 0) {
5198                     zclose(map->fd);
5199                     goto err_out;
5200                 }
5201             }
5202
5203             if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5204                 err = init_map_in_map_slots(obj, map);
5205                 if (err < 0) {
5206                     zclose(map->fd);
5207                     goto err_out;
5208                 }
5209             }
5210         }
5211
5212         if (map->pin_path && !map->pinned) {
5213             err = bpf_map__pin(map, NULL);
5214             if (err) {
5215                 zclose(map->fd);
5216                 if (!retried && err == -EEXIST) {
5217                     retried = true;
5218                     goto retry;
5219                 }
5220                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5221                     map->name, map->pin_path, err);
5222                 goto err_out;
5223             }
5224         }
5225     }
5226
5227     return 0;
5228
5229 err_out:
5230     cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5231     pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5232     pr_perm_msg(err);
5233     for (j = 0; j < i; j++)
5234         zclose(obj->maps[j].fd);
5235     return err;
5236 }
5237
5238 static bool bpf_core_is_flavor_sep(const char *s)
5239 {
5240     /* check X___Y name pattern, where X and Y are not underscores */
5241     return s[0] != '_' &&                     /* X */
5242            s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5243            s[4] != '_';                   /* Y */
5244 }
5245
5246 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5247  * before last triple underscore. Struct name part after last triple
5248  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5249  */
5250 size_t bpf_core_essential_name_len(const char *name)
5251 {
5252     size_t n = strlen(name);
5253     int i;
5254
5255     for (i = n - 5; i >= 0; i--) {
5256         if (bpf_core_is_flavor_sep(name + i))
5257             return i + 1;
5258     }
5259     return n;
5260 }
5261
5262 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5263 {
5264     if (!cands)
5265         return;
5266
5267     free(cands->cands);
5268     free(cands);
5269 }
5270
5271 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5272                size_t local_essent_len,
5273                const struct btf *targ_btf,
5274                const char *targ_btf_name,
5275                int targ_start_id,
5276                struct bpf_core_cand_list *cands)
5277 {
5278     struct bpf_core_cand *new_cands, *cand;
5279     const struct btf_type *t, *local_t;
5280     const char *targ_name, *local_name;
5281     size_t targ_essent_len;
5282     int n, i;
5283
5284     local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5285     local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5286
5287     n = btf__type_cnt(targ_btf);
5288     for (i = targ_start_id; i < n; i++) {
5289         t = btf__type_by_id(targ_btf, i);
5290         if (!btf_kind_core_compat(t, local_t))
5291             continue;
5292
5293         targ_name = btf__name_by_offset(targ_btf, t->name_off);
5294         if (str_is_empty(targ_name))
5295             continue;
5296
5297         targ_essent_len = bpf_core_essential_name_len(targ_name);
5298         if (targ_essent_len != local_essent_len)
5299             continue;
5300
5301         if (strncmp(local_name, targ_name, local_essent_len) != 0)
5302             continue;
5303
5304         pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5305              local_cand->id, btf_kind_str(local_t),
5306              local_name, i, btf_kind_str(t), targ_name,
5307              targ_btf_name);
5308         new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5309                           sizeof(*cands->cands));
5310         if (!new_cands)
5311             return -ENOMEM;
5312
5313         cand = &new_cands[cands->len];
5314         cand->btf = targ_btf;
5315         cand->id = i;
5316
5317         cands->cands = new_cands;
5318         cands->len++;
5319     }
5320     return 0;
5321 }
5322
5323 static int load_module_btfs(struct bpf_object *obj)
5324 {
5325     struct bpf_btf_info info;
5326     struct module_btf *mod_btf;
5327     struct btf *btf;
5328     char name[64];
5329     __u32 id = 0, len;
5330     int err, fd;
5331
5332     if (obj->btf_modules_loaded)
5333         return 0;
5334
5335     if (obj->gen_loader)
5336         return 0;
5337
5338     /* don't do this again, even if we find no module BTFs */
5339     obj->btf_modules_loaded = true;
5340
5341     /* kernel too old to support module BTFs */
5342     if (!kernel_supports(obj, FEAT_MODULE_BTF))
5343         return 0;
5344
5345     while (true) {
5346         err = bpf_btf_get_next_id(id, &id);
5347         if (err && errno == ENOENT)
5348             return 0;
5349         if (err) {
5350             err = -errno;
5351             pr_warn("failed to iterate BTF objects: %d\n", err);
5352             return err;
5353         }
5354
5355         fd = bpf_btf_get_fd_by_id(id);
5356         if (fd < 0) {
5357             if (errno == ENOENT)
5358                 continue; /* expected race: BTF was unloaded */
5359             err = -errno;
5360             pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5361             return err;
5362         }
5363
5364         len = sizeof(info);
5365         memset(&info, 0, sizeof(info));
5366         info.name = ptr_to_u64(name);
5367         info.name_len = sizeof(name);
5368
5369         err = bpf_obj_get_info_by_fd(fd, &info, &len);
5370         if (err) {
5371             err = -errno;
5372             pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5373             goto err_out;
5374         }
5375
5376         /* ignore non-module BTFs */
5377         if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5378             close(fd);
5379             continue;
5380         }
5381
5382         btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5383         err = libbpf_get_error(btf);
5384         if (err) {
5385             pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5386                 name, id, err);
5387             goto err_out;
5388         }
5389
5390         err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5391                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5392         if (err)
5393             goto err_out;
5394
5395         mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5396
5397         mod_btf->btf = btf;
5398         mod_btf->id = id;
5399         mod_btf->fd = fd;
5400         mod_btf->name = strdup(name);
5401         if (!mod_btf->name) {
5402             err = -ENOMEM;
5403             goto err_out;
5404         }
5405         continue;
5406
5407 err_out:
5408         close(fd);
5409         return err;
5410     }
5411
5412     return 0;
5413 }
5414
5415 static struct bpf_core_cand_list *
5416 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5417 {
5418     struct bpf_core_cand local_cand = {};
5419     struct bpf_core_cand_list *cands;
5420     const struct btf *main_btf;
5421     const struct btf_type *local_t;
5422     const char *local_name;
5423     size_t local_essent_len;
5424     int err, i;
5425
5426     local_cand.btf = local_btf;
5427     local_cand.id = local_type_id;
5428     local_t = btf__type_by_id(local_btf, local_type_id);
5429     if (!local_t)
5430         return ERR_PTR(-EINVAL);
5431
5432     local_name = btf__name_by_offset(local_btf, local_t->name_off);
5433     if (str_is_empty(local_name))
5434         return ERR_PTR(-EINVAL);
5435     local_essent_len = bpf_core_essential_name_len(local_name);
5436
5437     cands = calloc(1, sizeof(*cands));
5438     if (!cands)
5439         return ERR_PTR(-ENOMEM);
5440
5441     /* Attempt to find target candidates in vmlinux BTF first */
5442     main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5443     err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5444     if (err)
5445         goto err_out;
5446
5447     /* if vmlinux BTF has any candidate, don't got for module BTFs */
5448     if (cands->len)
5449         return cands;
5450
5451     /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5452     if (obj->btf_vmlinux_override)
5453         return cands;
5454
5455     /* now look through module BTFs, trying to still find candidates */
5456     err = load_module_btfs(obj);
5457     if (err)
5458         goto err_out;
5459
5460     for (i = 0; i < obj->btf_module_cnt; i++) {
5461         err = bpf_core_add_cands(&local_cand, local_essent_len,
5462                      obj->btf_modules[i].btf,
5463                      obj->btf_modules[i].name,
5464                      btf__type_cnt(obj->btf_vmlinux),
5465                      cands);
5466         if (err)
5467             goto err_out;
5468     }
5469
5470     return cands;
5471 err_out:
5472     bpf_core_free_cands(cands);
5473     return ERR_PTR(err);
5474 }
5475
5476 /* Check local and target types for compatibility. This check is used for
5477  * type-based CO-RE relocations and follow slightly different rules than
5478  * field-based relocations. This function assumes that root types were already
5479  * checked for name match. Beyond that initial root-level name check, names
5480  * are completely ignored. Compatibility rules are as follows:
5481  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5482  *     kind should match for local and target types (i.e., STRUCT is not
5483  *     compatible with UNION);
5484  *   - for ENUMs, the size is ignored;
5485  *   - for INT, size and signedness are ignored;
5486  *   - for ARRAY, dimensionality is ignored, element types are checked for
5487  *     compatibility recursively;
5488  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5489  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5490  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5491  *     number of input args and compatible return and argument types.
5492  * These rules are not set in stone and probably will be adjusted as we get
5493  * more experience with using BPF CO-RE relocations.
5494  */
5495 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5496                   const struct btf *targ_btf, __u32 targ_id)
5497 {
5498     return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5499 }
5500
5501 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5502              const struct btf *targ_btf, __u32 targ_id)
5503 {
5504     return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5505 }
5506
5507 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5508 {
5509     return (size_t)key;
5510 }
5511
5512 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5513 {
5514     return k1 == k2;
5515 }
5516
5517 static void *u32_as_hash_key(__u32 x)
5518 {
5519     return (void *)(uintptr_t)x;
5520 }
5521
5522 static int record_relo_core(struct bpf_program *prog,
5523                 const struct bpf_core_relo *core_relo, int insn_idx)
5524 {
5525     struct reloc_desc *relos, *relo;
5526
5527     relos = libbpf_reallocarray(prog->reloc_desc,
5528                     prog->nr_reloc + 1, sizeof(*relos));
5529     if (!relos)
5530         return -ENOMEM;
5531     relo = &relos[prog->nr_reloc];
5532     relo->type = RELO_CORE;
5533     relo->insn_idx = insn_idx;
5534     relo->core_relo = core_relo;
5535     prog->reloc_desc = relos;
5536     prog->nr_reloc++;
5537     return 0;
5538 }
5539
5540 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5541 {
5542     struct reloc_desc *relo;
5543     int i;
5544
5545     for (i = 0; i < prog->nr_reloc; i++) {
5546         relo = &prog->reloc_desc[i];
5547         if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5548             continue;
5549
5550         return relo->core_relo;
5551     }
5552
5553     return NULL;
5554 }
5555
5556 static int bpf_core_resolve_relo(struct bpf_program *prog,
5557                  const struct bpf_core_relo *relo,
5558                  int relo_idx,
5559                  const struct btf *local_btf,
5560                  struct hashmap *cand_cache,
5561                  struct bpf_core_relo_res *targ_res)
5562 {
5563     struct bpf_core_spec specs_scratch[3] = {};
5564     const void *type_key = u32_as_hash_key(relo->type_id);
5565     struct bpf_core_cand_list *cands = NULL;
5566     const char *prog_name = prog->name;
5567     const struct btf_type *local_type;
5568     const char *local_name;
5569     __u32 local_id = relo->type_id;
5570     int err;
5571
5572     local_type = btf__type_by_id(local_btf, local_id);
5573     if (!local_type)
5574         return -EINVAL;
5575
5576     local_name = btf__name_by_offset(local_btf, local_type->name_off);
5577     if (!local_name)
5578         return -EINVAL;
5579
5580     if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5581         !hashmap__find(cand_cache, type_key, (void **)&cands)) {
5582         cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5583         if (IS_ERR(cands)) {
5584             pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5585                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5586                 local_name, PTR_ERR(cands));
5587             return PTR_ERR(cands);
5588         }
5589         err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
5590         if (err) {
5591             bpf_core_free_cands(cands);
5592             return err;
5593         }
5594     }
5595
5596     return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5597                        targ_res);
5598 }
5599
5600 static int
5601 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5602 {
5603     const struct btf_ext_info_sec *sec;
5604     struct bpf_core_relo_res targ_res;
5605     const struct bpf_core_relo *rec;
5606     const struct btf_ext_info *seg;
5607     struct hashmap_entry *entry;
5608     struct hashmap *cand_cache = NULL;
5609     struct bpf_program *prog;
5610     struct bpf_insn *insn;
5611     const char *sec_name;
5612     int i, err = 0, insn_idx, sec_idx, sec_num;
5613
5614     if (obj->btf_ext->core_relo_info.len == 0)
5615         return 0;
5616
5617     if (targ_btf_path) {
5618         obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5619         err = libbpf_get_error(obj->btf_vmlinux_override);
5620         if (err) {
5621             pr_warn("failed to parse target BTF: %d\n", err);
5622             return err;
5623         }
5624     }
5625
5626     cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5627     if (IS_ERR(cand_cache)) {
5628         err = PTR_ERR(cand_cache);
5629         goto out;
5630     }
5631
5632     seg = &obj->btf_ext->core_relo_info;
5633     sec_num = 0;
5634     for_each_btf_ext_sec(seg, sec) {
5635         sec_idx = seg->sec_idxs[sec_num];
5636         sec_num++;
5637
5638         sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5639         if (str_is_empty(sec_name)) {
5640             err = -EINVAL;
5641             goto out;
5642         }
5643
5644         pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5645
5646         for_each_btf_ext_rec(seg, sec, i, rec) {
5647             if (rec->insn_off % BPF_INSN_SZ)
5648                 return -EINVAL;
5649             insn_idx = rec->insn_off / BPF_INSN_SZ;
5650             prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5651             if (!prog) {
5652                 /* When __weak subprog is "overridden" by another instance
5653                  * of the subprog from a different object file, linker still
5654                  * appends all the .BTF.ext info that used to belong to that
5655                  * eliminated subprogram.
5656                  * This is similar to what x86-64 linker does for relocations.
5657                  * So just ignore such relocations just like we ignore
5658                  * subprog instructions when discovering subprograms.
5659                  */
5660                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5661                      sec_name, i, insn_idx);
5662                 continue;
5663             }
5664             /* no need to apply CO-RE relocation if the program is
5665              * not going to be loaded
5666              */
5667             if (!prog->autoload)
5668                 continue;
5669
5670             /* adjust insn_idx from section frame of reference to the local
5671              * program's frame of reference; (sub-)program code is not yet
5672              * relocated, so it's enough to just subtract in-section offset
5673              */
5674             insn_idx = insn_idx - prog->sec_insn_off;
5675             if (insn_idx >= prog->insns_cnt)
5676                 return -EINVAL;
5677             insn = &prog->insns[insn_idx];
5678
5679             err = record_relo_core(prog, rec, insn_idx);
5680             if (err) {
5681                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5682                     prog->name, i, err);
5683                 goto out;
5684             }
5685
5686             if (prog->obj->gen_loader)
5687                 continue;
5688
5689             err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5690             if (err) {
5691                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5692                     prog->name, i, err);
5693                 goto out;
5694             }
5695
5696             err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5697             if (err) {
5698                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5699                     prog->name, i, insn_idx, err);
5700                 goto out;
5701             }
5702         }
5703     }
5704
5705 out:
5706     /* obj->btf_vmlinux and module BTFs are freed after object load */
5707     btf__free(obj->btf_vmlinux_override);
5708     obj->btf_vmlinux_override = NULL;
5709
5710     if (!IS_ERR_OR_NULL(cand_cache)) {
5711         hashmap__for_each_entry(cand_cache, entry, i) {
5712             bpf_core_free_cands(entry->value);
5713         }
5714         hashmap__free(cand_cache);
5715     }
5716     return err;
5717 }
5718
5719 /* base map load ldimm64 special constant, used also for log fixup logic */
5720 #define MAP_LDIMM64_POISON_BASE 2001000000
5721 #define MAP_LDIMM64_POISON_PFX "200100"
5722
5723 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5724                    int insn_idx, struct bpf_insn *insn,
5725                    int map_idx, const struct bpf_map *map)
5726 {
5727     int i;
5728
5729     pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5730          prog->name, relo_idx, insn_idx, map_idx, map->name);
5731
5732     /* we turn single ldimm64 into two identical invalid calls */
5733     for (i = 0; i < 2; i++) {
5734         insn->code = BPF_JMP | BPF_CALL;
5735         insn->dst_reg = 0;
5736         insn->src_reg = 0;
5737         insn->off = 0;
5738         /* if this instruction is reachable (not a dead code),
5739          * verifier will complain with something like:
5740          * invalid func unknown#2001000123
5741          * where lower 123 is map index into obj->maps[] array
5742          */
5743         insn->imm = MAP_LDIMM64_POISON_BASE + map_idx;
5744
5745         insn++;
5746     }
5747 }
5748
5749 /* Relocate data references within program code:
5750  *  - map references;
5751  *  - global variable references;
5752  *  - extern references.
5753  */
5754 static int
5755 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5756 {
5757     int i;
5758
5759     for (i = 0; i < prog->nr_reloc; i++) {
5760         struct reloc_desc *relo = &prog->reloc_desc[i];
5761         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5762         const struct bpf_map *map;
5763         struct extern_desc *ext;
5764
5765         switch (relo->type) {
5766         case RELO_LD64:
5767             map = &obj->maps[relo->map_idx];
5768             if (obj->gen_loader) {
5769                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5770                 insn[0].imm = relo->map_idx;
5771             } else if (map->autocreate) {
5772                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5773                 insn[0].imm = map->fd;
5774             } else {
5775                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5776                            relo->map_idx, map);
5777             }
5778             break;
5779         case RELO_DATA:
5780             map = &obj->maps[relo->map_idx];
5781             insn[1].imm = insn[0].imm + relo->sym_off;
5782             if (obj->gen_loader) {
5783                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5784                 insn[0].imm = relo->map_idx;
5785             } else if (map->autocreate) {
5786                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5787                 insn[0].imm = map->fd;
5788             } else {
5789                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5790                            relo->map_idx, map);
5791             }
5792             break;
5793         case RELO_EXTERN_VAR:
5794             ext = &obj->externs[relo->sym_off];
5795             if (ext->type == EXT_KCFG) {
5796                 if (obj->gen_loader) {
5797                     insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5798                     insn[0].imm = obj->kconfig_map_idx;
5799                 } else {
5800                     insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5801                     insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5802                 }
5803                 insn[1].imm = ext->kcfg.data_off;
5804             } else /* EXT_KSYM */ {
5805                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5806                     insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5807                     insn[0].imm = ext->ksym.kernel_btf_id;
5808                     insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5809                 } else { /* typeless ksyms or unresolved typed ksyms */
5810                     insn[0].imm = (__u32)ext->ksym.addr;
5811                     insn[1].imm = ext->ksym.addr >> 32;
5812                 }
5813             }
5814             break;
5815         case RELO_EXTERN_FUNC:
5816             ext = &obj->externs[relo->sym_off];
5817             insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5818             if (ext->is_set) {
5819                 insn[0].imm = ext->ksym.kernel_btf_id;
5820                 insn[0].off = ext->ksym.btf_fd_idx;
5821             } else { /* unresolved weak kfunc */
5822                 insn[0].imm = 0;
5823                 insn[0].off = 0;
5824             }
5825             break;
5826         case RELO_SUBPROG_ADDR:
5827             if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5828                 pr_warn("prog '%s': relo #%d: bad insn\n",
5829                     prog->name, i);
5830                 return -EINVAL;
5831             }
5832             /* handled already */
5833             break;
5834         case RELO_CALL:
5835             /* handled already */
5836             break;
5837         case RELO_CORE:
5838             /* will be handled by bpf_program_record_relos() */
5839             break;
5840         default:
5841             pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5842                 prog->name, i, relo->type);
5843             return -EINVAL;
5844         }
5845     }
5846
5847     return 0;
5848 }
5849
5850 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5851                     const struct bpf_program *prog,
5852                     const struct btf_ext_info *ext_info,
5853                     void **prog_info, __u32 *prog_rec_cnt,
5854                     __u32 *prog_rec_sz)
5855 {
5856     void *copy_start = NULL, *copy_end = NULL;
5857     void *rec, *rec_end, *new_prog_info;
5858     const struct btf_ext_info_sec *sec;
5859     size_t old_sz, new_sz;
5860     int i, sec_num, sec_idx, off_adj;
5861
5862     sec_num = 0;
5863     for_each_btf_ext_sec(ext_info, sec) {
5864         sec_idx = ext_info->sec_idxs[sec_num];
5865         sec_num++;
5866         if (prog->sec_idx != sec_idx)
5867             continue;
5868
5869         for_each_btf_ext_rec(ext_info, sec, i, rec) {
5870             __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5871
5872             if (insn_off < prog->sec_insn_off)
5873                 continue;
5874             if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5875                 break;
5876
5877             if (!copy_start)
5878                 copy_start = rec;
5879             copy_end = rec + ext_info->rec_size;
5880         }
5881
5882         if (!copy_start)
5883             return -ENOENT;
5884
5885         /* append func/line info of a given (sub-)program to the main
5886          * program func/line info
5887          */
5888         old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5889         new_sz = old_sz + (copy_end - copy_start);
5890         new_prog_info = realloc(*prog_info, new_sz);
5891         if (!new_prog_info)
5892             return -ENOMEM;
5893         *prog_info = new_prog_info;
5894         *prog_rec_cnt = new_sz / ext_info->rec_size;
5895         memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5896
5897         /* Kernel instruction offsets are in units of 8-byte
5898          * instructions, while .BTF.ext instruction offsets generated
5899          * by Clang are in units of bytes. So convert Clang offsets
5900          * into kernel offsets and adjust offset according to program
5901          * relocated position.
5902          */
5903         off_adj = prog->sub_insn_off - prog->sec_insn_off;
5904         rec = new_prog_info + old_sz;
5905         rec_end = new_prog_info + new_sz;
5906         for (; rec < rec_end; rec += ext_info->rec_size) {
5907             __u32 *insn_off = rec;
5908
5909             *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5910         }
5911         *prog_rec_sz = ext_info->rec_size;
5912         return 0;
5913     }
5914
5915     return -ENOENT;
5916 }
5917
5918 static int
5919 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5920                   struct bpf_program *main_prog,
5921                   const struct bpf_program *prog)
5922 {
5923     int err;
5924
5925     /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5926      * supprot func/line info
5927      */
5928     if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5929         return 0;
5930
5931     /* only attempt func info relocation if main program's func_info
5932      * relocation was successful
5933      */
5934     if (main_prog != prog && !main_prog->func_info)
5935         goto line_info;
5936
5937     err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5938                        &main_prog->func_info,
5939                        &main_prog->func_info_cnt,
5940                        &main_prog->func_info_rec_size);
5941     if (err) {
5942         if (err != -ENOENT) {
5943             pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5944                 prog->name, err);
5945             return err;
5946         }
5947         if (main_prog->func_info) {
5948             /*
5949              * Some info has already been found but has problem
5950              * in the last btf_ext reloc. Must have to error out.
5951              */
5952             pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5953             return err;
5954         }
5955         /* Have problem loading the very first info. Ignore the rest. */
5956         pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5957             prog->name);
5958     }
5959
5960 line_info:
5961     /* don't relocate line info if main program's relocation failed */
5962     if (main_prog != prog && !main_prog->line_info)
5963         return 0;
5964
5965     err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
5966                        &main_prog->line_info,
5967                        &main_prog->line_info_cnt,
5968                        &main_prog->line_info_rec_size);
5969     if (err) {
5970         if (err != -ENOENT) {
5971             pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
5972                 prog->name, err);
5973             return err;
5974         }
5975         if (main_prog->line_info) {
5976             /*
5977              * Some info has already been found but has problem
5978              * in the last btf_ext reloc. Must have to error out.
5979              */
5980             pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
5981             return err;
5982         }
5983         /* Have problem loading the very first info. Ignore the rest. */
5984         pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
5985             prog->name);
5986     }
5987     return 0;
5988 }
5989
5990 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
5991 {
5992     size_t insn_idx = *(const size_t *)key;
5993     const struct reloc_desc *relo = elem;
5994
5995     if (insn_idx == relo->insn_idx)
5996         return 0;
5997     return insn_idx < relo->insn_idx ? -1 : 1;
5998 }
5999
6000 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6001 {
6002     if (!prog->nr_reloc)
6003         return NULL;
6004     return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6005                sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6006 }
6007
6008 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6009 {
6010     int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6011     struct reloc_desc *relos;
6012     int i;
6013
6014     if (main_prog == subprog)
6015         return 0;
6016     relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6017     if (!relos)
6018         return -ENOMEM;
6019     if (subprog->nr_reloc)
6020         memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6021                sizeof(*relos) * subprog->nr_reloc);
6022
6023     for (i = main_prog->nr_reloc; i < new_cnt; i++)
6024         relos[i].insn_idx += subprog->sub_insn_off;
6025     /* After insn_idx adjustment the 'relos' array is still sorted
6026      * by insn_idx and doesn't break bsearch.
6027      */
6028     main_prog->reloc_desc = relos;
6029     main_prog->nr_reloc = new_cnt;
6030     return 0;
6031 }
6032
6033 static int
6034 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6035                struct bpf_program *prog)
6036 {
6037     size_t sub_insn_idx, insn_idx, new_cnt;
6038     struct bpf_program *subprog;
6039     struct bpf_insn *insns, *insn;
6040     struct reloc_desc *relo;
6041     int err;
6042
6043     err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6044     if (err)
6045         return err;
6046
6047     for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6048         insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6049         if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6050             continue;
6051
6052         relo = find_prog_insn_relo(prog, insn_idx);
6053         if (relo && relo->type == RELO_EXTERN_FUNC)
6054             /* kfunc relocations will be handled later
6055              * in bpf_object__relocate_data()
6056              */
6057             continue;
6058         if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6059             pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6060                 prog->name, insn_idx, relo->type);
6061             return -LIBBPF_ERRNO__RELOC;
6062         }
6063         if (relo) {
6064             /* sub-program instruction index is a combination of
6065              * an offset of a symbol pointed to by relocation and
6066              * call instruction's imm field; for global functions,
6067              * call always has imm = -1, but for static functions
6068              * relocation is against STT_SECTION and insn->imm
6069              * points to a start of a static function
6070              *
6071              * for subprog addr relocation, the relo->sym_off + insn->imm is
6072              * the byte offset in the corresponding section.
6073              */
6074             if (relo->type == RELO_CALL)
6075                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6076             else
6077                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6078         } else if (insn_is_pseudo_func(insn)) {
6079             /*
6080              * RELO_SUBPROG_ADDR relo is always emitted even if both
6081              * functions are in the same section, so it shouldn't reach here.
6082              */
6083             pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6084                 prog->name, insn_idx);
6085             return -LIBBPF_ERRNO__RELOC;
6086         } else {
6087             /* if subprogram call is to a static function within
6088              * the same ELF section, there won't be any relocation
6089              * emitted, but it also means there is no additional
6090              * offset necessary, insns->imm is relative to
6091              * instruction's original position within the section
6092              */
6093             sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6094         }
6095
6096         /* we enforce that sub-programs should be in .text section */
6097         subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6098         if (!subprog) {
6099             pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6100                 prog->name);
6101             return -LIBBPF_ERRNO__RELOC;
6102         }
6103
6104         /* if it's the first call instruction calling into this
6105          * subprogram (meaning this subprog hasn't been processed
6106          * yet) within the context of current main program:
6107          *   - append it at the end of main program's instructions blog;
6108          *   - process is recursively, while current program is put on hold;
6109          *   - if that subprogram calls some other not yet processes
6110          *   subprogram, same thing will happen recursively until
6111          *   there are no more unprocesses subprograms left to append
6112          *   and relocate.
6113          */
6114         if (subprog->sub_insn_off == 0) {
6115             subprog->sub_insn_off = main_prog->insns_cnt;
6116
6117             new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6118             insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6119             if (!insns) {
6120                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6121                 return -ENOMEM;
6122             }
6123             main_prog->insns = insns;
6124             main_prog->insns_cnt = new_cnt;
6125
6126             memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6127                    subprog->insns_cnt * sizeof(*insns));
6128
6129             pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6130                  main_prog->name, subprog->insns_cnt, subprog->name);
6131
6132             /* The subprog insns are now appended. Append its relos too. */
6133             err = append_subprog_relos(main_prog, subprog);
6134             if (err)
6135                 return err;
6136             err = bpf_object__reloc_code(obj, main_prog, subprog);
6137             if (err)
6138                 return err;
6139         }
6140
6141         /* main_prog->insns memory could have been re-allocated, so
6142          * calculate pointer again
6143          */
6144         insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6145         /* calculate correct instruction position within current main
6146          * prog; each main prog can have a different set of
6147          * subprograms appended (potentially in different order as
6148          * well), so position of any subprog can be different for
6149          * different main programs */
6150         insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6151
6152         pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6153              prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6154     }
6155
6156     return 0;
6157 }
6158
6159 /*
6160  * Relocate sub-program calls.
6161  *
6162  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6163  * main prog) is processed separately. For each subprog (non-entry functions,
6164  * that can be called from either entry progs or other subprogs) gets their
6165  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6166  * hasn't been yet appended and relocated within current main prog. Once its
6167  * relocated, sub_insn_off will point at the position within current main prog
6168  * where given subprog was appended. This will further be used to relocate all
6169  * the call instructions jumping into this subprog.
6170  *
6171  * We start with main program and process all call instructions. If the call
6172  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6173  * is zero), subprog instructions are appended at the end of main program's
6174  * instruction array. Then main program is "put on hold" while we recursively
6175  * process newly appended subprogram. If that subprogram calls into another
6176  * subprogram that hasn't been appended, new subprogram is appended again to
6177  * the *main* prog's instructions (subprog's instructions are always left
6178  * untouched, as they need to be in unmodified state for subsequent main progs
6179  * and subprog instructions are always sent only as part of a main prog) and
6180  * the process continues recursively. Once all the subprogs called from a main
6181  * prog or any of its subprogs are appended (and relocated), all their
6182  * positions within finalized instructions array are known, so it's easy to
6183  * rewrite call instructions with correct relative offsets, corresponding to
6184  * desired target subprog.
6185  *
6186  * Its important to realize that some subprogs might not be called from some
6187  * main prog and any of its called/used subprogs. Those will keep their
6188  * subprog->sub_insn_off as zero at all times and won't be appended to current
6189  * main prog and won't be relocated within the context of current main prog.
6190  * They might still be used from other main progs later.
6191  *
6192  * Visually this process can be shown as below. Suppose we have two main
6193  * programs mainA and mainB and BPF object contains three subprogs: subA,
6194  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6195  * subC both call subB:
6196  *
6197  *        +--------+ +-------+
6198  *        |        v v       |
6199  *     +--+---+ +--+-+-+ +---+--+
6200  *     | subA | | subB | | subC |
6201  *     +--+---+ +------+ +---+--+
6202  *        ^                  ^
6203  *        |                  |
6204  *    +---+-------+   +------+----+
6205  *    |   mainA   |   |   mainB   |
6206  *    +-----------+   +-----------+
6207  *
6208  * We'll start relocating mainA, will find subA, append it and start
6209  * processing sub A recursively:
6210  *
6211  *    +-----------+------+
6212  *    |   mainA   | subA |
6213  *    +-----------+------+
6214  *
6215  * At this point we notice that subB is used from subA, so we append it and
6216  * relocate (there are no further subcalls from subB):
6217  *
6218  *    +-----------+------+------+
6219  *    |   mainA   | subA | subB |
6220  *    +-----------+------+------+
6221  *
6222  * At this point, we relocate subA calls, then go one level up and finish with
6223  * relocatin mainA calls. mainA is done.
6224  *
6225  * For mainB process is similar but results in different order. We start with
6226  * mainB and skip subA and subB, as mainB never calls them (at least
6227  * directly), but we see subC is needed, so we append and start processing it:
6228  *
6229  *    +-----------+------+
6230  *    |   mainB   | subC |
6231  *    +-----------+------+
6232  * Now we see subC needs subB, so we go back to it, append and relocate it:
6233  *
6234  *    +-----------+------+------+
6235  *    |   mainB   | subC | subB |
6236  *    +-----------+------+------+
6237  *
6238  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6239  */
6240 static int
6241 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6242 {
6243     struct bpf_program *subprog;
6244     int i, err;
6245
6246     /* mark all subprogs as not relocated (yet) within the context of
6247      * current main program
6248      */
6249     for (i = 0; i < obj->nr_programs; i++) {
6250         subprog = &obj->programs[i];
6251         if (!prog_is_subprog(obj, subprog))
6252             continue;
6253
6254         subprog->sub_insn_off = 0;
6255     }
6256
6257     err = bpf_object__reloc_code(obj, prog, prog);
6258     if (err)
6259         return err;
6260
6261     return 0;
6262 }
6263
6264 static void
6265 bpf_object__free_relocs(struct bpf_object *obj)
6266 {
6267     struct bpf_program *prog;
6268     int i;
6269
6270     /* free up relocation descriptors */
6271     for (i = 0; i < obj->nr_programs; i++) {
6272         prog = &obj->programs[i];
6273         zfree(&prog->reloc_desc);
6274         prog->nr_reloc = 0;
6275     }
6276 }
6277
6278 static int cmp_relocs(const void *_a, const void *_b)
6279 {
6280     const struct reloc_desc *a = _a;
6281     const struct reloc_desc *b = _b;
6282
6283     if (a->insn_idx != b->insn_idx)
6284         return a->insn_idx < b->insn_idx ? -1 : 1;
6285
6286     /* no two relocations should have the same insn_idx, but ... */
6287     if (a->type != b->type)
6288         return a->type < b->type ? -1 : 1;
6289
6290     return 0;
6291 }
6292
6293 static void bpf_object__sort_relos(struct bpf_object *obj)
6294 {
6295     int i;
6296
6297     for (i = 0; i < obj->nr_programs; i++) {
6298         struct bpf_program *p = &obj->programs[i];
6299
6300         if (!p->nr_reloc)
6301             continue;
6302
6303         qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6304     }
6305 }
6306
6307 static int
6308 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6309 {
6310     struct bpf_program *prog;
6311     size_t i, j;
6312     int err;
6313
6314     if (obj->btf_ext) {
6315         err = bpf_object__relocate_core(obj, targ_btf_path);
6316         if (err) {
6317             pr_warn("failed to perform CO-RE relocations: %d\n",
6318                 err);
6319             return err;
6320         }
6321         bpf_object__sort_relos(obj);
6322     }
6323
6324     /* Before relocating calls pre-process relocations and mark
6325      * few ld_imm64 instructions that points to subprogs.
6326      * Otherwise bpf_object__reloc_code() later would have to consider
6327      * all ld_imm64 insns as relocation candidates. That would
6328      * reduce relocation speed, since amount of find_prog_insn_relo()
6329      * would increase and most of them will fail to find a relo.
6330      */
6331     for (i = 0; i < obj->nr_programs; i++) {
6332         prog = &obj->programs[i];
6333         for (j = 0; j < prog->nr_reloc; j++) {
6334             struct reloc_desc *relo = &prog->reloc_desc[j];
6335             struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6336
6337             /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6338             if (relo->type == RELO_SUBPROG_ADDR)
6339                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6340         }
6341     }
6342
6343     /* relocate subprogram calls and append used subprograms to main
6344      * programs; each copy of subprogram code needs to be relocated
6345      * differently for each main program, because its code location might
6346      * have changed.
6347      * Append subprog relos to main programs to allow data relos to be
6348      * processed after text is completely relocated.
6349      */
6350     for (i = 0; i < obj->nr_programs; i++) {
6351         prog = &obj->programs[i];
6352         /* sub-program's sub-calls are relocated within the context of
6353          * its main program only
6354          */
6355         if (prog_is_subprog(obj, prog))
6356             continue;
6357         if (!prog->autoload)
6358             continue;
6359
6360         err = bpf_object__relocate_calls(obj, prog);
6361         if (err) {
6362             pr_warn("prog '%s': failed to relocate calls: %d\n",
6363                 prog->name, err);
6364             return err;
6365         }
6366     }
6367     /* Process data relos for main programs */
6368     for (i = 0; i < obj->nr_programs; i++) {
6369         prog = &obj->programs[i];
6370         if (prog_is_subprog(obj, prog))
6371             continue;
6372         if (!prog->autoload)
6373             continue;
6374         err = bpf_object__relocate_data(obj, prog);
6375         if (err) {
6376             pr_warn("prog '%s': failed to relocate data references: %d\n",
6377                 prog->name, err);
6378             return err;
6379         }
6380     }
6381
6382     return 0;
6383 }
6384
6385 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6386                         Elf64_Shdr *shdr, Elf_Data *data);
6387
6388 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6389                      Elf64_Shdr *shdr, Elf_Data *data)
6390 {
6391     const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6392     int i, j, nrels, new_sz;
6393     const struct btf_var_secinfo *vi = NULL;
6394     const struct btf_type *sec, *var, *def;
6395     struct bpf_map *map = NULL, *targ_map = NULL;
6396     struct bpf_program *targ_prog = NULL;
6397     bool is_prog_array, is_map_in_map;
6398     const struct btf_member *member;
6399     const char *name, *mname, *type;
6400     unsigned int moff;
6401     Elf64_Sym *sym;
6402     Elf64_Rel *rel;
6403     void *tmp;
6404
6405     if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6406         return -EINVAL;
6407     sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6408     if (!sec)
6409         return -EINVAL;
6410
6411     nrels = shdr->sh_size / shdr->sh_entsize;
6412     for (i = 0; i < nrels; i++) {
6413         rel = elf_rel_by_idx(data, i);
6414         if (!rel) {
6415             pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6416             return -LIBBPF_ERRNO__FORMAT;
6417         }
6418
6419         sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6420         if (!sym) {
6421             pr_warn(".maps relo #%d: symbol %zx not found\n",
6422                 i, (size_t)ELF64_R_SYM(rel->r_info));
6423             return -LIBBPF_ERRNO__FORMAT;
6424         }
6425         name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6426
6427         pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6428              i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6429              (size_t)rel->r_offset, sym->st_name, name);
6430
6431         for (j = 0; j < obj->nr_maps; j++) {
6432             map = &obj->maps[j];
6433             if (map->sec_idx != obj->efile.btf_maps_shndx)
6434                 continue;
6435
6436             vi = btf_var_secinfos(sec) + map->btf_var_idx;
6437             if (vi->offset <= rel->r_offset &&
6438                 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6439                 break;
6440         }
6441         if (j == obj->nr_maps) {
6442             pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6443                 i, name, (size_t)rel->r_offset);
6444             return -EINVAL;
6445         }
6446
6447         is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6448         is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6449         type = is_map_in_map ? "map" : "prog";
6450         if (is_map_in_map) {
6451             if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6452                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6453                     i, name);
6454                 return -LIBBPF_ERRNO__RELOC;
6455             }
6456             if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6457                 map->def.key_size != sizeof(int)) {
6458                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6459                     i, map->name, sizeof(int));
6460                 return -EINVAL;
6461             }
6462             targ_map = bpf_object__find_map_by_name(obj, name);
6463             if (!targ_map) {
6464                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6465                     i, name);
6466                 return -ESRCH;
6467             }
6468         } else if (is_prog_array) {
6469             targ_prog = bpf_object__find_program_by_name(obj, name);
6470             if (!targ_prog) {
6471                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6472                     i, name);
6473                 return -ESRCH;
6474             }
6475             if (targ_prog->sec_idx != sym->st_shndx ||
6476                 targ_prog->sec_insn_off * 8 != sym->st_value ||
6477                 prog_is_subprog(obj, targ_prog)) {
6478                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6479                     i, name);
6480                 return -LIBBPF_ERRNO__RELOC;
6481             }
6482         } else {
6483             return -EINVAL;
6484         }
6485
6486         var = btf__type_by_id(obj->btf, vi->type);
6487         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6488         if (btf_vlen(def) == 0)
6489             return -EINVAL;
6490         member = btf_members(def) + btf_vlen(def) - 1;
6491         mname = btf__name_by_offset(obj->btf, member->name_off);
6492         if (strcmp(mname, "values"))
6493             return -EINVAL;
6494
6495         moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6496         if (rel->r_offset - vi->offset < moff)
6497             return -EINVAL;
6498
6499         moff = rel->r_offset - vi->offset - moff;
6500         /* here we use BPF pointer size, which is always 64 bit, as we
6501          * are parsing ELF that was built for BPF target
6502          */
6503         if (moff % bpf_ptr_sz)
6504             return -EINVAL;
6505         moff /= bpf_ptr_sz;
6506         if (moff >= map->init_slots_sz) {
6507             new_sz = moff + 1;
6508             tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6509             if (!tmp)
6510                 return -ENOMEM;
6511             map->init_slots = tmp;
6512             memset(map->init_slots + map->init_slots_sz, 0,
6513                    (new_sz - map->init_slots_sz) * host_ptr_sz);
6514             map->init_slots_sz = new_sz;
6515         }
6516         map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6517
6518         pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6519              i, map->name, moff, type, name);
6520     }
6521
6522     return 0;
6523 }
6524
6525 static int bpf_object__collect_relos(struct bpf_object *obj)
6526 {
6527     int i, err;
6528
6529     for (i = 0; i < obj->efile.sec_cnt; i++) {
6530         struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6531         Elf64_Shdr *shdr;
6532         Elf_Data *data;
6533         int idx;
6534
6535         if (sec_desc->sec_type != SEC_RELO)
6536             continue;
6537
6538         shdr = sec_desc->shdr;
6539         data = sec_desc->data;
6540         idx = shdr->sh_info;
6541
6542         if (shdr->sh_type != SHT_REL) {
6543             pr_warn("internal error at %d\n", __LINE__);
6544             return -LIBBPF_ERRNO__INTERNAL;
6545         }
6546
6547         if (idx == obj->efile.st_ops_shndx)
6548             err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6549         else if (idx == obj->efile.btf_maps_shndx)
6550             err = bpf_object__collect_map_relos(obj, shdr, data);
6551         else
6552             err = bpf_object__collect_prog_relos(obj, shdr, data);
6553         if (err)
6554             return err;
6555     }
6556
6557     bpf_object__sort_relos(obj);
6558     return 0;
6559 }
6560
6561 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6562 {
6563     if (BPF_CLASS(insn->code) == BPF_JMP &&
6564         BPF_OP(insn->code) == BPF_CALL &&
6565         BPF_SRC(insn->code) == BPF_K &&
6566         insn->src_reg == 0 &&
6567         insn->dst_reg == 0) {
6568             *func_id = insn->imm;
6569             return true;
6570     }
6571     return false;
6572 }
6573
6574 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6575 {
6576     struct bpf_insn *insn = prog->insns;
6577     enum bpf_func_id func_id;
6578     int i;
6579
6580     if (obj->gen_loader)
6581         return 0;
6582
6583     for (i = 0; i < prog->insns_cnt; i++, insn++) {
6584         if (!insn_is_helper_call(insn, &func_id))
6585             continue;
6586
6587         /* on kernels that don't yet support
6588          * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6589          * to bpf_probe_read() which works well for old kernels
6590          */
6591         switch (func_id) {
6592         case BPF_FUNC_probe_read_kernel:
6593         case BPF_FUNC_probe_read_user:
6594             if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6595                 insn->imm = BPF_FUNC_probe_read;
6596             break;
6597         case BPF_FUNC_probe_read_kernel_str:
6598         case BPF_FUNC_probe_read_user_str:
6599             if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6600                 insn->imm = BPF_FUNC_probe_read_str;
6601             break;
6602         default:
6603             break;
6604         }
6605     }
6606     return 0;
6607 }
6608
6609 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6610                      int *btf_obj_fd, int *btf_type_id);
6611
6612 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6613 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6614                     struct bpf_prog_load_opts *opts, long cookie)
6615 {
6616     enum sec_def_flags def = cookie;
6617
6618     /* old kernels might not support specifying expected_attach_type */
6619     if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6620         opts->expected_attach_type = 0;
6621
6622     if (def & SEC_SLEEPABLE)
6623         opts->prog_flags |= BPF_F_SLEEPABLE;
6624
6625     if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6626         opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6627
6628     if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6629         int btf_obj_fd = 0, btf_type_id = 0, err;
6630         const char *attach_name;
6631
6632         attach_name = strchr(prog->sec_name, '/');
6633         if (!attach_name) {
6634             /* if BPF program is annotated with just SEC("fentry")
6635              * (or similar) without declaratively specifying
6636              * target, then it is expected that target will be
6637              * specified with bpf_program__set_attach_target() at
6638              * runtime before BPF object load step. If not, then
6639              * there is nothing to load into the kernel as BPF
6640              * verifier won't be able to validate BPF program
6641              * correctness anyways.
6642              */
6643             pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
6644                 prog->name);
6645             return -EINVAL;
6646         }
6647         attach_name++; /* skip over / */
6648
6649         err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
6650         if (err)
6651             return err;
6652
6653         /* cache resolved BTF FD and BTF type ID in the prog */
6654         prog->attach_btf_obj_fd = btf_obj_fd;
6655         prog->attach_btf_id = btf_type_id;
6656
6657         /* but by now libbpf common logic is not utilizing
6658          * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
6659          * this callback is called after opts were populated by
6660          * libbpf, so this callback has to update opts explicitly here
6661          */
6662         opts->attach_btf_obj_fd = btf_obj_fd;
6663         opts->attach_btf_id = btf_type_id;
6664     }
6665     return 0;
6666 }
6667
6668 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
6669
6670 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
6671                 struct bpf_insn *insns, int insns_cnt,
6672                 const char *license, __u32 kern_version, int *prog_fd)
6673 {
6674     LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
6675     const char *prog_name = NULL;
6676     char *cp, errmsg[STRERR_BUFSIZE];
6677     size_t log_buf_size = 0;
6678     char *log_buf = NULL, *tmp;
6679     int btf_fd, ret, err;
6680     bool own_log_buf = true;
6681     __u32 log_level = prog->log_level;
6682
6683     if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6684         /*
6685          * The program type must be set.  Most likely we couldn't find a proper
6686          * section definition at load time, and thus we didn't infer the type.
6687          */
6688         pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6689             prog->name, prog->sec_name);
6690         return -EINVAL;
6691     }
6692
6693     if (!insns || !insns_cnt)
6694         return -EINVAL;
6695
6696     load_attr.expected_attach_type = prog->expected_attach_type;
6697     if (kernel_supports(obj, FEAT_PROG_NAME))
6698         prog_name = prog->name;
6699     load_attr.attach_prog_fd = prog->attach_prog_fd;
6700     load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6701     load_attr.attach_btf_id = prog->attach_btf_id;
6702     load_attr.kern_version = kern_version;
6703     load_attr.prog_ifindex = prog->prog_ifindex;
6704
6705     /* specify func_info/line_info only if kernel supports them */
6706     btf_fd = bpf_object__btf_fd(obj);
6707     if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
6708         load_attr.prog_btf_fd = btf_fd;
6709         load_attr.func_info = prog->func_info;
6710         load_attr.func_info_rec_size = prog->func_info_rec_size;
6711         load_attr.func_info_cnt = prog->func_info_cnt;
6712         load_attr.line_info = prog->line_info;
6713         load_attr.line_info_rec_size = prog->line_info_rec_size;
6714         load_attr.line_info_cnt = prog->line_info_cnt;
6715     }
6716     load_attr.log_level = log_level;
6717     load_attr.prog_flags = prog->prog_flags;
6718     load_attr.fd_array = obj->fd_array;
6719
6720     /* adjust load_attr if sec_def provides custom preload callback */
6721     if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
6722         err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
6723         if (err < 0) {
6724             pr_warn("prog '%s': failed to prepare load attributes: %d\n",
6725                 prog->name, err);
6726             return err;
6727         }
6728         insns = prog->insns;
6729         insns_cnt = prog->insns_cnt;
6730     }
6731
6732     if (obj->gen_loader) {
6733         bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
6734                    license, insns, insns_cnt, &load_attr,
6735                    prog - obj->programs);
6736         *prog_fd = -1;
6737         return 0;
6738     }
6739
6740 retry_load:
6741     /* if log_level is zero, we don't request logs initially even if
6742      * custom log_buf is specified; if the program load fails, then we'll
6743      * bump log_level to 1 and use either custom log_buf or we'll allocate
6744      * our own and retry the load to get details on what failed
6745      */
6746     if (log_level) {
6747         if (prog->log_buf) {
6748             log_buf = prog->log_buf;
6749             log_buf_size = prog->log_size;
6750             own_log_buf = false;
6751         } else if (obj->log_buf) {
6752             log_buf = obj->log_buf;
6753             log_buf_size = obj->log_size;
6754             own_log_buf = false;
6755         } else {
6756             log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
6757             tmp = realloc(log_buf, log_buf_size);
6758             if (!tmp) {
6759                 ret = -ENOMEM;
6760                 goto out;
6761             }
6762             log_buf = tmp;
6763             log_buf[0] = '\0';
6764             own_log_buf = true;
6765         }
6766     }
6767
6768     load_attr.log_buf = log_buf;
6769     load_attr.log_size = log_buf_size;
6770     load_attr.log_level = log_level;
6771
6772     ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
6773     if (ret >= 0) {
6774         if (log_level && own_log_buf) {
6775             pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6776                  prog->name, log_buf);
6777         }
6778
6779         if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
6780             struct bpf_map *map;
6781             int i;
6782
6783             for (i = 0; i < obj->nr_maps; i++) {
6784                 map = &prog->obj->maps[i];
6785                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
6786                     continue;
6787
6788                 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
6789                     cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6790                     pr_warn("prog '%s': failed to bind map '%s': %s\n",
6791                         prog->name, map->real_name, cp);
6792                     /* Don't fail hard if can't bind rodata. */
6793                 }
6794             }
6795         }
6796
6797         *prog_fd = ret;
6798         ret = 0;
6799         goto out;
6800     }
6801
6802     if (log_level == 0) {
6803         log_level = 1;
6804         goto retry_load;
6805     }
6806     /* On ENOSPC, increase log buffer size and retry, unless custom
6807      * log_buf is specified.
6808      * Be careful to not overflow u32, though. Kernel's log buf size limit
6809      * isn't part of UAPI so it can always be bumped to full 4GB. So don't
6810      * multiply by 2 unless we are sure we'll fit within 32 bits.
6811      * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
6812      */
6813     if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
6814         goto retry_load;
6815
6816     ret = -errno;
6817
6818     /* post-process verifier log to improve error descriptions */
6819     fixup_verifier_log(prog, log_buf, log_buf_size);
6820
6821     cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6822     pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
6823     pr_perm_msg(ret);
6824
6825     if (own_log_buf && log_buf && log_buf[0] != '\0') {
6826         pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6827             prog->name, log_buf);
6828     }
6829
6830 out:
6831     if (own_log_buf)
6832         free(log_buf);
6833     return ret;
6834 }
6835
6836 static char *find_prev_line(char *buf, char *cur)
6837 {
6838     char *p;
6839
6840     if (cur == buf) /* end of a log buf */
6841         return NULL;
6842
6843     p = cur - 1;
6844     while (p - 1 >= buf && *(p - 1) != '\n')
6845         p--;
6846
6847     return p;
6848 }
6849
6850 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
6851               char *orig, size_t orig_sz, const char *patch)
6852 {
6853     /* size of the remaining log content to the right from the to-be-replaced part */
6854     size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
6855     size_t patch_sz = strlen(patch);
6856
6857     if (patch_sz != orig_sz) {
6858         /* If patch line(s) are longer than original piece of verifier log,
6859          * shift log contents by (patch_sz - orig_sz) bytes to the right
6860          * starting from after to-be-replaced part of the log.
6861          *
6862          * If patch line(s) are shorter than original piece of verifier log,
6863          * shift log contents by (orig_sz - patch_sz) bytes to the left
6864          * starting from after to-be-replaced part of the log
6865          *
6866          * We need to be careful about not overflowing available
6867          * buf_sz capacity. If that's the case, we'll truncate the end
6868          * of the original log, as necessary.
6869          */
6870         if (patch_sz > orig_sz) {
6871             if (orig + patch_sz >= buf + buf_sz) {
6872                 /* patch is big enough to cover remaining space completely */
6873                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
6874                 rem_sz = 0;
6875             } else if (patch_sz - orig_sz > buf_sz - log_sz) {
6876                 /* patch causes part of remaining log to be truncated */
6877                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
6878             }
6879         }
6880         /* shift remaining log to the right by calculated amount */
6881         memmove(orig + patch_sz, orig + orig_sz, rem_sz);
6882     }
6883
6884     memcpy(orig, patch, patch_sz);
6885 }
6886
6887 static void fixup_log_failed_core_relo(struct bpf_program *prog,
6888                        char *buf, size_t buf_sz, size_t log_sz,
6889                        char *line1, char *line2, char *line3)
6890 {
6891     /* Expected log for failed and not properly guarded CO-RE relocation:
6892      * line1 -> 123: (85) call unknown#195896080
6893      * line2 -> invalid func unknown#195896080
6894      * line3 -> <anything else or end of buffer>
6895      *
6896      * "123" is the index of the instruction that was poisoned. We extract
6897      * instruction index to find corresponding CO-RE relocation and
6898      * replace this part of the log with more relevant information about
6899      * failed CO-RE relocation.
6900      */
6901     const struct bpf_core_relo *relo;
6902     struct bpf_core_spec spec;
6903     char patch[512], spec_buf[256];
6904     int insn_idx, err, spec_len;
6905
6906     if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
6907         return;
6908
6909     relo = find_relo_core(prog, insn_idx);
6910     if (!relo)
6911         return;
6912
6913     err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
6914     if (err)
6915         return;
6916
6917     spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
6918     snprintf(patch, sizeof(patch),
6919          "%d: <invalid CO-RE relocation>\n"
6920          "failed to resolve CO-RE relocation %s%s\n",
6921          insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
6922
6923     patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
6924 }
6925
6926 static void fixup_log_missing_map_load(struct bpf_program *prog,
6927                        char *buf, size_t buf_sz, size_t log_sz,
6928                        char *line1, char *line2, char *line3)
6929 {
6930     /* Expected log for failed and not properly guarded CO-RE relocation:
6931      * line1 -> 123: (85) call unknown#2001000345
6932      * line2 -> invalid func unknown#2001000345
6933      * line3 -> <anything else or end of buffer>
6934      *
6935      * "123" is the index of the instruction that was poisoned.
6936      * "345" in "2001000345" are map index in obj->maps to fetch map name.
6937      */
6938     struct bpf_object *obj = prog->obj;
6939     const struct bpf_map *map;
6940     int insn_idx, map_idx;
6941     char patch[128];
6942
6943     if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
6944         return;
6945
6946     map_idx -= MAP_LDIMM64_POISON_BASE;
6947     if (map_idx < 0 || map_idx >= obj->nr_maps)
6948         return;
6949     map = &obj->maps[map_idx];
6950
6951     snprintf(patch, sizeof(patch),
6952          "%d: <invalid BPF map reference>\n"
6953          "BPF map '%s' is referenced but wasn't created\n",
6954          insn_idx, map->name);
6955
6956     patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
6957 }
6958
6959 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
6960 {
6961     /* look for familiar error patterns in last N lines of the log */
6962     const size_t max_last_line_cnt = 10;
6963     char *prev_line, *cur_line, *next_line;
6964     size_t log_sz;
6965     int i;
6966
6967     if (!buf)
6968         return;
6969
6970     log_sz = strlen(buf) + 1;
6971     next_line = buf + log_sz - 1;
6972
6973     for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
6974         cur_line = find_prev_line(buf, next_line);
6975         if (!cur_line)
6976             return;
6977
6978         /* failed CO-RE relocation case */
6979         if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
6980             prev_line = find_prev_line(buf, cur_line);
6981             if (!prev_line)
6982                 continue;
6983
6984             fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
6985                            prev_line, cur_line, next_line);
6986             return;
6987         } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) {
6988             prev_line = find_prev_line(buf, cur_line);
6989             if (!prev_line)
6990                 continue;
6991
6992             fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
6993                            prev_line, cur_line, next_line);
6994             return;
6995         }
6996     }
6997 }
6998
6999 static int bpf_program_record_relos(struct bpf_program *prog)
7000 {
7001     struct bpf_object *obj = prog->obj;
7002     int i;
7003
7004     for (i = 0; i < prog->nr_reloc; i++) {
7005         struct reloc_desc *relo = &prog->reloc_desc[i];
7006         struct extern_desc *ext = &obj->externs[relo->sym_off];
7007
7008         switch (relo->type) {
7009         case RELO_EXTERN_VAR:
7010             if (ext->type != EXT_KSYM)
7011                 continue;
7012             bpf_gen__record_extern(obj->gen_loader, ext->name,
7013                            ext->is_weak, !ext->ksym.type_id,
7014                            BTF_KIND_VAR, relo->insn_idx);
7015             break;
7016         case RELO_EXTERN_FUNC:
7017             bpf_gen__record_extern(obj->gen_loader, ext->name,
7018                            ext->is_weak, false, BTF_KIND_FUNC,
7019                            relo->insn_idx);
7020             break;
7021         case RELO_CORE: {
7022             struct bpf_core_relo cr = {
7023                 .insn_off = relo->insn_idx * 8,
7024                 .type_id = relo->core_relo->type_id,
7025                 .access_str_off = relo->core_relo->access_str_off,
7026                 .kind = relo->core_relo->kind,
7027             };
7028
7029             bpf_gen__record_relo_core(obj->gen_loader, &cr);
7030             break;
7031         }
7032         default:
7033             continue;
7034         }
7035     }
7036     return 0;
7037 }
7038
7039 static int
7040 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7041 {
7042     struct bpf_program *prog;
7043     size_t i;
7044     int err;
7045
7046     for (i = 0; i < obj->nr_programs; i++) {
7047         prog = &obj->programs[i];
7048         err = bpf_object__sanitize_prog(obj, prog);
7049         if (err)
7050             return err;
7051     }
7052
7053     for (i = 0; i < obj->nr_programs; i++) {
7054         prog = &obj->programs[i];
7055         if (prog_is_subprog(obj, prog))
7056             continue;
7057         if (!prog->autoload) {
7058             pr_debug("prog '%s': skipped loading\n", prog->name);
7059             continue;
7060         }
7061         prog->log_level |= log_level;
7062
7063         if (obj->gen_loader)
7064             bpf_program_record_relos(prog);
7065
7066         err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7067                        obj->license, obj->kern_version, &prog->fd);
7068         if (err) {
7069             pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7070             return err;
7071         }
7072     }
7073
7074     bpf_object__free_relocs(obj);
7075     return 0;
7076 }
7077
7078 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7079
7080 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7081 {
7082     struct bpf_program *prog;
7083     int err;
7084
7085     bpf_object__for_each_program(prog, obj) {
7086         prog->sec_def = find_sec_def(prog->sec_name);
7087         if (!prog->sec_def) {
7088             /* couldn't guess, but user might manually specify */
7089             pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7090                 prog->name, prog->sec_name);
7091             continue;
7092         }
7093
7094         prog->type = prog->sec_def->prog_type;
7095         prog->expected_attach_type = prog->sec_def->expected_attach_type;
7096
7097         /* sec_def can have custom callback which should be called
7098          * after bpf_program is initialized to adjust its properties
7099          */
7100         if (prog->sec_def->prog_setup_fn) {
7101             err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7102             if (err < 0) {
7103                 pr_warn("prog '%s': failed to initialize: %d\n",
7104                     prog->name, err);
7105                 return err;
7106             }
7107         }
7108     }
7109
7110     return 0;
7111 }
7112
7113 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7114                       const struct bpf_object_open_opts *opts)
7115 {
7116     const char *obj_name, *kconfig, *btf_tmp_path;
7117     struct bpf_object *obj;
7118     char tmp_name[64];
7119     int err;
7120     char *log_buf;
7121     size_t log_size;
7122     __u32 log_level;
7123
7124     if (elf_version(EV_CURRENT) == EV_NONE) {
7125         pr_warn("failed to init libelf for %s\n",
7126             path ? : "(mem buf)");
7127         return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7128     }
7129
7130     if (!OPTS_VALID(opts, bpf_object_open_opts))
7131         return ERR_PTR(-EINVAL);
7132
7133     obj_name = OPTS_GET(opts, object_name, NULL);
7134     if (obj_buf) {
7135         if (!obj_name) {
7136             snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7137                  (unsigned long)obj_buf,
7138                  (unsigned long)obj_buf_sz);
7139             obj_name = tmp_name;
7140         }
7141         path = obj_name;
7142         pr_debug("loading object '%s' from buffer\n", obj_name);
7143     }
7144
7145     log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7146     log_size = OPTS_GET(opts, kernel_log_size, 0);
7147     log_level = OPTS_GET(opts, kernel_log_level, 0);
7148     if (log_size > UINT_MAX)
7149         return ERR_PTR(-EINVAL);
7150     if (log_size && !log_buf)
7151         return ERR_PTR(-EINVAL);
7152
7153     obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7154     if (IS_ERR(obj))
7155         return obj;
7156
7157     obj->log_buf = log_buf;
7158     obj->log_size = log_size;
7159     obj->log_level = log_level;
7160
7161     btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7162     if (btf_tmp_path) {
7163         if (strlen(btf_tmp_path) >= PATH_MAX) {
7164             err = -ENAMETOOLONG;
7165             goto out;
7166         }
7167         obj->btf_custom_path = strdup(btf_tmp_path);
7168         if (!obj->btf_custom_path) {
7169             err = -ENOMEM;
7170             goto out;
7171         }
7172     }
7173
7174     kconfig = OPTS_GET(opts, kconfig, NULL);
7175     if (kconfig) {
7176         obj->kconfig = strdup(kconfig);
7177         if (!obj->kconfig) {
7178             err = -ENOMEM;
7179             goto out;
7180         }
7181     }
7182
7183     err = bpf_object__elf_init(obj);
7184     err = err ? : bpf_object__check_endianness(obj);
7185     err = err ? : bpf_object__elf_collect(obj);
7186     err = err ? : bpf_object__collect_externs(obj);
7187     err = err ? : bpf_object__finalize_btf(obj);
7188     err = err ? : bpf_object__init_maps(obj, opts);
7189     err = err ? : bpf_object_init_progs(obj, opts);
7190     err = err ? : bpf_object__collect_relos(obj);
7191     if (err)
7192         goto out;
7193
7194     bpf_object__elf_finish(obj);
7195
7196     return obj;
7197 out:
7198     bpf_object__close(obj);
7199     return ERR_PTR(err);
7200 }
7201
7202 struct bpf_object *
7203 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7204 {
7205     if (!path)
7206         return libbpf_err_ptr(-EINVAL);
7207
7208     pr_debug("loading %s\n", path);
7209
7210     return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7211 }
7212
7213 struct bpf_object *bpf_object__open(const char *path)
7214 {
7215     return bpf_object__open_file(path, NULL);
7216 }
7217
7218 struct bpf_object *
7219 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7220              const struct bpf_object_open_opts *opts)
7221 {
7222     if (!obj_buf || obj_buf_sz == 0)
7223         return libbpf_err_ptr(-EINVAL);
7224
7225     return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7226 }
7227
7228 static int bpf_object_unload(struct bpf_object *obj)
7229 {
7230     size_t i;
7231
7232     if (!obj)
7233         return libbpf_err(-EINVAL);
7234
7235     for (i = 0; i < obj->nr_maps; i++) {
7236         zclose(obj->maps[i].fd);
7237         if (obj->maps[i].st_ops)
7238             zfree(&obj->maps[i].st_ops->kern_vdata);
7239     }
7240
7241     for (i = 0; i < obj->nr_programs; i++)
7242         bpf_program__unload(&obj->programs[i]);
7243
7244     return 0;
7245 }
7246
7247 int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
7248
7249 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7250 {
7251     struct bpf_map *m;
7252
7253     bpf_object__for_each_map(m, obj) {
7254         if (!bpf_map__is_internal(m))
7255             continue;
7256         if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7257             m->def.map_flags ^= BPF_F_MMAPABLE;
7258     }
7259
7260     return 0;
7261 }
7262
7263 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7264 {
7265     char sym_type, sym_name[500];
7266     unsigned long long sym_addr;
7267     int ret, err = 0;
7268     FILE *f;
7269
7270     f = fopen("/proc/kallsyms", "r");
7271     if (!f) {
7272         err = -errno;
7273         pr_warn("failed to open /proc/kallsyms: %d\n", err);
7274         return err;
7275     }
7276
7277     while (true) {
7278         ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7279                  &sym_addr, &sym_type, sym_name);
7280         if (ret == EOF && feof(f))
7281             break;
7282         if (ret != 3) {
7283             pr_warn("failed to read kallsyms entry: %d\n", ret);
7284             err = -EINVAL;
7285             break;
7286         }
7287
7288         err = cb(sym_addr, sym_type, sym_name, ctx);
7289         if (err)
7290             break;
7291     }
7292
7293     fclose(f);
7294     return err;
7295 }
7296
7297 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7298                const char *sym_name, void *ctx)
7299 {
7300     struct bpf_object *obj = ctx;
7301     const struct btf_type *t;
7302     struct extern_desc *ext;
7303
7304     ext = find_extern_by_name(obj, sym_name);
7305     if (!ext || ext->type != EXT_KSYM)
7306         return 0;
7307
7308     t = btf__type_by_id(obj->btf, ext->btf_id);
7309     if (!btf_is_var(t))
7310         return 0;
7311
7312     if (ext->is_set && ext->ksym.addr != sym_addr) {
7313         pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7314             sym_name, ext->ksym.addr, sym_addr);
7315         return -EINVAL;
7316     }
7317     if (!ext->is_set) {
7318         ext->is_set = true;
7319         ext->ksym.addr = sym_addr;
7320         pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7321     }
7322     return 0;
7323 }
7324
7325 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7326 {
7327     return libbpf_kallsyms_parse(kallsyms_cb, obj);
7328 }
7329
7330 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7331                 __u16 kind, struct btf **res_btf,
7332                 struct module_btf **res_mod_btf)
7333 {
7334     struct module_btf *mod_btf;
7335     struct btf *btf;
7336     int i, id, err;
7337
7338     btf = obj->btf_vmlinux;
7339     mod_btf = NULL;
7340     id = btf__find_by_name_kind(btf, ksym_name, kind);
7341
7342     if (id == -ENOENT) {
7343         err = load_module_btfs(obj);
7344         if (err)
7345             return err;
7346
7347         for (i = 0; i < obj->btf_module_cnt; i++) {
7348             /* we assume module_btf's BTF FD is always >0 */
7349             mod_btf = &obj->btf_modules[i];
7350             btf = mod_btf->btf;
7351             id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7352             if (id != -ENOENT)
7353                 break;
7354         }
7355     }
7356     if (id <= 0)
7357         return -ESRCH;
7358
7359     *res_btf = btf;
7360     *res_mod_btf = mod_btf;
7361     return id;
7362 }
7363
7364 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7365                            struct extern_desc *ext)
7366 {
7367     const struct btf_type *targ_var, *targ_type;
7368     __u32 targ_type_id, local_type_id;
7369     struct module_btf *mod_btf = NULL;
7370     const char *targ_var_name;
7371     struct btf *btf = NULL;
7372     int id, err;
7373
7374     id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7375     if (id < 0) {
7376         if (id == -ESRCH && ext->is_weak)
7377             return 0;
7378         pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7379             ext->name);
7380         return id;
7381     }
7382
7383     /* find local type_id */
7384     local_type_id = ext->ksym.type_id;
7385
7386     /* find target type_id */
7387     targ_var = btf__type_by_id(btf, id);
7388     targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7389     targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7390
7391     err = bpf_core_types_are_compat(obj->btf, local_type_id,
7392                     btf, targ_type_id);
7393     if (err <= 0) {
7394         const struct btf_type *local_type;
7395         const char *targ_name, *local_name;
7396
7397         local_type = btf__type_by_id(obj->btf, local_type_id);
7398         local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7399         targ_name = btf__name_by_offset(btf, targ_type->name_off);
7400
7401         pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7402             ext->name, local_type_id,
7403             btf_kind_str(local_type), local_name, targ_type_id,
7404             btf_kind_str(targ_type), targ_name);
7405         return -EINVAL;
7406     }
7407
7408     ext->is_set = true;
7409     ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7410     ext->ksym.kernel_btf_id = id;
7411     pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7412          ext->name, id, btf_kind_str(targ_var), targ_var_name);
7413
7414     return 0;
7415 }
7416
7417 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7418                         struct extern_desc *ext)
7419 {
7420     int local_func_proto_id, kfunc_proto_id, kfunc_id;
7421     struct module_btf *mod_btf = NULL;
7422     const struct btf_type *kern_func;
7423     struct btf *kern_btf = NULL;
7424     int ret;
7425
7426     local_func_proto_id = ext->ksym.type_id;
7427
7428     kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
7429     if (kfunc_id < 0) {
7430         if (kfunc_id == -ESRCH && ext->is_weak)
7431             return 0;
7432         pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7433             ext->name);
7434         return kfunc_id;
7435     }
7436
7437     kern_func = btf__type_by_id(kern_btf, kfunc_id);
7438     kfunc_proto_id = kern_func->type;
7439
7440     ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7441                     kern_btf, kfunc_proto_id);
7442     if (ret <= 0) {
7443         pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
7444             ext->name, local_func_proto_id, kfunc_proto_id);
7445         return -EINVAL;
7446     }
7447
7448     /* set index for module BTF fd in fd_array, if unset */
7449     if (mod_btf && !mod_btf->fd_array_idx) {
7450         /* insn->off is s16 */
7451         if (obj->fd_array_cnt == INT16_MAX) {
7452             pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7453                 ext->name, mod_btf->fd_array_idx);
7454             return -E2BIG;
7455         }
7456         /* Cannot use index 0 for module BTF fd */
7457         if (!obj->fd_array_cnt)
7458             obj->fd_array_cnt = 1;
7459
7460         ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7461                     obj->fd_array_cnt + 1);
7462         if (ret)
7463             return ret;
7464         mod_btf->fd_array_idx = obj->fd_array_cnt;
7465         /* we assume module BTF FD is always >0 */
7466         obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7467     }
7468
7469     ext->is_set = true;
7470     ext->ksym.kernel_btf_id = kfunc_id;
7471     ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7472     pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
7473          ext->name, kfunc_id);
7474
7475     return 0;
7476 }
7477
7478 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7479 {
7480     const struct btf_type *t;
7481     struct extern_desc *ext;
7482     int i, err;
7483
7484     for (i = 0; i < obj->nr_extern; i++) {
7485         ext = &obj->externs[i];
7486         if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7487             continue;
7488
7489         if (obj->gen_loader) {
7490             ext->is_set = true;
7491             ext->ksym.kernel_btf_obj_fd = 0;
7492             ext->ksym.kernel_btf_id = 0;
7493             continue;
7494         }
7495         t = btf__type_by_id(obj->btf, ext->btf_id);
7496         if (btf_is_var(t))
7497             err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7498         else
7499             err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7500         if (err)
7501             return err;
7502     }
7503     return 0;
7504 }
7505
7506 static int bpf_object__resolve_externs(struct bpf_object *obj,
7507                        const char *extra_kconfig)
7508 {
7509     bool need_config = false, need_kallsyms = false;
7510     bool need_vmlinux_btf = false;
7511     struct extern_desc *ext;
7512     void *kcfg_data = NULL;
7513     int err, i;
7514
7515     if (obj->nr_extern == 0)
7516         return 0;
7517
7518     if (obj->kconfig_map_idx >= 0)
7519         kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7520
7521     for (i = 0; i < obj->nr_extern; i++) {
7522         ext = &obj->externs[i];
7523
7524         if (ext->type == EXT_KSYM) {
7525             if (ext->ksym.type_id)
7526                 need_vmlinux_btf = true;
7527             else
7528                 need_kallsyms = true;
7529             continue;
7530         } else if (ext->type == EXT_KCFG) {
7531             void *ext_ptr = kcfg_data + ext->kcfg.data_off;
7532             __u64 value = 0;
7533
7534             /* Kconfig externs need actual /proc/config.gz */
7535             if (str_has_pfx(ext->name, "CONFIG_")) {
7536                 need_config = true;
7537                 continue;
7538             }
7539
7540             /* Virtual kcfg externs are customly handled by libbpf */
7541             if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7542                 value = get_kernel_version();
7543                 if (!value) {
7544                     pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
7545                     return -EINVAL;
7546                 }
7547             } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
7548                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
7549             } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
7550                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
7551             } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
7552                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
7553                  * __kconfig externs, where LINUX_ ones are virtual and filled out
7554                  * customly by libbpf (their values don't come from Kconfig).
7555                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
7556                  * __weak, it defaults to zero value, just like for CONFIG_xxx
7557                  * externs.
7558                  */
7559                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
7560                 return -EINVAL;
7561             }
7562
7563             err = set_kcfg_value_num(ext, ext_ptr, value);
7564             if (err)
7565                 return err;
7566             pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
7567                  ext->name, (long long)value);
7568         } else {
7569             pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
7570             return -EINVAL;
7571         }
7572     }
7573     if (need_config && extra_kconfig) {
7574         err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7575         if (err)
7576             return -EINVAL;
7577         need_config = false;
7578         for (i = 0; i < obj->nr_extern; i++) {
7579             ext = &obj->externs[i];
7580             if (ext->type == EXT_KCFG && !ext->is_set) {
7581                 need_config = true;
7582                 break;
7583             }
7584         }
7585     }
7586     if (need_config) {
7587         err = bpf_object__read_kconfig_file(obj, kcfg_data);
7588         if (err)
7589             return -EINVAL;
7590     }
7591     if (need_kallsyms) {
7592         err = bpf_object__read_kallsyms_file(obj);
7593         if (err)
7594             return -EINVAL;
7595     }
7596     if (need_vmlinux_btf) {
7597         err = bpf_object__resolve_ksyms_btf_id(obj);
7598         if (err)
7599             return -EINVAL;
7600     }
7601     for (i = 0; i < obj->nr_extern; i++) {
7602         ext = &obj->externs[i];
7603
7604         if (!ext->is_set && !ext->is_weak) {
7605             pr_warn("extern '%s' (strong): not resolved\n", ext->name);
7606             return -ESRCH;
7607         } else if (!ext->is_set) {
7608             pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
7609                  ext->name);
7610         }
7611     }
7612
7613     return 0;
7614 }
7615
7616 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
7617 {
7618     int err, i;
7619
7620     if (!obj)
7621         return libbpf_err(-EINVAL);
7622
7623     if (obj->loaded) {
7624         pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7625         return libbpf_err(-EINVAL);
7626     }
7627
7628     if (obj->gen_loader)
7629         bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
7630
7631     err = bpf_object__probe_loading(obj);
7632     err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7633     err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7634     err = err ? : bpf_object__sanitize_and_load_btf(obj);
7635     err = err ? : bpf_object__sanitize_maps(obj);
7636     err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7637     err = err ? : bpf_object__create_maps(obj);
7638     err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
7639     err = err ? : bpf_object__load_progs(obj, extra_log_level);
7640     err = err ? : bpf_object_init_prog_arrays(obj);
7641
7642     if (obj->gen_loader) {
7643         /* reset FDs */
7644         if (obj->btf)
7645             btf__set_fd(obj->btf, -1);
7646         for (i = 0; i < obj->nr_maps; i++)
7647             obj->maps[i].fd = -1;
7648         if (!err)
7649             err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
7650     }
7651
7652     /* clean up fd_array */
7653     zfree(&obj->fd_array);
7654
7655     /* clean up module BTFs */
7656     for (i = 0; i < obj->btf_module_cnt; i++) {
7657         close(obj->btf_modules[i].fd);
7658         btf__free(obj->btf_modules[i].btf);
7659         free(obj->btf_modules[i].name);
7660     }
7661     free(obj->btf_modules);
7662
7663     /* clean up vmlinux BTF */
7664     btf__free(obj->btf_vmlinux);
7665     obj->btf_vmlinux = NULL;
7666
7667     obj->loaded = true; /* doesn't matter if successfully or not */
7668
7669     if (err)
7670         goto out;
7671
7672     return 0;
7673 out:
7674     /* unpin any maps that were auto-pinned during load */
7675     for (i = 0; i < obj->nr_maps; i++)
7676         if (obj->maps[i].pinned && !obj->maps[i].reused)
7677             bpf_map__unpin(&obj->maps[i], NULL);
7678
7679     bpf_object_unload(obj);
7680     pr_warn("failed to load object '%s'\n", obj->path);
7681     return libbpf_err(err);
7682 }
7683
7684 int bpf_object__load(struct bpf_object *obj)
7685 {
7686     return bpf_object_load(obj, 0, NULL);
7687 }
7688
7689 static int make_parent_dir(const char *path)
7690 {
7691     char *cp, errmsg[STRERR_BUFSIZE];
7692     char *dname, *dir;
7693     int err = 0;
7694
7695     dname = strdup(path);
7696     if (dname == NULL)
7697         return -ENOMEM;
7698
7699     dir = dirname(dname);
7700     if (mkdir(dir, 0700) && errno != EEXIST)
7701         err = -errno;
7702
7703     free(dname);
7704     if (err) {
7705         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7706         pr_warn("failed to mkdir %s: %s\n", path, cp);
7707     }
7708     return err;
7709 }
7710
7711 static int check_path(const char *path)
7712 {
7713     char *cp, errmsg[STRERR_BUFSIZE];
7714     struct statfs st_fs;
7715     char *dname, *dir;
7716     int err = 0;
7717
7718     if (path == NULL)
7719         return -EINVAL;
7720
7721     dname = strdup(path);
7722     if (dname == NULL)
7723         return -ENOMEM;
7724
7725     dir = dirname(dname);
7726     if (statfs(dir, &st_fs)) {
7727         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7728         pr_warn("failed to statfs %s: %s\n", dir, cp);
7729         err = -errno;
7730     }
7731     free(dname);
7732
7733     if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7734         pr_warn("specified path %s is not on BPF FS\n", path);
7735         err = -EINVAL;
7736     }
7737
7738     return err;
7739 }
7740
7741 int bpf_program__pin(struct bpf_program *prog, const char *path)
7742 {
7743     char *cp, errmsg[STRERR_BUFSIZE];
7744     int err;
7745
7746     if (prog->fd < 0) {
7747         pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
7748         return libbpf_err(-EINVAL);
7749     }
7750
7751     err = make_parent_dir(path);
7752     if (err)
7753         return libbpf_err(err);
7754
7755     err = check_path(path);
7756     if (err)
7757         return libbpf_err(err);
7758
7759     if (bpf_obj_pin(prog->fd, path)) {
7760         err = -errno;
7761         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7762         pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
7763         return libbpf_err(err);
7764     }
7765
7766     pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
7767     return 0;
7768 }
7769
7770 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7771 {
7772     int err;
7773
7774     if (prog->fd < 0) {
7775         pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
7776         return libbpf_err(-EINVAL);
7777     }
7778
7779     err = check_path(path);
7780     if (err)
7781         return libbpf_err(err);
7782
7783     err = unlink(path);
7784     if (err)
7785         return libbpf_err(-errno);
7786
7787     pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
7788     return 0;
7789 }
7790
7791 int bpf_map__pin(struct bpf_map *map, const char *path)
7792 {
7793     char *cp, errmsg[STRERR_BUFSIZE];
7794     int err;
7795
7796     if (map == NULL) {
7797         pr_warn("invalid map pointer\n");
7798         return libbpf_err(-EINVAL);
7799     }
7800
7801     if (map->pin_path) {
7802         if (path && strcmp(path, map->pin_path)) {
7803             pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7804                 bpf_map__name(map), map->pin_path, path);
7805             return libbpf_err(-EINVAL);
7806         } else if (map->pinned) {
7807             pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7808                  bpf_map__name(map), map->pin_path);
7809             return 0;
7810         }
7811     } else {
7812         if (!path) {
7813             pr_warn("missing a path to pin map '%s' at\n",
7814                 bpf_map__name(map));
7815             return libbpf_err(-EINVAL);
7816         } else if (map->pinned) {
7817             pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7818             return libbpf_err(-EEXIST);
7819         }
7820
7821         map->pin_path = strdup(path);
7822         if (!map->pin_path) {
7823             err = -errno;
7824             goto out_err;
7825         }
7826     }
7827
7828     err = make_parent_dir(map->pin_path);
7829     if (err)
7830         return libbpf_err(err);
7831
7832     err = check_path(map->pin_path);
7833     if (err)
7834         return libbpf_err(err);
7835
7836     if (bpf_obj_pin(map->fd, map->pin_path)) {
7837         err = -errno;
7838         goto out_err;
7839     }
7840
7841     map->pinned = true;
7842     pr_debug("pinned map '%s'\n", map->pin_path);
7843
7844     return 0;
7845
7846 out_err:
7847     cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7848     pr_warn("failed to pin map: %s\n", cp);
7849     return libbpf_err(err);
7850 }
7851
7852 int bpf_map__unpin(struct bpf_map *map, const char *path)
7853 {
7854     int err;
7855
7856     if (map == NULL) {
7857         pr_warn("invalid map pointer\n");
7858         return libbpf_err(-EINVAL);
7859     }
7860
7861     if (map->pin_path) {
7862         if (path && strcmp(path, map->pin_path)) {
7863             pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7864                 bpf_map__name(map), map->pin_path, path);
7865             return libbpf_err(-EINVAL);
7866         }
7867         path = map->pin_path;
7868     } else if (!path) {
7869         pr_warn("no path to unpin map '%s' from\n",
7870             bpf_map__name(map));
7871         return libbpf_err(-EINVAL);
7872     }
7873
7874     err = check_path(path);
7875     if (err)
7876         return libbpf_err(err);
7877
7878     err = unlink(path);
7879     if (err != 0)
7880         return libbpf_err(-errno);
7881
7882     map->pinned = false;
7883     pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7884
7885     return 0;
7886 }
7887
7888 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
7889 {
7890     char *new = NULL;
7891
7892     if (path) {
7893         new = strdup(path);
7894         if (!new)
7895             return libbpf_err(-errno);
7896     }
7897
7898     free(map->pin_path);
7899     map->pin_path = new;
7900     return 0;
7901 }
7902
7903 __alias(bpf_map__pin_path)
7904 const char *bpf_map__get_pin_path(const struct bpf_map *map);
7905
7906 const char *bpf_map__pin_path(const struct bpf_map *map)
7907 {
7908     return map->pin_path;
7909 }
7910
7911 bool bpf_map__is_pinned(const struct bpf_map *map)
7912 {
7913     return map->pinned;
7914 }
7915
7916 static void sanitize_pin_path(char *s)
7917 {
7918     /* bpffs disallows periods in path names */
7919     while (*s) {
7920         if (*s == '.')
7921             *s = '_';
7922         s++;
7923     }
7924 }
7925
7926 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7927 {
7928     struct bpf_map *map;
7929     int err;
7930
7931     if (!obj)
7932         return libbpf_err(-ENOENT);
7933
7934     if (!obj->loaded) {
7935         pr_warn("object not yet loaded; load it first\n");
7936         return libbpf_err(-ENOENT);
7937     }
7938
7939     bpf_object__for_each_map(map, obj) {
7940         char *pin_path = NULL;
7941         char buf[PATH_MAX];
7942
7943         if (!map->autocreate)
7944             continue;
7945
7946         if (path) {
7947             int len;
7948
7949             len = snprintf(buf, PATH_MAX, "%s/%s", path,
7950                        bpf_map__name(map));
7951             if (len < 0) {
7952                 err = -EINVAL;
7953                 goto err_unpin_maps;
7954             } else if (len >= PATH_MAX) {
7955                 err = -ENAMETOOLONG;
7956                 goto err_unpin_maps;
7957             }
7958             sanitize_pin_path(buf);
7959             pin_path = buf;
7960         } else if (!map->pin_path) {
7961             continue;
7962         }
7963
7964         err = bpf_map__pin(map, pin_path);
7965         if (err)
7966             goto err_unpin_maps;
7967     }
7968
7969     return 0;
7970
7971 err_unpin_maps:
7972     while ((map = bpf_object__prev_map(obj, map))) {
7973         if (!map->pin_path)
7974             continue;
7975
7976         bpf_map__unpin(map, NULL);
7977     }
7978
7979     return libbpf_err(err);
7980 }
7981
7982 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
7983 {
7984     struct bpf_map *map;
7985     int err;
7986
7987     if (!obj)
7988         return libbpf_err(-ENOENT);
7989
7990     bpf_object__for_each_map(map, obj) {
7991         char *pin_path = NULL;
7992         char buf[PATH_MAX];
7993
7994         if (path) {
7995             int len;
7996
7997             len = snprintf(buf, PATH_MAX, "%s/%s", path,
7998                        bpf_map__name(map));
7999             if (len < 0)
8000                 return libbpf_err(-EINVAL);
8001             else if (len >= PATH_MAX)
8002                 return libbpf_err(-ENAMETOOLONG);
8003             sanitize_pin_path(buf);
8004             pin_path = buf;
8005         } else if (!map->pin_path) {
8006             continue;
8007         }
8008
8009         err = bpf_map__unpin(map, pin_path);
8010         if (err)
8011             return libbpf_err(err);
8012     }
8013
8014     return 0;
8015 }
8016
8017 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8018 {
8019     struct bpf_program *prog;
8020     int err;
8021
8022     if (!obj)
8023         return libbpf_err(-ENOENT);
8024
8025     if (!obj->loaded) {
8026         pr_warn("object not yet loaded; load it first\n");
8027         return libbpf_err(-ENOENT);
8028     }
8029
8030     bpf_object__for_each_program(prog, obj) {
8031         char buf[PATH_MAX];
8032         int len;
8033
8034         len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
8035         if (len < 0) {
8036             err = -EINVAL;
8037             goto err_unpin_programs;
8038         } else if (len >= PATH_MAX) {
8039             err = -ENAMETOOLONG;
8040             goto err_unpin_programs;
8041         }
8042
8043         err = bpf_program__pin(prog, buf);
8044         if (err)
8045             goto err_unpin_programs;
8046     }
8047
8048     return 0;
8049
8050 err_unpin_programs:
8051     while ((prog = bpf_object__prev_program(obj, prog))) {
8052         char buf[PATH_MAX];
8053         int len;
8054
8055         len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
8056         if (len < 0)
8057             continue;
8058         else if (len >= PATH_MAX)
8059             continue;
8060
8061         bpf_program__unpin(prog, buf);
8062     }
8063
8064     return libbpf_err(err);
8065 }
8066
8067 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8068 {
8069     struct bpf_program *prog;
8070     int err;
8071
8072     if (!obj)
8073         return libbpf_err(-ENOENT);
8074
8075     bpf_object__for_each_program(prog, obj) {
8076         char buf[PATH_MAX];
8077         int len;
8078
8079         len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
8080         if (len < 0)
8081             return libbpf_err(-EINVAL);
8082         else if (len >= PATH_MAX)
8083             return libbpf_err(-ENAMETOOLONG);
8084
8085         err = bpf_program__unpin(prog, buf);
8086         if (err)
8087             return libbpf_err(err);
8088     }
8089
8090     return 0;
8091 }
8092
8093 int bpf_object__pin(struct bpf_object *obj, const char *path)
8094 {
8095     int err;
8096
8097     err = bpf_object__pin_maps(obj, path);
8098     if (err)
8099         return libbpf_err(err);
8100
8101     err = bpf_object__pin_programs(obj, path);
8102     if (err) {
8103         bpf_object__unpin_maps(obj, path);
8104         return libbpf_err(err);
8105     }
8106
8107     return 0;
8108 }
8109
8110 static void bpf_map__destroy(struct bpf_map *map)
8111 {
8112     if (map->inner_map) {
8113         bpf_map__destroy(map->inner_map);
8114         zfree(&map->inner_map);
8115     }
8116
8117     zfree(&map->init_slots);
8118     map->init_slots_sz = 0;
8119
8120     if (map->mmaped) {
8121         munmap(map->mmaped, bpf_map_mmap_sz(map));
8122         map->mmaped = NULL;
8123     }
8124
8125     if (map->st_ops) {
8126         zfree(&map->st_ops->data);
8127         zfree(&map->st_ops->progs);
8128         zfree(&map->st_ops->kern_func_off);
8129         zfree(&map->st_ops);
8130     }
8131
8132     zfree(&map->name);
8133     zfree(&map->real_name);
8134     zfree(&map->pin_path);
8135
8136     if (map->fd >= 0)
8137         zclose(map->fd);
8138 }
8139
8140 void bpf_object__close(struct bpf_object *obj)
8141 {
8142     size_t i;
8143
8144     if (IS_ERR_OR_NULL(obj))
8145         return;
8146
8147     usdt_manager_free(obj->usdt_man);
8148     obj->usdt_man = NULL;
8149
8150     bpf_gen__free(obj->gen_loader);
8151     bpf_object__elf_finish(obj);
8152     bpf_object_unload(obj);
8153     btf__free(obj->btf);
8154     btf_ext__free(obj->btf_ext);
8155
8156     for (i = 0; i < obj->nr_maps; i++)
8157         bpf_map__destroy(&obj->maps[i]);
8158
8159     zfree(&obj->btf_custom_path);
8160     zfree(&obj->kconfig);
8161     zfree(&obj->externs);
8162     obj->nr_extern = 0;
8163
8164     zfree(&obj->maps);
8165     obj->nr_maps = 0;
8166
8167     if (obj->programs && obj->nr_programs) {
8168         for (i = 0; i < obj->nr_programs; i++)
8169             bpf_program__exit(&obj->programs[i]);
8170     }
8171     zfree(&obj->programs);
8172
8173     free(obj);
8174 }
8175
8176 const char *bpf_object__name(const struct bpf_object *obj)
8177 {
8178     return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8179 }
8180
8181 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8182 {
8183     return obj ? obj->kern_version : 0;
8184 }
8185
8186 struct btf *bpf_object__btf(const struct bpf_object *obj)
8187 {
8188     return obj ? obj->btf : NULL;
8189 }
8190
8191 int bpf_object__btf_fd(const struct bpf_object *obj)
8192 {
8193     return obj->btf ? btf__fd(obj->btf) : -1;
8194 }
8195
8196 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8197 {
8198     if (obj->loaded)
8199         return libbpf_err(-EINVAL);
8200
8201     obj->kern_version = kern_version;
8202
8203     return 0;
8204 }
8205
8206 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8207 {
8208     struct bpf_gen *gen;
8209
8210     if (!opts)
8211         return -EFAULT;
8212     if (!OPTS_VALID(opts, gen_loader_opts))
8213         return -EINVAL;
8214     gen = calloc(sizeof(*gen), 1);
8215     if (!gen)
8216         return -ENOMEM;
8217     gen->opts = opts;
8218     obj->gen_loader = gen;
8219     return 0;
8220 }
8221
8222 static struct bpf_program *
8223 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8224             bool forward)
8225 {
8226     size_t nr_programs = obj->nr_programs;
8227     ssize_t idx;
8228
8229     if (!nr_programs)
8230         return NULL;
8231
8232     if (!p)
8233         /* Iter from the beginning */
8234         return forward ? &obj->programs[0] :
8235             &obj->programs[nr_programs - 1];
8236
8237     if (p->obj != obj) {
8238         pr_warn("error: program handler doesn't match object\n");
8239         return errno = EINVAL, NULL;
8240     }
8241
8242     idx = (p - obj->programs) + (forward ? 1 : -1);
8243     if (idx >= obj->nr_programs || idx < 0)
8244         return NULL;
8245     return &obj->programs[idx];
8246 }
8247
8248 struct bpf_program *
8249 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8250 {
8251     struct bpf_program *prog = prev;
8252
8253     do {
8254         prog = __bpf_program__iter(prog, obj, true);
8255     } while (prog && prog_is_subprog(obj, prog));
8256
8257     return prog;
8258 }
8259
8260 struct bpf_program *
8261 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8262 {
8263     struct bpf_program *prog = next;
8264
8265     do {
8266         prog = __bpf_program__iter(prog, obj, false);
8267     } while (prog && prog_is_subprog(obj, prog));
8268
8269     return prog;
8270 }
8271
8272 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8273 {
8274     prog->prog_ifindex = ifindex;
8275 }
8276
8277 const char *bpf_program__name(const struct bpf_program *prog)
8278 {
8279     return prog->name;
8280 }
8281
8282 const char *bpf_program__section_name(const struct bpf_program *prog)
8283 {
8284     return prog->sec_name;
8285 }
8286
8287 bool bpf_program__autoload(const struct bpf_program *prog)
8288 {
8289     return prog->autoload;
8290 }
8291
8292 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8293 {
8294     if (prog->obj->loaded)
8295         return libbpf_err(-EINVAL);
8296
8297     prog->autoload = autoload;
8298     return 0;
8299 }
8300
8301 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8302 {
8303     return prog->insns;
8304 }
8305
8306 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8307 {
8308     return prog->insns_cnt;
8309 }
8310
8311 int bpf_program__set_insns(struct bpf_program *prog,
8312                struct bpf_insn *new_insns, size_t new_insn_cnt)
8313 {
8314     struct bpf_insn *insns;
8315
8316     if (prog->obj->loaded)
8317         return -EBUSY;
8318
8319     insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8320     if (!insns) {
8321         pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8322         return -ENOMEM;
8323     }
8324     memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8325
8326     prog->insns = insns;
8327     prog->insns_cnt = new_insn_cnt;
8328     return 0;
8329 }
8330
8331 int bpf_program__fd(const struct bpf_program *prog)
8332 {
8333     if (!prog)
8334         return libbpf_err(-EINVAL);
8335
8336     if (prog->fd < 0)
8337         return libbpf_err(-ENOENT);
8338
8339     return prog->fd;
8340 }
8341
8342 __alias(bpf_program__type)
8343 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8344
8345 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8346 {
8347     return prog->type;
8348 }
8349
8350 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8351 {
8352     if (prog->obj->loaded)
8353         return libbpf_err(-EBUSY);
8354
8355     prog->type = type;
8356     return 0;
8357 }
8358
8359 __alias(bpf_program__expected_attach_type)
8360 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8361
8362 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8363 {
8364     return prog->expected_attach_type;
8365 }
8366
8367 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8368                        enum bpf_attach_type type)
8369 {
8370     if (prog->obj->loaded)
8371         return libbpf_err(-EBUSY);
8372
8373     prog->expected_attach_type = type;
8374     return 0;
8375 }
8376
8377 __u32 bpf_program__flags(const struct bpf_program *prog)
8378 {
8379     return prog->prog_flags;
8380 }
8381
8382 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8383 {
8384     if (prog->obj->loaded)
8385         return libbpf_err(-EBUSY);
8386
8387     prog->prog_flags = flags;
8388     return 0;
8389 }
8390
8391 __u32 bpf_program__log_level(const struct bpf_program *prog)
8392 {
8393     return prog->log_level;
8394 }
8395
8396 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8397 {
8398     if (prog->obj->loaded)
8399         return libbpf_err(-EBUSY);
8400
8401     prog->log_level = log_level;
8402     return 0;
8403 }
8404
8405 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8406 {
8407     *log_size = prog->log_size;
8408     return prog->log_buf;
8409 }
8410
8411 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8412 {
8413     if (log_size && !log_buf)
8414         return -EINVAL;
8415     if (prog->log_size > UINT_MAX)
8416         return -EINVAL;
8417     if (prog->obj->loaded)
8418         return -EBUSY;
8419
8420     prog->log_buf = log_buf;
8421     prog->log_size = log_size;
8422     return 0;
8423 }
8424
8425 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                \
8426     .sec = (char *)sec_pfx,                         \
8427     .prog_type = BPF_PROG_TYPE_##ptype,                 \
8428     .expected_attach_type = atype,                      \
8429     .cookie = (long)(flags),                        \
8430     .prog_prepare_load_fn = libbpf_prepare_prog_load,           \
8431     __VA_ARGS__                             \
8432 }
8433
8434 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8435 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8436 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8437 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8438 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8439 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8440 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8441 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8442 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8443 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8444
8445 static const struct bpf_sec_def section_defs[] = {
8446     SEC_DEF("socket",       SOCKET_FILTER, 0, SEC_NONE),
8447     SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8448     SEC_DEF("sk_reuseport",     SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8449     SEC_DEF("kprobe+",      KPROBE, 0, SEC_NONE, attach_kprobe),
8450     SEC_DEF("uprobe+",      KPROBE, 0, SEC_NONE, attach_uprobe),
8451     SEC_DEF("uprobe.s+",        KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8452     SEC_DEF("kretprobe+",       KPROBE, 0, SEC_NONE, attach_kprobe),
8453     SEC_DEF("uretprobe+",       KPROBE, 0, SEC_NONE, attach_uprobe),
8454     SEC_DEF("uretprobe.s+",     KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8455     SEC_DEF("kprobe.multi+",    KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8456     SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8457     SEC_DEF("ksyscall+",        KPROBE, 0, SEC_NONE, attach_ksyscall),
8458     SEC_DEF("kretsyscall+",     KPROBE, 0, SEC_NONE, attach_ksyscall),
8459     SEC_DEF("usdt+",        KPROBE, 0, SEC_NONE, attach_usdt),
8460     SEC_DEF("tc",           SCHED_CLS, 0, SEC_NONE),
8461     SEC_DEF("classifier",       SCHED_CLS, 0, SEC_NONE),
8462     SEC_DEF("action",       SCHED_ACT, 0, SEC_NONE),
8463     SEC_DEF("tracepoint+",      TRACEPOINT, 0, SEC_NONE, attach_tp),
8464     SEC_DEF("tp+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8465     SEC_DEF("raw_tracepoint+",  RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8466     SEC_DEF("raw_tp+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8467     SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8468     SEC_DEF("raw_tp.w+",        RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8469     SEC_DEF("tp_btf+",      TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8470     SEC_DEF("fentry+",      TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8471     SEC_DEF("fmod_ret+",        TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8472     SEC_DEF("fexit+",       TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8473     SEC_DEF("fentry.s+",        TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8474     SEC_DEF("fmod_ret.s+",      TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8475     SEC_DEF("fexit.s+",     TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8476     SEC_DEF("freplace+",        EXT, 0, SEC_ATTACH_BTF, attach_trace),
8477     SEC_DEF("lsm+",         LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8478     SEC_DEF("lsm.s+",       LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8479     SEC_DEF("lsm_cgroup+",      LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8480     SEC_DEF("iter+",        TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8481     SEC_DEF("iter.s+",      TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8482     SEC_DEF("syscall",      SYSCALL, 0, SEC_SLEEPABLE),
8483     SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8484     SEC_DEF("xdp/devmap",       XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8485     SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8486     SEC_DEF("xdp/cpumap",       XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8487     SEC_DEF("xdp.frags",        XDP, BPF_XDP, SEC_XDP_FRAGS),
8488     SEC_DEF("xdp",          XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8489     SEC_DEF("perf_event",       PERF_EVENT, 0, SEC_NONE),
8490     SEC_DEF("lwt_in",       LWT_IN, 0, SEC_NONE),
8491     SEC_DEF("lwt_out",      LWT_OUT, 0, SEC_NONE),
8492     SEC_DEF("lwt_xmit",     LWT_XMIT, 0, SEC_NONE),
8493     SEC_DEF("lwt_seg6local",    LWT_SEG6LOCAL, 0, SEC_NONE),
8494     SEC_DEF("sockops",      SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
8495     SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
8496     SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
8497     SEC_DEF("sk_skb",       SK_SKB, 0, SEC_NONE),
8498     SEC_DEF("sk_msg",       SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
8499     SEC_DEF("lirc_mode2",       LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
8500     SEC_DEF("flow_dissector",   FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
8501     SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
8502     SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
8503     SEC_DEF("cgroup/skb",       CGROUP_SKB, 0, SEC_NONE),
8504     SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
8505     SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
8506     SEC_DEF("cgroup/sock",      CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
8507     SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
8508     SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
8509     SEC_DEF("cgroup/bind4",     CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
8510     SEC_DEF("cgroup/bind6",     CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
8511     SEC_DEF("cgroup/connect4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
8512     SEC_DEF("cgroup/connect6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
8513     SEC_DEF("cgroup/sendmsg4",  CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
8514     SEC_DEF("cgroup/sendmsg6",  CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
8515     SEC_DEF("cgroup/recvmsg4",  CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
8516     SEC_DEF("cgroup/recvmsg6",  CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
8517     SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
8518     SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
8519     SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
8520     SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
8521     SEC_DEF("cgroup/sysctl",    CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
8522     SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
8523     SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
8524     SEC_DEF("cgroup/dev",       CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
8525     SEC_DEF("struct_ops+",      STRUCT_OPS, 0, SEC_NONE),
8526     SEC_DEF("sk_lookup",        SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
8527 };
8528
8529 static size_t custom_sec_def_cnt;
8530 static struct bpf_sec_def *custom_sec_defs;
8531 static struct bpf_sec_def custom_fallback_def;
8532 static bool has_custom_fallback_def;
8533
8534 static int last_custom_sec_def_handler_id;
8535
8536 int libbpf_register_prog_handler(const char *sec,
8537                  enum bpf_prog_type prog_type,
8538                  enum bpf_attach_type exp_attach_type,
8539                  const struct libbpf_prog_handler_opts *opts)
8540 {
8541     struct bpf_sec_def *sec_def;
8542
8543     if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
8544         return libbpf_err(-EINVAL);
8545
8546     if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
8547         return libbpf_err(-E2BIG);
8548
8549     if (sec) {
8550         sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
8551                           sizeof(*sec_def));
8552         if (!sec_def)
8553             return libbpf_err(-ENOMEM);
8554
8555         custom_sec_defs = sec_def;
8556         sec_def = &custom_sec_defs[custom_sec_def_cnt];
8557     } else {
8558         if (has_custom_fallback_def)
8559             return libbpf_err(-EBUSY);
8560
8561         sec_def = &custom_fallback_def;
8562     }
8563
8564     sec_def->sec = sec ? strdup(sec) : NULL;
8565     if (sec && !sec_def->sec)
8566         return libbpf_err(-ENOMEM);
8567
8568     sec_def->prog_type = prog_type;
8569     sec_def->expected_attach_type = exp_attach_type;
8570     sec_def->cookie = OPTS_GET(opts, cookie, 0);
8571
8572     sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
8573     sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
8574     sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
8575
8576     sec_def->handler_id = ++last_custom_sec_def_handler_id;
8577
8578     if (sec)
8579         custom_sec_def_cnt++;
8580     else
8581         has_custom_fallback_def = true;
8582
8583     return sec_def->handler_id;
8584 }
8585
8586 int libbpf_unregister_prog_handler(int handler_id)
8587 {
8588     struct bpf_sec_def *sec_defs;
8589     int i;
8590
8591     if (handler_id <= 0)
8592         return libbpf_err(-EINVAL);
8593
8594     if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
8595         memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
8596         has_custom_fallback_def = false;
8597         return 0;
8598     }
8599
8600     for (i = 0; i < custom_sec_def_cnt; i++) {
8601         if (custom_sec_defs[i].handler_id == handler_id)
8602             break;
8603     }
8604
8605     if (i == custom_sec_def_cnt)
8606         return libbpf_err(-ENOENT);
8607
8608     free(custom_sec_defs[i].sec);
8609     for (i = i + 1; i < custom_sec_def_cnt; i++)
8610         custom_sec_defs[i - 1] = custom_sec_defs[i];
8611     custom_sec_def_cnt--;
8612
8613     /* try to shrink the array, but it's ok if we couldn't */
8614     sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
8615     if (sec_defs)
8616         custom_sec_defs = sec_defs;
8617
8618     return 0;
8619 }
8620
8621 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
8622 {
8623     size_t len = strlen(sec_def->sec);
8624
8625     /* "type/" always has to have proper SEC("type/extras") form */
8626     if (sec_def->sec[len - 1] == '/') {
8627         if (str_has_pfx(sec_name, sec_def->sec))
8628             return true;
8629         return false;
8630     }
8631
8632     /* "type+" means it can be either exact SEC("type") or
8633      * well-formed SEC("type/extras") with proper '/' separator
8634      */
8635     if (sec_def->sec[len - 1] == '+') {
8636         len--;
8637         /* not even a prefix */
8638         if (strncmp(sec_name, sec_def->sec, len) != 0)
8639             return false;
8640         /* exact match or has '/' separator */
8641         if (sec_name[len] == '\0' || sec_name[len] == '/')
8642             return true;
8643         return false;
8644     }
8645
8646     return strcmp(sec_name, sec_def->sec) == 0;
8647 }
8648
8649 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8650 {
8651     const struct bpf_sec_def *sec_def;
8652     int i, n;
8653
8654     n = custom_sec_def_cnt;
8655     for (i = 0; i < n; i++) {
8656         sec_def = &custom_sec_defs[i];
8657         if (sec_def_matches(sec_def, sec_name))
8658             return sec_def;
8659     }
8660
8661     n = ARRAY_SIZE(section_defs);
8662     for (i = 0; i < n; i++) {
8663         sec_def = &section_defs[i];
8664         if (sec_def_matches(sec_def, sec_name))
8665             return sec_def;
8666     }
8667
8668     if (has_custom_fallback_def)
8669         return &custom_fallback_def;
8670
8671     return NULL;
8672 }
8673
8674 #define MAX_TYPE_NAME_SIZE 32
8675
8676 static char *libbpf_get_type_names(bool attach_type)
8677 {
8678     int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8679     char *buf;
8680
8681     buf = malloc(len);
8682     if (!buf)
8683         return NULL;
8684
8685     buf[0] = '\0';
8686     /* Forge string buf with all available names */
8687     for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8688         const struct bpf_sec_def *sec_def = &section_defs[i];
8689
8690         if (attach_type) {
8691             if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
8692                 continue;
8693
8694             if (!(sec_def->cookie & SEC_ATTACHABLE))
8695                 continue;
8696         }
8697
8698         if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8699             free(buf);
8700             return NULL;
8701         }
8702         strcat(buf, " ");
8703         strcat(buf, section_defs[i].sec);
8704     }
8705
8706     return buf;
8707 }
8708
8709 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8710                  enum bpf_attach_type *expected_attach_type)
8711 {
8712     const struct bpf_sec_def *sec_def;
8713     char *type_names;
8714
8715     if (!name)
8716         return libbpf_err(-EINVAL);
8717
8718     sec_def = find_sec_def(name);
8719     if (sec_def) {
8720         *prog_type = sec_def->prog_type;
8721         *expected_attach_type = sec_def->expected_attach_type;
8722         return 0;
8723     }
8724
8725     pr_debug("failed to guess program type from ELF section '%s'\n", name);
8726     type_names = libbpf_get_type_names(false);
8727     if (type_names != NULL) {
8728         pr_debug("supported section(type) names are:%s\n", type_names);
8729         free(type_names);
8730     }
8731
8732     return libbpf_err(-ESRCH);
8733 }
8734
8735 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
8736 {
8737     if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
8738         return NULL;
8739
8740     return attach_type_name[t];
8741 }
8742
8743 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
8744 {
8745     if (t < 0 || t >= ARRAY_SIZE(link_type_name))
8746         return NULL;
8747
8748     return link_type_name[t];
8749 }
8750
8751 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
8752 {
8753     if (t < 0 || t >= ARRAY_SIZE(map_type_name))
8754         return NULL;
8755
8756     return map_type_name[t];
8757 }
8758
8759 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
8760 {
8761     if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
8762         return NULL;
8763
8764     return prog_type_name[t];
8765 }
8766
8767 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8768                              size_t offset)
8769 {
8770     struct bpf_map *map;
8771     size_t i;
8772
8773     for (i = 0; i < obj->nr_maps; i++) {
8774         map = &obj->maps[i];
8775         if (!bpf_map__is_struct_ops(map))
8776             continue;
8777         if (map->sec_offset <= offset &&
8778             offset - map->sec_offset < map->def.value_size)
8779             return map;
8780     }
8781
8782     return NULL;
8783 }
8784
8785 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8786 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8787                         Elf64_Shdr *shdr, Elf_Data *data)
8788 {
8789     const struct btf_member *member;
8790     struct bpf_struct_ops *st_ops;
8791     struct bpf_program *prog;
8792     unsigned int shdr_idx;
8793     const struct btf *btf;
8794     struct bpf_map *map;
8795     unsigned int moff, insn_idx;
8796     const char *name;
8797     __u32 member_idx;
8798     Elf64_Sym *sym;
8799     Elf64_Rel *rel;
8800     int i, nrels;
8801
8802     btf = obj->btf;
8803     nrels = shdr->sh_size / shdr->sh_entsize;
8804     for (i = 0; i < nrels; i++) {
8805         rel = elf_rel_by_idx(data, i);
8806         if (!rel) {
8807             pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8808             return -LIBBPF_ERRNO__FORMAT;
8809         }
8810
8811         sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
8812         if (!sym) {
8813             pr_warn("struct_ops reloc: symbol %zx not found\n",
8814                 (size_t)ELF64_R_SYM(rel->r_info));
8815             return -LIBBPF_ERRNO__FORMAT;
8816         }
8817
8818         name = elf_sym_str(obj, sym->st_name) ?: "<?>";
8819         map = find_struct_ops_map_by_offset(obj, rel->r_offset);
8820         if (!map) {
8821             pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
8822                 (size_t)rel->r_offset);
8823             return -EINVAL;
8824         }
8825
8826         moff = rel->r_offset - map->sec_offset;
8827         shdr_idx = sym->st_shndx;
8828         st_ops = map->st_ops;
8829         pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8830              map->name,
8831              (long long)(rel->r_info >> 32),
8832              (long long)sym->st_value,
8833              shdr_idx, (size_t)rel->r_offset,
8834              map->sec_offset, sym->st_name, name);
8835
8836         if (shdr_idx >= SHN_LORESERVE) {
8837             pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
8838                 map->name, (size_t)rel->r_offset, shdr_idx);
8839             return -LIBBPF_ERRNO__RELOC;
8840         }
8841         if (sym->st_value % BPF_INSN_SZ) {
8842             pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8843                 map->name, (unsigned long long)sym->st_value);
8844             return -LIBBPF_ERRNO__FORMAT;
8845         }
8846         insn_idx = sym->st_value / BPF_INSN_SZ;
8847
8848         member = find_member_by_offset(st_ops->type, moff * 8);
8849         if (!member) {
8850             pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8851                 map->name, moff);
8852             return -EINVAL;
8853         }
8854         member_idx = member - btf_members(st_ops->type);
8855         name = btf__name_by_offset(btf, member->name_off);
8856
8857         if (!resolve_func_ptr(btf, member->type, NULL)) {
8858             pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8859                 map->name, name);
8860             return -EINVAL;
8861         }
8862
8863         prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8864         if (!prog) {
8865             pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8866                 map->name, shdr_idx, name);
8867             return -EINVAL;
8868         }
8869
8870         /* prevent the use of BPF prog with invalid type */
8871         if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
8872             pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
8873                 map->name, prog->name);
8874             return -EINVAL;
8875         }
8876
8877         /* if we haven't yet processed this BPF program, record proper
8878          * attach_btf_id and member_idx
8879          */
8880         if (!prog->attach_btf_id) {
8881             prog->attach_btf_id = st_ops->type_id;
8882             prog->expected_attach_type = member_idx;
8883         }
8884
8885         /* struct_ops BPF prog can be re-used between multiple
8886          * .struct_ops as long as it's the same struct_ops struct
8887          * definition and the same function pointer field
8888          */
8889         if (prog->attach_btf_id != st_ops->type_id ||
8890             prog->expected_attach_type != member_idx) {
8891             pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8892                 map->name, prog->name, prog->sec_name, prog->type,
8893                 prog->attach_btf_id, prog->expected_attach_type, name);
8894             return -EINVAL;
8895         }
8896
8897         st_ops->progs[member_idx] = prog;
8898     }
8899
8900     return 0;
8901 }
8902
8903 #define BTF_TRACE_PREFIX "btf_trace_"
8904 #define BTF_LSM_PREFIX "bpf_lsm_"
8905 #define BTF_ITER_PREFIX "bpf_iter_"
8906 #define BTF_MAX_NAME_SIZE 128
8907
8908 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
8909                 const char **prefix, int *kind)
8910 {
8911     switch (attach_type) {
8912     case BPF_TRACE_RAW_TP:
8913         *prefix = BTF_TRACE_PREFIX;
8914         *kind = BTF_KIND_TYPEDEF;
8915         break;
8916     case BPF_LSM_MAC:
8917     case BPF_LSM_CGROUP:
8918         *prefix = BTF_LSM_PREFIX;
8919         *kind = BTF_KIND_FUNC;
8920         break;
8921     case BPF_TRACE_ITER:
8922         *prefix = BTF_ITER_PREFIX;
8923         *kind = BTF_KIND_FUNC;
8924         break;
8925     default:
8926         *prefix = "";
8927         *kind = BTF_KIND_FUNC;
8928     }
8929 }
8930
8931 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8932                    const char *name, __u32 kind)
8933 {
8934     char btf_type_name[BTF_MAX_NAME_SIZE];
8935     int ret;
8936
8937     ret = snprintf(btf_type_name, sizeof(btf_type_name),
8938                "%s%s", prefix, name);
8939     /* snprintf returns the number of characters written excluding the
8940      * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8941      * indicates truncation.
8942      */
8943     if (ret < 0 || ret >= sizeof(btf_type_name))
8944         return -ENAMETOOLONG;
8945     return btf__find_by_name_kind(btf, btf_type_name, kind);
8946 }
8947
8948 static inline int find_attach_btf_id(struct btf *btf, const char *name,
8949                      enum bpf_attach_type attach_type)
8950 {
8951     const char *prefix;
8952     int kind;
8953
8954     btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
8955     return find_btf_by_prefix_kind(btf, prefix, name, kind);
8956 }
8957
8958 int libbpf_find_vmlinux_btf_id(const char *name,
8959                    enum bpf_attach_type attach_type)
8960 {
8961     struct btf *btf;
8962     int err;
8963
8964     btf = btf__load_vmlinux_btf();
8965     err = libbpf_get_error(btf);
8966     if (err) {
8967         pr_warn("vmlinux BTF is not found\n");
8968         return libbpf_err(err);
8969     }
8970
8971     err = find_attach_btf_id(btf, name, attach_type);
8972     if (err <= 0)
8973         pr_warn("%s is not found in vmlinux BTF\n", name);
8974
8975     btf__free(btf);
8976     return libbpf_err(err);
8977 }
8978
8979 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8980 {
8981     struct bpf_prog_info info = {};
8982     __u32 info_len = sizeof(info);
8983     struct btf *btf;
8984     int err;
8985
8986     err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
8987     if (err) {
8988         pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
8989             attach_prog_fd, err);
8990         return err;
8991     }
8992
8993     err = -EINVAL;
8994     if (!info.btf_id) {
8995         pr_warn("The target program doesn't have BTF\n");
8996         goto out;
8997     }
8998     btf = btf__load_from_kernel_by_id(info.btf_id);
8999     err = libbpf_get_error(btf);
9000     if (err) {
9001         pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9002         goto out;
9003     }
9004     err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9005     btf__free(btf);
9006     if (err <= 0) {
9007         pr_warn("%s is not found in prog's BTF\n", name);
9008         goto out;
9009     }
9010 out:
9011     return err;
9012 }
9013
9014 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9015                   enum bpf_attach_type attach_type,
9016                   int *btf_obj_fd, int *btf_type_id)
9017 {
9018     int ret, i;
9019
9020     ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9021     if (ret > 0) {
9022         *btf_obj_fd = 0; /* vmlinux BTF */
9023         *btf_type_id = ret;
9024         return 0;
9025     }
9026     if (ret != -ENOENT)
9027         return ret;
9028
9029     ret = load_module_btfs(obj);
9030     if (ret)
9031         return ret;
9032
9033     for (i = 0; i < obj->btf_module_cnt; i++) {
9034         const struct module_btf *mod = &obj->btf_modules[i];
9035
9036         ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9037         if (ret > 0) {
9038             *btf_obj_fd = mod->fd;
9039             *btf_type_id = ret;
9040             return 0;
9041         }
9042         if (ret == -ENOENT)
9043             continue;
9044
9045         return ret;
9046     }
9047
9048     return -ESRCH;
9049 }
9050
9051 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9052                      int *btf_obj_fd, int *btf_type_id)
9053 {
9054     enum bpf_attach_type attach_type = prog->expected_attach_type;
9055     __u32 attach_prog_fd = prog->attach_prog_fd;
9056     int err = 0;
9057
9058     /* BPF program's BTF ID */
9059     if (attach_prog_fd) {
9060         err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9061         if (err < 0) {
9062             pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9063                  attach_prog_fd, attach_name, err);
9064             return err;
9065         }
9066         *btf_obj_fd = 0;
9067         *btf_type_id = err;
9068         return 0;
9069     }
9070
9071     /* kernel/module BTF ID */
9072     if (prog->obj->gen_loader) {
9073         bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9074         *btf_obj_fd = 0;
9075         *btf_type_id = 1;
9076     } else {
9077         err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9078     }
9079     if (err) {
9080         pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
9081         return err;
9082     }
9083     return 0;
9084 }
9085
9086 int libbpf_attach_type_by_name(const char *name,
9087                    enum bpf_attach_type *attach_type)
9088 {
9089     char *type_names;
9090     const struct bpf_sec_def *sec_def;
9091
9092     if (!name)
9093         return libbpf_err(-EINVAL);
9094
9095     sec_def = find_sec_def(name);
9096     if (!sec_def) {
9097         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9098         type_names = libbpf_get_type_names(true);
9099         if (type_names != NULL) {
9100             pr_debug("attachable section(type) names are:%s\n", type_names);
9101             free(type_names);
9102         }
9103
9104         return libbpf_err(-EINVAL);
9105     }
9106
9107     if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9108         return libbpf_err(-EINVAL);
9109     if (!(sec_def->cookie & SEC_ATTACHABLE))
9110         return libbpf_err(-EINVAL);
9111
9112     *attach_type = sec_def->expected_attach_type;
9113     return 0;
9114 }
9115
9116 int bpf_map__fd(const struct bpf_map *map)
9117 {
9118     return map ? map->fd : libbpf_err(-EINVAL);
9119 }
9120
9121 static bool map_uses_real_name(const struct bpf_map *map)
9122 {
9123     /* Since libbpf started to support custom .data.* and .rodata.* maps,
9124      * their user-visible name differs from kernel-visible name. Users see
9125      * such map's corresponding ELF section name as a map name.
9126      * This check distinguishes .data/.rodata from .data.* and .rodata.*
9127      * maps to know which name has to be returned to the user.
9128      */
9129     if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9130         return true;
9131     if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9132         return true;
9133     return false;
9134 }
9135
9136 const char *bpf_map__name(const struct bpf_map *map)
9137 {
9138     if (!map)
9139         return NULL;
9140
9141     if (map_uses_real_name(map))
9142         return map->real_name;
9143
9144     return map->name;
9145 }
9146
9147 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9148 {
9149     return map->def.type;
9150 }
9151
9152 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9153 {
9154     if (map->fd >= 0)
9155         return libbpf_err(-EBUSY);
9156     map->def.type = type;
9157     return 0;
9158 }
9159
9160 __u32 bpf_map__map_flags(const struct bpf_map *map)
9161 {
9162     return map->def.map_flags;
9163 }
9164
9165 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9166 {
9167     if (map->fd >= 0)
9168         return libbpf_err(-EBUSY);
9169     map->def.map_flags = flags;
9170     return 0;
9171 }
9172
9173 __u64 bpf_map__map_extra(const struct bpf_map *map)
9174 {
9175     return map->map_extra;
9176 }
9177
9178 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9179 {
9180     if (map->fd >= 0)
9181         return libbpf_err(-EBUSY);
9182     map->map_extra = map_extra;
9183     return 0;
9184 }
9185
9186 __u32 bpf_map__numa_node(const struct bpf_map *map)
9187 {
9188     return map->numa_node;
9189 }
9190
9191 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9192 {
9193     if (map->fd >= 0)
9194         return libbpf_err(-EBUSY);
9195     map->numa_node = numa_node;
9196     return 0;
9197 }
9198
9199 __u32 bpf_map__key_size(const struct bpf_map *map)
9200 {
9201     return map->def.key_size;
9202 }
9203
9204 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9205 {
9206     if (map->fd >= 0)
9207         return libbpf_err(-EBUSY);
9208     map->def.key_size = size;
9209     return 0;
9210 }
9211
9212 __u32 bpf_map__value_size(const struct bpf_map *map)
9213 {
9214     return map->def.value_size;
9215 }
9216
9217 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9218 {
9219     if (map->fd >= 0)
9220         return libbpf_err(-EBUSY);
9221     map->def.value_size = size;
9222     return 0;
9223 }
9224
9225 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9226 {
9227     return map ? map->btf_key_type_id : 0;
9228 }
9229
9230 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9231 {
9232     return map ? map->btf_value_type_id : 0;
9233 }
9234
9235 int bpf_map__set_initial_value(struct bpf_map *map,
9236                    const void *data, size_t size)
9237 {
9238     if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9239         size != map->def.value_size || map->fd >= 0)
9240         return libbpf_err(-EINVAL);
9241
9242     memcpy(map->mmaped, data, size);
9243     return 0;
9244 }
9245
9246 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9247 {
9248     if (!map->mmaped)
9249         return NULL;
9250     *psize = map->def.value_size;
9251     return map->mmaped;
9252 }
9253
9254 bool bpf_map__is_internal(const struct bpf_map *map)
9255 {
9256     return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9257 }
9258
9259 __u32 bpf_map__ifindex(const struct bpf_map *map)
9260 {
9261     return map->map_ifindex;
9262 }
9263
9264 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9265 {
9266     if (map->fd >= 0)
9267         return libbpf_err(-EBUSY);
9268     map->map_ifindex = ifindex;
9269     return 0;
9270 }
9271
9272 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9273 {
9274     if (!bpf_map_type__is_map_in_map(map->def.type)) {
9275         pr_warn("error: unsupported map type\n");
9276         return libbpf_err(-EINVAL);
9277     }
9278     if (map->inner_map_fd != -1) {
9279         pr_warn("error: inner_map_fd already specified\n");
9280         return libbpf_err(-EINVAL);
9281     }
9282     if (map->inner_map) {
9283         bpf_map__destroy(map->inner_map);
9284         zfree(&map->inner_map);
9285     }
9286     map->inner_map_fd = fd;
9287     return 0;
9288 }
9289
9290 static struct bpf_map *
9291 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9292 {
9293     ssize_t idx;
9294     struct bpf_map *s, *e;
9295
9296     if (!obj || !obj->maps)
9297         return errno = EINVAL, NULL;
9298
9299     s = obj->maps;
9300     e = obj->maps + obj->nr_maps;
9301
9302     if ((m < s) || (m >= e)) {
9303         pr_warn("error in %s: map handler doesn't belong to object\n",
9304              __func__);
9305         return errno = EINVAL, NULL;
9306     }
9307
9308     idx = (m - obj->maps) + i;
9309     if (idx >= obj->nr_maps || idx < 0)
9310         return NULL;
9311     return &obj->maps[idx];
9312 }
9313
9314 struct bpf_map *
9315 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9316 {
9317     if (prev == NULL)
9318         return obj->maps;
9319
9320     return __bpf_map__iter(prev, obj, 1);
9321 }
9322
9323 struct bpf_map *
9324 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9325 {
9326     if (next == NULL) {
9327         if (!obj->nr_maps)
9328             return NULL;
9329         return obj->maps + obj->nr_maps - 1;
9330     }
9331
9332     return __bpf_map__iter(next, obj, -1);
9333 }
9334
9335 struct bpf_map *
9336 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9337 {
9338     struct bpf_map *pos;
9339
9340     bpf_object__for_each_map(pos, obj) {
9341         /* if it's a special internal map name (which always starts
9342          * with dot) then check if that special name matches the
9343          * real map name (ELF section name)
9344          */
9345         if (name[0] == '.') {
9346             if (pos->real_name && strcmp(pos->real_name, name) == 0)
9347                 return pos;
9348             continue;
9349         }
9350         /* otherwise map name has to be an exact match */
9351         if (map_uses_real_name(pos)) {
9352             if (strcmp(pos->real_name, name) == 0)
9353                 return pos;
9354             continue;
9355         }
9356         if (strcmp(pos->name, name) == 0)
9357             return pos;
9358     }
9359     return errno = ENOENT, NULL;
9360 }
9361
9362 int
9363 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9364 {
9365     return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9366 }
9367
9368 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9369                size_t value_sz, bool check_value_sz)
9370 {
9371     if (map->fd <= 0)
9372         return -ENOENT;
9373
9374     if (map->def.key_size != key_sz) {
9375         pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9376             map->name, key_sz, map->def.key_size);
9377         return -EINVAL;
9378     }
9379
9380     if (!check_value_sz)
9381         return 0;
9382
9383     switch (map->def.type) {
9384     case BPF_MAP_TYPE_PERCPU_ARRAY:
9385     case BPF_MAP_TYPE_PERCPU_HASH:
9386     case BPF_MAP_TYPE_LRU_PERCPU_HASH:
9387     case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
9388         int num_cpu = libbpf_num_possible_cpus();
9389         size_t elem_sz = roundup(map->def.value_size, 8);
9390
9391         if (value_sz != num_cpu * elem_sz) {
9392             pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
9393                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
9394             return -EINVAL;
9395         }
9396         break;
9397     }
9398     default:
9399         if (map->def.value_size != value_sz) {
9400             pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
9401                 map->name, value_sz, map->def.value_size);
9402             return -EINVAL;
9403         }
9404         break;
9405     }
9406     return 0;
9407 }
9408
9409 int bpf_map__lookup_elem(const struct bpf_map *map,
9410              const void *key, size_t key_sz,
9411              void *value, size_t value_sz, __u64 flags)
9412 {
9413     int err;
9414
9415     err = validate_map_op(map, key_sz, value_sz, true);
9416     if (err)
9417         return libbpf_err(err);
9418
9419     return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
9420 }
9421
9422 int bpf_map__update_elem(const struct bpf_map *map,
9423              const void *key, size_t key_sz,
9424              const void *value, size_t value_sz, __u64 flags)
9425 {
9426     int err;
9427
9428     err = validate_map_op(map, key_sz, value_sz, true);
9429     if (err)
9430         return libbpf_err(err);
9431
9432     return bpf_map_update_elem(map->fd, key, value, flags);
9433 }
9434
9435 int bpf_map__delete_elem(const struct bpf_map *map,
9436              const void *key, size_t key_sz, __u64 flags)
9437 {
9438     int err;
9439
9440     err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9441     if (err)
9442         return libbpf_err(err);
9443
9444     return bpf_map_delete_elem_flags(map->fd, key, flags);
9445 }
9446
9447 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
9448                     const void *key, size_t key_sz,
9449                     void *value, size_t value_sz, __u64 flags)
9450 {
9451     int err;
9452
9453     err = validate_map_op(map, key_sz, value_sz, true);
9454     if (err)
9455         return libbpf_err(err);
9456
9457     return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
9458 }
9459
9460 int bpf_map__get_next_key(const struct bpf_map *map,
9461               const void *cur_key, void *next_key, size_t key_sz)
9462 {
9463     int err;
9464
9465     err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9466     if (err)
9467         return libbpf_err(err);
9468
9469     return bpf_map_get_next_key(map->fd, cur_key, next_key);
9470 }
9471
9472 long libbpf_get_error(const void *ptr)
9473 {
9474     if (!IS_ERR_OR_NULL(ptr))
9475         return 0;
9476
9477     if (IS_ERR(ptr))
9478         errno = -PTR_ERR(ptr);
9479
9480     /* If ptr == NULL, then errno should be already set by the failing
9481      * API, because libbpf never returns NULL on success and it now always
9482      * sets errno on error. So no extra errno handling for ptr == NULL
9483      * case.
9484      */
9485     return -errno;
9486 }
9487
9488 /* Replace link's underlying BPF program with the new one */
9489 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9490 {
9491     int ret;
9492
9493     ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9494     return libbpf_err_errno(ret);
9495 }
9496
9497 /* Release "ownership" of underlying BPF resource (typically, BPF program
9498  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9499  * link, when destructed through bpf_link__destroy() call won't attempt to
9500  * detach/unregisted that BPF resource. This is useful in situations where,
9501  * say, attached BPF program has to outlive userspace program that attached it
9502  * in the system. Depending on type of BPF program, though, there might be
9503  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9504  * exit of userspace program doesn't trigger automatic detachment and clean up
9505  * inside the kernel.
9506  */
9507 void bpf_link__disconnect(struct bpf_link *link)
9508 {
9509     link->disconnected = true;
9510 }
9511
9512 int bpf_link__destroy(struct bpf_link *link)
9513 {
9514     int err = 0;
9515
9516     if (IS_ERR_OR_NULL(link))
9517         return 0;
9518
9519     if (!link->disconnected && link->detach)
9520         err = link->detach(link);
9521     if (link->pin_path)
9522         free(link->pin_path);
9523     if (link->dealloc)
9524         link->dealloc(link);
9525     else
9526         free(link);
9527
9528     return libbpf_err(err);
9529 }
9530
9531 int bpf_link__fd(const struct bpf_link *link)
9532 {
9533     return link->fd;
9534 }
9535
9536 const char *bpf_link__pin_path(const struct bpf_link *link)
9537 {
9538     return link->pin_path;
9539 }
9540
9541 static int bpf_link__detach_fd(struct bpf_link *link)
9542 {
9543     return libbpf_err_errno(close(link->fd));
9544 }
9545
9546 struct bpf_link *bpf_link__open(const char *path)
9547 {
9548     struct bpf_link *link;
9549     int fd;
9550
9551     fd = bpf_obj_get(path);
9552     if (fd < 0) {
9553         fd = -errno;
9554         pr_warn("failed to open link at %s: %d\n", path, fd);
9555         return libbpf_err_ptr(fd);
9556     }
9557
9558     link = calloc(1, sizeof(*link));
9559     if (!link) {
9560         close(fd);
9561         return libbpf_err_ptr(-ENOMEM);
9562     }
9563     link->detach = &bpf_link__detach_fd;
9564     link->fd = fd;
9565
9566     link->pin_path = strdup(path);
9567     if (!link->pin_path) {
9568         bpf_link__destroy(link);
9569         return libbpf_err_ptr(-ENOMEM);
9570     }
9571
9572     return link;
9573 }
9574
9575 int bpf_link__detach(struct bpf_link *link)
9576 {
9577     return bpf_link_detach(link->fd) ? -errno : 0;
9578 }
9579
9580 int bpf_link__pin(struct bpf_link *link, const char *path)
9581 {
9582     int err;
9583
9584     if (link->pin_path)
9585         return libbpf_err(-EBUSY);
9586     err = make_parent_dir(path);
9587     if (err)
9588         return libbpf_err(err);
9589     err = check_path(path);
9590     if (err)
9591         return libbpf_err(err);
9592
9593     link->pin_path = strdup(path);
9594     if (!link->pin_path)
9595         return libbpf_err(-ENOMEM);
9596
9597     if (bpf_obj_pin(link->fd, link->pin_path)) {
9598         err = -errno;
9599         zfree(&link->pin_path);
9600         return libbpf_err(err);
9601     }
9602
9603     pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9604     return 0;
9605 }
9606
9607 int bpf_link__unpin(struct bpf_link *link)
9608 {
9609     int err;
9610
9611     if (!link->pin_path)
9612         return libbpf_err(-EINVAL);
9613
9614     err = unlink(link->pin_path);
9615     if (err != 0)
9616         return -errno;
9617
9618     pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9619     zfree(&link->pin_path);
9620     return 0;
9621 }
9622
9623 struct bpf_link_perf {
9624     struct bpf_link link;
9625     int perf_event_fd;
9626     /* legacy kprobe support: keep track of probe identifier and type */
9627     char *legacy_probe_name;
9628     bool legacy_is_kprobe;
9629     bool legacy_is_retprobe;
9630 };
9631
9632 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
9633 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
9634
9635 static int bpf_link_perf_detach(struct bpf_link *link)
9636 {
9637     struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9638     int err = 0;
9639
9640     if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
9641         err = -errno;
9642
9643     if (perf_link->perf_event_fd != link->fd)
9644         close(perf_link->perf_event_fd);
9645     close(link->fd);
9646
9647     /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
9648     if (perf_link->legacy_probe_name) {
9649         if (perf_link->legacy_is_kprobe) {
9650             err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
9651                              perf_link->legacy_is_retprobe);
9652         } else {
9653             err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
9654                              perf_link->legacy_is_retprobe);
9655         }
9656     }
9657
9658     return err;
9659 }
9660
9661 static void bpf_link_perf_dealloc(struct bpf_link *link)
9662 {
9663     struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9664
9665     free(perf_link->legacy_probe_name);
9666     free(perf_link);
9667 }
9668
9669 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
9670                              const struct bpf_perf_event_opts *opts)
9671 {
9672     char errmsg[STRERR_BUFSIZE];
9673     struct bpf_link_perf *link;
9674     int prog_fd, link_fd = -1, err;
9675
9676     if (!OPTS_VALID(opts, bpf_perf_event_opts))
9677         return libbpf_err_ptr(-EINVAL);
9678
9679     if (pfd < 0) {
9680         pr_warn("prog '%s': invalid perf event FD %d\n",
9681             prog->name, pfd);
9682         return libbpf_err_ptr(-EINVAL);
9683     }
9684     prog_fd = bpf_program__fd(prog);
9685     if (prog_fd < 0) {
9686         pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9687             prog->name);
9688         return libbpf_err_ptr(-EINVAL);
9689     }
9690
9691     link = calloc(1, sizeof(*link));
9692     if (!link)
9693         return libbpf_err_ptr(-ENOMEM);
9694     link->link.detach = &bpf_link_perf_detach;
9695     link->link.dealloc = &bpf_link_perf_dealloc;
9696     link->perf_event_fd = pfd;
9697
9698     if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
9699         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
9700             .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
9701
9702         link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
9703         if (link_fd < 0) {
9704             err = -errno;
9705             pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
9706                 prog->name, pfd,
9707                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9708             goto err_out;
9709         }
9710         link->link.fd = link_fd;
9711     } else {
9712         if (OPTS_GET(opts, bpf_cookie, 0)) {
9713             pr_warn("prog '%s': user context value is not supported\n", prog->name);
9714             err = -EOPNOTSUPP;
9715             goto err_out;
9716         }
9717
9718         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9719             err = -errno;
9720             pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
9721                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9722             if (err == -EPROTO)
9723                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9724                     prog->name, pfd);
9725             goto err_out;
9726         }
9727         link->link.fd = pfd;
9728     }
9729     if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9730         err = -errno;
9731         pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
9732             prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9733         goto err_out;
9734     }
9735
9736     return &link->link;
9737 err_out:
9738     if (link_fd >= 0)
9739         close(link_fd);
9740     free(link);
9741     return libbpf_err_ptr(err);
9742 }
9743
9744 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
9745 {
9746     return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
9747 }
9748
9749 /*
9750  * this function is expected to parse integer in the range of [0, 2^31-1] from
9751  * given file using scanf format string fmt. If actual parsed value is
9752  * negative, the result might be indistinguishable from error
9753  */
9754 static int parse_uint_from_file(const char *file, const char *fmt)
9755 {
9756     char buf[STRERR_BUFSIZE];
9757     int err, ret;
9758     FILE *f;
9759
9760     f = fopen(file, "r");
9761     if (!f) {
9762         err = -errno;
9763         pr_debug("failed to open '%s': %s\n", file,
9764              libbpf_strerror_r(err, buf, sizeof(buf)));
9765         return err;
9766     }
9767     err = fscanf(f, fmt, &ret);
9768     if (err != 1) {
9769         err = err == EOF ? -EIO : -errno;
9770         pr_debug("failed to parse '%s': %s\n", file,
9771             libbpf_strerror_r(err, buf, sizeof(buf)));
9772         fclose(f);
9773         return err;
9774     }
9775     fclose(f);
9776     return ret;
9777 }
9778
9779 static int determine_kprobe_perf_type(void)
9780 {
9781     const char *file = "/sys/bus/event_source/devices/kprobe/type";
9782
9783     return parse_uint_from_file(file, "%d\n");
9784 }
9785
9786 static int determine_uprobe_perf_type(void)
9787 {
9788     const char *file = "/sys/bus/event_source/devices/uprobe/type";
9789
9790     return parse_uint_from_file(file, "%d\n");
9791 }
9792
9793 static int determine_kprobe_retprobe_bit(void)
9794 {
9795     const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9796
9797     return parse_uint_from_file(file, "config:%d\n");
9798 }
9799
9800 static int determine_uprobe_retprobe_bit(void)
9801 {
9802     const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9803
9804     return parse_uint_from_file(file, "config:%d\n");
9805 }
9806
9807 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
9808 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
9809
9810 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9811                  uint64_t offset, int pid, size_t ref_ctr_off)
9812 {
9813     struct perf_event_attr attr = {};
9814     char errmsg[STRERR_BUFSIZE];
9815     int type, pfd;
9816
9817     if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
9818         return -EINVAL;
9819
9820     type = uprobe ? determine_uprobe_perf_type()
9821               : determine_kprobe_perf_type();
9822     if (type < 0) {
9823         pr_warn("failed to determine %s perf type: %s\n",
9824             uprobe ? "uprobe" : "kprobe",
9825             libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9826         return type;
9827     }
9828     if (retprobe) {
9829         int bit = uprobe ? determine_uprobe_retprobe_bit()
9830                  : determine_kprobe_retprobe_bit();
9831
9832         if (bit < 0) {
9833             pr_warn("failed to determine %s retprobe bit: %s\n",
9834                 uprobe ? "uprobe" : "kprobe",
9835                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9836             return bit;
9837         }
9838         attr.config |= 1 << bit;
9839     }
9840     attr.size = sizeof(attr);
9841     attr.type = type;
9842     attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
9843     attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9844     attr.config2 = offset;       /* kprobe_addr or probe_offset */
9845
9846     /* pid filter is meaningful only for uprobes */
9847     pfd = syscall(__NR_perf_event_open, &attr,
9848               pid < 0 ? -1 : pid /* pid */,
9849               pid == -1 ? 0 : -1 /* cpu */,
9850               -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9851     return pfd >= 0 ? pfd : -errno;
9852 }
9853
9854 static int append_to_file(const char *file, const char *fmt, ...)
9855 {
9856     int fd, n, err = 0;
9857     va_list ap;
9858
9859     fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
9860     if (fd < 0)
9861         return -errno;
9862
9863     va_start(ap, fmt);
9864     n = vdprintf(fd, fmt, ap);
9865     va_end(ap);
9866
9867     if (n < 0)
9868         err = -errno;
9869
9870     close(fd);
9871     return err;
9872 }
9873
9874 #define DEBUGFS "/sys/kernel/debug/tracing"
9875 #define TRACEFS "/sys/kernel/tracing"
9876
9877 static bool use_debugfs(void)
9878 {
9879     static int has_debugfs = -1;
9880
9881     if (has_debugfs < 0)
9882         has_debugfs = access(DEBUGFS, F_OK) == 0;
9883
9884     return has_debugfs == 1;
9885 }
9886
9887 static const char *tracefs_path(void)
9888 {
9889     return use_debugfs() ? DEBUGFS : TRACEFS;
9890 }
9891
9892 static const char *tracefs_kprobe_events(void)
9893 {
9894     return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
9895 }
9896
9897 static const char *tracefs_uprobe_events(void)
9898 {
9899     return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
9900 }
9901
9902 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
9903                      const char *kfunc_name, size_t offset)
9904 {
9905     static int index = 0;
9906
9907     snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
9908          __sync_fetch_and_add(&index, 1));
9909 }
9910
9911 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
9912                    const char *kfunc_name, size_t offset)
9913 {
9914     return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
9915                   retprobe ? 'r' : 'p',
9916                   retprobe ? "kretprobes" : "kprobes",
9917                   probe_name, kfunc_name, offset);
9918 }
9919
9920 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
9921 {
9922     return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
9923                   retprobe ? "kretprobes" : "kprobes", probe_name);
9924 }
9925
9926 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
9927 {
9928     char file[256];
9929
9930     snprintf(file, sizeof(file), "%s/events/%s/%s/id",
9931          tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
9932
9933     return parse_uint_from_file(file, "%d\n");
9934 }
9935
9936 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
9937                      const char *kfunc_name, size_t offset, int pid)
9938 {
9939     struct perf_event_attr attr = {};
9940     char errmsg[STRERR_BUFSIZE];
9941     int type, pfd, err;
9942
9943     err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
9944     if (err < 0) {
9945         pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
9946             kfunc_name, offset,
9947             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9948         return err;
9949     }
9950     type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
9951     if (type < 0) {
9952         err = type;
9953         pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
9954             kfunc_name, offset,
9955             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9956         goto err_clean_legacy;
9957     }
9958     attr.size = sizeof(attr);
9959     attr.config = type;
9960     attr.type = PERF_TYPE_TRACEPOINT;
9961
9962     pfd = syscall(__NR_perf_event_open, &attr,
9963               pid < 0 ? -1 : pid, /* pid */
9964               pid == -1 ? 0 : -1, /* cpu */
9965               -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
9966     if (pfd < 0) {
9967         err = -errno;
9968         pr_warn("legacy kprobe perf_event_open() failed: %s\n",
9969             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9970         goto err_clean_legacy;
9971     }
9972     return pfd;
9973
9974 err_clean_legacy:
9975     /* Clear the newly added legacy kprobe_event */
9976     remove_kprobe_event_legacy(probe_name, retprobe);
9977     return err;
9978 }
9979
9980 static const char *arch_specific_syscall_pfx(void)
9981 {
9982 #if defined(__x86_64__)
9983     return "x64";
9984 #elif defined(__i386__)
9985     return "ia32";
9986 #elif defined(__s390x__)
9987     return "s390x";
9988 #elif defined(__s390__)
9989     return "s390";
9990 #elif defined(__arm__)
9991     return "arm";
9992 #elif defined(__aarch64__)
9993     return "arm64";
9994 #elif defined(__mips__)
9995     return "mips";
9996 #elif defined(__riscv)
9997     return "riscv";
9998 #elif defined(__powerpc__)
9999     return "powerpc";
10000 #elif defined(__powerpc64__)
10001     return "powerpc64";
10002 #else
10003     return NULL;
10004 #endif
10005 }
10006
10007 static int probe_kern_syscall_wrapper(void)
10008 {
10009     char syscall_name[64];
10010     const char *ksys_pfx;
10011
10012     ksys_pfx = arch_specific_syscall_pfx();
10013     if (!ksys_pfx)
10014         return 0;
10015
10016     snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10017
10018     if (determine_kprobe_perf_type() >= 0) {
10019         int pfd;
10020
10021         pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10022         if (pfd >= 0)
10023             close(pfd);
10024
10025         return pfd >= 0 ? 1 : 0;
10026     } else { /* legacy mode */
10027         char probe_name[128];
10028
10029         gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10030         if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10031             return 0;
10032
10033         (void)remove_kprobe_event_legacy(probe_name, false);
10034         return 1;
10035     }
10036 }
10037
10038 struct bpf_link *
10039 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10040                 const char *func_name,
10041                 const struct bpf_kprobe_opts *opts)
10042 {
10043     DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10044     char errmsg[STRERR_BUFSIZE];
10045     char *legacy_probe = NULL;
10046     struct bpf_link *link;
10047     size_t offset;
10048     bool retprobe, legacy;
10049     int pfd, err;
10050
10051     if (!OPTS_VALID(opts, bpf_kprobe_opts))
10052         return libbpf_err_ptr(-EINVAL);
10053
10054     retprobe = OPTS_GET(opts, retprobe, false);
10055     offset = OPTS_GET(opts, offset, 0);
10056     pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10057
10058     legacy = determine_kprobe_perf_type() < 0;
10059     if (!legacy) {
10060         pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10061                         func_name, offset,
10062                         -1 /* pid */, 0 /* ref_ctr_off */);
10063     } else {
10064         char probe_name[256];
10065
10066         gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10067                          func_name, offset);
10068
10069         legacy_probe = strdup(probe_name);
10070         if (!legacy_probe)
10071             return libbpf_err_ptr(-ENOMEM);
10072
10073         pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10074                             offset, -1 /* pid */);
10075     }
10076     if (pfd < 0) {
10077         err = -errno;
10078         pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10079             prog->name, retprobe ? "kretprobe" : "kprobe",
10080             func_name, offset,
10081             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10082         goto err_out;
10083     }
10084     link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10085     err = libbpf_get_error(link);
10086     if (err) {
10087         close(pfd);
10088         pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10089             prog->name, retprobe ? "kretprobe" : "kprobe",
10090             func_name, offset,
10091             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10092         goto err_clean_legacy;
10093     }
10094     if (legacy) {
10095         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10096
10097         perf_link->legacy_probe_name = legacy_probe;
10098         perf_link->legacy_is_kprobe = true;
10099         perf_link->legacy_is_retprobe = retprobe;
10100     }
10101
10102     return link;
10103
10104 err_clean_legacy:
10105     if (legacy)
10106         remove_kprobe_event_legacy(legacy_probe, retprobe);
10107 err_out:
10108     free(legacy_probe);
10109     return libbpf_err_ptr(err);
10110 }
10111
10112 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10113                         bool retprobe,
10114                         const char *func_name)
10115 {
10116     DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10117         .retprobe = retprobe,
10118     );
10119
10120     return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10121 }
10122
10123 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10124                           const char *syscall_name,
10125                           const struct bpf_ksyscall_opts *opts)
10126 {
10127     LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10128     char func_name[128];
10129
10130     if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10131         return libbpf_err_ptr(-EINVAL);
10132
10133     if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10134         /* arch_specific_syscall_pfx() should never return NULL here
10135          * because it is guarded by kernel_supports(). However, since
10136          * compiler does not know that we have an explicit conditional
10137          * as well.
10138          */
10139         snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10140              arch_specific_syscall_pfx() ? : "", syscall_name);
10141     } else {
10142         snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10143     }
10144
10145     kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10146     kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10147
10148     return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10149 }
10150
10151 /* Adapted from perf/util/string.c */
10152 static bool glob_match(const char *str, const char *pat)
10153 {
10154     while (*str && *pat && *pat != '*') {
10155         if (*pat == '?') {      /* Matches any single character */
10156             str++;
10157             pat++;
10158             continue;
10159         }
10160         if (*str != *pat)
10161             return false;
10162         str++;
10163         pat++;
10164     }
10165     /* Check wild card */
10166     if (*pat == '*') {
10167         while (*pat == '*')
10168             pat++;
10169         if (!*pat) /* Tail wild card matches all */
10170             return true;
10171         while (*str)
10172             if (glob_match(str++, pat))
10173                 return true;
10174     }
10175     return !*str && !*pat;
10176 }
10177
10178 struct kprobe_multi_resolve {
10179     const char *pattern;
10180     unsigned long *addrs;
10181     size_t cap;
10182     size_t cnt;
10183 };
10184
10185 static int
10186 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10187             const char *sym_name, void *ctx)
10188 {
10189     struct kprobe_multi_resolve *res = ctx;
10190     int err;
10191
10192     if (!glob_match(sym_name, res->pattern))
10193         return 0;
10194
10195     err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10196                 res->cnt + 1);
10197     if (err)
10198         return err;
10199
10200     res->addrs[res->cnt++] = (unsigned long) sym_addr;
10201     return 0;
10202 }
10203
10204 struct bpf_link *
10205 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10206                       const char *pattern,
10207                       const struct bpf_kprobe_multi_opts *opts)
10208 {
10209     LIBBPF_OPTS(bpf_link_create_opts, lopts);
10210     struct kprobe_multi_resolve res = {
10211         .pattern = pattern,
10212     };
10213     struct bpf_link *link = NULL;
10214     char errmsg[STRERR_BUFSIZE];
10215     const unsigned long *addrs;
10216     int err, link_fd, prog_fd;
10217     const __u64 *cookies;
10218     const char **syms;
10219     bool retprobe;
10220     size_t cnt;
10221
10222     if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10223         return libbpf_err_ptr(-EINVAL);
10224
10225     syms    = OPTS_GET(opts, syms, false);
10226     addrs   = OPTS_GET(opts, addrs, false);
10227     cnt     = OPTS_GET(opts, cnt, false);
10228     cookies = OPTS_GET(opts, cookies, false);
10229
10230     if (!pattern && !addrs && !syms)
10231         return libbpf_err_ptr(-EINVAL);
10232     if (pattern && (addrs || syms || cookies || cnt))
10233         return libbpf_err_ptr(-EINVAL);
10234     if (!pattern && !cnt)
10235         return libbpf_err_ptr(-EINVAL);
10236     if (addrs && syms)
10237         return libbpf_err_ptr(-EINVAL);
10238
10239     if (pattern) {
10240         err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10241         if (err)
10242             goto error;
10243         if (!res.cnt) {
10244             err = -ENOENT;
10245             goto error;
10246         }
10247         addrs = res.addrs;
10248         cnt = res.cnt;
10249     }
10250
10251     retprobe = OPTS_GET(opts, retprobe, false);
10252
10253     lopts.kprobe_multi.syms = syms;
10254     lopts.kprobe_multi.addrs = addrs;
10255     lopts.kprobe_multi.cookies = cookies;
10256     lopts.kprobe_multi.cnt = cnt;
10257     lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10258
10259     link = calloc(1, sizeof(*link));
10260     if (!link) {
10261         err = -ENOMEM;
10262         goto error;
10263     }
10264     link->detach = &bpf_link__detach_fd;
10265
10266     prog_fd = bpf_program__fd(prog);
10267     link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10268     if (link_fd < 0) {
10269         err = -errno;
10270         pr_warn("prog '%s': failed to attach: %s\n",
10271             prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10272         goto error;
10273     }
10274     link->fd = link_fd;
10275     free(res.addrs);
10276     return link;
10277
10278 error:
10279     free(link);
10280     free(res.addrs);
10281     return libbpf_err_ptr(err);
10282 }
10283
10284 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10285 {
10286     DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10287     unsigned long offset = 0;
10288     const char *func_name;
10289     char *func;
10290     int n;
10291
10292     *link = NULL;
10293
10294     /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10295     if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10296         return 0;
10297
10298     opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10299     if (opts.retprobe)
10300         func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10301     else
10302         func_name = prog->sec_name + sizeof("kprobe/") - 1;
10303
10304     n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10305     if (n < 1) {
10306         pr_warn("kprobe name is invalid: %s\n", func_name);
10307         return -EINVAL;
10308     }
10309     if (opts.retprobe && offset != 0) {
10310         free(func);
10311         pr_warn("kretprobes do not support offset specification\n");
10312         return -EINVAL;
10313     }
10314
10315     opts.offset = offset;
10316     *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10317     free(func);
10318     return libbpf_get_error(*link);
10319 }
10320
10321 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10322 {
10323     LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10324     const char *syscall_name;
10325
10326     *link = NULL;
10327
10328     /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10329     if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10330         return 0;
10331
10332     opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10333     if (opts.retprobe)
10334         syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10335     else
10336         syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10337
10338     *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10339     return *link ? 0 : -errno;
10340 }
10341
10342 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10343 {
10344     LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10345     const char *spec;
10346     char *pattern;
10347     int n;
10348
10349     *link = NULL;
10350
10351     /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10352     if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10353         strcmp(prog->sec_name, "kretprobe.multi") == 0)
10354         return 0;
10355
10356     opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10357     if (opts.retprobe)
10358         spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10359     else
10360         spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10361
10362     n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10363     if (n < 1) {
10364         pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10365         return -EINVAL;
10366     }
10367
10368     *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10369     free(pattern);
10370     return libbpf_get_error(*link);
10371 }
10372
10373 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10374                      const char *binary_path, uint64_t offset)
10375 {
10376     int i;
10377
10378     snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10379
10380     /* sanitize binary_path in the probe name */
10381     for (i = 0; buf[i]; i++) {
10382         if (!isalnum(buf[i]))
10383             buf[i] = '_';
10384     }
10385 }
10386
10387 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10388                       const char *binary_path, size_t offset)
10389 {
10390     return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
10391                   retprobe ? 'r' : 'p',
10392                   retprobe ? "uretprobes" : "uprobes",
10393                   probe_name, binary_path, offset);
10394 }
10395
10396 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10397 {
10398     return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
10399                   retprobe ? "uretprobes" : "uprobes", probe_name);
10400 }
10401
10402 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10403 {
10404     char file[512];
10405
10406     snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10407          tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
10408
10409     return parse_uint_from_file(file, "%d\n");
10410 }
10411
10412 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10413                      const char *binary_path, size_t offset, int pid)
10414 {
10415     struct perf_event_attr attr;
10416     int type, pfd, err;
10417
10418     err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10419     if (err < 0) {
10420         pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10421             binary_path, (size_t)offset, err);
10422         return err;
10423     }
10424     type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10425     if (type < 0) {
10426         err = type;
10427         pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10428             binary_path, offset, err);
10429         goto err_clean_legacy;
10430     }
10431
10432     memset(&attr, 0, sizeof(attr));
10433     attr.size = sizeof(attr);
10434     attr.config = type;
10435     attr.type = PERF_TYPE_TRACEPOINT;
10436
10437     pfd = syscall(__NR_perf_event_open, &attr,
10438               pid < 0 ? -1 : pid, /* pid */
10439               pid == -1 ? 0 : -1, /* cpu */
10440               -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10441     if (pfd < 0) {
10442         err = -errno;
10443         pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10444         goto err_clean_legacy;
10445     }
10446     return pfd;
10447
10448 err_clean_legacy:
10449     /* Clear the newly added legacy uprobe_event */
10450     remove_uprobe_event_legacy(probe_name, retprobe);
10451     return err;
10452 }
10453
10454 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10455 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10456 {
10457     while ((scn = elf_nextscn(elf, scn)) != NULL) {
10458         GElf_Shdr sh;
10459
10460         if (!gelf_getshdr(scn, &sh))
10461             continue;
10462         if (sh.sh_type == sh_type)
10463             return scn;
10464     }
10465     return NULL;
10466 }
10467
10468 /* Find offset of function name in object specified by path.  "name" matches
10469  * symbol name or name@@LIB for library functions.
10470  */
10471 static long elf_find_func_offset(const char *binary_path, const char *name)
10472 {
10473     int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10474     bool is_shared_lib, is_name_qualified;
10475     char errmsg[STRERR_BUFSIZE];
10476     long ret = -ENOENT;
10477     size_t name_len;
10478     GElf_Ehdr ehdr;
10479     Elf *elf;
10480
10481     fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10482     if (fd < 0) {
10483         ret = -errno;
10484         pr_warn("failed to open %s: %s\n", binary_path,
10485             libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10486         return ret;
10487     }
10488     elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10489     if (!elf) {
10490         pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10491         close(fd);
10492         return -LIBBPF_ERRNO__FORMAT;
10493     }
10494     if (!gelf_getehdr(elf, &ehdr)) {
10495         pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10496         ret = -LIBBPF_ERRNO__FORMAT;
10497         goto out;
10498     }
10499     /* for shared lib case, we do not need to calculate relative offset */
10500     is_shared_lib = ehdr.e_type == ET_DYN;
10501
10502     name_len = strlen(name);
10503     /* Does name specify "@@LIB"? */
10504     is_name_qualified = strstr(name, "@@") != NULL;
10505
10506     /* Search SHT_DYNSYM, SHT_SYMTAB for symbol.  This search order is used because if
10507      * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10508      * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10509      * reported as a warning/error.
10510      */
10511     for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10512         size_t nr_syms, strtabidx, idx;
10513         Elf_Data *symbols = NULL;
10514         Elf_Scn *scn = NULL;
10515         int last_bind = -1;
10516         const char *sname;
10517         GElf_Shdr sh;
10518
10519         scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10520         if (!scn) {
10521             pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10522                  binary_path);
10523             continue;
10524         }
10525         if (!gelf_getshdr(scn, &sh))
10526             continue;
10527         strtabidx = sh.sh_link;
10528         symbols = elf_getdata(scn, 0);
10529         if (!symbols) {
10530             pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10531                 binary_path, elf_errmsg(-1));
10532             ret = -LIBBPF_ERRNO__FORMAT;
10533             goto out;
10534         }
10535         nr_syms = symbols->d_size / sh.sh_entsize;
10536
10537         for (idx = 0; idx < nr_syms; idx++) {
10538             int curr_bind;
10539             GElf_Sym sym;
10540             Elf_Scn *sym_scn;
10541             GElf_Shdr sym_sh;
10542
10543             if (!gelf_getsym(symbols, idx, &sym))
10544                 continue;
10545
10546             if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10547                 continue;
10548
10549             sname = elf_strptr(elf, strtabidx, sym.st_name);
10550             if (!sname)
10551                 continue;
10552
10553             curr_bind = GELF_ST_BIND(sym.st_info);
10554
10555             /* User can specify func, func@@LIB or func@@LIB_VERSION. */
10556             if (strncmp(sname, name, name_len) != 0)
10557                 continue;
10558             /* ...but we don't want a search for "foo" to match 'foo2" also, so any
10559              * additional characters in sname should be of the form "@@LIB".
10560              */
10561             if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10562                 continue;
10563
10564             if (ret >= 0) {
10565                 /* handle multiple matches */
10566                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10567                     /* Only accept one non-weak bind. */
10568                     pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10569                         sname, name, binary_path);
10570                     ret = -LIBBPF_ERRNO__FORMAT;
10571                     goto out;
10572                 } else if (curr_bind == STB_WEAK) {
10573                     /* already have a non-weak bind, and
10574                      * this is a weak bind, so ignore.
10575                      */
10576                     continue;
10577                 }
10578             }
10579
10580             /* Transform symbol's virtual address (absolute for
10581              * binaries and relative for shared libs) into file
10582              * offset, which is what kernel is expecting for
10583              * uprobe/uretprobe attachment.
10584              * See Documentation/trace/uprobetracer.rst for more
10585              * details.
10586              * This is done by looking up symbol's containing
10587              * section's header and using it's virtual address
10588              * (sh_addr) and corresponding file offset (sh_offset)
10589              * to transform sym.st_value (virtual address) into
10590              * desired final file offset.
10591              */
10592             sym_scn = elf_getscn(elf, sym.st_shndx);
10593             if (!sym_scn)
10594                 continue;
10595             if (!gelf_getshdr(sym_scn, &sym_sh))
10596                 continue;
10597
10598             ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10599             last_bind = curr_bind;
10600         }
10601         if (ret > 0)
10602             break;
10603     }
10604
10605     if (ret > 0) {
10606         pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10607              ret);
10608     } else {
10609         if (ret == 0) {
10610             pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10611                 is_shared_lib ? "should not be 0 in a shared library" :
10612                         "try using shared library path instead");
10613             ret = -ENOENT;
10614         } else {
10615             pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10616         }
10617     }
10618 out:
10619     elf_end(elf);
10620     close(fd);
10621     return ret;
10622 }
10623
10624 static const char *arch_specific_lib_paths(void)
10625 {
10626     /*
10627      * Based on https://packages.debian.org/sid/libc6.
10628      *
10629      * Assume that the traced program is built for the same architecture
10630      * as libbpf, which should cover the vast majority of cases.
10631      */
10632 #if defined(__x86_64__)
10633     return "/lib/x86_64-linux-gnu";
10634 #elif defined(__i386__)
10635     return "/lib/i386-linux-gnu";
10636 #elif defined(__s390x__)
10637     return "/lib/s390x-linux-gnu";
10638 #elif defined(__s390__)
10639     return "/lib/s390-linux-gnu";
10640 #elif defined(__arm__) && defined(__SOFTFP__)
10641     return "/lib/arm-linux-gnueabi";
10642 #elif defined(__arm__) && !defined(__SOFTFP__)
10643     return "/lib/arm-linux-gnueabihf";
10644 #elif defined(__aarch64__)
10645     return "/lib/aarch64-linux-gnu";
10646 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
10647     return "/lib/mips64el-linux-gnuabi64";
10648 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
10649     return "/lib/mipsel-linux-gnu";
10650 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
10651     return "/lib/powerpc64le-linux-gnu";
10652 #elif defined(__sparc__) && defined(__arch64__)
10653     return "/lib/sparc64-linux-gnu";
10654 #elif defined(__riscv) && __riscv_xlen == 64
10655     return "/lib/riscv64-linux-gnu";
10656 #else
10657     return NULL;
10658 #endif
10659 }
10660
10661 /* Get full path to program/shared library. */
10662 static int resolve_full_path(const char *file, char *result, size_t result_sz)
10663 {
10664     const char *search_paths[3] = {};
10665     int i;
10666
10667     if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
10668         search_paths[0] = getenv("LD_LIBRARY_PATH");
10669         search_paths[1] = "/usr/lib64:/usr/lib";
10670         search_paths[2] = arch_specific_lib_paths();
10671     } else {
10672         search_paths[0] = getenv("PATH");
10673         search_paths[1] = "/usr/bin:/usr/sbin";
10674     }
10675
10676     for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
10677         const char *s;
10678
10679         if (!search_paths[i])
10680             continue;
10681         for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
10682             char *next_path;
10683             int seg_len;
10684
10685             if (s[0] == ':')
10686                 s++;
10687             next_path = strchr(s, ':');
10688             seg_len = next_path ? next_path - s : strlen(s);
10689             if (!seg_len)
10690                 continue;
10691             snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
10692             /* ensure it is an executable file/link */
10693             if (access(result, R_OK | X_OK) < 0)
10694                 continue;
10695             pr_debug("resolved '%s' to '%s'\n", file, result);
10696             return 0;
10697         }
10698     }
10699     return -ENOENT;
10700 }
10701
10702 LIBBPF_API struct bpf_link *
10703 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
10704                 const char *binary_path, size_t func_offset,
10705                 const struct bpf_uprobe_opts *opts)
10706 {
10707     DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10708     char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
10709     char full_binary_path[PATH_MAX];
10710     struct bpf_link *link;
10711     size_t ref_ctr_off;
10712     int pfd, err;
10713     bool retprobe, legacy;
10714     const char *func_name;
10715
10716     if (!OPTS_VALID(opts, bpf_uprobe_opts))
10717         return libbpf_err_ptr(-EINVAL);
10718
10719     retprobe = OPTS_GET(opts, retprobe, false);
10720     ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
10721     pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10722
10723     if (!binary_path)
10724         return libbpf_err_ptr(-EINVAL);
10725
10726     if (!strchr(binary_path, '/')) {
10727         err = resolve_full_path(binary_path, full_binary_path,
10728                     sizeof(full_binary_path));
10729         if (err) {
10730             pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10731                 prog->name, binary_path, err);
10732             return libbpf_err_ptr(err);
10733         }
10734         binary_path = full_binary_path;
10735     }
10736     func_name = OPTS_GET(opts, func_name, NULL);
10737     if (func_name) {
10738         long sym_off;
10739
10740         sym_off = elf_find_func_offset(binary_path, func_name);
10741         if (sym_off < 0)
10742             return libbpf_err_ptr(sym_off);
10743         func_offset += sym_off;
10744     }
10745
10746     legacy = determine_uprobe_perf_type() < 0;
10747     if (!legacy) {
10748         pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
10749                         func_offset, pid, ref_ctr_off);
10750     } else {
10751         char probe_name[PATH_MAX + 64];
10752
10753         if (ref_ctr_off)
10754             return libbpf_err_ptr(-EINVAL);
10755
10756         gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
10757                          binary_path, func_offset);
10758
10759         legacy_probe = strdup(probe_name);
10760         if (!legacy_probe)
10761             return libbpf_err_ptr(-ENOMEM);
10762
10763         pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
10764                             binary_path, func_offset, pid);
10765     }
10766     if (pfd < 0) {
10767         err = -errno;
10768         pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10769             prog->name, retprobe ? "uretprobe" : "uprobe",
10770             binary_path, func_offset,
10771             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10772         goto err_out;
10773     }
10774
10775     link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10776     err = libbpf_get_error(link);
10777     if (err) {
10778         close(pfd);
10779         pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10780             prog->name, retprobe ? "uretprobe" : "uprobe",
10781             binary_path, func_offset,
10782             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10783         goto err_clean_legacy;
10784     }
10785     if (legacy) {
10786         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10787
10788         perf_link->legacy_probe_name = legacy_probe;
10789         perf_link->legacy_is_kprobe = false;
10790         perf_link->legacy_is_retprobe = retprobe;
10791     }
10792     return link;
10793
10794 err_clean_legacy:
10795     if (legacy)
10796         remove_uprobe_event_legacy(legacy_probe, retprobe);
10797 err_out:
10798     free(legacy_probe);
10799     return libbpf_err_ptr(err);
10800 }
10801
10802 /* Format of u[ret]probe section definition supporting auto-attach:
10803  * u[ret]probe/binary:function[+offset]
10804  *
10805  * binary can be an absolute/relative path or a filename; the latter is resolved to a
10806  * full binary path via bpf_program__attach_uprobe_opts.
10807  *
10808  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
10809  * specified (and auto-attach is not possible) or the above format is specified for
10810  * auto-attach.
10811  */
10812 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10813 {
10814     DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
10815     char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
10816     int n, ret = -EINVAL;
10817     long offset = 0;
10818
10819     *link = NULL;
10820
10821     n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
10822            &probe_type, &binary_path, &func_name, &offset);
10823     switch (n) {
10824     case 1:
10825         /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
10826         ret = 0;
10827         break;
10828     case 2:
10829         pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
10830             prog->name, prog->sec_name);
10831         break;
10832     case 3:
10833     case 4:
10834         opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
10835                 strcmp(probe_type, "uretprobe.s") == 0;
10836         if (opts.retprobe && offset != 0) {
10837             pr_warn("prog '%s': uretprobes do not support offset specification\n",
10838                 prog->name);
10839             break;
10840         }
10841         opts.func_name = func_name;
10842         *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
10843         ret = libbpf_get_error(*link);
10844         break;
10845     default:
10846         pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
10847             prog->sec_name);
10848         break;
10849     }
10850     free(probe_type);
10851     free(binary_path);
10852     free(func_name);
10853
10854     return ret;
10855 }
10856
10857 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
10858                         bool retprobe, pid_t pid,
10859                         const char *binary_path,
10860                         size_t func_offset)
10861 {
10862     DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
10863
10864     return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
10865 }
10866
10867 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
10868                       pid_t pid, const char *binary_path,
10869                       const char *usdt_provider, const char *usdt_name,
10870                       const struct bpf_usdt_opts *opts)
10871 {
10872     char resolved_path[512];
10873     struct bpf_object *obj = prog->obj;
10874     struct bpf_link *link;
10875     __u64 usdt_cookie;
10876     int err;
10877
10878     if (!OPTS_VALID(opts, bpf_uprobe_opts))
10879         return libbpf_err_ptr(-EINVAL);
10880
10881     if (bpf_program__fd(prog) < 0) {
10882         pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10883             prog->name);
10884         return libbpf_err_ptr(-EINVAL);
10885     }
10886
10887     if (!binary_path)
10888         return libbpf_err_ptr(-EINVAL);
10889
10890     if (!strchr(binary_path, '/')) {
10891         err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
10892         if (err) {
10893             pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10894                 prog->name, binary_path, err);
10895             return libbpf_err_ptr(err);
10896         }
10897         binary_path = resolved_path;
10898     }
10899
10900     /* USDT manager is instantiated lazily on first USDT attach. It will
10901      * be destroyed together with BPF object in bpf_object__close().
10902      */
10903     if (IS_ERR(obj->usdt_man))
10904         return libbpf_ptr(obj->usdt_man);
10905     if (!obj->usdt_man) {
10906         obj->usdt_man = usdt_manager_new(obj);
10907         if (IS_ERR(obj->usdt_man))
10908             return libbpf_ptr(obj->usdt_man);
10909     }
10910
10911     usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
10912     link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
10913                         usdt_provider, usdt_name, usdt_cookie);
10914     err = libbpf_get_error(link);
10915     if (err)
10916         return libbpf_err_ptr(err);
10917     return link;
10918 }
10919
10920 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10921 {
10922     char *path = NULL, *provider = NULL, *name = NULL;
10923     const char *sec_name;
10924     int n, err;
10925
10926     sec_name = bpf_program__section_name(prog);
10927     if (strcmp(sec_name, "usdt") == 0) {
10928         /* no auto-attach for just SEC("usdt") */
10929         *link = NULL;
10930         return 0;
10931     }
10932
10933     n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
10934     if (n != 3) {
10935         pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
10936             sec_name);
10937         err = -EINVAL;
10938     } else {
10939         *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
10940                          provider, name, NULL);
10941         err = libbpf_get_error(*link);
10942     }
10943     free(path);
10944     free(provider);
10945     free(name);
10946     return err;
10947 }
10948
10949 static int determine_tracepoint_id(const char *tp_category,
10950                    const char *tp_name)
10951 {
10952     char file[PATH_MAX];
10953     int ret;
10954
10955     ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10956                tracefs_path(), tp_category, tp_name);
10957     if (ret < 0)
10958         return -errno;
10959     if (ret >= sizeof(file)) {
10960         pr_debug("tracepoint %s/%s path is too long\n",
10961              tp_category, tp_name);
10962         return -E2BIG;
10963     }
10964     return parse_uint_from_file(file, "%d\n");
10965 }
10966
10967 static int perf_event_open_tracepoint(const char *tp_category,
10968                       const char *tp_name)
10969 {
10970     struct perf_event_attr attr = {};
10971     char errmsg[STRERR_BUFSIZE];
10972     int tp_id, pfd, err;
10973
10974     tp_id = determine_tracepoint_id(tp_category, tp_name);
10975     if (tp_id < 0) {
10976         pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10977             tp_category, tp_name,
10978             libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10979         return tp_id;
10980     }
10981
10982     attr.type = PERF_TYPE_TRACEPOINT;
10983     attr.size = sizeof(attr);
10984     attr.config = tp_id;
10985
10986     pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10987               -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10988     if (pfd < 0) {
10989         err = -errno;
10990         pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10991             tp_category, tp_name,
10992             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10993         return err;
10994     }
10995     return pfd;
10996 }
10997
10998 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
10999                              const char *tp_category,
11000                              const char *tp_name,
11001                              const struct bpf_tracepoint_opts *opts)
11002 {
11003     DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11004     char errmsg[STRERR_BUFSIZE];
11005     struct bpf_link *link;
11006     int pfd, err;
11007
11008     if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11009         return libbpf_err_ptr(-EINVAL);
11010
11011     pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11012
11013     pfd = perf_event_open_tracepoint(tp_category, tp_name);
11014     if (pfd < 0) {
11015         pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11016             prog->name, tp_category, tp_name,
11017             libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11018         return libbpf_err_ptr(pfd);
11019     }
11020     link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11021     err = libbpf_get_error(link);
11022     if (err) {
11023         close(pfd);
11024         pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11025             prog->name, tp_category, tp_name,
11026             libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11027         return libbpf_err_ptr(err);
11028     }
11029     return link;
11030 }
11031
11032 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11033                         const char *tp_category,
11034                         const char *tp_name)
11035 {
11036     return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11037 }
11038
11039 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11040 {
11041     char *sec_name, *tp_cat, *tp_name;
11042
11043     *link = NULL;
11044
11045     /* no auto-attach for SEC("tp") or SEC("tracepoint") */
11046     if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11047         return 0;
11048
11049     sec_name = strdup(prog->sec_name);
11050     if (!sec_name)
11051         return -ENOMEM;
11052
11053     /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11054     if (str_has_pfx(prog->sec_name, "tp/"))
11055         tp_cat = sec_name + sizeof("tp/") - 1;
11056     else
11057         tp_cat = sec_name + sizeof("tracepoint/") - 1;
11058     tp_name = strchr(tp_cat, '/');
11059     if (!tp_name) {
11060         free(sec_name);
11061         return -EINVAL;
11062     }
11063     *tp_name = '\0';
11064     tp_name++;
11065
11066     *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11067     free(sec_name);
11068     return libbpf_get_error(*link);
11069 }
11070
11071 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11072                             const char *tp_name)
11073 {
11074     char errmsg[STRERR_BUFSIZE];
11075     struct bpf_link *link;
11076     int prog_fd, pfd;
11077
11078     prog_fd = bpf_program__fd(prog);
11079     if (prog_fd < 0) {
11080         pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11081         return libbpf_err_ptr(-EINVAL);
11082     }
11083
11084     link = calloc(1, sizeof(*link));
11085     if (!link)
11086         return libbpf_err_ptr(-ENOMEM);
11087     link->detach = &bpf_link__detach_fd;
11088
11089     pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11090     if (pfd < 0) {
11091         pfd = -errno;
11092         free(link);
11093         pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11094             prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11095         return libbpf_err_ptr(pfd);
11096     }
11097     link->fd = pfd;
11098     return link;
11099 }
11100
11101 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11102 {
11103     static const char *const prefixes[] = {
11104         "raw_tp",
11105         "raw_tracepoint",
11106         "raw_tp.w",
11107         "raw_tracepoint.w",
11108     };
11109     size_t i;
11110     const char *tp_name = NULL;
11111
11112     *link = NULL;
11113
11114     for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
11115         size_t pfx_len;
11116
11117         if (!str_has_pfx(prog->sec_name, prefixes[i]))
11118             continue;
11119
11120         pfx_len = strlen(prefixes[i]);
11121         /* no auto-attach case of, e.g., SEC("raw_tp") */
11122         if (prog->sec_name[pfx_len] == '\0')
11123             return 0;
11124
11125         if (prog->sec_name[pfx_len] != '/')
11126             continue;
11127
11128         tp_name = prog->sec_name + pfx_len + 1;
11129         break;
11130     }
11131
11132     if (!tp_name) {
11133         pr_warn("prog '%s': invalid section name '%s'\n",
11134             prog->name, prog->sec_name);
11135         return -EINVAL;
11136     }
11137
11138     *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
11139     return libbpf_get_error(link);
11140 }
11141
11142 /* Common logic for all BPF program types that attach to a btf_id */
11143 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
11144                            const struct bpf_trace_opts *opts)
11145 {
11146     LIBBPF_OPTS(bpf_link_create_opts, link_opts);
11147     char errmsg[STRERR_BUFSIZE];
11148     struct bpf_link *link;
11149     int prog_fd, pfd;
11150
11151     if (!OPTS_VALID(opts, bpf_trace_opts))
11152         return libbpf_err_ptr(-EINVAL);
11153
11154     prog_fd = bpf_program__fd(prog);
11155     if (prog_fd < 0) {
11156         pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11157         return libbpf_err_ptr(-EINVAL);
11158     }
11159
11160     link = calloc(1, sizeof(*link));
11161     if (!link)
11162         return libbpf_err_ptr(-ENOMEM);
11163     link->detach = &bpf_link__detach_fd;
11164
11165     /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
11166     link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
11167     pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
11168     if (pfd < 0) {
11169         pfd = -errno;
11170         free(link);
11171         pr_warn("prog '%s': failed to attach: %s\n",
11172             prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11173         return libbpf_err_ptr(pfd);
11174     }
11175     link->fd = pfd;
11176     return link;
11177 }
11178
11179 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11180 {
11181     return bpf_program__attach_btf_id(prog, NULL);
11182 }
11183
11184 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11185                         const struct bpf_trace_opts *opts)
11186 {
11187     return bpf_program__attach_btf_id(prog, opts);
11188 }
11189
11190 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11191 {
11192     return bpf_program__attach_btf_id(prog, NULL);
11193 }
11194
11195 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11196 {
11197     *link = bpf_program__attach_trace(prog);
11198     return libbpf_get_error(*link);
11199 }
11200
11201 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11202 {
11203     *link = bpf_program__attach_lsm(prog);
11204     return libbpf_get_error(*link);
11205 }
11206
11207 static struct bpf_link *
11208 bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11209                const char *target_name)
11210 {
11211     DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11212                 .target_btf_id = btf_id);
11213     enum bpf_attach_type attach_type;
11214     char errmsg[STRERR_BUFSIZE];
11215     struct bpf_link *link;
11216     int prog_fd, link_fd;
11217
11218     prog_fd = bpf_program__fd(prog);
11219     if (prog_fd < 0) {
11220         pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11221         return libbpf_err_ptr(-EINVAL);
11222     }
11223
11224     link = calloc(1, sizeof(*link));
11225     if (!link)
11226         return libbpf_err_ptr(-ENOMEM);
11227     link->detach = &bpf_link__detach_fd;
11228
11229     attach_type = bpf_program__expected_attach_type(prog);
11230     link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11231     if (link_fd < 0) {
11232         link_fd = -errno;
11233         free(link);
11234         pr_warn("prog '%s': failed to attach to %s: %s\n",
11235             prog->name, target_name,
11236             libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11237         return libbpf_err_ptr(link_fd);
11238     }
11239     link->fd = link_fd;
11240     return link;
11241 }
11242
11243 struct bpf_link *
11244 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11245 {
11246     return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11247 }
11248
11249 struct bpf_link *
11250 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11251 {
11252     return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11253 }
11254
11255 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11256 {
11257     /* target_fd/target_ifindex use the same field in LINK_CREATE */
11258     return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11259 }
11260
11261 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11262                           int target_fd,
11263                           const char *attach_func_name)
11264 {
11265     int btf_id;
11266
11267     if (!!target_fd != !!attach_func_name) {
11268         pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11269             prog->name);
11270         return libbpf_err_ptr(-EINVAL);
11271     }
11272
11273     if (prog->type != BPF_PROG_TYPE_EXT) {
11274         pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11275             prog->name);
11276         return libbpf_err_ptr(-EINVAL);
11277     }
11278
11279     if (target_fd) {
11280         btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11281         if (btf_id < 0)
11282             return libbpf_err_ptr(btf_id);
11283
11284         return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11285     } else {
11286         /* no target, so use raw_tracepoint_open for compatibility
11287          * with old kernels
11288          */
11289         return bpf_program__attach_trace(prog);
11290     }
11291 }
11292
11293 struct bpf_link *
11294 bpf_program__attach_iter(const struct bpf_program *prog,
11295              const struct bpf_iter_attach_opts *opts)
11296 {
11297     DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11298     char errmsg[STRERR_BUFSIZE];
11299     struct bpf_link *link;
11300     int prog_fd, link_fd;
11301     __u32 target_fd = 0;
11302
11303     if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11304         return libbpf_err_ptr(-EINVAL);
11305
11306     link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11307     link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11308
11309     prog_fd = bpf_program__fd(prog);
11310     if (prog_fd < 0) {
11311         pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11312         return libbpf_err_ptr(-EINVAL);
11313     }
11314
11315     link = calloc(1, sizeof(*link));
11316     if (!link)
11317         return libbpf_err_ptr(-ENOMEM);
11318     link->detach = &bpf_link__detach_fd;
11319
11320     link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11321                   &link_create_opts);
11322     if (link_fd < 0) {
11323         link_fd = -errno;
11324         free(link);
11325         pr_warn("prog '%s': failed to attach to iterator: %s\n",
11326             prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11327         return libbpf_err_ptr(link_fd);
11328     }
11329     link->fd = link_fd;
11330     return link;
11331 }
11332
11333 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11334 {
11335     *link = bpf_program__attach_iter(prog, NULL);
11336     return libbpf_get_error(*link);
11337 }
11338
11339 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11340 {
11341     struct bpf_link *link = NULL;
11342     int err;
11343
11344     if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11345         return libbpf_err_ptr(-EOPNOTSUPP);
11346
11347     err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11348     if (err)
11349         return libbpf_err_ptr(err);
11350
11351     /* When calling bpf_program__attach() explicitly, auto-attach support
11352      * is expected to work, so NULL returned link is considered an error.
11353      * This is different for skeleton's attach, see comment in
11354      * bpf_object__attach_skeleton().
11355      */
11356     if (!link)
11357         return libbpf_err_ptr(-EOPNOTSUPP);
11358
11359     return link;
11360 }
11361
11362 static int bpf_link__detach_struct_ops(struct bpf_link *link)
11363 {
11364     __u32 zero = 0;
11365
11366     if (bpf_map_delete_elem(link->fd, &zero))
11367         return -errno;
11368
11369     return 0;
11370 }
11371
11372 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11373 {
11374     struct bpf_struct_ops *st_ops;
11375     struct bpf_link *link;
11376     __u32 i, zero = 0;
11377     int err;
11378
11379     if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11380         return libbpf_err_ptr(-EINVAL);
11381
11382     link = calloc(1, sizeof(*link));
11383     if (!link)
11384         return libbpf_err_ptr(-EINVAL);
11385
11386     st_ops = map->st_ops;
11387     for (i = 0; i < btf_vlen(st_ops->type); i++) {
11388         struct bpf_program *prog = st_ops->progs[i];
11389         void *kern_data;
11390         int prog_fd;
11391
11392         if (!prog)
11393             continue;
11394
11395         prog_fd = bpf_program__fd(prog);
11396         kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
11397         *(unsigned long *)kern_data = prog_fd;
11398     }
11399
11400     err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
11401     if (err) {
11402         err = -errno;
11403         free(link);
11404         return libbpf_err_ptr(err);
11405     }
11406
11407     link->detach = bpf_link__detach_struct_ops;
11408     link->fd = map->fd;
11409
11410     return link;
11411 }
11412
11413 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11414                               void *private_data);
11415
11416 static enum bpf_perf_event_ret
11417 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11418                void **copy_mem, size_t *copy_size,
11419                bpf_perf_event_print_t fn, void *private_data)
11420 {
11421     struct perf_event_mmap_page *header = mmap_mem;
11422     __u64 data_head = ring_buffer_read_head(header);
11423     __u64 data_tail = header->data_tail;
11424     void *base = ((__u8 *)header) + page_size;
11425     int ret = LIBBPF_PERF_EVENT_CONT;
11426     struct perf_event_header *ehdr;
11427     size_t ehdr_size;
11428
11429     while (data_head != data_tail) {
11430         ehdr = base + (data_tail & (mmap_size - 1));
11431         ehdr_size = ehdr->size;
11432
11433         if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11434             void *copy_start = ehdr;
11435             size_t len_first = base + mmap_size - copy_start;
11436             size_t len_secnd = ehdr_size - len_first;
11437
11438             if (*copy_size < ehdr_size) {
11439                 free(*copy_mem);
11440                 *copy_mem = malloc(ehdr_size);
11441                 if (!*copy_mem) {
11442                     *copy_size = 0;
11443                     ret = LIBBPF_PERF_EVENT_ERROR;
11444                     break;
11445                 }
11446                 *copy_size = ehdr_size;
11447             }
11448
11449             memcpy(*copy_mem, copy_start, len_first);
11450             memcpy(*copy_mem + len_first, base, len_secnd);
11451             ehdr = *copy_mem;
11452         }
11453
11454         ret = fn(ehdr, private_data);
11455         data_tail += ehdr_size;
11456         if (ret != LIBBPF_PERF_EVENT_CONT)
11457             break;
11458     }
11459
11460     ring_buffer_write_tail(header, data_tail);
11461     return libbpf_err(ret);
11462 }
11463
11464 struct perf_buffer;
11465
11466 struct perf_buffer_params {
11467     struct perf_event_attr *attr;
11468     /* if event_cb is specified, it takes precendence */
11469     perf_buffer_event_fn event_cb;
11470     /* sample_cb and lost_cb are higher-level common-case callbacks */
11471     perf_buffer_sample_fn sample_cb;
11472     perf_buffer_lost_fn lost_cb;
11473     void *ctx;
11474     int cpu_cnt;
11475     int *cpus;
11476     int *map_keys;
11477 };
11478
11479 struct perf_cpu_buf {
11480     struct perf_buffer *pb;
11481     void *base; /* mmap()'ed memory */
11482     void *buf; /* for reconstructing segmented data */
11483     size_t buf_size;
11484     int fd;
11485     int cpu;
11486     int map_key;
11487 };
11488
11489 struct perf_buffer {
11490     perf_buffer_event_fn event_cb;
11491     perf_buffer_sample_fn sample_cb;
11492     perf_buffer_lost_fn lost_cb;
11493     void *ctx; /* passed into callbacks */
11494
11495     size_t page_size;
11496     size_t mmap_size;
11497     struct perf_cpu_buf **cpu_bufs;
11498     struct epoll_event *events;
11499     int cpu_cnt; /* number of allocated CPU buffers */
11500     int epoll_fd; /* perf event FD */
11501     int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
11502 };
11503
11504 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
11505                       struct perf_cpu_buf *cpu_buf)
11506 {
11507     if (!cpu_buf)
11508         return;
11509     if (cpu_buf->base &&
11510         munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
11511         pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
11512     if (cpu_buf->fd >= 0) {
11513         ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
11514         close(cpu_buf->fd);
11515     }
11516     free(cpu_buf->buf);
11517     free(cpu_buf);
11518 }
11519
11520 void perf_buffer__free(struct perf_buffer *pb)
11521 {
11522     int i;
11523
11524     if (IS_ERR_OR_NULL(pb))
11525         return;
11526     if (pb->cpu_bufs) {
11527         for (i = 0; i < pb->cpu_cnt; i++) {
11528             struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11529
11530             if (!cpu_buf)
11531                 continue;
11532
11533             bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
11534             perf_buffer__free_cpu_buf(pb, cpu_buf);
11535         }
11536         free(pb->cpu_bufs);
11537     }
11538     if (pb->epoll_fd >= 0)
11539         close(pb->epoll_fd);
11540     free(pb->events);
11541     free(pb);
11542 }
11543
11544 static struct perf_cpu_buf *
11545 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11546               int cpu, int map_key)
11547 {
11548     struct perf_cpu_buf *cpu_buf;
11549     char msg[STRERR_BUFSIZE];
11550     int err;
11551
11552     cpu_buf = calloc(1, sizeof(*cpu_buf));
11553     if (!cpu_buf)
11554         return ERR_PTR(-ENOMEM);
11555
11556     cpu_buf->pb = pb;
11557     cpu_buf->cpu = cpu;
11558     cpu_buf->map_key = map_key;
11559
11560     cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11561                   -1, PERF_FLAG_FD_CLOEXEC);
11562     if (cpu_buf->fd < 0) {
11563         err = -errno;
11564         pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11565             cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11566         goto error;
11567     }
11568
11569     cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11570                  PROT_READ | PROT_WRITE, MAP_SHARED,
11571                  cpu_buf->fd, 0);
11572     if (cpu_buf->base == MAP_FAILED) {
11573         cpu_buf->base = NULL;
11574         err = -errno;
11575         pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11576             cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11577         goto error;
11578     }
11579
11580     if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11581         err = -errno;
11582         pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11583             cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11584         goto error;
11585     }
11586
11587     return cpu_buf;
11588
11589 error:
11590     perf_buffer__free_cpu_buf(pb, cpu_buf);
11591     return (struct perf_cpu_buf *)ERR_PTR(err);
11592 }
11593
11594 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11595                           struct perf_buffer_params *p);
11596
11597 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11598                      perf_buffer_sample_fn sample_cb,
11599                      perf_buffer_lost_fn lost_cb,
11600                      void *ctx,
11601                      const struct perf_buffer_opts *opts)
11602 {
11603     struct perf_buffer_params p = {};
11604     struct perf_event_attr attr = {};
11605
11606     if (!OPTS_VALID(opts, perf_buffer_opts))
11607         return libbpf_err_ptr(-EINVAL);
11608
11609     attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11610     attr.type = PERF_TYPE_SOFTWARE;
11611     attr.sample_type = PERF_SAMPLE_RAW;
11612     attr.sample_period = 1;
11613     attr.wakeup_events = 1;
11614
11615     p.attr = &attr;
11616     p.sample_cb = sample_cb;
11617     p.lost_cb = lost_cb;
11618     p.ctx = ctx;
11619
11620     return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11621 }
11622
11623 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
11624                      struct perf_event_attr *attr,
11625                      perf_buffer_event_fn event_cb, void *ctx,
11626                      const struct perf_buffer_raw_opts *opts)
11627 {
11628     struct perf_buffer_params p = {};
11629
11630     if (!attr)
11631         return libbpf_err_ptr(-EINVAL);
11632
11633     if (!OPTS_VALID(opts, perf_buffer_raw_opts))
11634         return libbpf_err_ptr(-EINVAL);
11635
11636     p.attr = attr;
11637     p.event_cb = event_cb;
11638     p.ctx = ctx;
11639     p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
11640     p.cpus = OPTS_GET(opts, cpus, NULL);
11641     p.map_keys = OPTS_GET(opts, map_keys, NULL);
11642
11643     return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11644 }
11645
11646 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11647                           struct perf_buffer_params *p)
11648 {
11649     const char *online_cpus_file = "/sys/devices/system/cpu/online";
11650     struct bpf_map_info map;
11651     char msg[STRERR_BUFSIZE];
11652     struct perf_buffer *pb;
11653     bool *online = NULL;
11654     __u32 map_info_len;
11655     int err, i, j, n;
11656
11657     if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
11658         pr_warn("page count should be power of two, but is %zu\n",
11659             page_cnt);
11660         return ERR_PTR(-EINVAL);
11661     }
11662
11663     /* best-effort sanity checks */
11664     memset(&map, 0, sizeof(map));
11665     map_info_len = sizeof(map);
11666     err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11667     if (err) {
11668         err = -errno;
11669         /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11670          * -EBADFD, -EFAULT, or -E2BIG on real error
11671          */
11672         if (err != -EINVAL) {
11673             pr_warn("failed to get map info for map FD %d: %s\n",
11674                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11675             return ERR_PTR(err);
11676         }
11677         pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11678              map_fd);
11679     } else {
11680         if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11681             pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11682                 map.name);
11683             return ERR_PTR(-EINVAL);
11684         }
11685     }
11686
11687     pb = calloc(1, sizeof(*pb));
11688     if (!pb)
11689         return ERR_PTR(-ENOMEM);
11690
11691     pb->event_cb = p->event_cb;
11692     pb->sample_cb = p->sample_cb;
11693     pb->lost_cb = p->lost_cb;
11694     pb->ctx = p->ctx;
11695
11696     pb->page_size = getpagesize();
11697     pb->mmap_size = pb->page_size * page_cnt;
11698     pb->map_fd = map_fd;
11699
11700     pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11701     if (pb->epoll_fd < 0) {
11702         err = -errno;
11703         pr_warn("failed to create epoll instance: %s\n",
11704             libbpf_strerror_r(err, msg, sizeof(msg)));
11705         goto error;
11706     }
11707
11708     if (p->cpu_cnt > 0) {
11709         pb->cpu_cnt = p->cpu_cnt;
11710     } else {
11711         pb->cpu_cnt = libbpf_num_possible_cpus();
11712         if (pb->cpu_cnt < 0) {
11713             err = pb->cpu_cnt;
11714             goto error;
11715         }
11716         if (map.max_entries && map.max_entries < pb->cpu_cnt)
11717             pb->cpu_cnt = map.max_entries;
11718     }
11719
11720     pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11721     if (!pb->events) {
11722         err = -ENOMEM;
11723         pr_warn("failed to allocate events: out of memory\n");
11724         goto error;
11725     }
11726     pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11727     if (!pb->cpu_bufs) {
11728         err = -ENOMEM;
11729         pr_warn("failed to allocate buffers: out of memory\n");
11730         goto error;
11731     }
11732
11733     err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11734     if (err) {
11735         pr_warn("failed to get online CPU mask: %d\n", err);
11736         goto error;
11737     }
11738
11739     for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11740         struct perf_cpu_buf *cpu_buf;
11741         int cpu, map_key;
11742
11743         cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11744         map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11745
11746         /* in case user didn't explicitly requested particular CPUs to
11747          * be attached to, skip offline/not present CPUs
11748          */
11749         if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11750             continue;
11751
11752         cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11753         if (IS_ERR(cpu_buf)) {
11754             err = PTR_ERR(cpu_buf);
11755             goto error;
11756         }
11757
11758         pb->cpu_bufs[j] = cpu_buf;
11759
11760         err = bpf_map_update_elem(pb->map_fd, &map_key,
11761                       &cpu_buf->fd, 0);
11762         if (err) {
11763             err = -errno;
11764             pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11765                 cpu, map_key, cpu_buf->fd,
11766                 libbpf_strerror_r(err, msg, sizeof(msg)));
11767             goto error;
11768         }
11769
11770         pb->events[j].events = EPOLLIN;
11771         pb->events[j].data.ptr = cpu_buf;
11772         if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11773                   &pb->events[j]) < 0) {
11774             err = -errno;
11775             pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11776                 cpu, cpu_buf->fd,
11777                 libbpf_strerror_r(err, msg, sizeof(msg)));
11778             goto error;
11779         }
11780         j++;
11781     }
11782     pb->cpu_cnt = j;
11783     free(online);
11784
11785     return pb;
11786
11787 error:
11788     free(online);
11789     if (pb)
11790         perf_buffer__free(pb);
11791     return ERR_PTR(err);
11792 }
11793
11794 struct perf_sample_raw {
11795     struct perf_event_header header;
11796     uint32_t size;
11797     char data[];
11798 };
11799
11800 struct perf_sample_lost {
11801     struct perf_event_header header;
11802     uint64_t id;
11803     uint64_t lost;
11804     uint64_t sample_id;
11805 };
11806
11807 static enum bpf_perf_event_ret
11808 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11809 {
11810     struct perf_cpu_buf *cpu_buf = ctx;
11811     struct perf_buffer *pb = cpu_buf->pb;
11812     void *data = e;
11813
11814     /* user wants full control over parsing perf event */
11815     if (pb->event_cb)
11816         return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11817
11818     switch (e->type) {
11819     case PERF_RECORD_SAMPLE: {
11820         struct perf_sample_raw *s = data;
11821
11822         if (pb->sample_cb)
11823             pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11824         break;
11825     }
11826     case PERF_RECORD_LOST: {
11827         struct perf_sample_lost *s = data;
11828
11829         if (pb->lost_cb)
11830             pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11831         break;
11832     }
11833     default:
11834         pr_warn("unknown perf sample type %d\n", e->type);
11835         return LIBBPF_PERF_EVENT_ERROR;
11836     }
11837     return LIBBPF_PERF_EVENT_CONT;
11838 }
11839
11840 static int perf_buffer__process_records(struct perf_buffer *pb,
11841                     struct perf_cpu_buf *cpu_buf)
11842 {
11843     enum bpf_perf_event_ret ret;
11844
11845     ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11846                      pb->page_size, &cpu_buf->buf,
11847                      &cpu_buf->buf_size,
11848                      perf_buffer__process_record, cpu_buf);
11849     if (ret != LIBBPF_PERF_EVENT_CONT)
11850         return ret;
11851     return 0;
11852 }
11853
11854 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11855 {
11856     return pb->epoll_fd;
11857 }
11858
11859 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11860 {
11861     int i, cnt, err;
11862
11863     cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11864     if (cnt < 0)
11865         return -errno;
11866
11867     for (i = 0; i < cnt; i++) {
11868         struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11869
11870         err = perf_buffer__process_records(pb, cpu_buf);
11871         if (err) {
11872             pr_warn("error while processing records: %d\n", err);
11873             return libbpf_err(err);
11874         }
11875     }
11876     return cnt;
11877 }
11878
11879 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11880  * manager.
11881  */
11882 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11883 {
11884     return pb->cpu_cnt;
11885 }
11886
11887 /*
11888  * Return perf_event FD of a ring buffer in *buf_idx* slot of
11889  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11890  * select()/poll()/epoll() Linux syscalls.
11891  */
11892 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11893 {
11894     struct perf_cpu_buf *cpu_buf;
11895
11896     if (buf_idx >= pb->cpu_cnt)
11897         return libbpf_err(-EINVAL);
11898
11899     cpu_buf = pb->cpu_bufs[buf_idx];
11900     if (!cpu_buf)
11901         return libbpf_err(-ENOENT);
11902
11903     return cpu_buf->fd;
11904 }
11905
11906 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
11907 {
11908     struct perf_cpu_buf *cpu_buf;
11909
11910     if (buf_idx >= pb->cpu_cnt)
11911         return libbpf_err(-EINVAL);
11912
11913     cpu_buf = pb->cpu_bufs[buf_idx];
11914     if (!cpu_buf)
11915         return libbpf_err(-ENOENT);
11916
11917     *buf = cpu_buf->base;
11918     *buf_size = pb->mmap_size;
11919     return 0;
11920 }
11921
11922 /*
11923  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11924  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11925  * consume, do nothing and return success.
11926  * Returns:
11927  *   - 0 on success;
11928  *   - <0 on failure.
11929  */
11930 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11931 {
11932     struct perf_cpu_buf *cpu_buf;
11933
11934     if (buf_idx >= pb->cpu_cnt)
11935         return libbpf_err(-EINVAL);
11936
11937     cpu_buf = pb->cpu_bufs[buf_idx];
11938     if (!cpu_buf)
11939         return libbpf_err(-ENOENT);
11940
11941     return perf_buffer__process_records(pb, cpu_buf);
11942 }
11943
11944 int perf_buffer__consume(struct perf_buffer *pb)
11945 {
11946     int i, err;
11947
11948     for (i = 0; i < pb->cpu_cnt; i++) {
11949         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11950
11951         if (!cpu_buf)
11952             continue;
11953
11954         err = perf_buffer__process_records(pb, cpu_buf);
11955         if (err) {
11956             pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
11957             return libbpf_err(err);
11958         }
11959     }
11960     return 0;
11961 }
11962
11963 int bpf_program__set_attach_target(struct bpf_program *prog,
11964                    int attach_prog_fd,
11965                    const char *attach_func_name)
11966 {
11967     int btf_obj_fd = 0, btf_id = 0, err;
11968
11969     if (!prog || attach_prog_fd < 0)
11970         return libbpf_err(-EINVAL);
11971
11972     if (prog->obj->loaded)
11973         return libbpf_err(-EINVAL);
11974
11975     if (attach_prog_fd && !attach_func_name) {
11976         /* remember attach_prog_fd and let bpf_program__load() find
11977          * BTF ID during the program load
11978          */
11979         prog->attach_prog_fd = attach_prog_fd;
11980         return 0;
11981     }
11982
11983     if (attach_prog_fd) {
11984         btf_id = libbpf_find_prog_btf_id(attach_func_name,
11985                          attach_prog_fd);
11986         if (btf_id < 0)
11987             return libbpf_err(btf_id);
11988     } else {
11989         if (!attach_func_name)
11990             return libbpf_err(-EINVAL);
11991
11992         /* load btf_vmlinux, if not yet */
11993         err = bpf_object__load_vmlinux_btf(prog->obj, true);
11994         if (err)
11995             return libbpf_err(err);
11996         err = find_kernel_btf_id(prog->obj, attach_func_name,
11997                      prog->expected_attach_type,
11998                      &btf_obj_fd, &btf_id);
11999         if (err)
12000             return libbpf_err(err);
12001     }
12002
12003     prog->attach_btf_id = btf_id;
12004     prog->attach_btf_obj_fd = btf_obj_fd;
12005     prog->attach_prog_fd = attach_prog_fd;
12006     return 0;
12007 }
12008
12009 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
12010 {
12011     int err = 0, n, len, start, end = -1;
12012     bool *tmp;
12013
12014     *mask = NULL;
12015     *mask_sz = 0;
12016
12017     /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
12018     while (*s) {
12019         if (*s == ',' || *s == '\n') {
12020             s++;
12021             continue;
12022         }
12023         n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
12024         if (n <= 0 || n > 2) {
12025             pr_warn("Failed to get CPU range %s: %d\n", s, n);
12026             err = -EINVAL;
12027             goto cleanup;
12028         } else if (n == 1) {
12029             end = start;
12030         }
12031         if (start < 0 || start > end) {
12032             pr_warn("Invalid CPU range [%d,%d] in %s\n",
12033                 start, end, s);
12034             err = -EINVAL;
12035             goto cleanup;
12036         }
12037         tmp = realloc(*mask, end + 1);
12038         if (!tmp) {
12039             err = -ENOMEM;
12040             goto cleanup;
12041         }
12042         *mask = tmp;
12043         memset(tmp + *mask_sz, 0, start - *mask_sz);
12044         memset(tmp + start, 1, end - start + 1);
12045         *mask_sz = end + 1;
12046         s += len;
12047     }
12048     if (!*mask_sz) {
12049         pr_warn("Empty CPU range\n");
12050         return -EINVAL;
12051     }
12052     return 0;
12053 cleanup:
12054     free(*mask);
12055     *mask = NULL;
12056     return err;
12057 }
12058
12059 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
12060 {
12061     int fd, err = 0, len;
12062     char buf[128];
12063
12064     fd = open(fcpu, O_RDONLY | O_CLOEXEC);
12065     if (fd < 0) {
12066         err = -errno;
12067         pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
12068         return err;
12069     }
12070     len = read(fd, buf, sizeof(buf));
12071     close(fd);
12072     if (len <= 0) {
12073         err = len ? -errno : -EINVAL;
12074         pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
12075         return err;
12076     }
12077     if (len >= sizeof(buf)) {
12078         pr_warn("CPU mask is too big in file %s\n", fcpu);
12079         return -E2BIG;
12080     }
12081     buf[len] = '\0';
12082
12083     return parse_cpu_mask_str(buf, mask, mask_sz);
12084 }
12085
12086 int libbpf_num_possible_cpus(void)
12087 {
12088     static const char *fcpu = "/sys/devices/system/cpu/possible";
12089     static int cpus;
12090     int err, n, i, tmp_cpus;
12091     bool *mask;
12092
12093     tmp_cpus = READ_ONCE(cpus);
12094     if (tmp_cpus > 0)
12095         return tmp_cpus;
12096
12097     err = parse_cpu_mask_file(fcpu, &mask, &n);
12098     if (err)
12099         return libbpf_err(err);
12100
12101     tmp_cpus = 0;
12102     for (i = 0; i < n; i++) {
12103         if (mask[i])
12104             tmp_cpus++;
12105     }
12106     free(mask);
12107
12108     WRITE_ONCE(cpus, tmp_cpus);
12109     return tmp_cpus;
12110 }
12111
12112 static int populate_skeleton_maps(const struct bpf_object *obj,
12113                   struct bpf_map_skeleton *maps,
12114                   size_t map_cnt)
12115 {
12116     int i;
12117
12118     for (i = 0; i < map_cnt; i++) {
12119         struct bpf_map **map = maps[i].map;
12120         const char *name = maps[i].name;
12121         void **mmaped = maps[i].mmaped;
12122
12123         *map = bpf_object__find_map_by_name(obj, name);
12124         if (!*map) {
12125             pr_warn("failed to find skeleton map '%s'\n", name);
12126             return -ESRCH;
12127         }
12128
12129         /* externs shouldn't be pre-setup from user code */
12130         if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
12131             *mmaped = (*map)->mmaped;
12132     }
12133     return 0;
12134 }
12135
12136 static int populate_skeleton_progs(const struct bpf_object *obj,
12137                    struct bpf_prog_skeleton *progs,
12138                    size_t prog_cnt)
12139 {
12140     int i;
12141
12142     for (i = 0; i < prog_cnt; i++) {
12143         struct bpf_program **prog = progs[i].prog;
12144         const char *name = progs[i].name;
12145
12146         *prog = bpf_object__find_program_by_name(obj, name);
12147         if (!*prog) {
12148             pr_warn("failed to find skeleton program '%s'\n", name);
12149             return -ESRCH;
12150         }
12151     }
12152     return 0;
12153 }
12154
12155 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
12156                   const struct bpf_object_open_opts *opts)
12157 {
12158     DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
12159         .object_name = s->name,
12160     );
12161     struct bpf_object *obj;
12162     int err;
12163
12164     /* Attempt to preserve opts->object_name, unless overriden by user
12165      * explicitly. Overwriting object name for skeletons is discouraged,
12166      * as it breaks global data maps, because they contain object name
12167      * prefix as their own map name prefix. When skeleton is generated,
12168      * bpftool is making an assumption that this name will stay the same.
12169      */
12170     if (opts) {
12171         memcpy(&skel_opts, opts, sizeof(*opts));
12172         if (!opts->object_name)
12173             skel_opts.object_name = s->name;
12174     }
12175
12176     obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
12177     err = libbpf_get_error(obj);
12178     if (err) {
12179         pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
12180             s->name, err);
12181         return libbpf_err(err);
12182     }
12183
12184     *s->obj = obj;
12185     err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
12186     if (err) {
12187         pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
12188         return libbpf_err(err);
12189     }
12190
12191     err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12192     if (err) {
12193         pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12194         return libbpf_err(err);
12195     }
12196
12197     return 0;
12198 }
12199
12200 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12201 {
12202     int err, len, var_idx, i;
12203     const char *var_name;
12204     const struct bpf_map *map;
12205     struct btf *btf;
12206     __u32 map_type_id;
12207     const struct btf_type *map_type, *var_type;
12208     const struct bpf_var_skeleton *var_skel;
12209     struct btf_var_secinfo *var;
12210
12211     if (!s->obj)
12212         return libbpf_err(-EINVAL);
12213
12214     btf = bpf_object__btf(s->obj);
12215     if (!btf) {
12216         pr_warn("subskeletons require BTF at runtime (object %s)\n",
12217                 bpf_object__name(s->obj));
12218         return libbpf_err(-errno);
12219     }
12220
12221     err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12222     if (err) {
12223         pr_warn("failed to populate subskeleton maps: %d\n", err);
12224         return libbpf_err(err);
12225     }
12226
12227     err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12228     if (err) {
12229         pr_warn("failed to populate subskeleton maps: %d\n", err);
12230         return libbpf_err(err);
12231     }
12232
12233     for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12234         var_skel = &s->vars[var_idx];
12235         map = *var_skel->map;
12236         map_type_id = bpf_map__btf_value_type_id(map);
12237         map_type = btf__type_by_id(btf, map_type_id);
12238
12239         if (!btf_is_datasec(map_type)) {
12240             pr_warn("type for map '%1$s' is not a datasec: %2$s",
12241                 bpf_map__name(map),
12242                 __btf_kind_str(btf_kind(map_type)));
12243             return libbpf_err(-EINVAL);
12244         }
12245
12246         len = btf_vlen(map_type);
12247         var = btf_var_secinfos(map_type);
12248         for (i = 0; i < len; i++, var++) {
12249             var_type = btf__type_by_id(btf, var->type);
12250             var_name = btf__name_by_offset(btf, var_type->name_off);
12251             if (strcmp(var_name, var_skel->name) == 0) {
12252                 *var_skel->addr = map->mmaped + var->offset;
12253                 break;
12254             }
12255         }
12256     }
12257     return 0;
12258 }
12259
12260 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12261 {
12262     if (!s)
12263         return;
12264     free(s->maps);
12265     free(s->progs);
12266     free(s->vars);
12267     free(s);
12268 }
12269
12270 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12271 {
12272     int i, err;
12273
12274     err = bpf_object__load(*s->obj);
12275     if (err) {
12276         pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12277         return libbpf_err(err);
12278     }
12279
12280     for (i = 0; i < s->map_cnt; i++) {
12281         struct bpf_map *map = *s->maps[i].map;
12282         size_t mmap_sz = bpf_map_mmap_sz(map);
12283         int prot, map_fd = bpf_map__fd(map);
12284         void **mmaped = s->maps[i].mmaped;
12285
12286         if (!mmaped)
12287             continue;
12288
12289         if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12290             *mmaped = NULL;
12291             continue;
12292         }
12293
12294         if (map->def.map_flags & BPF_F_RDONLY_PROG)
12295             prot = PROT_READ;
12296         else
12297             prot = PROT_READ | PROT_WRITE;
12298
12299         /* Remap anonymous mmap()-ed "map initialization image" as
12300          * a BPF map-backed mmap()-ed memory, but preserving the same
12301          * memory address. This will cause kernel to change process'
12302          * page table to point to a different piece of kernel memory,
12303          * but from userspace point of view memory address (and its
12304          * contents, being identical at this point) will stay the
12305          * same. This mapping will be released by bpf_object__close()
12306          * as per normal clean up procedure, so we don't need to worry
12307          * about it from skeleton's clean up perspective.
12308          */
12309         *mmaped = mmap(map->mmaped, mmap_sz, prot,
12310                 MAP_SHARED | MAP_FIXED, map_fd, 0);
12311         if (*mmaped == MAP_FAILED) {
12312             err = -errno;
12313             *mmaped = NULL;
12314             pr_warn("failed to re-mmap() map '%s': %d\n",
12315                  bpf_map__name(map), err);
12316             return libbpf_err(err);
12317         }
12318     }
12319
12320     return 0;
12321 }
12322
12323 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12324 {
12325     int i, err;
12326
12327     for (i = 0; i < s->prog_cnt; i++) {
12328         struct bpf_program *prog = *s->progs[i].prog;
12329         struct bpf_link **link = s->progs[i].link;
12330
12331         if (!prog->autoload)
12332             continue;
12333
12334         /* auto-attaching not supported for this program */
12335         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12336             continue;
12337
12338         /* if user already set the link manually, don't attempt auto-attach */
12339         if (*link)
12340             continue;
12341
12342         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12343         if (err) {
12344             pr_warn("prog '%s': failed to auto-attach: %d\n",
12345                 bpf_program__name(prog), err);
12346             return libbpf_err(err);
12347         }
12348
12349         /* It's possible that for some SEC() definitions auto-attach
12350          * is supported in some cases (e.g., if definition completely
12351          * specifies target information), but is not in other cases.
12352          * SEC("uprobe") is one such case. If user specified target
12353          * binary and function name, such BPF program can be
12354          * auto-attached. But if not, it shouldn't trigger skeleton's
12355          * attach to fail. It should just be skipped.
12356          * attach_fn signals such case with returning 0 (no error) and
12357          * setting link to NULL.
12358          */
12359     }
12360
12361     return 0;
12362 }
12363
12364 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12365 {
12366     int i;
12367
12368     for (i = 0; i < s->prog_cnt; i++) {
12369         struct bpf_link **link = s->progs[i].link;
12370
12371         bpf_link__destroy(*link);
12372         *link = NULL;
12373     }
12374 }
12375
12376 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12377 {
12378     if (!s)
12379         return;
12380
12381     if (s->progs)
12382         bpf_object__detach_skeleton(s);
12383     if (s->obj)
12384         bpf_object__close(*s->obj);
12385     free(s->maps);
12386     free(s->progs);
12387     free(s);
12388 }