Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
0004  *
0005  * Test it with:
0006  *
0007  * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
0008  *
0009  * This exactly matches what is marshalled into the raw_syscall:sys_enter
0010  * payload expected by the 'perf trace' beautifiers.
0011  *
0012  * For now it just uses the existing tracepoint augmentation code in 'perf
0013  * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
0014  * code that will combine entry/exit in a strace like way.
0015  */
0016 
0017 #include <unistd.h>
0018 #include <linux/limits.h>
0019 #include <linux/socket.h>
0020 #include <pid_filter.h>
0021 
0022 /* bpf-output associated map */
0023 bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
0024 
0025 /*
0026  * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
0027  *          PATH_MAX for copying everything, any other value to limit
0028  *          it a la 'strace -s strsize'.
0029  */
0030 struct syscall {
0031     bool    enabled;
0032     u16 string_args_len[6];
0033 };
0034 
0035 bpf_map(syscalls, ARRAY, int, struct syscall, 512);
0036 
0037 /*
0038  * What to augment at entry?
0039  *
0040  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
0041  */
0042 bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512);
0043 
0044 /*
0045  * What to augment at exit?
0046  *
0047  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
0048  */
0049 bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512);
0050 
0051 struct syscall_enter_args {
0052     unsigned long long common_tp_fields;
0053     long           syscall_nr;
0054     unsigned long      args[6];
0055 };
0056 
0057 struct syscall_exit_args {
0058     unsigned long long common_tp_fields;
0059     long           syscall_nr;
0060     long           ret;
0061 };
0062 
0063 struct augmented_arg {
0064     unsigned int    size;
0065     int     err;
0066     char        value[PATH_MAX];
0067 };
0068 
0069 pid_filter(pids_filtered);
0070 
0071 struct augmented_args_payload {
0072        struct syscall_enter_args args;
0073        union {
0074         struct {
0075             struct augmented_arg arg, arg2;
0076         };
0077         struct sockaddr_storage saddr;
0078     };
0079 };
0080 
0081 // We need more tmp space than the BPF stack can give us
0082 bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1);
0083 
0084 static inline struct augmented_args_payload *augmented_args_payload(void)
0085 {
0086     int key = 0;
0087     return bpf_map_lookup_elem(&augmented_args_tmp, &key);
0088 }
0089 
0090 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
0091 {
0092     /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
0093     return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
0094 }
0095 
0096 static inline
0097 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
0098 {
0099     unsigned int augmented_len = sizeof(*augmented_arg);
0100     int string_len = probe_read_str(&augmented_arg->value, arg_len, arg);
0101 
0102     augmented_arg->size = augmented_arg->err = 0;
0103     /*
0104      * probe_read_str may return < 0, e.g. -EFAULT
0105      * So we leave that in the augmented_arg->size that userspace will
0106      */
0107     if (string_len > 0) {
0108         augmented_len -= sizeof(augmented_arg->value) - string_len;
0109         augmented_len &= sizeof(augmented_arg->value) - 1;
0110         augmented_arg->size = string_len;
0111     } else {
0112         /*
0113          * So that username notice the error while still being able
0114          * to skip this augmented arg record
0115          */
0116         augmented_arg->err = string_len;
0117         augmented_len = offsetof(struct augmented_arg, value);
0118     }
0119 
0120     return augmented_len;
0121 }
0122 
0123 SEC("!raw_syscalls:unaugmented")
0124 int syscall_unaugmented(struct syscall_enter_args *args)
0125 {
0126     return 1;
0127 }
0128 
0129 /*
0130  * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
0131  * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
0132  * on from there, reading the first syscall arg as a string, i.e. open's
0133  * filename.
0134  */
0135 SEC("!syscalls:sys_enter_connect")
0136 int sys_enter_connect(struct syscall_enter_args *args)
0137 {
0138     struct augmented_args_payload *augmented_args = augmented_args_payload();
0139     const void *sockaddr_arg = (const void *)args->args[1];
0140     unsigned int socklen = args->args[2];
0141     unsigned int len = sizeof(augmented_args->args);
0142 
0143         if (augmented_args == NULL)
0144                 return 1; /* Failure: don't filter */
0145 
0146     if (socklen > sizeof(augmented_args->saddr))
0147         socklen = sizeof(augmented_args->saddr);
0148 
0149     probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
0150 
0151     return augmented__output(args, augmented_args, len + socklen);
0152 }
0153 
0154 SEC("!syscalls:sys_enter_sendto")
0155 int sys_enter_sendto(struct syscall_enter_args *args)
0156 {
0157     struct augmented_args_payload *augmented_args = augmented_args_payload();
0158     const void *sockaddr_arg = (const void *)args->args[4];
0159     unsigned int socklen = args->args[5];
0160     unsigned int len = sizeof(augmented_args->args);
0161 
0162         if (augmented_args == NULL)
0163                 return 1; /* Failure: don't filter */
0164 
0165     if (socklen > sizeof(augmented_args->saddr))
0166         socklen = sizeof(augmented_args->saddr);
0167 
0168     probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
0169 
0170     return augmented__output(args, augmented_args, len + socklen);
0171 }
0172 
0173 SEC("!syscalls:sys_enter_open")
0174 int sys_enter_open(struct syscall_enter_args *args)
0175 {
0176     struct augmented_args_payload *augmented_args = augmented_args_payload();
0177     const void *filename_arg = (const void *)args->args[0];
0178     unsigned int len = sizeof(augmented_args->args);
0179 
0180         if (augmented_args == NULL)
0181                 return 1; /* Failure: don't filter */
0182 
0183     len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
0184 
0185     return augmented__output(args, augmented_args, len);
0186 }
0187 
0188 SEC("!syscalls:sys_enter_openat")
0189 int sys_enter_openat(struct syscall_enter_args *args)
0190 {
0191     struct augmented_args_payload *augmented_args = augmented_args_payload();
0192     const void *filename_arg = (const void *)args->args[1];
0193     unsigned int len = sizeof(augmented_args->args);
0194 
0195         if (augmented_args == NULL)
0196                 return 1; /* Failure: don't filter */
0197 
0198     len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
0199 
0200     return augmented__output(args, augmented_args, len);
0201 }
0202 
0203 SEC("!syscalls:sys_enter_rename")
0204 int sys_enter_rename(struct syscall_enter_args *args)
0205 {
0206     struct augmented_args_payload *augmented_args = augmented_args_payload();
0207     const void *oldpath_arg = (const void *)args->args[0],
0208            *newpath_arg = (const void *)args->args[1];
0209     unsigned int len = sizeof(augmented_args->args), oldpath_len;
0210 
0211         if (augmented_args == NULL)
0212                 return 1; /* Failure: don't filter */
0213 
0214     oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
0215     len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
0216 
0217     return augmented__output(args, augmented_args, len);
0218 }
0219 
0220 SEC("!syscalls:sys_enter_renameat")
0221 int sys_enter_renameat(struct syscall_enter_args *args)
0222 {
0223     struct augmented_args_payload *augmented_args = augmented_args_payload();
0224     const void *oldpath_arg = (const void *)args->args[1],
0225            *newpath_arg = (const void *)args->args[3];
0226     unsigned int len = sizeof(augmented_args->args), oldpath_len;
0227 
0228         if (augmented_args == NULL)
0229                 return 1; /* Failure: don't filter */
0230 
0231     oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
0232     len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
0233 
0234     return augmented__output(args, augmented_args, len);
0235 }
0236 
0237 SEC("raw_syscalls:sys_enter")
0238 int sys_enter(struct syscall_enter_args *args)
0239 {
0240     struct augmented_args_payload *augmented_args;
0241     /*
0242      * We start len, the amount of data that will be in the perf ring
0243      * buffer, if this is not filtered out by one of pid_filter__has(),
0244      * syscall->enabled, etc, with the non-augmented raw syscall payload,
0245      * i.e. sizeof(augmented_args->args).
0246      *
0247      * We'll add to this as we add augmented syscalls right after that
0248      * initial, non-augmented raw_syscalls:sys_enter payload.
0249      */
0250     unsigned int len = sizeof(augmented_args->args);
0251     struct syscall *syscall;
0252 
0253     if (pid_filter__has(&pids_filtered, getpid()))
0254         return 0;
0255 
0256     augmented_args = augmented_args_payload();
0257     if (augmented_args == NULL)
0258         return 1;
0259 
0260     probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
0261 
0262     /*
0263      * Jump to syscall specific augmenter, even if the default one,
0264      * "!raw_syscalls:unaugmented" that will just return 1 to return the
0265      * unaugmented tracepoint payload.
0266      */
0267     bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
0268 
0269     // If not found on the PROG_ARRAY syscalls map, then we're filtering it:
0270     return 0;
0271 }
0272 
0273 SEC("raw_syscalls:sys_exit")
0274 int sys_exit(struct syscall_exit_args *args)
0275 {
0276     struct syscall_exit_args exit_args;
0277 
0278     if (pid_filter__has(&pids_filtered, getpid()))
0279         return 0;
0280 
0281     probe_read(&exit_args, sizeof(exit_args), args);
0282     /*
0283      * Jump to syscall specific return augmenter, even if the default one,
0284      * "!raw_syscalls:unaugmented" that will just return 1 to return the
0285      * unaugmented tracepoint payload.
0286      */
0287     bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
0288     /*
0289      * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
0290      */
0291     return 0;
0292 }
0293 
0294 license(GPL);