Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <trace/syscall.h>
0003 #include <trace/events/syscalls.h>
0004 #include <linux/syscalls.h>
0005 #include <linux/slab.h>
0006 #include <linux/kernel.h>
0007 #include <linux/module.h>   /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
0008 #include <linux/ftrace.h>
0009 #include <linux/perf_event.h>
0010 #include <linux/xarray.h>
0011 #include <asm/syscall.h>
0012 
0013 #include "trace_output.h"
0014 #include "trace.h"
0015 
0016 static DEFINE_MUTEX(syscall_trace_lock);
0017 
0018 static int syscall_enter_register(struct trace_event_call *event,
0019                  enum trace_reg type, void *data);
0020 static int syscall_exit_register(struct trace_event_call *event,
0021                  enum trace_reg type, void *data);
0022 
0023 static struct list_head *
0024 syscall_get_enter_fields(struct trace_event_call *call)
0025 {
0026     struct syscall_metadata *entry = call->data;
0027 
0028     return &entry->enter_fields;
0029 }
0030 
0031 extern struct syscall_metadata *__start_syscalls_metadata[];
0032 extern struct syscall_metadata *__stop_syscalls_metadata[];
0033 
0034 static DEFINE_XARRAY(syscalls_metadata_sparse);
0035 static struct syscall_metadata **syscalls_metadata;
0036 
0037 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
0038 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
0039 {
0040     /*
0041      * Only compare after the "sys" prefix. Archs that use
0042      * syscall wrappers may have syscalls symbols aliases prefixed
0043      * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
0044      * mismatch.
0045      */
0046     return !strcmp(sym + 3, name + 3);
0047 }
0048 #endif
0049 
0050 #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
0051 /*
0052  * Some architectures that allow for 32bit applications
0053  * to run on a 64bit kernel, do not map the syscalls for
0054  * the 32bit tasks the same as they do for 64bit tasks.
0055  *
0056  *     *cough*x86*cough*
0057  *
0058  * In such a case, instead of reporting the wrong syscalls,
0059  * simply ignore them.
0060  *
0061  * For an arch to ignore the compat syscalls it needs to
0062  * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
0063  * define the function arch_trace_is_compat_syscall() to let
0064  * the tracing system know that it should ignore it.
0065  */
0066 static int
0067 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
0068 {
0069     if (unlikely(arch_trace_is_compat_syscall(regs)))
0070         return -1;
0071 
0072     return syscall_get_nr(task, regs);
0073 }
0074 #else
0075 static inline int
0076 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
0077 {
0078     return syscall_get_nr(task, regs);
0079 }
0080 #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
0081 
0082 static __init struct syscall_metadata *
0083 find_syscall_meta(unsigned long syscall)
0084 {
0085     struct syscall_metadata **start;
0086     struct syscall_metadata **stop;
0087     char str[KSYM_SYMBOL_LEN];
0088 
0089 
0090     start = __start_syscalls_metadata;
0091     stop = __stop_syscalls_metadata;
0092     kallsyms_lookup(syscall, NULL, NULL, NULL, str);
0093 
0094     if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
0095         return NULL;
0096 
0097     for ( ; start < stop; start++) {
0098         if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
0099             return *start;
0100     }
0101     return NULL;
0102 }
0103 
0104 static struct syscall_metadata *syscall_nr_to_meta(int nr)
0105 {
0106     if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
0107         return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
0108 
0109     if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
0110         return NULL;
0111 
0112     return syscalls_metadata[nr];
0113 }
0114 
0115 const char *get_syscall_name(int syscall)
0116 {
0117     struct syscall_metadata *entry;
0118 
0119     entry = syscall_nr_to_meta(syscall);
0120     if (!entry)
0121         return NULL;
0122 
0123     return entry->name;
0124 }
0125 
0126 static enum print_line_t
0127 print_syscall_enter(struct trace_iterator *iter, int flags,
0128             struct trace_event *event)
0129 {
0130     struct trace_array *tr = iter->tr;
0131     struct trace_seq *s = &iter->seq;
0132     struct trace_entry *ent = iter->ent;
0133     struct syscall_trace_enter *trace;
0134     struct syscall_metadata *entry;
0135     int i, syscall;
0136 
0137     trace = (typeof(trace))ent;
0138     syscall = trace->nr;
0139     entry = syscall_nr_to_meta(syscall);
0140 
0141     if (!entry)
0142         goto end;
0143 
0144     if (entry->enter_event->event.type != ent->type) {
0145         WARN_ON_ONCE(1);
0146         goto end;
0147     }
0148 
0149     trace_seq_printf(s, "%s(", entry->name);
0150 
0151     for (i = 0; i < entry->nb_args; i++) {
0152 
0153         if (trace_seq_has_overflowed(s))
0154             goto end;
0155 
0156         /* parameter types */
0157         if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
0158             trace_seq_printf(s, "%s ", entry->types[i]);
0159 
0160         /* parameter values */
0161         trace_seq_printf(s, "%s: %lx%s", entry->args[i],
0162                  trace->args[i],
0163                  i == entry->nb_args - 1 ? "" : ", ");
0164     }
0165 
0166     trace_seq_putc(s, ')');
0167 end:
0168     trace_seq_putc(s, '\n');
0169 
0170     return trace_handle_return(s);
0171 }
0172 
0173 static enum print_line_t
0174 print_syscall_exit(struct trace_iterator *iter, int flags,
0175            struct trace_event *event)
0176 {
0177     struct trace_seq *s = &iter->seq;
0178     struct trace_entry *ent = iter->ent;
0179     struct syscall_trace_exit *trace;
0180     int syscall;
0181     struct syscall_metadata *entry;
0182 
0183     trace = (typeof(trace))ent;
0184     syscall = trace->nr;
0185     entry = syscall_nr_to_meta(syscall);
0186 
0187     if (!entry) {
0188         trace_seq_putc(s, '\n');
0189         goto out;
0190     }
0191 
0192     if (entry->exit_event->event.type != ent->type) {
0193         WARN_ON_ONCE(1);
0194         return TRACE_TYPE_UNHANDLED;
0195     }
0196 
0197     trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
0198                 trace->ret);
0199 
0200  out:
0201     return trace_handle_return(s);
0202 }
0203 
0204 extern char *__bad_type_size(void);
0205 
0206 #define SYSCALL_FIELD(_type, _name) {                   \
0207     .type = #_type, .name = #_name,                 \
0208     .size = sizeof(_type), .align = __alignof__(_type),     \
0209     .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
0210 
0211 static int __init
0212 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
0213 {
0214     int i;
0215     int pos = 0;
0216 
0217     /* When len=0, we just calculate the needed length */
0218 #define LEN_OR_ZERO (len ? len - pos : 0)
0219 
0220     pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
0221     for (i = 0; i < entry->nb_args; i++) {
0222         pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
0223                 entry->args[i], sizeof(unsigned long),
0224                 i == entry->nb_args - 1 ? "" : ", ");
0225     }
0226     pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
0227 
0228     for (i = 0; i < entry->nb_args; i++) {
0229         pos += snprintf(buf + pos, LEN_OR_ZERO,
0230                 ", ((unsigned long)(REC->%s))", entry->args[i]);
0231     }
0232 
0233 #undef LEN_OR_ZERO
0234 
0235     /* return the length of print_fmt */
0236     return pos;
0237 }
0238 
0239 static int __init set_syscall_print_fmt(struct trace_event_call *call)
0240 {
0241     char *print_fmt;
0242     int len;
0243     struct syscall_metadata *entry = call->data;
0244 
0245     if (entry->enter_event != call) {
0246         call->print_fmt = "\"0x%lx\", REC->ret";
0247         return 0;
0248     }
0249 
0250     /* First: called with 0 length to calculate the needed length */
0251     len = __set_enter_print_fmt(entry, NULL, 0);
0252 
0253     print_fmt = kmalloc(len + 1, GFP_KERNEL);
0254     if (!print_fmt)
0255         return -ENOMEM;
0256 
0257     /* Second: actually write the @print_fmt */
0258     __set_enter_print_fmt(entry, print_fmt, len + 1);
0259     call->print_fmt = print_fmt;
0260 
0261     return 0;
0262 }
0263 
0264 static void __init free_syscall_print_fmt(struct trace_event_call *call)
0265 {
0266     struct syscall_metadata *entry = call->data;
0267 
0268     if (entry->enter_event == call)
0269         kfree(call->print_fmt);
0270 }
0271 
0272 static int __init syscall_enter_define_fields(struct trace_event_call *call)
0273 {
0274     struct syscall_trace_enter trace;
0275     struct syscall_metadata *meta = call->data;
0276     int offset = offsetof(typeof(trace), args);
0277     int ret = 0;
0278     int i;
0279 
0280     for (i = 0; i < meta->nb_args; i++) {
0281         ret = trace_define_field(call, meta->types[i],
0282                      meta->args[i], offset,
0283                      sizeof(unsigned long), 0,
0284                      FILTER_OTHER);
0285         if (ret)
0286             break;
0287         offset += sizeof(unsigned long);
0288     }
0289 
0290     return ret;
0291 }
0292 
0293 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
0294 {
0295     struct trace_array *tr = data;
0296     struct trace_event_file *trace_file;
0297     struct syscall_trace_enter *entry;
0298     struct syscall_metadata *sys_data;
0299     struct trace_event_buffer fbuffer;
0300     unsigned long args[6];
0301     int syscall_nr;
0302     int size;
0303 
0304     syscall_nr = trace_get_syscall_nr(current, regs);
0305     if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
0306         return;
0307 
0308     /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
0309     trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
0310     if (!trace_file)
0311         return;
0312 
0313     if (trace_trigger_soft_disabled(trace_file))
0314         return;
0315 
0316     sys_data = syscall_nr_to_meta(syscall_nr);
0317     if (!sys_data)
0318         return;
0319 
0320     size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
0321 
0322     entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
0323     if (!entry)
0324         return;
0325 
0326     entry = ring_buffer_event_data(fbuffer.event);
0327     entry->nr = syscall_nr;
0328     syscall_get_arguments(current, regs, args);
0329     memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
0330 
0331     trace_event_buffer_commit(&fbuffer);
0332 }
0333 
0334 static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
0335 {
0336     struct trace_array *tr = data;
0337     struct trace_event_file *trace_file;
0338     struct syscall_trace_exit *entry;
0339     struct syscall_metadata *sys_data;
0340     struct trace_event_buffer fbuffer;
0341     int syscall_nr;
0342 
0343     syscall_nr = trace_get_syscall_nr(current, regs);
0344     if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
0345         return;
0346 
0347     /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
0348     trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
0349     if (!trace_file)
0350         return;
0351 
0352     if (trace_trigger_soft_disabled(trace_file))
0353         return;
0354 
0355     sys_data = syscall_nr_to_meta(syscall_nr);
0356     if (!sys_data)
0357         return;
0358 
0359     entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry));
0360     if (!entry)
0361         return;
0362 
0363     entry = ring_buffer_event_data(fbuffer.event);
0364     entry->nr = syscall_nr;
0365     entry->ret = syscall_get_return_value(current, regs);
0366 
0367     trace_event_buffer_commit(&fbuffer);
0368 }
0369 
0370 static int reg_event_syscall_enter(struct trace_event_file *file,
0371                    struct trace_event_call *call)
0372 {
0373     struct trace_array *tr = file->tr;
0374     int ret = 0;
0375     int num;
0376 
0377     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0378     if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
0379         return -ENOSYS;
0380     mutex_lock(&syscall_trace_lock);
0381     if (!tr->sys_refcount_enter)
0382         ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
0383     if (!ret) {
0384         rcu_assign_pointer(tr->enter_syscall_files[num], file);
0385         tr->sys_refcount_enter++;
0386     }
0387     mutex_unlock(&syscall_trace_lock);
0388     return ret;
0389 }
0390 
0391 static void unreg_event_syscall_enter(struct trace_event_file *file,
0392                       struct trace_event_call *call)
0393 {
0394     struct trace_array *tr = file->tr;
0395     int num;
0396 
0397     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0398     if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
0399         return;
0400     mutex_lock(&syscall_trace_lock);
0401     tr->sys_refcount_enter--;
0402     RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
0403     if (!tr->sys_refcount_enter)
0404         unregister_trace_sys_enter(ftrace_syscall_enter, tr);
0405     mutex_unlock(&syscall_trace_lock);
0406 }
0407 
0408 static int reg_event_syscall_exit(struct trace_event_file *file,
0409                   struct trace_event_call *call)
0410 {
0411     struct trace_array *tr = file->tr;
0412     int ret = 0;
0413     int num;
0414 
0415     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0416     if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
0417         return -ENOSYS;
0418     mutex_lock(&syscall_trace_lock);
0419     if (!tr->sys_refcount_exit)
0420         ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
0421     if (!ret) {
0422         rcu_assign_pointer(tr->exit_syscall_files[num], file);
0423         tr->sys_refcount_exit++;
0424     }
0425     mutex_unlock(&syscall_trace_lock);
0426     return ret;
0427 }
0428 
0429 static void unreg_event_syscall_exit(struct trace_event_file *file,
0430                      struct trace_event_call *call)
0431 {
0432     struct trace_array *tr = file->tr;
0433     int num;
0434 
0435     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0436     if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
0437         return;
0438     mutex_lock(&syscall_trace_lock);
0439     tr->sys_refcount_exit--;
0440     RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
0441     if (!tr->sys_refcount_exit)
0442         unregister_trace_sys_exit(ftrace_syscall_exit, tr);
0443     mutex_unlock(&syscall_trace_lock);
0444 }
0445 
0446 static int __init init_syscall_trace(struct trace_event_call *call)
0447 {
0448     int id;
0449     int num;
0450 
0451     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0452     if (num < 0 || num >= NR_syscalls) {
0453         pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
0454                 ((struct syscall_metadata *)call->data)->name);
0455         return -ENOSYS;
0456     }
0457 
0458     if (set_syscall_print_fmt(call) < 0)
0459         return -ENOMEM;
0460 
0461     id = trace_event_raw_init(call);
0462 
0463     if (id < 0) {
0464         free_syscall_print_fmt(call);
0465         return id;
0466     }
0467 
0468     return id;
0469 }
0470 
0471 static struct trace_event_fields __refdata syscall_enter_fields_array[] = {
0472     SYSCALL_FIELD(int, __syscall_nr),
0473     { .type = TRACE_FUNCTION_TYPE,
0474       .define_fields = syscall_enter_define_fields },
0475     {}
0476 };
0477 
0478 struct trace_event_functions enter_syscall_print_funcs = {
0479     .trace      = print_syscall_enter,
0480 };
0481 
0482 struct trace_event_functions exit_syscall_print_funcs = {
0483     .trace      = print_syscall_exit,
0484 };
0485 
0486 struct trace_event_class __refdata event_class_syscall_enter = {
0487     .system     = "syscalls",
0488     .reg        = syscall_enter_register,
0489     .fields_array   = syscall_enter_fields_array,
0490     .get_fields = syscall_get_enter_fields,
0491     .raw_init   = init_syscall_trace,
0492 };
0493 
0494 struct trace_event_class __refdata event_class_syscall_exit = {
0495     .system     = "syscalls",
0496     .reg        = syscall_exit_register,
0497     .fields_array   = (struct trace_event_fields[]){
0498         SYSCALL_FIELD(int, __syscall_nr),
0499         SYSCALL_FIELD(long, ret),
0500         {}
0501     },
0502     .fields     = LIST_HEAD_INIT(event_class_syscall_exit.fields),
0503     .raw_init   = init_syscall_trace,
0504 };
0505 
0506 unsigned long __init __weak arch_syscall_addr(int nr)
0507 {
0508     return (unsigned long)sys_call_table[nr];
0509 }
0510 
0511 void __init init_ftrace_syscalls(void)
0512 {
0513     struct syscall_metadata *meta;
0514     unsigned long addr;
0515     int i;
0516     void *ret;
0517 
0518     if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
0519         syscalls_metadata = kcalloc(NR_syscalls,
0520                     sizeof(*syscalls_metadata),
0521                     GFP_KERNEL);
0522         if (!syscalls_metadata) {
0523             WARN_ON(1);
0524             return;
0525         }
0526     }
0527 
0528     for (i = 0; i < NR_syscalls; i++) {
0529         addr = arch_syscall_addr(i);
0530         meta = find_syscall_meta(addr);
0531         if (!meta)
0532             continue;
0533 
0534         meta->syscall_nr = i;
0535 
0536         if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
0537             syscalls_metadata[i] = meta;
0538         } else {
0539             ret = xa_store(&syscalls_metadata_sparse, i, meta,
0540                     GFP_KERNEL);
0541             WARN(xa_is_err(ret),
0542                 "Syscall memory allocation failed\n");
0543         }
0544 
0545     }
0546 }
0547 
0548 #ifdef CONFIG_PERF_EVENTS
0549 
0550 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
0551 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
0552 static int sys_perf_refcount_enter;
0553 static int sys_perf_refcount_exit;
0554 
0555 static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
0556                    struct syscall_metadata *sys_data,
0557                    struct syscall_trace_enter *rec)
0558 {
0559     struct syscall_tp_t {
0560         unsigned long long regs;
0561         unsigned long syscall_nr;
0562         unsigned long args[SYSCALL_DEFINE_MAXARGS];
0563     } param;
0564     int i;
0565 
0566     *(struct pt_regs **)&param = regs;
0567     param.syscall_nr = rec->nr;
0568     for (i = 0; i < sys_data->nb_args; i++)
0569         param.args[i] = rec->args[i];
0570     return trace_call_bpf(call, &param);
0571 }
0572 
0573 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
0574 {
0575     struct syscall_metadata *sys_data;
0576     struct syscall_trace_enter *rec;
0577     struct hlist_head *head;
0578     unsigned long args[6];
0579     bool valid_prog_array;
0580     int syscall_nr;
0581     int rctx;
0582     int size;
0583 
0584     syscall_nr = trace_get_syscall_nr(current, regs);
0585     if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
0586         return;
0587     if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
0588         return;
0589 
0590     sys_data = syscall_nr_to_meta(syscall_nr);
0591     if (!sys_data)
0592         return;
0593 
0594     head = this_cpu_ptr(sys_data->enter_event->perf_events);
0595     valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
0596     if (!valid_prog_array && hlist_empty(head))
0597         return;
0598 
0599     /* get the size after alignment with the u32 buffer size field */
0600     size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
0601     size = ALIGN(size + sizeof(u32), sizeof(u64));
0602     size -= sizeof(u32);
0603 
0604     rec = perf_trace_buf_alloc(size, NULL, &rctx);
0605     if (!rec)
0606         return;
0607 
0608     rec->nr = syscall_nr;
0609     syscall_get_arguments(current, regs, args);
0610     memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
0611 
0612     if ((valid_prog_array &&
0613          !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
0614         hlist_empty(head)) {
0615         perf_swevent_put_recursion_context(rctx);
0616         return;
0617     }
0618 
0619     perf_trace_buf_submit(rec, size, rctx,
0620                   sys_data->enter_event->event.type, 1, regs,
0621                   head, NULL);
0622 }
0623 
0624 static int perf_sysenter_enable(struct trace_event_call *call)
0625 {
0626     int ret = 0;
0627     int num;
0628 
0629     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0630 
0631     mutex_lock(&syscall_trace_lock);
0632     if (!sys_perf_refcount_enter)
0633         ret = register_trace_sys_enter(perf_syscall_enter, NULL);
0634     if (ret) {
0635         pr_info("event trace: Could not activate syscall entry trace point");
0636     } else {
0637         set_bit(num, enabled_perf_enter_syscalls);
0638         sys_perf_refcount_enter++;
0639     }
0640     mutex_unlock(&syscall_trace_lock);
0641     return ret;
0642 }
0643 
0644 static void perf_sysenter_disable(struct trace_event_call *call)
0645 {
0646     int num;
0647 
0648     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0649 
0650     mutex_lock(&syscall_trace_lock);
0651     sys_perf_refcount_enter--;
0652     clear_bit(num, enabled_perf_enter_syscalls);
0653     if (!sys_perf_refcount_enter)
0654         unregister_trace_sys_enter(perf_syscall_enter, NULL);
0655     mutex_unlock(&syscall_trace_lock);
0656 }
0657 
0658 static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
0659                   struct syscall_trace_exit *rec)
0660 {
0661     struct syscall_tp_t {
0662         unsigned long long regs;
0663         unsigned long syscall_nr;
0664         unsigned long ret;
0665     } param;
0666 
0667     *(struct pt_regs **)&param = regs;
0668     param.syscall_nr = rec->nr;
0669     param.ret = rec->ret;
0670     return trace_call_bpf(call, &param);
0671 }
0672 
0673 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
0674 {
0675     struct syscall_metadata *sys_data;
0676     struct syscall_trace_exit *rec;
0677     struct hlist_head *head;
0678     bool valid_prog_array;
0679     int syscall_nr;
0680     int rctx;
0681     int size;
0682 
0683     syscall_nr = trace_get_syscall_nr(current, regs);
0684     if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
0685         return;
0686     if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
0687         return;
0688 
0689     sys_data = syscall_nr_to_meta(syscall_nr);
0690     if (!sys_data)
0691         return;
0692 
0693     head = this_cpu_ptr(sys_data->exit_event->perf_events);
0694     valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
0695     if (!valid_prog_array && hlist_empty(head))
0696         return;
0697 
0698     /* We can probably do that at build time */
0699     size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
0700     size -= sizeof(u32);
0701 
0702     rec = perf_trace_buf_alloc(size, NULL, &rctx);
0703     if (!rec)
0704         return;
0705 
0706     rec->nr = syscall_nr;
0707     rec->ret = syscall_get_return_value(current, regs);
0708 
0709     if ((valid_prog_array &&
0710          !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
0711         hlist_empty(head)) {
0712         perf_swevent_put_recursion_context(rctx);
0713         return;
0714     }
0715 
0716     perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
0717                   1, regs, head, NULL);
0718 }
0719 
0720 static int perf_sysexit_enable(struct trace_event_call *call)
0721 {
0722     int ret = 0;
0723     int num;
0724 
0725     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0726 
0727     mutex_lock(&syscall_trace_lock);
0728     if (!sys_perf_refcount_exit)
0729         ret = register_trace_sys_exit(perf_syscall_exit, NULL);
0730     if (ret) {
0731         pr_info("event trace: Could not activate syscall exit trace point");
0732     } else {
0733         set_bit(num, enabled_perf_exit_syscalls);
0734         sys_perf_refcount_exit++;
0735     }
0736     mutex_unlock(&syscall_trace_lock);
0737     return ret;
0738 }
0739 
0740 static void perf_sysexit_disable(struct trace_event_call *call)
0741 {
0742     int num;
0743 
0744     num = ((struct syscall_metadata *)call->data)->syscall_nr;
0745 
0746     mutex_lock(&syscall_trace_lock);
0747     sys_perf_refcount_exit--;
0748     clear_bit(num, enabled_perf_exit_syscalls);
0749     if (!sys_perf_refcount_exit)
0750         unregister_trace_sys_exit(perf_syscall_exit, NULL);
0751     mutex_unlock(&syscall_trace_lock);
0752 }
0753 
0754 #endif /* CONFIG_PERF_EVENTS */
0755 
0756 static int syscall_enter_register(struct trace_event_call *event,
0757                  enum trace_reg type, void *data)
0758 {
0759     struct trace_event_file *file = data;
0760 
0761     switch (type) {
0762     case TRACE_REG_REGISTER:
0763         return reg_event_syscall_enter(file, event);
0764     case TRACE_REG_UNREGISTER:
0765         unreg_event_syscall_enter(file, event);
0766         return 0;
0767 
0768 #ifdef CONFIG_PERF_EVENTS
0769     case TRACE_REG_PERF_REGISTER:
0770         return perf_sysenter_enable(event);
0771     case TRACE_REG_PERF_UNREGISTER:
0772         perf_sysenter_disable(event);
0773         return 0;
0774     case TRACE_REG_PERF_OPEN:
0775     case TRACE_REG_PERF_CLOSE:
0776     case TRACE_REG_PERF_ADD:
0777     case TRACE_REG_PERF_DEL:
0778         return 0;
0779 #endif
0780     }
0781     return 0;
0782 }
0783 
0784 static int syscall_exit_register(struct trace_event_call *event,
0785                  enum trace_reg type, void *data)
0786 {
0787     struct trace_event_file *file = data;
0788 
0789     switch (type) {
0790     case TRACE_REG_REGISTER:
0791         return reg_event_syscall_exit(file, event);
0792     case TRACE_REG_UNREGISTER:
0793         unreg_event_syscall_exit(file, event);
0794         return 0;
0795 
0796 #ifdef CONFIG_PERF_EVENTS
0797     case TRACE_REG_PERF_REGISTER:
0798         return perf_sysexit_enable(event);
0799     case TRACE_REG_PERF_UNREGISTER:
0800         perf_sysexit_disable(event);
0801         return 0;
0802     case TRACE_REG_PERF_OPEN:
0803     case TRACE_REG_PERF_CLOSE:
0804     case TRACE_REG_PERF_ADD:
0805     case TRACE_REG_PERF_DEL:
0806         return 0;
0807 #endif
0808     }
0809     return 0;
0810 }