0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/module.h>
0010 #include <linux/kprobes.h>
0011 #include <linux/security.h>
0012 #include "trace.h"
0013 #include "trace_probe.h"
0014
0015 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
0016
0017
0018
0019
0020
0021 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
0022 perf_trace_t;
0023
0024
0025 static int total_ref_count;
0026
0027 static int perf_trace_event_perm(struct trace_event_call *tp_event,
0028 struct perf_event *p_event)
0029 {
0030 int ret;
0031
0032 if (tp_event->perf_perm) {
0033 ret = tp_event->perf_perm(tp_event, p_event);
0034 if (ret)
0035 return ret;
0036 }
0037
0038
0039
0040
0041
0042 if (p_event->parent)
0043 return 0;
0044
0045
0046
0047
0048
0049
0050
0051 if (ftrace_event_is_function(tp_event)) {
0052 ret = perf_allow_tracepoint(&p_event->attr);
0053 if (ret)
0054 return ret;
0055
0056 if (!is_sampling_event(p_event))
0057 return 0;
0058
0059
0060
0061
0062
0063
0064 if (!p_event->attr.exclude_callchain_user)
0065 return -EINVAL;
0066
0067
0068
0069
0070
0071 if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
0072 return -EINVAL;
0073 }
0074
0075
0076 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
0077 return 0;
0078
0079
0080 if (p_event->attach_state == PERF_ATTACH_TASK) {
0081 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
0082 return 0;
0083 }
0084
0085
0086
0087
0088
0089 ret = perf_allow_tracepoint(&p_event->attr);
0090 if (ret)
0091 return ret;
0092
0093 return 0;
0094 }
0095
0096 static int perf_trace_event_reg(struct trace_event_call *tp_event,
0097 struct perf_event *p_event)
0098 {
0099 struct hlist_head __percpu *list;
0100 int ret = -ENOMEM;
0101 int cpu;
0102
0103 p_event->tp_event = tp_event;
0104 if (tp_event->perf_refcount++ > 0)
0105 return 0;
0106
0107 list = alloc_percpu(struct hlist_head);
0108 if (!list)
0109 goto fail;
0110
0111 for_each_possible_cpu(cpu)
0112 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
0113
0114 tp_event->perf_events = list;
0115
0116 if (!total_ref_count) {
0117 char __percpu *buf;
0118 int i;
0119
0120 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
0121 buf = (char __percpu *)alloc_percpu(perf_trace_t);
0122 if (!buf)
0123 goto fail;
0124
0125 perf_trace_buf[i] = buf;
0126 }
0127 }
0128
0129 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
0130 if (ret)
0131 goto fail;
0132
0133 total_ref_count++;
0134 return 0;
0135
0136 fail:
0137 if (!total_ref_count) {
0138 int i;
0139
0140 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
0141 free_percpu(perf_trace_buf[i]);
0142 perf_trace_buf[i] = NULL;
0143 }
0144 }
0145
0146 if (!--tp_event->perf_refcount) {
0147 free_percpu(tp_event->perf_events);
0148 tp_event->perf_events = NULL;
0149 }
0150
0151 return ret;
0152 }
0153
0154 static void perf_trace_event_unreg(struct perf_event *p_event)
0155 {
0156 struct trace_event_call *tp_event = p_event->tp_event;
0157 int i;
0158
0159 if (--tp_event->perf_refcount > 0)
0160 return;
0161
0162 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
0163
0164
0165
0166
0167
0168 tracepoint_synchronize_unregister();
0169
0170 free_percpu(tp_event->perf_events);
0171 tp_event->perf_events = NULL;
0172
0173 if (!--total_ref_count) {
0174 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
0175 free_percpu(perf_trace_buf[i]);
0176 perf_trace_buf[i] = NULL;
0177 }
0178 }
0179 }
0180
0181 static int perf_trace_event_open(struct perf_event *p_event)
0182 {
0183 struct trace_event_call *tp_event = p_event->tp_event;
0184 return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
0185 }
0186
0187 static void perf_trace_event_close(struct perf_event *p_event)
0188 {
0189 struct trace_event_call *tp_event = p_event->tp_event;
0190 tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
0191 }
0192
0193 static int perf_trace_event_init(struct trace_event_call *tp_event,
0194 struct perf_event *p_event)
0195 {
0196 int ret;
0197
0198 ret = perf_trace_event_perm(tp_event, p_event);
0199 if (ret)
0200 return ret;
0201
0202 ret = perf_trace_event_reg(tp_event, p_event);
0203 if (ret)
0204 return ret;
0205
0206 ret = perf_trace_event_open(p_event);
0207 if (ret) {
0208 perf_trace_event_unreg(p_event);
0209 return ret;
0210 }
0211
0212 return 0;
0213 }
0214
0215 int perf_trace_init(struct perf_event *p_event)
0216 {
0217 struct trace_event_call *tp_event;
0218 u64 event_id = p_event->attr.config;
0219 int ret = -EINVAL;
0220
0221 mutex_lock(&event_mutex);
0222 list_for_each_entry(tp_event, &ftrace_events, list) {
0223 if (tp_event->event.type == event_id &&
0224 tp_event->class && tp_event->class->reg &&
0225 trace_event_try_get_ref(tp_event)) {
0226 ret = perf_trace_event_init(tp_event, p_event);
0227 if (ret)
0228 trace_event_put_ref(tp_event);
0229 break;
0230 }
0231 }
0232 mutex_unlock(&event_mutex);
0233
0234 return ret;
0235 }
0236
0237 void perf_trace_destroy(struct perf_event *p_event)
0238 {
0239 mutex_lock(&event_mutex);
0240 perf_trace_event_close(p_event);
0241 perf_trace_event_unreg(p_event);
0242 trace_event_put_ref(p_event->tp_event);
0243 mutex_unlock(&event_mutex);
0244 }
0245
0246 #ifdef CONFIG_KPROBE_EVENTS
0247 int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
0248 {
0249 int ret;
0250 char *func = NULL;
0251 struct trace_event_call *tp_event;
0252
0253 if (p_event->attr.kprobe_func) {
0254 func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
0255 if (!func)
0256 return -ENOMEM;
0257 ret = strncpy_from_user(
0258 func, u64_to_user_ptr(p_event->attr.kprobe_func),
0259 KSYM_NAME_LEN);
0260 if (ret == KSYM_NAME_LEN)
0261 ret = -E2BIG;
0262 if (ret < 0)
0263 goto out;
0264
0265 if (func[0] == '\0') {
0266 kfree(func);
0267 func = NULL;
0268 }
0269 }
0270
0271 tp_event = create_local_trace_kprobe(
0272 func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
0273 p_event->attr.probe_offset, is_retprobe);
0274 if (IS_ERR(tp_event)) {
0275 ret = PTR_ERR(tp_event);
0276 goto out;
0277 }
0278
0279 mutex_lock(&event_mutex);
0280 ret = perf_trace_event_init(tp_event, p_event);
0281 if (ret)
0282 destroy_local_trace_kprobe(tp_event);
0283 mutex_unlock(&event_mutex);
0284 out:
0285 kfree(func);
0286 return ret;
0287 }
0288
0289 void perf_kprobe_destroy(struct perf_event *p_event)
0290 {
0291 mutex_lock(&event_mutex);
0292 perf_trace_event_close(p_event);
0293 perf_trace_event_unreg(p_event);
0294 trace_event_put_ref(p_event->tp_event);
0295 mutex_unlock(&event_mutex);
0296
0297 destroy_local_trace_kprobe(p_event->tp_event);
0298 }
0299 #endif
0300
0301 #ifdef CONFIG_UPROBE_EVENTS
0302 int perf_uprobe_init(struct perf_event *p_event,
0303 unsigned long ref_ctr_offset, bool is_retprobe)
0304 {
0305 int ret;
0306 char *path = NULL;
0307 struct trace_event_call *tp_event;
0308
0309 if (!p_event->attr.uprobe_path)
0310 return -EINVAL;
0311
0312 path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
0313 PATH_MAX);
0314 if (IS_ERR(path)) {
0315 ret = PTR_ERR(path);
0316 return (ret == -EINVAL) ? -E2BIG : ret;
0317 }
0318 if (path[0] == '\0') {
0319 ret = -EINVAL;
0320 goto out;
0321 }
0322
0323 tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
0324 ref_ctr_offset, is_retprobe);
0325 if (IS_ERR(tp_event)) {
0326 ret = PTR_ERR(tp_event);
0327 goto out;
0328 }
0329
0330
0331
0332
0333
0334
0335 mutex_lock(&event_mutex);
0336 ret = perf_trace_event_init(tp_event, p_event);
0337 if (ret)
0338 destroy_local_trace_uprobe(tp_event);
0339 mutex_unlock(&event_mutex);
0340 out:
0341 kfree(path);
0342 return ret;
0343 }
0344
0345 void perf_uprobe_destroy(struct perf_event *p_event)
0346 {
0347 mutex_lock(&event_mutex);
0348 perf_trace_event_close(p_event);
0349 perf_trace_event_unreg(p_event);
0350 trace_event_put_ref(p_event->tp_event);
0351 mutex_unlock(&event_mutex);
0352 destroy_local_trace_uprobe(p_event->tp_event);
0353 }
0354 #endif
0355
0356 int perf_trace_add(struct perf_event *p_event, int flags)
0357 {
0358 struct trace_event_call *tp_event = p_event->tp_event;
0359
0360 if (!(flags & PERF_EF_START))
0361 p_event->hw.state = PERF_HES_STOPPED;
0362
0363
0364
0365
0366
0367
0368 if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) {
0369 struct hlist_head __percpu *pcpu_list;
0370 struct hlist_head *list;
0371
0372 pcpu_list = tp_event->perf_events;
0373 if (WARN_ON_ONCE(!pcpu_list))
0374 return -EINVAL;
0375
0376 list = this_cpu_ptr(pcpu_list);
0377 hlist_add_head_rcu(&p_event->hlist_entry, list);
0378 }
0379
0380 return 0;
0381 }
0382
0383 void perf_trace_del(struct perf_event *p_event, int flags)
0384 {
0385 struct trace_event_call *tp_event = p_event->tp_event;
0386
0387
0388
0389
0390
0391
0392 if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event))
0393 hlist_del_rcu(&p_event->hlist_entry);
0394 }
0395
0396 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
0397 {
0398 char *raw_data;
0399 int rctx;
0400
0401 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
0402
0403 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
0404 "perf buffer not large enough, wanted %d, have %d",
0405 size, PERF_MAX_TRACE_SIZE))
0406 return NULL;
0407
0408 *rctxp = rctx = perf_swevent_get_recursion_context();
0409 if (rctx < 0)
0410 return NULL;
0411
0412 if (regs)
0413 *regs = this_cpu_ptr(&__perf_regs[rctx]);
0414 raw_data = this_cpu_ptr(perf_trace_buf[rctx]);
0415
0416
0417 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
0418 return raw_data;
0419 }
0420 EXPORT_SYMBOL_GPL(perf_trace_buf_alloc);
0421 NOKPROBE_SYMBOL(perf_trace_buf_alloc);
0422
0423 void perf_trace_buf_update(void *record, u16 type)
0424 {
0425 struct trace_entry *entry = record;
0426
0427 tracing_generic_entry_update(entry, type, tracing_gen_ctx());
0428 }
0429 NOKPROBE_SYMBOL(perf_trace_buf_update);
0430
0431 #ifdef CONFIG_FUNCTION_TRACER
0432 static void
0433 perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
0434 struct ftrace_ops *ops, struct ftrace_regs *fregs)
0435 {
0436 struct ftrace_entry *entry;
0437 struct perf_event *event;
0438 struct hlist_head head;
0439 struct pt_regs regs;
0440 int rctx;
0441 int bit;
0442
0443 if (!rcu_is_watching())
0444 return;
0445
0446 bit = ftrace_test_recursion_trylock(ip, parent_ip);
0447 if (bit < 0)
0448 return;
0449
0450 if ((unsigned long)ops->private != smp_processor_id())
0451 goto out;
0452
0453 event = container_of(ops, struct perf_event, ftrace_ops);
0454
0455
0456
0457
0458
0459
0460
0461 head.first = &event->hlist_entry;
0462
0463 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
0464 sizeof(u64)) - sizeof(u32))
0465
0466 BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
0467
0468 memset(®s, 0, sizeof(regs));
0469 perf_fetch_caller_regs(®s);
0470
0471 entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
0472 if (!entry)
0473 goto out;
0474
0475 entry->ip = ip;
0476 entry->parent_ip = parent_ip;
0477 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
0478 1, ®s, &head, NULL);
0479
0480 out:
0481 ftrace_test_recursion_unlock(bit);
0482 #undef ENTRY_SIZE
0483 }
0484
0485 static int perf_ftrace_function_register(struct perf_event *event)
0486 {
0487 struct ftrace_ops *ops = &event->ftrace_ops;
0488
0489 ops->func = perf_ftrace_function_call;
0490 ops->private = (void *)(unsigned long)nr_cpu_ids;
0491
0492 return register_ftrace_function(ops);
0493 }
0494
0495 static int perf_ftrace_function_unregister(struct perf_event *event)
0496 {
0497 struct ftrace_ops *ops = &event->ftrace_ops;
0498 int ret = unregister_ftrace_function(ops);
0499 ftrace_free_filter(ops);
0500 return ret;
0501 }
0502
0503 int perf_ftrace_event_register(struct trace_event_call *call,
0504 enum trace_reg type, void *data)
0505 {
0506 struct perf_event *event = data;
0507
0508 switch (type) {
0509 case TRACE_REG_REGISTER:
0510 case TRACE_REG_UNREGISTER:
0511 break;
0512 case TRACE_REG_PERF_REGISTER:
0513 case TRACE_REG_PERF_UNREGISTER:
0514 return 0;
0515 case TRACE_REG_PERF_OPEN:
0516 return perf_ftrace_function_register(data);
0517 case TRACE_REG_PERF_CLOSE:
0518 return perf_ftrace_function_unregister(data);
0519 case TRACE_REG_PERF_ADD:
0520 event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id();
0521 return 1;
0522 case TRACE_REG_PERF_DEL:
0523 event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids;
0524 return 1;
0525 }
0526
0527 return -EINVAL;
0528 }
0529 #endif