0001
0002
0003 #include <linux/context_tracking.h>
0004 #include <linux/entry-common.h>
0005 #include <linux/resume_user_mode.h>
0006 #include <linux/highmem.h>
0007 #include <linux/jump_label.h>
0008 #include <linux/livepatch.h>
0009 #include <linux/audit.h>
0010 #include <linux/tick.h>
0011
0012 #include "common.h"
0013
0014 #define CREATE_TRACE_POINTS
0015 #include <trace/events/syscalls.h>
0016
0017
0018 static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
0019 {
0020 arch_enter_from_user_mode(regs);
0021 lockdep_hardirqs_off(CALLER_ADDR0);
0022
0023 CT_WARN_ON(ct_state() != CONTEXT_USER);
0024 user_exit_irqoff();
0025
0026 instrumentation_begin();
0027 trace_hardirqs_off_finish();
0028 instrumentation_end();
0029 }
0030
0031 void noinstr enter_from_user_mode(struct pt_regs *regs)
0032 {
0033 __enter_from_user_mode(regs);
0034 }
0035
0036 static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
0037 {
0038 if (unlikely(audit_context())) {
0039 unsigned long args[6];
0040
0041 syscall_get_arguments(current, regs, args);
0042 audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
0043 }
0044 }
0045
0046 static long syscall_trace_enter(struct pt_regs *regs, long syscall,
0047 unsigned long work)
0048 {
0049 long ret = 0;
0050
0051
0052
0053
0054
0055
0056 if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
0057 if (syscall_user_dispatch(regs))
0058 return -1L;
0059 }
0060
0061
0062 if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
0063 ret = ptrace_report_syscall_entry(regs);
0064 if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
0065 return -1L;
0066 }
0067
0068
0069 if (work & SYSCALL_WORK_SECCOMP) {
0070 ret = __secure_computing(NULL);
0071 if (ret == -1L)
0072 return ret;
0073 }
0074
0075
0076 syscall = syscall_get_nr(current, regs);
0077
0078 if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
0079 trace_sys_enter(regs, syscall);
0080
0081 syscall_enter_audit(regs, syscall);
0082
0083 return ret ? : syscall;
0084 }
0085
0086 static __always_inline long
0087 __syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
0088 {
0089 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
0090
0091 if (work & SYSCALL_WORK_ENTER)
0092 syscall = syscall_trace_enter(regs, syscall, work);
0093
0094 return syscall;
0095 }
0096
0097 long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
0098 {
0099 return __syscall_enter_from_user_work(regs, syscall);
0100 }
0101
0102 noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
0103 {
0104 long ret;
0105
0106 __enter_from_user_mode(regs);
0107
0108 instrumentation_begin();
0109 local_irq_enable();
0110 ret = __syscall_enter_from_user_work(regs, syscall);
0111 instrumentation_end();
0112
0113 return ret;
0114 }
0115
0116 noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
0117 {
0118 __enter_from_user_mode(regs);
0119 instrumentation_begin();
0120 local_irq_enable();
0121 instrumentation_end();
0122 }
0123
0124
0125 static __always_inline void __exit_to_user_mode(void)
0126 {
0127 instrumentation_begin();
0128 trace_hardirqs_on_prepare();
0129 lockdep_hardirqs_on_prepare();
0130 instrumentation_end();
0131
0132 user_enter_irqoff();
0133 arch_exit_to_user_mode();
0134 lockdep_hardirqs_on(CALLER_ADDR0);
0135 }
0136
0137 void noinstr exit_to_user_mode(void)
0138 {
0139 __exit_to_user_mode();
0140 }
0141
0142
0143 void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
0144
0145 static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
0146 unsigned long ti_work)
0147 {
0148
0149
0150
0151
0152 while (ti_work & EXIT_TO_USER_MODE_WORK) {
0153
0154 local_irq_enable_exit_to_user(ti_work);
0155
0156 if (ti_work & _TIF_NEED_RESCHED)
0157 schedule();
0158
0159 if (ti_work & _TIF_UPROBE)
0160 uprobe_notify_resume(regs);
0161
0162 if (ti_work & _TIF_PATCH_PENDING)
0163 klp_update_patch_state(current);
0164
0165 if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
0166 arch_do_signal_or_restart(regs);
0167
0168 if (ti_work & _TIF_NOTIFY_RESUME)
0169 resume_user_mode_work(regs);
0170
0171
0172 arch_exit_to_user_mode_work(regs, ti_work);
0173
0174
0175
0176
0177
0178
0179 local_irq_disable_exit_to_user();
0180
0181
0182 tick_nohz_user_enter_prepare();
0183
0184 ti_work = read_thread_flags();
0185 }
0186
0187
0188 return ti_work;
0189 }
0190
0191 static void exit_to_user_mode_prepare(struct pt_regs *regs)
0192 {
0193 unsigned long ti_work = read_thread_flags();
0194
0195 lockdep_assert_irqs_disabled();
0196
0197
0198 tick_nohz_user_enter_prepare();
0199
0200 if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
0201 ti_work = exit_to_user_mode_loop(regs, ti_work);
0202
0203 arch_exit_to_user_mode_prepare(regs, ti_work);
0204
0205
0206 addr_limit_user_check();
0207 kmap_assert_nomap();
0208 lockdep_assert_irqs_disabled();
0209 lockdep_sys_exit();
0210 }
0211
0212
0213
0214
0215
0216
0217 static inline bool report_single_step(unsigned long work)
0218 {
0219 if (work & SYSCALL_WORK_SYSCALL_EMU)
0220 return false;
0221
0222 return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
0223 }
0224
0225 static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
0226 {
0227 bool step;
0228
0229
0230
0231
0232
0233
0234
0235 if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
0236 if (unlikely(current->syscall_dispatch.on_dispatch)) {
0237 current->syscall_dispatch.on_dispatch = false;
0238 return;
0239 }
0240 }
0241
0242 audit_syscall_exit(regs);
0243
0244 if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
0245 trace_sys_exit(regs, syscall_get_return_value(current, regs));
0246
0247 step = report_single_step(work);
0248 if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
0249 ptrace_report_syscall_exit(regs, step);
0250 }
0251
0252
0253
0254
0255
0256 static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
0257 {
0258 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
0259 unsigned long nr = syscall_get_nr(current, regs);
0260
0261 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
0262
0263 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
0264 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
0265 local_irq_enable();
0266 }
0267
0268 rseq_syscall(regs);
0269
0270
0271
0272
0273
0274
0275 if (unlikely(work & SYSCALL_WORK_EXIT))
0276 syscall_exit_work(regs, work);
0277 }
0278
0279 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
0280 {
0281 syscall_exit_to_user_mode_prepare(regs);
0282 local_irq_disable_exit_to_user();
0283 exit_to_user_mode_prepare(regs);
0284 }
0285
0286 void syscall_exit_to_user_mode_work(struct pt_regs *regs)
0287 {
0288 __syscall_exit_to_user_mode_work(regs);
0289 }
0290
0291 __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
0292 {
0293 instrumentation_begin();
0294 __syscall_exit_to_user_mode_work(regs);
0295 instrumentation_end();
0296 __exit_to_user_mode();
0297 }
0298
0299 noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
0300 {
0301 __enter_from_user_mode(regs);
0302 }
0303
0304 noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
0305 {
0306 instrumentation_begin();
0307 exit_to_user_mode_prepare(regs);
0308 instrumentation_end();
0309 __exit_to_user_mode();
0310 }
0311
0312 noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
0313 {
0314 irqentry_state_t ret = {
0315 .exit_rcu = false,
0316 };
0317
0318 if (user_mode(regs)) {
0319 irqentry_enter_from_user_mode(regs);
0320 return ret;
0321 }
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346 if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
0347
0348
0349
0350
0351
0352 lockdep_hardirqs_off(CALLER_ADDR0);
0353 ct_irq_enter();
0354 instrumentation_begin();
0355 trace_hardirqs_off_finish();
0356 instrumentation_end();
0357
0358 ret.exit_rcu = true;
0359 return ret;
0360 }
0361
0362
0363
0364
0365
0366
0367
0368 lockdep_hardirqs_off(CALLER_ADDR0);
0369 instrumentation_begin();
0370 rcu_irq_enter_check_tick();
0371 trace_hardirqs_off_finish();
0372 instrumentation_end();
0373
0374 return ret;
0375 }
0376
0377 void raw_irqentry_exit_cond_resched(void)
0378 {
0379 if (!preempt_count()) {
0380
0381 rcu_irq_exit_check_preempt();
0382 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
0383 WARN_ON_ONCE(!on_thread_stack());
0384 if (need_resched())
0385 preempt_schedule_irq();
0386 }
0387 }
0388 #ifdef CONFIG_PREEMPT_DYNAMIC
0389 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
0390 DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
0391 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
0392 DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
0393 void dynamic_irqentry_exit_cond_resched(void)
0394 {
0395 if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
0396 return;
0397 raw_irqentry_exit_cond_resched();
0398 }
0399 #endif
0400 #endif
0401
0402 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
0403 {
0404 lockdep_assert_irqs_disabled();
0405
0406
0407 if (user_mode(regs)) {
0408 irqentry_exit_to_user_mode(regs);
0409 } else if (!regs_irqs_disabled(regs)) {
0410
0411
0412
0413
0414
0415 if (state.exit_rcu) {
0416 instrumentation_begin();
0417
0418 trace_hardirqs_on_prepare();
0419 lockdep_hardirqs_on_prepare();
0420 instrumentation_end();
0421 ct_irq_exit();
0422 lockdep_hardirqs_on(CALLER_ADDR0);
0423 return;
0424 }
0425
0426 instrumentation_begin();
0427 if (IS_ENABLED(CONFIG_PREEMPTION))
0428 irqentry_exit_cond_resched();
0429
0430
0431 trace_hardirqs_on();
0432 instrumentation_end();
0433 } else {
0434
0435
0436
0437
0438 if (state.exit_rcu)
0439 ct_irq_exit();
0440 }
0441 }
0442
0443 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
0444 {
0445 irqentry_state_t irq_state;
0446
0447 irq_state.lockdep = lockdep_hardirqs_enabled();
0448
0449 __nmi_enter();
0450 lockdep_hardirqs_off(CALLER_ADDR0);
0451 lockdep_hardirq_enter();
0452 ct_nmi_enter();
0453
0454 instrumentation_begin();
0455 trace_hardirqs_off_finish();
0456 ftrace_nmi_enter();
0457 instrumentation_end();
0458
0459 return irq_state;
0460 }
0461
0462 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
0463 {
0464 instrumentation_begin();
0465 ftrace_nmi_exit();
0466 if (irq_state.lockdep) {
0467 trace_hardirqs_on_prepare();
0468 lockdep_hardirqs_on_prepare();
0469 }
0470 instrumentation_end();
0471
0472 ct_nmi_exit();
0473 lockdep_hardirq_exit();
0474 if (irq_state.lockdep)
0475 lockdep_hardirqs_on(CALLER_ADDR0);
0476 __nmi_exit();
0477 }