0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 #include <linux/mm.h>
0048 #include <linux/slab.h>
0049 #include <linux/acct.h>
0050 #include <linux/capability.h>
0051 #include <linux/file.h>
0052 #include <linux/tty.h>
0053 #include <linux/security.h>
0054 #include <linux/vfs.h>
0055 #include <linux/jiffies.h>
0056 #include <linux/times.h>
0057 #include <linux/syscalls.h>
0058 #include <linux/mount.h>
0059 #include <linux/uaccess.h>
0060 #include <linux/sched/cputime.h>
0061
0062 #include <asm/div64.h>
0063 #include <linux/pid_namespace.h>
0064 #include <linux/fs_pin.h>
0065
0066
0067
0068
0069
0070
0071
0072
0073 static int acct_parm[3] = {4, 2, 30};
0074 #define RESUME (acct_parm[0])
0075 #define SUSPEND (acct_parm[1])
0076 #define ACCT_TIMEOUT (acct_parm[2])
0077
0078 #ifdef CONFIG_SYSCTL
0079 static struct ctl_table kern_acct_table[] = {
0080 {
0081 .procname = "acct",
0082 .data = &acct_parm,
0083 .maxlen = 3*sizeof(int),
0084 .mode = 0644,
0085 .proc_handler = proc_dointvec,
0086 },
0087 { }
0088 };
0089
0090 static __init int kernel_acct_sysctls_init(void)
0091 {
0092 register_sysctl_init("kernel", kern_acct_table);
0093 return 0;
0094 }
0095 late_initcall(kernel_acct_sysctls_init);
0096 #endif
0097
0098
0099
0100
0101
0102 struct bsd_acct_struct {
0103 struct fs_pin pin;
0104 atomic_long_t count;
0105 struct rcu_head rcu;
0106 struct mutex lock;
0107 int active;
0108 unsigned long needcheck;
0109 struct file *file;
0110 struct pid_namespace *ns;
0111 struct work_struct work;
0112 struct completion done;
0113 };
0114
0115 static void do_acct_process(struct bsd_acct_struct *acct);
0116
0117
0118
0119
0120 static int check_free_space(struct bsd_acct_struct *acct)
0121 {
0122 struct kstatfs sbuf;
0123
0124 if (time_is_after_jiffies(acct->needcheck))
0125 goto out;
0126
0127
0128 if (vfs_statfs(&acct->file->f_path, &sbuf))
0129 goto out;
0130
0131 if (acct->active) {
0132 u64 suspend = sbuf.f_blocks * SUSPEND;
0133 do_div(suspend, 100);
0134 if (sbuf.f_bavail <= suspend) {
0135 acct->active = 0;
0136 pr_info("Process accounting paused\n");
0137 }
0138 } else {
0139 u64 resume = sbuf.f_blocks * RESUME;
0140 do_div(resume, 100);
0141 if (sbuf.f_bavail >= resume) {
0142 acct->active = 1;
0143 pr_info("Process accounting resumed\n");
0144 }
0145 }
0146
0147 acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
0148 out:
0149 return acct->active;
0150 }
0151
0152 static void acct_put(struct bsd_acct_struct *p)
0153 {
0154 if (atomic_long_dec_and_test(&p->count))
0155 kfree_rcu(p, rcu);
0156 }
0157
0158 static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
0159 {
0160 return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
0161 }
0162
0163 static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
0164 {
0165 struct bsd_acct_struct *res;
0166 again:
0167 smp_rmb();
0168 rcu_read_lock();
0169 res = to_acct(READ_ONCE(ns->bacct));
0170 if (!res) {
0171 rcu_read_unlock();
0172 return NULL;
0173 }
0174 if (!atomic_long_inc_not_zero(&res->count)) {
0175 rcu_read_unlock();
0176 cpu_relax();
0177 goto again;
0178 }
0179 rcu_read_unlock();
0180 mutex_lock(&res->lock);
0181 if (res != to_acct(READ_ONCE(ns->bacct))) {
0182 mutex_unlock(&res->lock);
0183 acct_put(res);
0184 goto again;
0185 }
0186 return res;
0187 }
0188
0189 static void acct_pin_kill(struct fs_pin *pin)
0190 {
0191 struct bsd_acct_struct *acct = to_acct(pin);
0192 mutex_lock(&acct->lock);
0193 do_acct_process(acct);
0194 schedule_work(&acct->work);
0195 wait_for_completion(&acct->done);
0196 cmpxchg(&acct->ns->bacct, pin, NULL);
0197 mutex_unlock(&acct->lock);
0198 pin_remove(pin);
0199 acct_put(acct);
0200 }
0201
0202 static void close_work(struct work_struct *work)
0203 {
0204 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
0205 struct file *file = acct->file;
0206 if (file->f_op->flush)
0207 file->f_op->flush(file, NULL);
0208 __fput_sync(file);
0209 complete(&acct->done);
0210 }
0211
0212 static int acct_on(struct filename *pathname)
0213 {
0214 struct file *file;
0215 struct vfsmount *mnt, *internal;
0216 struct pid_namespace *ns = task_active_pid_ns(current);
0217 struct bsd_acct_struct *acct;
0218 struct fs_pin *old;
0219 int err;
0220
0221 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
0222 if (!acct)
0223 return -ENOMEM;
0224
0225
0226 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
0227 if (IS_ERR(file)) {
0228 kfree(acct);
0229 return PTR_ERR(file);
0230 }
0231
0232 if (!S_ISREG(file_inode(file)->i_mode)) {
0233 kfree(acct);
0234 filp_close(file, NULL);
0235 return -EACCES;
0236 }
0237
0238 if (!(file->f_mode & FMODE_CAN_WRITE)) {
0239 kfree(acct);
0240 filp_close(file, NULL);
0241 return -EIO;
0242 }
0243 internal = mnt_clone_internal(&file->f_path);
0244 if (IS_ERR(internal)) {
0245 kfree(acct);
0246 filp_close(file, NULL);
0247 return PTR_ERR(internal);
0248 }
0249 err = __mnt_want_write(internal);
0250 if (err) {
0251 mntput(internal);
0252 kfree(acct);
0253 filp_close(file, NULL);
0254 return err;
0255 }
0256 mnt = file->f_path.mnt;
0257 file->f_path.mnt = internal;
0258
0259 atomic_long_set(&acct->count, 1);
0260 init_fs_pin(&acct->pin, acct_pin_kill);
0261 acct->file = file;
0262 acct->needcheck = jiffies;
0263 acct->ns = ns;
0264 mutex_init(&acct->lock);
0265 INIT_WORK(&acct->work, close_work);
0266 init_completion(&acct->done);
0267 mutex_lock_nested(&acct->lock, 1);
0268 pin_insert(&acct->pin, mnt);
0269
0270 rcu_read_lock();
0271 old = xchg(&ns->bacct, &acct->pin);
0272 mutex_unlock(&acct->lock);
0273 pin_kill(old);
0274 __mnt_drop_write(mnt);
0275 mntput(mnt);
0276 return 0;
0277 }
0278
0279 static DEFINE_MUTEX(acct_on_mutex);
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292 SYSCALL_DEFINE1(acct, const char __user *, name)
0293 {
0294 int error = 0;
0295
0296 if (!capable(CAP_SYS_PACCT))
0297 return -EPERM;
0298
0299 if (name) {
0300 struct filename *tmp = getname(name);
0301
0302 if (IS_ERR(tmp))
0303 return PTR_ERR(tmp);
0304 mutex_lock(&acct_on_mutex);
0305 error = acct_on(tmp);
0306 mutex_unlock(&acct_on_mutex);
0307 putname(tmp);
0308 } else {
0309 rcu_read_lock();
0310 pin_kill(task_active_pid_ns(current)->bacct);
0311 }
0312
0313 return error;
0314 }
0315
0316 void acct_exit_ns(struct pid_namespace *ns)
0317 {
0318 rcu_read_lock();
0319 pin_kill(ns->bacct);
0320 }
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330 #define MANTSIZE 13
0331 #define EXPSIZE 3
0332 #define MAXFRACT ((1 << MANTSIZE) - 1)
0333
0334 static comp_t encode_comp_t(unsigned long value)
0335 {
0336 int exp, rnd;
0337
0338 exp = rnd = 0;
0339 while (value > MAXFRACT) {
0340 rnd = value & (1 << (EXPSIZE - 1));
0341 value >>= EXPSIZE;
0342 exp++;
0343 }
0344
0345
0346
0347
0348 if (rnd && (++value > MAXFRACT)) {
0349 value >>= EXPSIZE;
0350 exp++;
0351 }
0352
0353
0354
0355
0356 exp <<= MANTSIZE;
0357 exp += value;
0358 return exp;
0359 }
0360
0361 #if ACCT_VERSION == 1 || ACCT_VERSION == 2
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371 #define MANTSIZE2 20
0372 #define EXPSIZE2 5
0373 #define MAXFRACT2 ((1ul << MANTSIZE2) - 1)
0374 #define MAXEXP2 ((1 << EXPSIZE2) - 1)
0375
0376 static comp2_t encode_comp2_t(u64 value)
0377 {
0378 int exp, rnd;
0379
0380 exp = (value > (MAXFRACT2>>1));
0381 rnd = 0;
0382 while (value > MAXFRACT2) {
0383 rnd = value & 1;
0384 value >>= 1;
0385 exp++;
0386 }
0387
0388
0389
0390
0391 if (rnd && (++value > MAXFRACT2)) {
0392 value >>= 1;
0393 exp++;
0394 }
0395
0396 if (exp > MAXEXP2) {
0397
0398 return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
0399 } else {
0400 return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
0401 }
0402 }
0403 #elif ACCT_VERSION == 3
0404
0405
0406
0407 static u32 encode_float(u64 value)
0408 {
0409 unsigned exp = 190;
0410 unsigned u;
0411
0412 if (value == 0)
0413 return 0;
0414 while ((s64)value > 0) {
0415 value <<= 1;
0416 exp--;
0417 }
0418 u = (u32)(value >> 40) & 0x7fffffu;
0419 return u | (exp << 23);
0420 }
0421 #endif
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432 static void fill_ac(acct_t *ac)
0433 {
0434 struct pacct_struct *pacct = ¤t->signal->pacct;
0435 u64 elapsed, run_time;
0436 time64_t btime;
0437 struct tty_struct *tty;
0438
0439
0440
0441
0442
0443 memset(ac, 0, sizeof(acct_t));
0444
0445 ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
0446 strlcpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
0447
0448
0449 run_time = ktime_get_ns();
0450 run_time -= current->group_leader->start_time;
0451
0452 elapsed = nsec_to_AHZ(run_time);
0453 #if ACCT_VERSION == 3
0454 ac->ac_etime = encode_float(elapsed);
0455 #else
0456 ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
0457 (unsigned long) elapsed : (unsigned long) -1l);
0458 #endif
0459 #if ACCT_VERSION == 1 || ACCT_VERSION == 2
0460 {
0461
0462 comp2_t etime = encode_comp2_t(elapsed);
0463
0464 ac->ac_etime_hi = etime >> 16;
0465 ac->ac_etime_lo = (u16) etime;
0466 }
0467 #endif
0468 do_div(elapsed, AHZ);
0469 btime = ktime_get_real_seconds() - elapsed;
0470 ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
0471 #if ACCT_VERSION==2
0472 ac->ac_ahz = AHZ;
0473 #endif
0474
0475 spin_lock_irq(¤t->sighand->siglock);
0476 tty = current->signal->tty;
0477 ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
0478 ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime));
0479 ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime));
0480 ac->ac_flag = pacct->ac_flag;
0481 ac->ac_mem = encode_comp_t(pacct->ac_mem);
0482 ac->ac_minflt = encode_comp_t(pacct->ac_minflt);
0483 ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
0484 ac->ac_exitcode = pacct->ac_exitcode;
0485 spin_unlock_irq(¤t->sighand->siglock);
0486 }
0487
0488
0489
0490 static void do_acct_process(struct bsd_acct_struct *acct)
0491 {
0492 acct_t ac;
0493 unsigned long flim;
0494 const struct cred *orig_cred;
0495 struct file *file = acct->file;
0496
0497
0498
0499
0500 flim = rlimit(RLIMIT_FSIZE);
0501 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
0502
0503 orig_cred = override_creds(file->f_cred);
0504
0505
0506
0507
0508
0509 if (!check_free_space(acct))
0510 goto out;
0511
0512 fill_ac(&ac);
0513
0514 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
0515 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
0516 #if ACCT_VERSION == 1 || ACCT_VERSION == 2
0517
0518 ac.ac_uid16 = ac.ac_uid;
0519 ac.ac_gid16 = ac.ac_gid;
0520 #elif ACCT_VERSION == 3
0521 {
0522 struct pid_namespace *ns = acct->ns;
0523
0524 ac.ac_pid = task_tgid_nr_ns(current, ns);
0525 rcu_read_lock();
0526 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent),
0527 ns);
0528 rcu_read_unlock();
0529 }
0530 #endif
0531
0532
0533
0534
0535 if (file_start_write_trylock(file)) {
0536
0537 loff_t pos = 0;
0538 __kernel_write(file, &ac, sizeof(acct_t), &pos);
0539 file_end_write(file);
0540 }
0541 out:
0542 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
0543 revert_creds(orig_cred);
0544 }
0545
0546
0547
0548
0549
0550
0551 void acct_collect(long exitcode, int group_dead)
0552 {
0553 struct pacct_struct *pacct = ¤t->signal->pacct;
0554 u64 utime, stime;
0555 unsigned long vsize = 0;
0556
0557 if (group_dead && current->mm) {
0558 struct vm_area_struct *vma;
0559
0560 mmap_read_lock(current->mm);
0561 vma = current->mm->mmap;
0562 while (vma) {
0563 vsize += vma->vm_end - vma->vm_start;
0564 vma = vma->vm_next;
0565 }
0566 mmap_read_unlock(current->mm);
0567 }
0568
0569 spin_lock_irq(¤t->sighand->siglock);
0570 if (group_dead)
0571 pacct->ac_mem = vsize / 1024;
0572 if (thread_group_leader(current)) {
0573 pacct->ac_exitcode = exitcode;
0574 if (current->flags & PF_FORKNOEXEC)
0575 pacct->ac_flag |= AFORK;
0576 }
0577 if (current->flags & PF_SUPERPRIV)
0578 pacct->ac_flag |= ASU;
0579 if (current->flags & PF_DUMPCORE)
0580 pacct->ac_flag |= ACORE;
0581 if (current->flags & PF_SIGNALED)
0582 pacct->ac_flag |= AXSIG;
0583
0584 task_cputime(current, &utime, &stime);
0585 pacct->ac_utime += utime;
0586 pacct->ac_stime += stime;
0587 pacct->ac_minflt += current->min_flt;
0588 pacct->ac_majflt += current->maj_flt;
0589 spin_unlock_irq(¤t->sighand->siglock);
0590 }
0591
0592 static void slow_acct_process(struct pid_namespace *ns)
0593 {
0594 for ( ; ns; ns = ns->parent) {
0595 struct bsd_acct_struct *acct = acct_get(ns);
0596 if (acct) {
0597 do_acct_process(acct);
0598 mutex_unlock(&acct->lock);
0599 acct_put(acct);
0600 }
0601 }
0602 }
0603
0604
0605
0606
0607 void acct_process(void)
0608 {
0609 struct pid_namespace *ns;
0610
0611
0612
0613
0614
0615
0616 for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
0617 if (ns->bacct)
0618 break;
0619 }
0620 if (unlikely(ns))
0621 slow_acct_process(ns);
0622 }