0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/string.h>
0010 #include <linux/slab.h>
0011 #include <linux/file.h>
0012 #include <linux/fdtable.h>
0013 #include <linux/init.h>
0014 #include <linux/module.h>
0015 #include <linux/fs.h>
0016 #include <linux/security.h>
0017 #include <linux/cred.h>
0018 #include <linux/eventpoll.h>
0019 #include <linux/rcupdate.h>
0020 #include <linux/mount.h>
0021 #include <linux/capability.h>
0022 #include <linux/cdev.h>
0023 #include <linux/fsnotify.h>
0024 #include <linux/sysctl.h>
0025 #include <linux/percpu_counter.h>
0026 #include <linux/percpu.h>
0027 #include <linux/task_work.h>
0028 #include <linux/ima.h>
0029 #include <linux/swap.h>
0030 #include <linux/kmemleak.h>
0031
0032 #include <linux/atomic.h>
0033
0034 #include "internal.h"
0035
0036
0037 static struct files_stat_struct files_stat = {
0038 .max_files = NR_FILE
0039 };
0040
0041
0042 static struct kmem_cache *filp_cachep __read_mostly;
0043
0044 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
0045
0046 static void file_free_rcu(struct rcu_head *head)
0047 {
0048 struct file *f = container_of(head, struct file, f_rcuhead);
0049
0050 put_cred(f->f_cred);
0051 kmem_cache_free(filp_cachep, f);
0052 }
0053
0054 static inline void file_free(struct file *f)
0055 {
0056 security_file_free(f);
0057 if (!(f->f_mode & FMODE_NOACCOUNT))
0058 percpu_counter_dec(&nr_files);
0059 call_rcu(&f->f_rcuhead, file_free_rcu);
0060 }
0061
0062
0063
0064
0065 static long get_nr_files(void)
0066 {
0067 return percpu_counter_read_positive(&nr_files);
0068 }
0069
0070
0071
0072
0073 unsigned long get_max_files(void)
0074 {
0075 return files_stat.max_files;
0076 }
0077 EXPORT_SYMBOL_GPL(get_max_files);
0078
0079 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
0080
0081
0082
0083
0084 static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
0085 size_t *lenp, loff_t *ppos)
0086 {
0087 files_stat.nr_files = get_nr_files();
0088 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0089 }
0090
0091 static struct ctl_table fs_stat_sysctls[] = {
0092 {
0093 .procname = "file-nr",
0094 .data = &files_stat,
0095 .maxlen = sizeof(files_stat),
0096 .mode = 0444,
0097 .proc_handler = proc_nr_files,
0098 },
0099 {
0100 .procname = "file-max",
0101 .data = &files_stat.max_files,
0102 .maxlen = sizeof(files_stat.max_files),
0103 .mode = 0644,
0104 .proc_handler = proc_doulongvec_minmax,
0105 .extra1 = SYSCTL_LONG_ZERO,
0106 .extra2 = SYSCTL_LONG_MAX,
0107 },
0108 {
0109 .procname = "nr_open",
0110 .data = &sysctl_nr_open,
0111 .maxlen = sizeof(unsigned int),
0112 .mode = 0644,
0113 .proc_handler = proc_dointvec_minmax,
0114 .extra1 = &sysctl_nr_open_min,
0115 .extra2 = &sysctl_nr_open_max,
0116 },
0117 { }
0118 };
0119
0120 static int __init init_fs_stat_sysctls(void)
0121 {
0122 register_sysctl_init("fs", fs_stat_sysctls);
0123 if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
0124 struct ctl_table_header *hdr;
0125 hdr = register_sysctl_mount_point("fs/binfmt_misc");
0126 kmemleak_not_leak(hdr);
0127 }
0128 return 0;
0129 }
0130 fs_initcall(init_fs_stat_sysctls);
0131 #endif
0132
0133 static struct file *__alloc_file(int flags, const struct cred *cred)
0134 {
0135 struct file *f;
0136 int error;
0137
0138 f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
0139 if (unlikely(!f))
0140 return ERR_PTR(-ENOMEM);
0141
0142 f->f_cred = get_cred(cred);
0143 error = security_file_alloc(f);
0144 if (unlikely(error)) {
0145 file_free_rcu(&f->f_rcuhead);
0146 return ERR_PTR(error);
0147 }
0148
0149 atomic_long_set(&f->f_count, 1);
0150 rwlock_init(&f->f_owner.lock);
0151 spin_lock_init(&f->f_lock);
0152 mutex_init(&f->f_pos_lock);
0153 f->f_flags = flags;
0154 f->f_mode = OPEN_FMODE(flags);
0155
0156
0157 return f;
0158 }
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170 struct file *alloc_empty_file(int flags, const struct cred *cred)
0171 {
0172 static long old_max;
0173 struct file *f;
0174
0175
0176
0177
0178 if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
0179
0180
0181
0182
0183 if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
0184 goto over;
0185 }
0186
0187 f = __alloc_file(flags, cred);
0188 if (!IS_ERR(f))
0189 percpu_counter_inc(&nr_files);
0190
0191 return f;
0192
0193 over:
0194
0195 if (get_nr_files() > old_max) {
0196 pr_info("VFS: file-max limit %lu reached\n", get_max_files());
0197 old_max = get_nr_files();
0198 }
0199 return ERR_PTR(-ENFILE);
0200 }
0201
0202
0203
0204
0205
0206
0207 struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
0208 {
0209 struct file *f = __alloc_file(flags, cred);
0210
0211 if (!IS_ERR(f))
0212 f->f_mode |= FMODE_NOACCOUNT;
0213
0214 return f;
0215 }
0216
0217
0218
0219
0220
0221
0222
0223
0224 static struct file *alloc_file(const struct path *path, int flags,
0225 const struct file_operations *fop)
0226 {
0227 struct file *file;
0228
0229 file = alloc_empty_file(flags, current_cred());
0230 if (IS_ERR(file))
0231 return file;
0232
0233 file->f_path = *path;
0234 file->f_inode = path->dentry->d_inode;
0235 file->f_mapping = path->dentry->d_inode->i_mapping;
0236 file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
0237 file->f_sb_err = file_sample_sb_err(file);
0238 if (fop->llseek)
0239 file->f_mode |= FMODE_LSEEK;
0240 if ((file->f_mode & FMODE_READ) &&
0241 likely(fop->read || fop->read_iter))
0242 file->f_mode |= FMODE_CAN_READ;
0243 if ((file->f_mode & FMODE_WRITE) &&
0244 likely(fop->write || fop->write_iter))
0245 file->f_mode |= FMODE_CAN_WRITE;
0246 file->f_iocb_flags = iocb_flags(file);
0247 file->f_mode |= FMODE_OPENED;
0248 file->f_op = fop;
0249 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
0250 i_readcount_inc(path->dentry->d_inode);
0251 return file;
0252 }
0253
0254 struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
0255 const char *name, int flags,
0256 const struct file_operations *fops)
0257 {
0258 static const struct dentry_operations anon_ops = {
0259 .d_dname = simple_dname
0260 };
0261 struct qstr this = QSTR_INIT(name, strlen(name));
0262 struct path path;
0263 struct file *file;
0264
0265 path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
0266 if (!path.dentry)
0267 return ERR_PTR(-ENOMEM);
0268 if (!mnt->mnt_sb->s_d_op)
0269 d_set_d_op(path.dentry, &anon_ops);
0270 path.mnt = mntget(mnt);
0271 d_instantiate(path.dentry, inode);
0272 file = alloc_file(&path, flags, fops);
0273 if (IS_ERR(file)) {
0274 ihold(inode);
0275 path_put(&path);
0276 }
0277 return file;
0278 }
0279 EXPORT_SYMBOL(alloc_file_pseudo);
0280
0281 struct file *alloc_file_clone(struct file *base, int flags,
0282 const struct file_operations *fops)
0283 {
0284 struct file *f = alloc_file(&base->f_path, flags, fops);
0285 if (!IS_ERR(f)) {
0286 path_get(&f->f_path);
0287 f->f_mapping = base->f_mapping;
0288 }
0289 return f;
0290 }
0291
0292
0293
0294 static void __fput(struct file *file)
0295 {
0296 struct dentry *dentry = file->f_path.dentry;
0297 struct vfsmount *mnt = file->f_path.mnt;
0298 struct inode *inode = file->f_inode;
0299 fmode_t mode = file->f_mode;
0300
0301 if (unlikely(!(file->f_mode & FMODE_OPENED)))
0302 goto out;
0303
0304 might_sleep();
0305
0306 fsnotify_close(file);
0307
0308
0309
0310
0311 eventpoll_release(file);
0312 locks_remove_file(file);
0313
0314 ima_file_free(file);
0315 if (unlikely(file->f_flags & FASYNC)) {
0316 if (file->f_op->fasync)
0317 file->f_op->fasync(-1, file, 0);
0318 }
0319 if (file->f_op->release)
0320 file->f_op->release(inode, file);
0321 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
0322 !(mode & FMODE_PATH))) {
0323 cdev_put(inode->i_cdev);
0324 }
0325 fops_put(file->f_op);
0326 put_pid(file->f_owner.pid);
0327 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
0328 i_readcount_dec(inode);
0329 if (mode & FMODE_WRITER) {
0330 put_write_access(inode);
0331 __mnt_drop_write(mnt);
0332 }
0333 dput(dentry);
0334 if (unlikely(mode & FMODE_NEED_UNMOUNT))
0335 dissolve_on_fput(mnt);
0336 mntput(mnt);
0337 out:
0338 file_free(file);
0339 }
0340
0341 static LLIST_HEAD(delayed_fput_list);
0342 static void delayed_fput(struct work_struct *unused)
0343 {
0344 struct llist_node *node = llist_del_all(&delayed_fput_list);
0345 struct file *f, *t;
0346
0347 llist_for_each_entry_safe(f, t, node, f_llist)
0348 __fput(f);
0349 }
0350
0351 static void ____fput(struct callback_head *work)
0352 {
0353 __fput(container_of(work, struct file, f_rcuhead));
0354 }
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366 void flush_delayed_fput(void)
0367 {
0368 delayed_fput(NULL);
0369 }
0370 EXPORT_SYMBOL_GPL(flush_delayed_fput);
0371
0372 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
0373
0374 void fput(struct file *file)
0375 {
0376 if (atomic_long_dec_and_test(&file->f_count)) {
0377 struct task_struct *task = current;
0378
0379 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
0380 init_task_work(&file->f_rcuhead, ____fput);
0381 if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME))
0382 return;
0383
0384
0385
0386
0387
0388 }
0389
0390 if (llist_add(&file->f_llist, &delayed_fput_list))
0391 schedule_delayed_work(&delayed_fput_work, 1);
0392 }
0393 }
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403 void __fput_sync(struct file *file)
0404 {
0405 if (atomic_long_dec_and_test(&file->f_count)) {
0406 struct task_struct *task = current;
0407 BUG_ON(!(task->flags & PF_KTHREAD));
0408 __fput(file);
0409 }
0410 }
0411
0412 EXPORT_SYMBOL(fput);
0413 EXPORT_SYMBOL(__fput_sync);
0414
0415 void __init files_init(void)
0416 {
0417 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
0418 SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
0419 percpu_counter_init(&nr_files, 0, GFP_KERNEL);
0420 }
0421
0422
0423
0424
0425
0426 void __init files_maxfiles_init(void)
0427 {
0428 unsigned long n;
0429 unsigned long nr_pages = totalram_pages();
0430 unsigned long memreserve = (nr_pages - nr_free_pages()) * 3/2;
0431
0432 memreserve = min(memreserve, nr_pages - 1);
0433 n = ((nr_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
0434
0435 files_stat.max_files = max_t(unsigned long, n, NR_FILE);
0436 }