0001
0002
0003
0004
0005
0006 #include <linux/export.h>
0007 #include <linux/fs.h>
0008 #include <linux/mm.h>
0009 #include <linux/backing-dev.h>
0010 #include <linux/hash.h>
0011 #include <linux/swap.h>
0012 #include <linux/security.h>
0013 #include <linux/cdev.h>
0014 #include <linux/memblock.h>
0015 #include <linux/fsnotify.h>
0016 #include <linux/mount.h>
0017 #include <linux/posix_acl.h>
0018 #include <linux/prefetch.h>
0019 #include <linux/buffer_head.h> /* for inode_has_buffers */
0020 #include <linux/ratelimit.h>
0021 #include <linux/list_lru.h>
0022 #include <linux/iversion.h>
0023 #include <trace/events/writeback.h>
0024 #include "internal.h"
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 static unsigned int i_hash_mask __read_mostly;
0058 static unsigned int i_hash_shift __read_mostly;
0059 static struct hlist_head *inode_hashtable __read_mostly;
0060 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
0061
0062
0063
0064
0065
0066 const struct address_space_operations empty_aops = {
0067 };
0068 EXPORT_SYMBOL(empty_aops);
0069
0070 static DEFINE_PER_CPU(unsigned long, nr_inodes);
0071 static DEFINE_PER_CPU(unsigned long, nr_unused);
0072
0073 static struct kmem_cache *inode_cachep __read_mostly;
0074
0075 static long get_nr_inodes(void)
0076 {
0077 int i;
0078 long sum = 0;
0079 for_each_possible_cpu(i)
0080 sum += per_cpu(nr_inodes, i);
0081 return sum < 0 ? 0 : sum;
0082 }
0083
0084 static inline long get_nr_inodes_unused(void)
0085 {
0086 int i;
0087 long sum = 0;
0088 for_each_possible_cpu(i)
0089 sum += per_cpu(nr_unused, i);
0090 return sum < 0 ? 0 : sum;
0091 }
0092
0093 long get_nr_dirty_inodes(void)
0094 {
0095
0096 long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
0097 return nr_dirty > 0 ? nr_dirty : 0;
0098 }
0099
0100
0101
0102
0103 #ifdef CONFIG_SYSCTL
0104
0105
0106
0107 static struct inodes_stat_t inodes_stat;
0108
0109 static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
0110 size_t *lenp, loff_t *ppos)
0111 {
0112 inodes_stat.nr_inodes = get_nr_inodes();
0113 inodes_stat.nr_unused = get_nr_inodes_unused();
0114 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0115 }
0116
0117 static struct ctl_table inodes_sysctls[] = {
0118 {
0119 .procname = "inode-nr",
0120 .data = &inodes_stat,
0121 .maxlen = 2*sizeof(long),
0122 .mode = 0444,
0123 .proc_handler = proc_nr_inodes,
0124 },
0125 {
0126 .procname = "inode-state",
0127 .data = &inodes_stat,
0128 .maxlen = 7*sizeof(long),
0129 .mode = 0444,
0130 .proc_handler = proc_nr_inodes,
0131 },
0132 { }
0133 };
0134
0135 static int __init init_fs_inode_sysctls(void)
0136 {
0137 register_sysctl_init("fs", inodes_sysctls);
0138 return 0;
0139 }
0140 early_initcall(init_fs_inode_sysctls);
0141 #endif
0142
0143 static int no_open(struct inode *inode, struct file *file)
0144 {
0145 return -ENXIO;
0146 }
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156 int inode_init_always(struct super_block *sb, struct inode *inode)
0157 {
0158 static const struct inode_operations empty_iops;
0159 static const struct file_operations no_open_fops = {.open = no_open};
0160 struct address_space *const mapping = &inode->i_data;
0161
0162 inode->i_sb = sb;
0163 inode->i_blkbits = sb->s_blocksize_bits;
0164 inode->i_flags = 0;
0165 atomic64_set(&inode->i_sequence, 0);
0166 atomic_set(&inode->i_count, 1);
0167 inode->i_op = &empty_iops;
0168 inode->i_fop = &no_open_fops;
0169 inode->i_ino = 0;
0170 inode->__i_nlink = 1;
0171 inode->i_opflags = 0;
0172 if (sb->s_xattr)
0173 inode->i_opflags |= IOP_XATTR;
0174 i_uid_write(inode, 0);
0175 i_gid_write(inode, 0);
0176 atomic_set(&inode->i_writecount, 0);
0177 inode->i_size = 0;
0178 inode->i_write_hint = WRITE_LIFE_NOT_SET;
0179 inode->i_blocks = 0;
0180 inode->i_bytes = 0;
0181 inode->i_generation = 0;
0182 inode->i_pipe = NULL;
0183 inode->i_cdev = NULL;
0184 inode->i_link = NULL;
0185 inode->i_dir_seq = 0;
0186 inode->i_rdev = 0;
0187 inode->dirtied_when = 0;
0188
0189 #ifdef CONFIG_CGROUP_WRITEBACK
0190 inode->i_wb_frn_winner = 0;
0191 inode->i_wb_frn_avg_time = 0;
0192 inode->i_wb_frn_history = 0;
0193 #endif
0194
0195 spin_lock_init(&inode->i_lock);
0196 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
0197
0198 init_rwsem(&inode->i_rwsem);
0199 lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
0200
0201 atomic_set(&inode->i_dio_count, 0);
0202
0203 mapping->a_ops = &empty_aops;
0204 mapping->host = inode;
0205 mapping->flags = 0;
0206 mapping->wb_err = 0;
0207 atomic_set(&mapping->i_mmap_writable, 0);
0208 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
0209 atomic_set(&mapping->nr_thps, 0);
0210 #endif
0211 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
0212 mapping->private_data = NULL;
0213 mapping->writeback_index = 0;
0214 init_rwsem(&mapping->invalidate_lock);
0215 lockdep_set_class_and_name(&mapping->invalidate_lock,
0216 &sb->s_type->invalidate_lock_key,
0217 "mapping.invalidate_lock");
0218 inode->i_private = NULL;
0219 inode->i_mapping = mapping;
0220 INIT_HLIST_HEAD(&inode->i_dentry);
0221 #ifdef CONFIG_FS_POSIX_ACL
0222 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
0223 #endif
0224
0225 #ifdef CONFIG_FSNOTIFY
0226 inode->i_fsnotify_mask = 0;
0227 #endif
0228 inode->i_flctx = NULL;
0229
0230 if (unlikely(security_inode_alloc(inode)))
0231 return -ENOMEM;
0232 this_cpu_inc(nr_inodes);
0233
0234 return 0;
0235 }
0236 EXPORT_SYMBOL(inode_init_always);
0237
0238 void free_inode_nonrcu(struct inode *inode)
0239 {
0240 kmem_cache_free(inode_cachep, inode);
0241 }
0242 EXPORT_SYMBOL(free_inode_nonrcu);
0243
0244 static void i_callback(struct rcu_head *head)
0245 {
0246 struct inode *inode = container_of(head, struct inode, i_rcu);
0247 if (inode->free_inode)
0248 inode->free_inode(inode);
0249 else
0250 free_inode_nonrcu(inode);
0251 }
0252
0253 static struct inode *alloc_inode(struct super_block *sb)
0254 {
0255 const struct super_operations *ops = sb->s_op;
0256 struct inode *inode;
0257
0258 if (ops->alloc_inode)
0259 inode = ops->alloc_inode(sb);
0260 else
0261 inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
0262
0263 if (!inode)
0264 return NULL;
0265
0266 if (unlikely(inode_init_always(sb, inode))) {
0267 if (ops->destroy_inode) {
0268 ops->destroy_inode(inode);
0269 if (!ops->free_inode)
0270 return NULL;
0271 }
0272 inode->free_inode = ops->free_inode;
0273 i_callback(&inode->i_rcu);
0274 return NULL;
0275 }
0276
0277 return inode;
0278 }
0279
0280 void __destroy_inode(struct inode *inode)
0281 {
0282 BUG_ON(inode_has_buffers(inode));
0283 inode_detach_wb(inode);
0284 security_inode_free(inode);
0285 fsnotify_inode_delete(inode);
0286 locks_free_lock_context(inode);
0287 if (!inode->i_nlink) {
0288 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
0289 atomic_long_dec(&inode->i_sb->s_remove_count);
0290 }
0291
0292 #ifdef CONFIG_FS_POSIX_ACL
0293 if (inode->i_acl && !is_uncached_acl(inode->i_acl))
0294 posix_acl_release(inode->i_acl);
0295 if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
0296 posix_acl_release(inode->i_default_acl);
0297 #endif
0298 this_cpu_dec(nr_inodes);
0299 }
0300 EXPORT_SYMBOL(__destroy_inode);
0301
0302 static void destroy_inode(struct inode *inode)
0303 {
0304 const struct super_operations *ops = inode->i_sb->s_op;
0305
0306 BUG_ON(!list_empty(&inode->i_lru));
0307 __destroy_inode(inode);
0308 if (ops->destroy_inode) {
0309 ops->destroy_inode(inode);
0310 if (!ops->free_inode)
0311 return;
0312 }
0313 inode->free_inode = ops->free_inode;
0314 call_rcu(&inode->i_rcu, i_callback);
0315 }
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328 void drop_nlink(struct inode *inode)
0329 {
0330 WARN_ON(inode->i_nlink == 0);
0331 inode->__i_nlink--;
0332 if (!inode->i_nlink)
0333 atomic_long_inc(&inode->i_sb->s_remove_count);
0334 }
0335 EXPORT_SYMBOL(drop_nlink);
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345 void clear_nlink(struct inode *inode)
0346 {
0347 if (inode->i_nlink) {
0348 inode->__i_nlink = 0;
0349 atomic_long_inc(&inode->i_sb->s_remove_count);
0350 }
0351 }
0352 EXPORT_SYMBOL(clear_nlink);
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362 void set_nlink(struct inode *inode, unsigned int nlink)
0363 {
0364 if (!nlink) {
0365 clear_nlink(inode);
0366 } else {
0367
0368 if (inode->i_nlink == 0)
0369 atomic_long_dec(&inode->i_sb->s_remove_count);
0370
0371 inode->__i_nlink = nlink;
0372 }
0373 }
0374 EXPORT_SYMBOL(set_nlink);
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384 void inc_nlink(struct inode *inode)
0385 {
0386 if (unlikely(inode->i_nlink == 0)) {
0387 WARN_ON(!(inode->i_state & I_LINKABLE));
0388 atomic_long_dec(&inode->i_sb->s_remove_count);
0389 }
0390
0391 inode->__i_nlink++;
0392 }
0393 EXPORT_SYMBOL(inc_nlink);
0394
0395 static void __address_space_init_once(struct address_space *mapping)
0396 {
0397 xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
0398 init_rwsem(&mapping->i_mmap_rwsem);
0399 INIT_LIST_HEAD(&mapping->private_list);
0400 spin_lock_init(&mapping->private_lock);
0401 mapping->i_mmap = RB_ROOT_CACHED;
0402 }
0403
0404 void address_space_init_once(struct address_space *mapping)
0405 {
0406 memset(mapping, 0, sizeof(*mapping));
0407 __address_space_init_once(mapping);
0408 }
0409 EXPORT_SYMBOL(address_space_init_once);
0410
0411
0412
0413
0414
0415
0416 void inode_init_once(struct inode *inode)
0417 {
0418 memset(inode, 0, sizeof(*inode));
0419 INIT_HLIST_NODE(&inode->i_hash);
0420 INIT_LIST_HEAD(&inode->i_devices);
0421 INIT_LIST_HEAD(&inode->i_io_list);
0422 INIT_LIST_HEAD(&inode->i_wb_list);
0423 INIT_LIST_HEAD(&inode->i_lru);
0424 INIT_LIST_HEAD(&inode->i_sb_list);
0425 __address_space_init_once(&inode->i_data);
0426 i_size_ordered_init(inode);
0427 }
0428 EXPORT_SYMBOL(inode_init_once);
0429
0430 static void init_once(void *foo)
0431 {
0432 struct inode *inode = (struct inode *) foo;
0433
0434 inode_init_once(inode);
0435 }
0436
0437
0438
0439
0440 void __iget(struct inode *inode)
0441 {
0442 atomic_inc(&inode->i_count);
0443 }
0444
0445
0446
0447
0448 void ihold(struct inode *inode)
0449 {
0450 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
0451 }
0452 EXPORT_SYMBOL(ihold);
0453
0454 static void __inode_add_lru(struct inode *inode, bool rotate)
0455 {
0456 if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
0457 return;
0458 if (atomic_read(&inode->i_count))
0459 return;
0460 if (!(inode->i_sb->s_flags & SB_ACTIVE))
0461 return;
0462 if (!mapping_shrinkable(&inode->i_data))
0463 return;
0464
0465 if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
0466 this_cpu_inc(nr_unused);
0467 else if (rotate)
0468 inode->i_state |= I_REFERENCED;
0469 }
0470
0471
0472
0473
0474
0475
0476 void inode_add_lru(struct inode *inode)
0477 {
0478 __inode_add_lru(inode, false);
0479 }
0480
0481 static void inode_lru_list_del(struct inode *inode)
0482 {
0483 if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
0484 this_cpu_dec(nr_unused);
0485 }
0486
0487
0488
0489
0490
0491 void inode_sb_list_add(struct inode *inode)
0492 {
0493 spin_lock(&inode->i_sb->s_inode_list_lock);
0494 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
0495 spin_unlock(&inode->i_sb->s_inode_list_lock);
0496 }
0497 EXPORT_SYMBOL_GPL(inode_sb_list_add);
0498
0499 static inline void inode_sb_list_del(struct inode *inode)
0500 {
0501 if (!list_empty(&inode->i_sb_list)) {
0502 spin_lock(&inode->i_sb->s_inode_list_lock);
0503 list_del_init(&inode->i_sb_list);
0504 spin_unlock(&inode->i_sb->s_inode_list_lock);
0505 }
0506 }
0507
0508 static unsigned long hash(struct super_block *sb, unsigned long hashval)
0509 {
0510 unsigned long tmp;
0511
0512 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
0513 L1_CACHE_BYTES;
0514 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
0515 return tmp & i_hash_mask;
0516 }
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526 void __insert_inode_hash(struct inode *inode, unsigned long hashval)
0527 {
0528 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
0529
0530 spin_lock(&inode_hash_lock);
0531 spin_lock(&inode->i_lock);
0532 hlist_add_head_rcu(&inode->i_hash, b);
0533 spin_unlock(&inode->i_lock);
0534 spin_unlock(&inode_hash_lock);
0535 }
0536 EXPORT_SYMBOL(__insert_inode_hash);
0537
0538
0539
0540
0541
0542
0543
0544 void __remove_inode_hash(struct inode *inode)
0545 {
0546 spin_lock(&inode_hash_lock);
0547 spin_lock(&inode->i_lock);
0548 hlist_del_init_rcu(&inode->i_hash);
0549 spin_unlock(&inode->i_lock);
0550 spin_unlock(&inode_hash_lock);
0551 }
0552 EXPORT_SYMBOL(__remove_inode_hash);
0553
0554 void dump_mapping(const struct address_space *mapping)
0555 {
0556 struct inode *host;
0557 const struct address_space_operations *a_ops;
0558 struct hlist_node *dentry_first;
0559 struct dentry *dentry_ptr;
0560 struct dentry dentry;
0561 unsigned long ino;
0562
0563
0564
0565
0566
0567 if (get_kernel_nofault(host, &mapping->host) ||
0568 get_kernel_nofault(a_ops, &mapping->a_ops)) {
0569 pr_warn("invalid mapping:%px\n", mapping);
0570 return;
0571 }
0572
0573 if (!host) {
0574 pr_warn("aops:%ps\n", a_ops);
0575 return;
0576 }
0577
0578 if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
0579 get_kernel_nofault(ino, &host->i_ino)) {
0580 pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
0581 return;
0582 }
0583
0584 if (!dentry_first) {
0585 pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
0586 return;
0587 }
0588
0589 dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
0590 if (get_kernel_nofault(dentry, dentry_ptr)) {
0591 pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
0592 a_ops, ino, dentry_ptr);
0593 return;
0594 }
0595
0596
0597
0598
0599
0600 pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
0601 }
0602
0603 void clear_inode(struct inode *inode)
0604 {
0605
0606
0607
0608
0609
0610 xa_lock_irq(&inode->i_data.i_pages);
0611 BUG_ON(inode->i_data.nrpages);
0612
0613
0614
0615
0616
0617
0618
0619
0620 xa_unlock_irq(&inode->i_data.i_pages);
0621 BUG_ON(!list_empty(&inode->i_data.private_list));
0622 BUG_ON(!(inode->i_state & I_FREEING));
0623 BUG_ON(inode->i_state & I_CLEAR);
0624 BUG_ON(!list_empty(&inode->i_wb_list));
0625
0626 inode->i_state = I_FREEING | I_CLEAR;
0627 }
0628 EXPORT_SYMBOL(clear_inode);
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643 static void evict(struct inode *inode)
0644 {
0645 const struct super_operations *op = inode->i_sb->s_op;
0646
0647 BUG_ON(!(inode->i_state & I_FREEING));
0648 BUG_ON(!list_empty(&inode->i_lru));
0649
0650 if (!list_empty(&inode->i_io_list))
0651 inode_io_list_del(inode);
0652
0653 inode_sb_list_del(inode);
0654
0655
0656
0657
0658
0659
0660
0661 inode_wait_for_writeback(inode);
0662
0663 if (op->evict_inode) {
0664 op->evict_inode(inode);
0665 } else {
0666 truncate_inode_pages_final(&inode->i_data);
0667 clear_inode(inode);
0668 }
0669 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
0670 cd_forget(inode);
0671
0672 remove_inode_hash(inode);
0673
0674 spin_lock(&inode->i_lock);
0675 wake_up_bit(&inode->i_state, __I_NEW);
0676 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
0677 spin_unlock(&inode->i_lock);
0678
0679 destroy_inode(inode);
0680 }
0681
0682
0683
0684
0685
0686
0687
0688
0689 static void dispose_list(struct list_head *head)
0690 {
0691 while (!list_empty(head)) {
0692 struct inode *inode;
0693
0694 inode = list_first_entry(head, struct inode, i_lru);
0695 list_del_init(&inode->i_lru);
0696
0697 evict(inode);
0698 cond_resched();
0699 }
0700 }
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711 void evict_inodes(struct super_block *sb)
0712 {
0713 struct inode *inode, *next;
0714 LIST_HEAD(dispose);
0715
0716 again:
0717 spin_lock(&sb->s_inode_list_lock);
0718 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
0719 if (atomic_read(&inode->i_count))
0720 continue;
0721
0722 spin_lock(&inode->i_lock);
0723 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
0724 spin_unlock(&inode->i_lock);
0725 continue;
0726 }
0727
0728 inode->i_state |= I_FREEING;
0729 inode_lru_list_del(inode);
0730 spin_unlock(&inode->i_lock);
0731 list_add(&inode->i_lru, &dispose);
0732
0733
0734
0735
0736
0737
0738 if (need_resched()) {
0739 spin_unlock(&sb->s_inode_list_lock);
0740 cond_resched();
0741 dispose_list(&dispose);
0742 goto again;
0743 }
0744 }
0745 spin_unlock(&sb->s_inode_list_lock);
0746
0747 dispose_list(&dispose);
0748 }
0749 EXPORT_SYMBOL_GPL(evict_inodes);
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761 int invalidate_inodes(struct super_block *sb, bool kill_dirty)
0762 {
0763 int busy = 0;
0764 struct inode *inode, *next;
0765 LIST_HEAD(dispose);
0766
0767 again:
0768 spin_lock(&sb->s_inode_list_lock);
0769 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
0770 spin_lock(&inode->i_lock);
0771 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
0772 spin_unlock(&inode->i_lock);
0773 continue;
0774 }
0775 if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
0776 spin_unlock(&inode->i_lock);
0777 busy = 1;
0778 continue;
0779 }
0780 if (atomic_read(&inode->i_count)) {
0781 spin_unlock(&inode->i_lock);
0782 busy = 1;
0783 continue;
0784 }
0785
0786 inode->i_state |= I_FREEING;
0787 inode_lru_list_del(inode);
0788 spin_unlock(&inode->i_lock);
0789 list_add(&inode->i_lru, &dispose);
0790 if (need_resched()) {
0791 spin_unlock(&sb->s_inode_list_lock);
0792 cond_resched();
0793 dispose_list(&dispose);
0794 goto again;
0795 }
0796 }
0797 spin_unlock(&sb->s_inode_list_lock);
0798
0799 dispose_list(&dispose);
0800
0801 return busy;
0802 }
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815 static enum lru_status inode_lru_isolate(struct list_head *item,
0816 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
0817 {
0818 struct list_head *freeable = arg;
0819 struct inode *inode = container_of(item, struct inode, i_lru);
0820
0821
0822
0823
0824
0825 if (!spin_trylock(&inode->i_lock))
0826 return LRU_SKIP;
0827
0828
0829
0830
0831
0832
0833
0834 if (atomic_read(&inode->i_count) ||
0835 (inode->i_state & ~I_REFERENCED) ||
0836 !mapping_shrinkable(&inode->i_data)) {
0837 list_lru_isolate(lru, &inode->i_lru);
0838 spin_unlock(&inode->i_lock);
0839 this_cpu_dec(nr_unused);
0840 return LRU_REMOVED;
0841 }
0842
0843
0844 if (inode->i_state & I_REFERENCED) {
0845 inode->i_state &= ~I_REFERENCED;
0846 spin_unlock(&inode->i_lock);
0847 return LRU_ROTATE;
0848 }
0849
0850
0851
0852
0853
0854
0855 if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
0856 __iget(inode);
0857 spin_unlock(&inode->i_lock);
0858 spin_unlock(lru_lock);
0859 if (remove_inode_buffers(inode)) {
0860 unsigned long reap;
0861 reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
0862 if (current_is_kswapd())
0863 __count_vm_events(KSWAPD_INODESTEAL, reap);
0864 else
0865 __count_vm_events(PGINODESTEAL, reap);
0866 if (current->reclaim_state)
0867 current->reclaim_state->reclaimed_slab += reap;
0868 }
0869 iput(inode);
0870 spin_lock(lru_lock);
0871 return LRU_RETRY;
0872 }
0873
0874 WARN_ON(inode->i_state & I_NEW);
0875 inode->i_state |= I_FREEING;
0876 list_lru_isolate_move(lru, &inode->i_lru, freeable);
0877 spin_unlock(&inode->i_lock);
0878
0879 this_cpu_dec(nr_unused);
0880 return LRU_REMOVED;
0881 }
0882
0883
0884
0885
0886
0887
0888
0889 long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
0890 {
0891 LIST_HEAD(freeable);
0892 long freed;
0893
0894 freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
0895 inode_lru_isolate, &freeable);
0896 dispose_list(&freeable);
0897 return freed;
0898 }
0899
0900 static void __wait_on_freeing_inode(struct inode *inode);
0901
0902
0903
0904 static struct inode *find_inode(struct super_block *sb,
0905 struct hlist_head *head,
0906 int (*test)(struct inode *, void *),
0907 void *data)
0908 {
0909 struct inode *inode = NULL;
0910
0911 repeat:
0912 hlist_for_each_entry(inode, head, i_hash) {
0913 if (inode->i_sb != sb)
0914 continue;
0915 if (!test(inode, data))
0916 continue;
0917 spin_lock(&inode->i_lock);
0918 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
0919 __wait_on_freeing_inode(inode);
0920 goto repeat;
0921 }
0922 if (unlikely(inode->i_state & I_CREATING)) {
0923 spin_unlock(&inode->i_lock);
0924 return ERR_PTR(-ESTALE);
0925 }
0926 __iget(inode);
0927 spin_unlock(&inode->i_lock);
0928 return inode;
0929 }
0930 return NULL;
0931 }
0932
0933
0934
0935
0936
0937 static struct inode *find_inode_fast(struct super_block *sb,
0938 struct hlist_head *head, unsigned long ino)
0939 {
0940 struct inode *inode = NULL;
0941
0942 repeat:
0943 hlist_for_each_entry(inode, head, i_hash) {
0944 if (inode->i_ino != ino)
0945 continue;
0946 if (inode->i_sb != sb)
0947 continue;
0948 spin_lock(&inode->i_lock);
0949 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
0950 __wait_on_freeing_inode(inode);
0951 goto repeat;
0952 }
0953 if (unlikely(inode->i_state & I_CREATING)) {
0954 spin_unlock(&inode->i_lock);
0955 return ERR_PTR(-ESTALE);
0956 }
0957 __iget(inode);
0958 spin_unlock(&inode->i_lock);
0959 return inode;
0960 }
0961 return NULL;
0962 }
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977
0978
0979 #define LAST_INO_BATCH 1024
0980 static DEFINE_PER_CPU(unsigned int, last_ino);
0981
0982 unsigned int get_next_ino(void)
0983 {
0984 unsigned int *p = &get_cpu_var(last_ino);
0985 unsigned int res = *p;
0986
0987 #ifdef CONFIG_SMP
0988 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
0989 static atomic_t shared_last_ino;
0990 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
0991
0992 res = next - LAST_INO_BATCH;
0993 }
0994 #endif
0995
0996 res++;
0997
0998 if (unlikely(!res))
0999 res++;
1000 *p = res;
1001 put_cpu_var(last_ino);
1002 return res;
1003 }
1004 EXPORT_SYMBOL(get_next_ino);
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 struct inode *new_inode_pseudo(struct super_block *sb)
1017 {
1018 struct inode *inode = alloc_inode(sb);
1019
1020 if (inode) {
1021 spin_lock(&inode->i_lock);
1022 inode->i_state = 0;
1023 spin_unlock(&inode->i_lock);
1024 }
1025 return inode;
1026 }
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040 struct inode *new_inode(struct super_block *sb)
1041 {
1042 struct inode *inode;
1043
1044 spin_lock_prefetch(&sb->s_inode_list_lock);
1045
1046 inode = new_inode_pseudo(sb);
1047 if (inode)
1048 inode_sb_list_add(inode);
1049 return inode;
1050 }
1051 EXPORT_SYMBOL(new_inode);
1052
1053 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1054 void lockdep_annotate_inode_mutex_key(struct inode *inode)
1055 {
1056 if (S_ISDIR(inode->i_mode)) {
1057 struct file_system_type *type = inode->i_sb->s_type;
1058
1059
1060 if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
1061
1062
1063
1064
1065 init_rwsem(&inode->i_rwsem);
1066 lockdep_set_class(&inode->i_rwsem,
1067 &type->i_mutex_dir_key);
1068 }
1069 }
1070 }
1071 EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
1072 #endif
1073
1074
1075
1076
1077
1078
1079
1080
1081 void unlock_new_inode(struct inode *inode)
1082 {
1083 lockdep_annotate_inode_mutex_key(inode);
1084 spin_lock(&inode->i_lock);
1085 WARN_ON(!(inode->i_state & I_NEW));
1086 inode->i_state &= ~I_NEW & ~I_CREATING;
1087 smp_mb();
1088 wake_up_bit(&inode->i_state, __I_NEW);
1089 spin_unlock(&inode->i_lock);
1090 }
1091 EXPORT_SYMBOL(unlock_new_inode);
1092
1093 void discard_new_inode(struct inode *inode)
1094 {
1095 lockdep_annotate_inode_mutex_key(inode);
1096 spin_lock(&inode->i_lock);
1097 WARN_ON(!(inode->i_state & I_NEW));
1098 inode->i_state &= ~I_NEW;
1099 smp_mb();
1100 wake_up_bit(&inode->i_state, __I_NEW);
1101 spin_unlock(&inode->i_lock);
1102 iput(inode);
1103 }
1104 EXPORT_SYMBOL(discard_new_inode);
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1116 {
1117 if (inode1 > inode2)
1118 swap(inode1, inode2);
1119
1120 if (inode1 && !S_ISDIR(inode1->i_mode))
1121 inode_lock(inode1);
1122 if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
1123 inode_lock_nested(inode2, I_MUTEX_NONDIR2);
1124 }
1125 EXPORT_SYMBOL(lock_two_nondirectories);
1126
1127
1128
1129
1130
1131
1132 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1133 {
1134 if (inode1 && !S_ISDIR(inode1->i_mode))
1135 inode_unlock(inode1);
1136 if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
1137 inode_unlock(inode2);
1138 }
1139 EXPORT_SYMBOL(unlock_two_nondirectories);
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
1162 int (*test)(struct inode *, void *),
1163 int (*set)(struct inode *, void *), void *data)
1164 {
1165 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
1166 struct inode *old;
1167
1168 again:
1169 spin_lock(&inode_hash_lock);
1170 old = find_inode(inode->i_sb, head, test, data);
1171 if (unlikely(old)) {
1172
1173
1174
1175
1176 spin_unlock(&inode_hash_lock);
1177 if (IS_ERR(old))
1178 return NULL;
1179 wait_on_inode(old);
1180 if (unlikely(inode_unhashed(old))) {
1181 iput(old);
1182 goto again;
1183 }
1184 return old;
1185 }
1186
1187 if (set && unlikely(set(inode, data))) {
1188 inode = NULL;
1189 goto unlock;
1190 }
1191
1192
1193
1194
1195
1196 spin_lock(&inode->i_lock);
1197 inode->i_state |= I_NEW;
1198 hlist_add_head_rcu(&inode->i_hash, head);
1199 spin_unlock(&inode->i_lock);
1200
1201
1202
1203
1204
1205 if (list_empty(&inode->i_sb_list))
1206 inode_sb_list_add(inode);
1207 unlock:
1208 spin_unlock(&inode_hash_lock);
1209
1210 return inode;
1211 }
1212 EXPORT_SYMBOL(inode_insert5);
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1235 int (*test)(struct inode *, void *),
1236 int (*set)(struct inode *, void *), void *data)
1237 {
1238 struct inode *inode = ilookup5(sb, hashval, test, data);
1239
1240 if (!inode) {
1241 struct inode *new = alloc_inode(sb);
1242
1243 if (new) {
1244 new->i_state = 0;
1245 inode = inode_insert5(new, hashval, test, set, data);
1246 if (unlikely(inode != new))
1247 destroy_inode(new);
1248 }
1249 }
1250 return inode;
1251 }
1252 EXPORT_SYMBOL(iget5_locked);
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1268 {
1269 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1270 struct inode *inode;
1271 again:
1272 spin_lock(&inode_hash_lock);
1273 inode = find_inode_fast(sb, head, ino);
1274 spin_unlock(&inode_hash_lock);
1275 if (inode) {
1276 if (IS_ERR(inode))
1277 return NULL;
1278 wait_on_inode(inode);
1279 if (unlikely(inode_unhashed(inode))) {
1280 iput(inode);
1281 goto again;
1282 }
1283 return inode;
1284 }
1285
1286 inode = alloc_inode(sb);
1287 if (inode) {
1288 struct inode *old;
1289
1290 spin_lock(&inode_hash_lock);
1291
1292 old = find_inode_fast(sb, head, ino);
1293 if (!old) {
1294 inode->i_ino = ino;
1295 spin_lock(&inode->i_lock);
1296 inode->i_state = I_NEW;
1297 hlist_add_head_rcu(&inode->i_hash, head);
1298 spin_unlock(&inode->i_lock);
1299 inode_sb_list_add(inode);
1300 spin_unlock(&inode_hash_lock);
1301
1302
1303
1304
1305 return inode;
1306 }
1307
1308
1309
1310
1311
1312
1313 spin_unlock(&inode_hash_lock);
1314 destroy_inode(inode);
1315 if (IS_ERR(old))
1316 return NULL;
1317 inode = old;
1318 wait_on_inode(inode);
1319 if (unlikely(inode_unhashed(inode))) {
1320 iput(inode);
1321 goto again;
1322 }
1323 }
1324 return inode;
1325 }
1326 EXPORT_SYMBOL(iget_locked);
1327
1328
1329
1330
1331
1332
1333
1334
1335 static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1336 {
1337 struct hlist_head *b = inode_hashtable + hash(sb, ino);
1338 struct inode *inode;
1339
1340 hlist_for_each_entry_rcu(inode, b, i_hash) {
1341 if (inode->i_ino == ino && inode->i_sb == sb)
1342 return 0;
1343 }
1344 return 1;
1345 }
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361 ino_t iunique(struct super_block *sb, ino_t max_reserved)
1362 {
1363
1364
1365
1366
1367
1368 static DEFINE_SPINLOCK(iunique_lock);
1369 static unsigned int counter;
1370 ino_t res;
1371
1372 rcu_read_lock();
1373 spin_lock(&iunique_lock);
1374 do {
1375 if (counter <= max_reserved)
1376 counter = max_reserved + 1;
1377 res = counter++;
1378 } while (!test_inode_iunique(sb, res));
1379 spin_unlock(&iunique_lock);
1380 rcu_read_unlock();
1381
1382 return res;
1383 }
1384 EXPORT_SYMBOL(iunique);
1385
1386 struct inode *igrab(struct inode *inode)
1387 {
1388 spin_lock(&inode->i_lock);
1389 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1390 __iget(inode);
1391 spin_unlock(&inode->i_lock);
1392 } else {
1393 spin_unlock(&inode->i_lock);
1394
1395
1396
1397
1398
1399 inode = NULL;
1400 }
1401 return inode;
1402 }
1403 EXPORT_SYMBOL(igrab);
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1422 int (*test)(struct inode *, void *), void *data)
1423 {
1424 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1425 struct inode *inode;
1426
1427 spin_lock(&inode_hash_lock);
1428 inode = find_inode(sb, head, test, data);
1429 spin_unlock(&inode_hash_lock);
1430
1431 return IS_ERR(inode) ? NULL : inode;
1432 }
1433 EXPORT_SYMBOL(ilookup5_nowait);
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452 struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1453 int (*test)(struct inode *, void *), void *data)
1454 {
1455 struct inode *inode;
1456 again:
1457 inode = ilookup5_nowait(sb, hashval, test, data);
1458 if (inode) {
1459 wait_on_inode(inode);
1460 if (unlikely(inode_unhashed(inode))) {
1461 iput(inode);
1462 goto again;
1463 }
1464 }
1465 return inode;
1466 }
1467 EXPORT_SYMBOL(ilookup5);
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477 struct inode *ilookup(struct super_block *sb, unsigned long ino)
1478 {
1479 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1480 struct inode *inode;
1481 again:
1482 spin_lock(&inode_hash_lock);
1483 inode = find_inode_fast(sb, head, ino);
1484 spin_unlock(&inode_hash_lock);
1485
1486 if (inode) {
1487 if (IS_ERR(inode))
1488 return NULL;
1489 wait_on_inode(inode);
1490 if (unlikely(inode_unhashed(inode))) {
1491 iput(inode);
1492 goto again;
1493 }
1494 }
1495 return inode;
1496 }
1497 EXPORT_SYMBOL(ilookup);
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522 struct inode *find_inode_nowait(struct super_block *sb,
1523 unsigned long hashval,
1524 int (*match)(struct inode *, unsigned long,
1525 void *),
1526 void *data)
1527 {
1528 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1529 struct inode *inode, *ret_inode = NULL;
1530 int mval;
1531
1532 spin_lock(&inode_hash_lock);
1533 hlist_for_each_entry(inode, head, i_hash) {
1534 if (inode->i_sb != sb)
1535 continue;
1536 mval = match(inode, hashval, data);
1537 if (mval == 0)
1538 continue;
1539 if (mval == 1)
1540 ret_inode = inode;
1541 goto out;
1542 }
1543 out:
1544 spin_unlock(&inode_hash_lock);
1545 return ret_inode;
1546 }
1547 EXPORT_SYMBOL(find_inode_nowait);
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570 struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
1571 int (*test)(struct inode *, void *), void *data)
1572 {
1573 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1574 struct inode *inode;
1575
1576 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1577 "suspicious find_inode_rcu() usage");
1578
1579 hlist_for_each_entry_rcu(inode, head, i_hash) {
1580 if (inode->i_sb == sb &&
1581 !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
1582 test(inode, data))
1583 return inode;
1584 }
1585 return NULL;
1586 }
1587 EXPORT_SYMBOL(find_inode_rcu);
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608 struct inode *find_inode_by_ino_rcu(struct super_block *sb,
1609 unsigned long ino)
1610 {
1611 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1612 struct inode *inode;
1613
1614 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1615 "suspicious find_inode_by_ino_rcu() usage");
1616
1617 hlist_for_each_entry_rcu(inode, head, i_hash) {
1618 if (inode->i_ino == ino &&
1619 inode->i_sb == sb &&
1620 !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
1621 return inode;
1622 }
1623 return NULL;
1624 }
1625 EXPORT_SYMBOL(find_inode_by_ino_rcu);
1626
1627 int insert_inode_locked(struct inode *inode)
1628 {
1629 struct super_block *sb = inode->i_sb;
1630 ino_t ino = inode->i_ino;
1631 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1632
1633 while (1) {
1634 struct inode *old = NULL;
1635 spin_lock(&inode_hash_lock);
1636 hlist_for_each_entry(old, head, i_hash) {
1637 if (old->i_ino != ino)
1638 continue;
1639 if (old->i_sb != sb)
1640 continue;
1641 spin_lock(&old->i_lock);
1642 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1643 spin_unlock(&old->i_lock);
1644 continue;
1645 }
1646 break;
1647 }
1648 if (likely(!old)) {
1649 spin_lock(&inode->i_lock);
1650 inode->i_state |= I_NEW | I_CREATING;
1651 hlist_add_head_rcu(&inode->i_hash, head);
1652 spin_unlock(&inode->i_lock);
1653 spin_unlock(&inode_hash_lock);
1654 return 0;
1655 }
1656 if (unlikely(old->i_state & I_CREATING)) {
1657 spin_unlock(&old->i_lock);
1658 spin_unlock(&inode_hash_lock);
1659 return -EBUSY;
1660 }
1661 __iget(old);
1662 spin_unlock(&old->i_lock);
1663 spin_unlock(&inode_hash_lock);
1664 wait_on_inode(old);
1665 if (unlikely(!inode_unhashed(old))) {
1666 iput(old);
1667 return -EBUSY;
1668 }
1669 iput(old);
1670 }
1671 }
1672 EXPORT_SYMBOL(insert_inode_locked);
1673
1674 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1675 int (*test)(struct inode *, void *), void *data)
1676 {
1677 struct inode *old;
1678
1679 inode->i_state |= I_CREATING;
1680 old = inode_insert5(inode, hashval, test, NULL, data);
1681
1682 if (old != inode) {
1683 iput(old);
1684 return -EBUSY;
1685 }
1686 return 0;
1687 }
1688 EXPORT_SYMBOL(insert_inode_locked4);
1689
1690
1691 int generic_delete_inode(struct inode *inode)
1692 {
1693 return 1;
1694 }
1695 EXPORT_SYMBOL(generic_delete_inode);
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707 static void iput_final(struct inode *inode)
1708 {
1709 struct super_block *sb = inode->i_sb;
1710 const struct super_operations *op = inode->i_sb->s_op;
1711 unsigned long state;
1712 int drop;
1713
1714 WARN_ON(inode->i_state & I_NEW);
1715
1716 if (op->drop_inode)
1717 drop = op->drop_inode(inode);
1718 else
1719 drop = generic_drop_inode(inode);
1720
1721 if (!drop &&
1722 !(inode->i_state & I_DONTCACHE) &&
1723 (sb->s_flags & SB_ACTIVE)) {
1724 __inode_add_lru(inode, true);
1725 spin_unlock(&inode->i_lock);
1726 return;
1727 }
1728
1729 state = inode->i_state;
1730 if (!drop) {
1731 WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1732 spin_unlock(&inode->i_lock);
1733
1734 write_inode_now(inode, 1);
1735
1736 spin_lock(&inode->i_lock);
1737 state = inode->i_state;
1738 WARN_ON(state & I_NEW);
1739 state &= ~I_WILL_FREE;
1740 }
1741
1742 WRITE_ONCE(inode->i_state, state | I_FREEING);
1743 if (!list_empty(&inode->i_lru))
1744 inode_lru_list_del(inode);
1745 spin_unlock(&inode->i_lock);
1746
1747 evict(inode);
1748 }
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759 void iput(struct inode *inode)
1760 {
1761 if (!inode)
1762 return;
1763 BUG_ON(inode->i_state & I_CLEAR);
1764 retry:
1765 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
1766 if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
1767 atomic_inc(&inode->i_count);
1768 spin_unlock(&inode->i_lock);
1769 trace_writeback_lazytime_iput(inode);
1770 mark_inode_dirty_sync(inode);
1771 goto retry;
1772 }
1773 iput_final(inode);
1774 }
1775 }
1776 EXPORT_SYMBOL(iput);
1777
1778 #ifdef CONFIG_BLOCK
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793 int bmap(struct inode *inode, sector_t *block)
1794 {
1795 if (!inode->i_mapping->a_ops->bmap)
1796 return -EINVAL;
1797
1798 *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
1799 return 0;
1800 }
1801 EXPORT_SYMBOL(bmap);
1802 #endif
1803
1804
1805
1806
1807
1808
1809 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1810 struct timespec64 now)
1811 {
1812
1813 if (!(mnt->mnt_flags & MNT_RELATIME))
1814 return 1;
1815
1816
1817
1818 if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1819 return 1;
1820
1821
1822
1823 if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1824 return 1;
1825
1826
1827
1828
1829
1830 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1831 return 1;
1832
1833
1834
1835 return 0;
1836 }
1837
1838 int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
1839 {
1840 int dirty_flags = 0;
1841
1842 if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
1843 if (flags & S_ATIME)
1844 inode->i_atime = *time;
1845 if (flags & S_CTIME)
1846 inode->i_ctime = *time;
1847 if (flags & S_MTIME)
1848 inode->i_mtime = *time;
1849
1850 if (inode->i_sb->s_flags & SB_LAZYTIME)
1851 dirty_flags |= I_DIRTY_TIME;
1852 else
1853 dirty_flags |= I_DIRTY_SYNC;
1854 }
1855
1856 if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
1857 dirty_flags |= I_DIRTY_SYNC;
1858
1859 __mark_inode_dirty(inode, dirty_flags);
1860 return 0;
1861 }
1862 EXPORT_SYMBOL(generic_update_time);
1863
1864
1865
1866
1867
1868 int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
1869 {
1870 if (inode->i_op->update_time)
1871 return inode->i_op->update_time(inode, time, flags);
1872 return generic_update_time(inode, time, flags);
1873 }
1874 EXPORT_SYMBOL(inode_update_time);
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885 bool atime_needs_update(const struct path *path, struct inode *inode)
1886 {
1887 struct vfsmount *mnt = path->mnt;
1888 struct timespec64 now;
1889
1890 if (inode->i_flags & S_NOATIME)
1891 return false;
1892
1893
1894
1895
1896 if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
1897 return false;
1898
1899 if (IS_NOATIME(inode))
1900 return false;
1901 if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
1902 return false;
1903
1904 if (mnt->mnt_flags & MNT_NOATIME)
1905 return false;
1906 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1907 return false;
1908
1909 now = current_time(inode);
1910
1911 if (!relatime_need_update(mnt, inode, now))
1912 return false;
1913
1914 if (timespec64_equal(&inode->i_atime, &now))
1915 return false;
1916
1917 return true;
1918 }
1919
1920 void touch_atime(const struct path *path)
1921 {
1922 struct vfsmount *mnt = path->mnt;
1923 struct inode *inode = d_inode(path->dentry);
1924 struct timespec64 now;
1925
1926 if (!atime_needs_update(path, inode))
1927 return;
1928
1929 if (!sb_start_write_trylock(inode->i_sb))
1930 return;
1931
1932 if (__mnt_want_write(mnt) != 0)
1933 goto skip_update;
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943 now = current_time(inode);
1944 inode_update_time(inode, &now, S_ATIME);
1945 __mnt_drop_write(mnt);
1946 skip_update:
1947 sb_end_write(inode->i_sb);
1948 }
1949 EXPORT_SYMBOL(touch_atime);
1950
1951
1952
1953
1954
1955
1956
1957 int should_remove_suid(struct dentry *dentry)
1958 {
1959 umode_t mode = d_inode(dentry)->i_mode;
1960 int kill = 0;
1961
1962
1963 if (unlikely(mode & S_ISUID))
1964 kill = ATTR_KILL_SUID;
1965
1966
1967
1968
1969
1970 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1971 kill |= ATTR_KILL_SGID;
1972
1973 if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1974 return kill;
1975
1976 return 0;
1977 }
1978 EXPORT_SYMBOL(should_remove_suid);
1979
1980
1981
1982
1983
1984
1985 int dentry_needs_remove_privs(struct dentry *dentry)
1986 {
1987 struct inode *inode = d_inode(dentry);
1988 int mask = 0;
1989 int ret;
1990
1991 if (IS_NOSEC(inode))
1992 return 0;
1993
1994 mask = should_remove_suid(dentry);
1995 ret = security_inode_need_killpriv(dentry);
1996 if (ret < 0)
1997 return ret;
1998 if (ret)
1999 mask |= ATTR_KILL_PRIV;
2000 return mask;
2001 }
2002
2003 static int __remove_privs(struct user_namespace *mnt_userns,
2004 struct dentry *dentry, int kill)
2005 {
2006 struct iattr newattrs;
2007
2008 newattrs.ia_valid = ATTR_FORCE | kill;
2009
2010
2011
2012
2013 return notify_change(mnt_userns, dentry, &newattrs, NULL);
2014 }
2015
2016 static int __file_remove_privs(struct file *file, unsigned int flags)
2017 {
2018 struct dentry *dentry = file_dentry(file);
2019 struct inode *inode = file_inode(file);
2020 int error = 0;
2021 int kill;
2022
2023 if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
2024 return 0;
2025
2026 kill = dentry_needs_remove_privs(dentry);
2027 if (kill < 0)
2028 return kill;
2029
2030 if (kill) {
2031 if (flags & IOCB_NOWAIT)
2032 return -EAGAIN;
2033
2034 error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
2035 }
2036
2037 if (!error)
2038 inode_has_no_xattr(inode);
2039 return error;
2040 }
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051 int file_remove_privs(struct file *file)
2052 {
2053 return __file_remove_privs(file, 0);
2054 }
2055 EXPORT_SYMBOL(file_remove_privs);
2056
2057 static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
2058 {
2059 int sync_it = 0;
2060
2061
2062 if (IS_NOCMTIME(inode))
2063 return 0;
2064
2065 if (!timespec64_equal(&inode->i_mtime, now))
2066 sync_it = S_MTIME;
2067
2068 if (!timespec64_equal(&inode->i_ctime, now))
2069 sync_it |= S_CTIME;
2070
2071 if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2072 sync_it |= S_VERSION;
2073
2074 if (!sync_it)
2075 return 0;
2076
2077 return sync_it;
2078 }
2079
2080 static int __file_update_time(struct file *file, struct timespec64 *now,
2081 int sync_mode)
2082 {
2083 int ret = 0;
2084 struct inode *inode = file_inode(file);
2085
2086
2087 if (!__mnt_want_write_file(file)) {
2088 ret = inode_update_time(inode, now, sync_mode);
2089 __mnt_drop_write_file(file);
2090 }
2091
2092 return ret;
2093 }
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109 int file_update_time(struct file *file)
2110 {
2111 int ret;
2112 struct inode *inode = file_inode(file);
2113 struct timespec64 now = current_time(inode);
2114
2115 ret = inode_needs_update_time(inode, &now);
2116 if (ret <= 0)
2117 return ret;
2118
2119 return __file_update_time(file, &now, ret);
2120 }
2121 EXPORT_SYMBOL(file_update_time);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138 static int file_modified_flags(struct file *file, int flags)
2139 {
2140 int ret;
2141 struct inode *inode = file_inode(file);
2142 struct timespec64 now = current_time(inode);
2143
2144
2145
2146
2147
2148 ret = __file_remove_privs(file, flags);
2149 if (ret)
2150 return ret;
2151
2152 if (unlikely(file->f_mode & FMODE_NOCMTIME))
2153 return 0;
2154
2155 ret = inode_needs_update_time(inode, &now);
2156 if (ret <= 0)
2157 return ret;
2158 if (flags & IOCB_NOWAIT)
2159 return -EAGAIN;
2160
2161 return __file_update_time(file, &now, ret);
2162 }
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175 int file_modified(struct file *file)
2176 {
2177 return file_modified_flags(file, 0);
2178 }
2179 EXPORT_SYMBOL(file_modified);
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192 int kiocb_modified(struct kiocb *iocb)
2193 {
2194 return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
2195 }
2196 EXPORT_SYMBOL_GPL(kiocb_modified);
2197
2198 int inode_needs_sync(struct inode *inode)
2199 {
2200 if (IS_SYNC(inode))
2201 return 1;
2202 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
2203 return 1;
2204 return 0;
2205 }
2206 EXPORT_SYMBOL(inode_needs_sync);
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219 static void __wait_on_freeing_inode(struct inode *inode)
2220 {
2221 wait_queue_head_t *wq;
2222 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2223 wq = bit_waitqueue(&inode->i_state, __I_NEW);
2224 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2225 spin_unlock(&inode->i_lock);
2226 spin_unlock(&inode_hash_lock);
2227 schedule();
2228 finish_wait(wq, &wait.wq_entry);
2229 spin_lock(&inode_hash_lock);
2230 }
2231
2232 static __initdata unsigned long ihash_entries;
2233 static int __init set_ihash_entries(char *str)
2234 {
2235 if (!str)
2236 return 0;
2237 ihash_entries = simple_strtoul(str, &str, 0);
2238 return 1;
2239 }
2240 __setup("ihash_entries=", set_ihash_entries);
2241
2242
2243
2244
2245 void __init inode_init_early(void)
2246 {
2247
2248
2249
2250 if (hashdist)
2251 return;
2252
2253 inode_hashtable =
2254 alloc_large_system_hash("Inode-cache",
2255 sizeof(struct hlist_head),
2256 ihash_entries,
2257 14,
2258 HASH_EARLY | HASH_ZERO,
2259 &i_hash_shift,
2260 &i_hash_mask,
2261 0,
2262 0);
2263 }
2264
2265 void __init inode_init(void)
2266 {
2267
2268 inode_cachep = kmem_cache_create("inode_cache",
2269 sizeof(struct inode),
2270 0,
2271 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
2272 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2273 init_once);
2274
2275
2276 if (!hashdist)
2277 return;
2278
2279 inode_hashtable =
2280 alloc_large_system_hash("Inode-cache",
2281 sizeof(struct hlist_head),
2282 ihash_entries,
2283 14,
2284 HASH_ZERO,
2285 &i_hash_shift,
2286 &i_hash_mask,
2287 0,
2288 0);
2289 }
2290
2291 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
2292 {
2293 inode->i_mode = mode;
2294 if (S_ISCHR(mode)) {
2295 inode->i_fop = &def_chr_fops;
2296 inode->i_rdev = rdev;
2297 } else if (S_ISBLK(mode)) {
2298 inode->i_fop = &def_blk_fops;
2299 inode->i_rdev = rdev;
2300 } else if (S_ISFIFO(mode))
2301 inode->i_fop = &pipefifo_fops;
2302 else if (S_ISSOCK(mode))
2303 ;
2304 else
2305 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2306 " inode %s:%lu\n", mode, inode->i_sb->s_id,
2307 inode->i_ino);
2308 }
2309 EXPORT_SYMBOL(init_special_inode);
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
2325 const struct inode *dir, umode_t mode)
2326 {
2327 inode_fsuid_set(inode, mnt_userns);
2328 if (dir && dir->i_mode & S_ISGID) {
2329 inode->i_gid = dir->i_gid;
2330
2331
2332 if (S_ISDIR(mode))
2333 mode |= S_ISGID;
2334 } else
2335 inode_fsgid_set(inode, mnt_userns);
2336 inode->i_mode = mode;
2337 }
2338 EXPORT_SYMBOL(inode_init_owner);
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354 bool inode_owner_or_capable(struct user_namespace *mnt_userns,
2355 const struct inode *inode)
2356 {
2357 kuid_t i_uid;
2358 struct user_namespace *ns;
2359
2360 i_uid = i_uid_into_mnt(mnt_userns, inode);
2361 if (uid_eq(current_fsuid(), i_uid))
2362 return true;
2363
2364 ns = current_user_ns();
2365 if (kuid_has_mapping(ns, i_uid) && ns_capable(ns, CAP_FOWNER))
2366 return true;
2367 return false;
2368 }
2369 EXPORT_SYMBOL(inode_owner_or_capable);
2370
2371
2372
2373
2374 static void __inode_dio_wait(struct inode *inode)
2375 {
2376 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
2377 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
2378
2379 do {
2380 prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
2381 if (atomic_read(&inode->i_dio_count))
2382 schedule();
2383 } while (atomic_read(&inode->i_dio_count));
2384 finish_wait(wq, &q.wq_entry);
2385 }
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397 void inode_dio_wait(struct inode *inode)
2398 {
2399 if (atomic_read(&inode->i_dio_count))
2400 __inode_dio_wait(inode);
2401 }
2402 EXPORT_SYMBOL(inode_dio_wait);
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420 void inode_set_flags(struct inode *inode, unsigned int flags,
2421 unsigned int mask)
2422 {
2423 WARN_ON_ONCE(flags & ~mask);
2424 set_mask_bits(&inode->i_flags, mask, flags);
2425 }
2426 EXPORT_SYMBOL(inode_set_flags);
2427
2428 void inode_nohighmem(struct inode *inode)
2429 {
2430 mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
2431 }
2432 EXPORT_SYMBOL(inode_nohighmem);
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443 struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
2444 {
2445 struct super_block *sb = inode->i_sb;
2446 unsigned int gran = sb->s_time_gran;
2447
2448 t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
2449 if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
2450 t.tv_nsec = 0;
2451
2452
2453 if (gran == 1)
2454 ;
2455 else if (gran == NSEC_PER_SEC)
2456 t.tv_nsec = 0;
2457 else if (gran > 1 && gran < NSEC_PER_SEC)
2458 t.tv_nsec -= t.tv_nsec % gran;
2459 else
2460 WARN(1, "invalid file time granularity: %u", gran);
2461 return t;
2462 }
2463 EXPORT_SYMBOL(timestamp_truncate);
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475 struct timespec64 current_time(struct inode *inode)
2476 {
2477 struct timespec64 now;
2478
2479 ktime_get_coarse_real_ts64(&now);
2480
2481 if (unlikely(!inode->i_sb)) {
2482 WARN(1, "current_time() called with uninitialized super_block in the inode");
2483 return now;
2484 }
2485
2486 return timestamp_truncate(now, inode);
2487 }
2488 EXPORT_SYMBOL(current_time);
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504 umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
2505 const struct inode *dir, umode_t mode)
2506 {
2507 if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
2508 return mode;
2509 if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
2510 return mode;
2511 if (in_group_p(i_gid_into_mnt(mnt_userns, dir)))
2512 return mode;
2513 if (capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
2514 return mode;
2515
2516 return mode & ~S_ISGID;
2517 }
2518 EXPORT_SYMBOL(mode_strip_sgid);