0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/sched.h>
0011 #include <linux/fs.h>
0012 #include <linux/namei.h>
0013 #include <linux/idr.h>
0014 #include <linux/slab.h>
0015 #include <linux/security.h>
0016 #include <linux/hash.h>
0017
0018 #include "kernfs-internal.h"
0019
0020 static DEFINE_SPINLOCK(kernfs_rename_lock);
0021
0022
0023
0024
0025
0026
0027
0028 static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
0029 static char kernfs_pr_cont_buf[PATH_MAX];
0030 static DEFINE_SPINLOCK(kernfs_idr_lock);
0031
0032 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
0033
0034 static bool kernfs_active(struct kernfs_node *kn)
0035 {
0036 lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
0037 return atomic_read(&kn->active) >= 0;
0038 }
0039
0040 static bool kernfs_lockdep(struct kernfs_node *kn)
0041 {
0042 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0043 return kn->flags & KERNFS_LOCKDEP;
0044 #else
0045 return false;
0046 #endif
0047 }
0048
0049 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
0050 {
0051 if (!kn)
0052 return strlcpy(buf, "(null)", buflen);
0053
0054 return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
0055 }
0056
0057
0058 static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
0059 {
0060 size_t depth = 0;
0061
0062 while (to->parent && to != from) {
0063 depth++;
0064 to = to->parent;
0065 }
0066 return depth;
0067 }
0068
0069 static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
0070 struct kernfs_node *b)
0071 {
0072 size_t da, db;
0073 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
0074
0075 if (ra != rb)
0076 return NULL;
0077
0078 da = kernfs_depth(ra->kn, a);
0079 db = kernfs_depth(rb->kn, b);
0080
0081 while (da > db) {
0082 a = a->parent;
0083 da--;
0084 }
0085 while (db > da) {
0086 b = b->parent;
0087 db--;
0088 }
0089
0090
0091 while (b != a) {
0092 b = b->parent;
0093 a = a->parent;
0094 }
0095
0096 return a;
0097 }
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129 static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
0130 struct kernfs_node *kn_from,
0131 char *buf, size_t buflen)
0132 {
0133 struct kernfs_node *kn, *common;
0134 const char parent_str[] = "/..";
0135 size_t depth_from, depth_to, len = 0;
0136 int i, j;
0137
0138 if (!kn_to)
0139 return strlcpy(buf, "(null)", buflen);
0140
0141 if (!kn_from)
0142 kn_from = kernfs_root(kn_to)->kn;
0143
0144 if (kn_from == kn_to)
0145 return strlcpy(buf, "/", buflen);
0146
0147 if (!buf)
0148 return -EINVAL;
0149
0150 common = kernfs_common_ancestor(kn_from, kn_to);
0151 if (WARN_ON(!common))
0152 return -EINVAL;
0153
0154 depth_to = kernfs_depth(common, kn_to);
0155 depth_from = kernfs_depth(common, kn_from);
0156
0157 buf[0] = '\0';
0158
0159 for (i = 0; i < depth_from; i++)
0160 len += strlcpy(buf + len, parent_str,
0161 len < buflen ? buflen - len : 0);
0162
0163
0164 for (i = depth_to - 1; i >= 0; i--) {
0165 for (kn = kn_to, j = 0; j < i; j++)
0166 kn = kn->parent;
0167 len += strlcpy(buf + len, "/",
0168 len < buflen ? buflen - len : 0);
0169 len += strlcpy(buf + len, kn->name,
0170 len < buflen ? buflen - len : 0);
0171 }
0172
0173 return len;
0174 }
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
0191 {
0192 unsigned long flags;
0193 int ret;
0194
0195 spin_lock_irqsave(&kernfs_rename_lock, flags);
0196 ret = kernfs_name_locked(kn, buf, buflen);
0197 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
0198 return ret;
0199 }
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
0218 char *buf, size_t buflen)
0219 {
0220 unsigned long flags;
0221 int ret;
0222
0223 spin_lock_irqsave(&kernfs_rename_lock, flags);
0224 ret = kernfs_path_from_node_locked(to, from, buf, buflen);
0225 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
0226 return ret;
0227 }
0228 EXPORT_SYMBOL_GPL(kernfs_path_from_node);
0229
0230
0231
0232
0233
0234
0235
0236 void pr_cont_kernfs_name(struct kernfs_node *kn)
0237 {
0238 unsigned long flags;
0239
0240 spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
0241
0242 kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
0243 pr_cont("%s", kernfs_pr_cont_buf);
0244
0245 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
0246 }
0247
0248
0249
0250
0251
0252
0253
0254 void pr_cont_kernfs_path(struct kernfs_node *kn)
0255 {
0256 unsigned long flags;
0257 int sz;
0258
0259 spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
0260
0261 sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
0262 sizeof(kernfs_pr_cont_buf));
0263 if (sz < 0) {
0264 pr_cont("(error)");
0265 goto out;
0266 }
0267
0268 if (sz >= sizeof(kernfs_pr_cont_buf)) {
0269 pr_cont("(name too long)");
0270 goto out;
0271 }
0272
0273 pr_cont("%s", kernfs_pr_cont_buf);
0274
0275 out:
0276 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
0277 }
0278
0279
0280
0281
0282
0283
0284
0285
0286 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
0287 {
0288 struct kernfs_node *parent;
0289 unsigned long flags;
0290
0291 spin_lock_irqsave(&kernfs_rename_lock, flags);
0292 parent = kn->parent;
0293 kernfs_get(parent);
0294 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
0295
0296 return parent;
0297 }
0298
0299
0300
0301
0302
0303
0304
0305
0306 static unsigned int kernfs_name_hash(const char *name, const void *ns)
0307 {
0308 unsigned long hash = init_name_hash(ns);
0309 unsigned int len = strlen(name);
0310 while (len--)
0311 hash = partial_name_hash(*name++, hash);
0312 hash = end_name_hash(hash);
0313 hash &= 0x7fffffffU;
0314
0315 if (hash < 2)
0316 hash += 2;
0317 if (hash >= INT_MAX)
0318 hash = INT_MAX - 1;
0319 return hash;
0320 }
0321
0322 static int kernfs_name_compare(unsigned int hash, const char *name,
0323 const void *ns, const struct kernfs_node *kn)
0324 {
0325 if (hash < kn->hash)
0326 return -1;
0327 if (hash > kn->hash)
0328 return 1;
0329 if (ns < kn->ns)
0330 return -1;
0331 if (ns > kn->ns)
0332 return 1;
0333 return strcmp(name, kn->name);
0334 }
0335
0336 static int kernfs_sd_compare(const struct kernfs_node *left,
0337 const struct kernfs_node *right)
0338 {
0339 return kernfs_name_compare(left->hash, left->name, left->ns, right);
0340 }
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355 static int kernfs_link_sibling(struct kernfs_node *kn)
0356 {
0357 struct rb_node **node = &kn->parent->dir.children.rb_node;
0358 struct rb_node *parent = NULL;
0359
0360 while (*node) {
0361 struct kernfs_node *pos;
0362 int result;
0363
0364 pos = rb_to_kn(*node);
0365 parent = *node;
0366 result = kernfs_sd_compare(kn, pos);
0367 if (result < 0)
0368 node = &pos->rb.rb_left;
0369 else if (result > 0)
0370 node = &pos->rb.rb_right;
0371 else
0372 return -EEXIST;
0373 }
0374
0375
0376 rb_link_node(&kn->rb, parent, node);
0377 rb_insert_color(&kn->rb, &kn->parent->dir.children);
0378
0379
0380 if (kernfs_type(kn) == KERNFS_DIR)
0381 kn->parent->dir.subdirs++;
0382 kernfs_inc_rev(kn->parent);
0383
0384 return 0;
0385 }
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398 static bool kernfs_unlink_sibling(struct kernfs_node *kn)
0399 {
0400 if (RB_EMPTY_NODE(&kn->rb))
0401 return false;
0402
0403 if (kernfs_type(kn) == KERNFS_DIR)
0404 kn->parent->dir.subdirs--;
0405 kernfs_inc_rev(kn->parent);
0406
0407 rb_erase(&kn->rb, &kn->parent->dir.children);
0408 RB_CLEAR_NODE(&kn->rb);
0409 return true;
0410 }
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
0423 {
0424 if (unlikely(!kn))
0425 return NULL;
0426
0427 if (!atomic_inc_unless_negative(&kn->active))
0428 return NULL;
0429
0430 if (kernfs_lockdep(kn))
0431 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
0432 return kn;
0433 }
0434
0435
0436
0437
0438
0439
0440
0441
0442 void kernfs_put_active(struct kernfs_node *kn)
0443 {
0444 int v;
0445
0446 if (unlikely(!kn))
0447 return;
0448
0449 if (kernfs_lockdep(kn))
0450 rwsem_release(&kn->dep_map, _RET_IP_);
0451 v = atomic_dec_return(&kn->active);
0452 if (likely(v != KN_DEACTIVATED_BIAS))
0453 return;
0454
0455 wake_up_all(&kernfs_root(kn)->deactivate_waitq);
0456 }
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466 static void kernfs_drain(struct kernfs_node *kn)
0467 __releases(&kernfs_root(kn)->kernfs_rwsem)
0468 __acquires(&kernfs_root(kn)->kernfs_rwsem)
0469 {
0470 struct kernfs_root *root = kernfs_root(kn);
0471
0472 lockdep_assert_held_write(&root->kernfs_rwsem);
0473 WARN_ON_ONCE(kernfs_active(kn));
0474
0475 up_write(&root->kernfs_rwsem);
0476
0477 if (kernfs_lockdep(kn)) {
0478 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
0479 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
0480 lock_contended(&kn->dep_map, _RET_IP_);
0481 }
0482
0483
0484 wait_event(root->deactivate_waitq,
0485 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
0486
0487 if (kernfs_lockdep(kn)) {
0488 lock_acquired(&kn->dep_map, _RET_IP_);
0489 rwsem_release(&kn->dep_map, _RET_IP_);
0490 }
0491
0492 kernfs_drain_open_files(kn);
0493
0494 down_write(&root->kernfs_rwsem);
0495 }
0496
0497
0498
0499
0500
0501 void kernfs_get(struct kernfs_node *kn)
0502 {
0503 if (kn) {
0504 WARN_ON(!atomic_read(&kn->count));
0505 atomic_inc(&kn->count);
0506 }
0507 }
0508 EXPORT_SYMBOL_GPL(kernfs_get);
0509
0510
0511
0512
0513
0514
0515
0516 void kernfs_put(struct kernfs_node *kn)
0517 {
0518 struct kernfs_node *parent;
0519 struct kernfs_root *root;
0520
0521 if (!kn || !atomic_dec_and_test(&kn->count))
0522 return;
0523 root = kernfs_root(kn);
0524 repeat:
0525
0526
0527
0528
0529 parent = kn->parent;
0530
0531 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
0532 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
0533 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
0534
0535 if (kernfs_type(kn) == KERNFS_LINK)
0536 kernfs_put(kn->symlink.target_kn);
0537
0538 kfree_const(kn->name);
0539
0540 if (kn->iattr) {
0541 simple_xattrs_free(&kn->iattr->xattrs);
0542 kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
0543 }
0544 spin_lock(&kernfs_idr_lock);
0545 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
0546 spin_unlock(&kernfs_idr_lock);
0547 kmem_cache_free(kernfs_node_cache, kn);
0548
0549 kn = parent;
0550 if (kn) {
0551 if (atomic_dec_and_test(&kn->count))
0552 goto repeat;
0553 } else {
0554
0555 idr_destroy(&root->ino_idr);
0556 kfree(root);
0557 }
0558 }
0559 EXPORT_SYMBOL_GPL(kernfs_put);
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
0573 {
0574 if (dentry->d_sb->s_op == &kernfs_sops)
0575 return kernfs_dentry_node(dentry);
0576 return NULL;
0577 }
0578
0579 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
0580 struct kernfs_node *parent,
0581 const char *name, umode_t mode,
0582 kuid_t uid, kgid_t gid,
0583 unsigned flags)
0584 {
0585 struct kernfs_node *kn;
0586 u32 id_highbits;
0587 int ret;
0588
0589 name = kstrdup_const(name, GFP_KERNEL);
0590 if (!name)
0591 return NULL;
0592
0593 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
0594 if (!kn)
0595 goto err_out1;
0596
0597 idr_preload(GFP_KERNEL);
0598 spin_lock(&kernfs_idr_lock);
0599 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
0600 if (ret >= 0 && ret < root->last_id_lowbits)
0601 root->id_highbits++;
0602 id_highbits = root->id_highbits;
0603 root->last_id_lowbits = ret;
0604 spin_unlock(&kernfs_idr_lock);
0605 idr_preload_end();
0606 if (ret < 0)
0607 goto err_out2;
0608
0609 kn->id = (u64)id_highbits << 32 | ret;
0610
0611 atomic_set(&kn->count, 1);
0612 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
0613 RB_CLEAR_NODE(&kn->rb);
0614
0615 kn->name = name;
0616 kn->mode = mode;
0617 kn->flags = flags;
0618
0619 if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
0620 struct iattr iattr = {
0621 .ia_valid = ATTR_UID | ATTR_GID,
0622 .ia_uid = uid,
0623 .ia_gid = gid,
0624 };
0625
0626 ret = __kernfs_setattr(kn, &iattr);
0627 if (ret < 0)
0628 goto err_out3;
0629 }
0630
0631 if (parent) {
0632 ret = security_kernfs_init_security(parent, kn);
0633 if (ret)
0634 goto err_out3;
0635 }
0636
0637 return kn;
0638
0639 err_out3:
0640 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
0641 err_out2:
0642 kmem_cache_free(kernfs_node_cache, kn);
0643 err_out1:
0644 kfree_const(name);
0645 return NULL;
0646 }
0647
0648 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
0649 const char *name, umode_t mode,
0650 kuid_t uid, kgid_t gid,
0651 unsigned flags)
0652 {
0653 struct kernfs_node *kn;
0654
0655 kn = __kernfs_new_node(kernfs_root(parent), parent,
0656 name, mode, uid, gid, flags);
0657 if (kn) {
0658 kernfs_get(parent);
0659 kn->parent = parent;
0660 }
0661 return kn;
0662 }
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
0676 u64 id)
0677 {
0678 struct kernfs_node *kn;
0679 ino_t ino = kernfs_id_ino(id);
0680 u32 gen = kernfs_id_gen(id);
0681
0682 spin_lock(&kernfs_idr_lock);
0683
0684 kn = idr_find(&root->ino_idr, (u32)ino);
0685 if (!kn)
0686 goto err_unlock;
0687
0688 if (sizeof(ino_t) >= sizeof(u64)) {
0689
0690 if (kernfs_ino(kn) != ino)
0691 goto err_unlock;
0692 } else {
0693
0694 if (unlikely(gen && kernfs_gen(kn) != gen))
0695 goto err_unlock;
0696 }
0697
0698
0699
0700
0701
0702
0703 if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
0704 !atomic_inc_not_zero(&kn->count)))
0705 goto err_unlock;
0706
0707 spin_unlock(&kernfs_idr_lock);
0708 return kn;
0709 err_unlock:
0710 spin_unlock(&kernfs_idr_lock);
0711 return NULL;
0712 }
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726 int kernfs_add_one(struct kernfs_node *kn)
0727 {
0728 struct kernfs_node *parent = kn->parent;
0729 struct kernfs_root *root = kernfs_root(parent);
0730 struct kernfs_iattrs *ps_iattr;
0731 bool has_ns;
0732 int ret;
0733
0734 down_write(&root->kernfs_rwsem);
0735
0736 ret = -EINVAL;
0737 has_ns = kernfs_ns_enabled(parent);
0738 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
0739 has_ns ? "required" : "invalid", parent->name, kn->name))
0740 goto out_unlock;
0741
0742 if (kernfs_type(parent) != KERNFS_DIR)
0743 goto out_unlock;
0744
0745 ret = -ENOENT;
0746 if (parent->flags & KERNFS_EMPTY_DIR)
0747 goto out_unlock;
0748
0749 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
0750 goto out_unlock;
0751
0752 kn->hash = kernfs_name_hash(kn->name, kn->ns);
0753
0754 ret = kernfs_link_sibling(kn);
0755 if (ret)
0756 goto out_unlock;
0757
0758
0759 ps_iattr = parent->iattr;
0760 if (ps_iattr) {
0761 ktime_get_real_ts64(&ps_iattr->ia_ctime);
0762 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
0763 }
0764
0765 up_write(&root->kernfs_rwsem);
0766
0767
0768
0769
0770
0771
0772
0773
0774 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
0775 kernfs_activate(kn);
0776 return 0;
0777
0778 out_unlock:
0779 up_write(&root->kernfs_rwsem);
0780 return ret;
0781 }
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
0793 const unsigned char *name,
0794 const void *ns)
0795 {
0796 struct rb_node *node = parent->dir.children.rb_node;
0797 bool has_ns = kernfs_ns_enabled(parent);
0798 unsigned int hash;
0799
0800 lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);
0801
0802 if (has_ns != (bool)ns) {
0803 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
0804 has_ns ? "required" : "invalid", parent->name, name);
0805 return NULL;
0806 }
0807
0808 hash = kernfs_name_hash(name, ns);
0809 while (node) {
0810 struct kernfs_node *kn;
0811 int result;
0812
0813 kn = rb_to_kn(node);
0814 result = kernfs_name_compare(hash, name, ns, kn);
0815 if (result < 0)
0816 node = node->rb_left;
0817 else if (result > 0)
0818 node = node->rb_right;
0819 else
0820 return kn;
0821 }
0822 return NULL;
0823 }
0824
0825 static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
0826 const unsigned char *path,
0827 const void *ns)
0828 {
0829 size_t len;
0830 char *p, *name;
0831
0832 lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
0833
0834 spin_lock_irq(&kernfs_pr_cont_lock);
0835
0836 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
0837
0838 if (len >= sizeof(kernfs_pr_cont_buf)) {
0839 spin_unlock_irq(&kernfs_pr_cont_lock);
0840 return NULL;
0841 }
0842
0843 p = kernfs_pr_cont_buf;
0844
0845 while ((name = strsep(&p, "/")) && parent) {
0846 if (*name == '\0')
0847 continue;
0848 parent = kernfs_find_ns(parent, name, ns);
0849 }
0850
0851 spin_unlock_irq(&kernfs_pr_cont_lock);
0852
0853 return parent;
0854 }
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
0867 const char *name, const void *ns)
0868 {
0869 struct kernfs_node *kn;
0870 struct kernfs_root *root = kernfs_root(parent);
0871
0872 down_read(&root->kernfs_rwsem);
0873 kn = kernfs_find_ns(parent, name, ns);
0874 kernfs_get(kn);
0875 up_read(&root->kernfs_rwsem);
0876
0877 return kn;
0878 }
0879 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
0880
0881
0882
0883
0884
0885
0886
0887
0888
0889
0890
0891 struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
0892 const char *path, const void *ns)
0893 {
0894 struct kernfs_node *kn;
0895 struct kernfs_root *root = kernfs_root(parent);
0896
0897 down_read(&root->kernfs_rwsem);
0898 kn = kernfs_walk_ns(parent, path, ns);
0899 kernfs_get(kn);
0900 up_read(&root->kernfs_rwsem);
0901
0902 return kn;
0903 }
0904
0905
0906
0907
0908
0909
0910
0911
0912
0913
0914 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
0915 unsigned int flags, void *priv)
0916 {
0917 struct kernfs_root *root;
0918 struct kernfs_node *kn;
0919
0920 root = kzalloc(sizeof(*root), GFP_KERNEL);
0921 if (!root)
0922 return ERR_PTR(-ENOMEM);
0923
0924 idr_init(&root->ino_idr);
0925 init_rwsem(&root->kernfs_rwsem);
0926 INIT_LIST_HEAD(&root->supers);
0927
0928
0929
0930
0931
0932
0933
0934 if (sizeof(ino_t) >= sizeof(u64))
0935 root->id_highbits = 0;
0936 else
0937 root->id_highbits = 1;
0938
0939 kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
0940 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
0941 KERNFS_DIR);
0942 if (!kn) {
0943 idr_destroy(&root->ino_idr);
0944 kfree(root);
0945 return ERR_PTR(-ENOMEM);
0946 }
0947
0948 kn->priv = priv;
0949 kn->dir.root = root;
0950
0951 root->syscall_ops = scops;
0952 root->flags = flags;
0953 root->kn = kn;
0954 init_waitqueue_head(&root->deactivate_waitq);
0955
0956 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
0957 kernfs_activate(kn);
0958
0959 return root;
0960 }
0961
0962
0963
0964
0965
0966
0967
0968
0969 void kernfs_destroy_root(struct kernfs_root *root)
0970 {
0971
0972
0973
0974
0975 kernfs_get(root->kn);
0976 kernfs_remove(root->kn);
0977 kernfs_put(root->kn);
0978 }
0979
0980
0981
0982
0983
0984 struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root)
0985 {
0986 return root->kn;
0987 }
0988
0989
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000
1001 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
1002 const char *name, umode_t mode,
1003 kuid_t uid, kgid_t gid,
1004 void *priv, const void *ns)
1005 {
1006 struct kernfs_node *kn;
1007 int rc;
1008
1009
1010 kn = kernfs_new_node(parent, name, mode | S_IFDIR,
1011 uid, gid, KERNFS_DIR);
1012 if (!kn)
1013 return ERR_PTR(-ENOMEM);
1014
1015 kn->dir.root = parent->dir.root;
1016 kn->ns = ns;
1017 kn->priv = priv;
1018
1019
1020 rc = kernfs_add_one(kn);
1021 if (!rc)
1022 return kn;
1023
1024 kernfs_put(kn);
1025 return ERR_PTR(rc);
1026 }
1027
1028
1029
1030
1031
1032
1033
1034
1035 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
1036 const char *name)
1037 {
1038 struct kernfs_node *kn;
1039 int rc;
1040
1041
1042 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
1043 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
1044 if (!kn)
1045 return ERR_PTR(-ENOMEM);
1046
1047 kn->flags |= KERNFS_EMPTY_DIR;
1048 kn->dir.root = parent->dir.root;
1049 kn->ns = NULL;
1050 kn->priv = NULL;
1051
1052
1053 rc = kernfs_add_one(kn);
1054 if (!rc)
1055 return kn;
1056
1057 kernfs_put(kn);
1058 return ERR_PTR(rc);
1059 }
1060
1061 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
1062 {
1063 struct kernfs_node *kn;
1064 struct kernfs_root *root;
1065
1066 if (flags & LOOKUP_RCU)
1067 return -ECHILD;
1068
1069
1070 if (d_really_is_negative(dentry)) {
1071 struct kernfs_node *parent;
1072
1073
1074
1075
1076 spin_lock(&dentry->d_lock);
1077 parent = kernfs_dentry_node(dentry->d_parent);
1078 if (parent) {
1079 spin_unlock(&dentry->d_lock);
1080 root = kernfs_root(parent);
1081 down_read(&root->kernfs_rwsem);
1082 if (kernfs_dir_changed(parent, dentry)) {
1083 up_read(&root->kernfs_rwsem);
1084 return 0;
1085 }
1086 up_read(&root->kernfs_rwsem);
1087 } else
1088 spin_unlock(&dentry->d_lock);
1089
1090
1091
1092
1093 return 1;
1094 }
1095
1096 kn = kernfs_dentry_node(dentry);
1097 root = kernfs_root(kn);
1098 down_read(&root->kernfs_rwsem);
1099
1100
1101 if (!kernfs_active(kn))
1102 goto out_bad;
1103
1104
1105 if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
1106 goto out_bad;
1107
1108
1109 if (strcmp(dentry->d_name.name, kn->name) != 0)
1110 goto out_bad;
1111
1112
1113 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
1114 kernfs_info(dentry->d_sb)->ns != kn->ns)
1115 goto out_bad;
1116
1117 up_read(&root->kernfs_rwsem);
1118 return 1;
1119 out_bad:
1120 up_read(&root->kernfs_rwsem);
1121 return 0;
1122 }
1123
1124 const struct dentry_operations kernfs_dops = {
1125 .d_revalidate = kernfs_dop_revalidate,
1126 };
1127
1128 static struct dentry *kernfs_iop_lookup(struct inode *dir,
1129 struct dentry *dentry,
1130 unsigned int flags)
1131 {
1132 struct kernfs_node *parent = dir->i_private;
1133 struct kernfs_node *kn;
1134 struct kernfs_root *root;
1135 struct inode *inode = NULL;
1136 const void *ns = NULL;
1137
1138 root = kernfs_root(parent);
1139 down_read(&root->kernfs_rwsem);
1140 if (kernfs_ns_enabled(parent))
1141 ns = kernfs_info(dir->i_sb)->ns;
1142
1143 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1144
1145 if (kn) {
1146
1147
1148
1149 if (!kernfs_active(kn)) {
1150 up_read(&root->kernfs_rwsem);
1151 return NULL;
1152 }
1153 inode = kernfs_get_inode(dir->i_sb, kn);
1154 if (!inode)
1155 inode = ERR_PTR(-ENOMEM);
1156 }
1157
1158
1159
1160
1161
1162
1163 if (!IS_ERR(inode))
1164 kernfs_set_rev(parent, dentry);
1165 up_read(&root->kernfs_rwsem);
1166
1167
1168 return d_splice_alias(inode, dentry);
1169 }
1170
1171 static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
1172 struct inode *dir, struct dentry *dentry,
1173 umode_t mode)
1174 {
1175 struct kernfs_node *parent = dir->i_private;
1176 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1177 int ret;
1178
1179 if (!scops || !scops->mkdir)
1180 return -EPERM;
1181
1182 if (!kernfs_get_active(parent))
1183 return -ENODEV;
1184
1185 ret = scops->mkdir(parent, dentry->d_name.name, mode);
1186
1187 kernfs_put_active(parent);
1188 return ret;
1189 }
1190
1191 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1192 {
1193 struct kernfs_node *kn = kernfs_dentry_node(dentry);
1194 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1195 int ret;
1196
1197 if (!scops || !scops->rmdir)
1198 return -EPERM;
1199
1200 if (!kernfs_get_active(kn))
1201 return -ENODEV;
1202
1203 ret = scops->rmdir(kn);
1204
1205 kernfs_put_active(kn);
1206 return ret;
1207 }
1208
1209 static int kernfs_iop_rename(struct user_namespace *mnt_userns,
1210 struct inode *old_dir, struct dentry *old_dentry,
1211 struct inode *new_dir, struct dentry *new_dentry,
1212 unsigned int flags)
1213 {
1214 struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
1215 struct kernfs_node *new_parent = new_dir->i_private;
1216 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1217 int ret;
1218
1219 if (flags)
1220 return -EINVAL;
1221
1222 if (!scops || !scops->rename)
1223 return -EPERM;
1224
1225 if (!kernfs_get_active(kn))
1226 return -ENODEV;
1227
1228 if (!kernfs_get_active(new_parent)) {
1229 kernfs_put_active(kn);
1230 return -ENODEV;
1231 }
1232
1233 ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1234
1235 kernfs_put_active(new_parent);
1236 kernfs_put_active(kn);
1237 return ret;
1238 }
1239
1240 const struct inode_operations kernfs_dir_iops = {
1241 .lookup = kernfs_iop_lookup,
1242 .permission = kernfs_iop_permission,
1243 .setattr = kernfs_iop_setattr,
1244 .getattr = kernfs_iop_getattr,
1245 .listxattr = kernfs_iop_listxattr,
1246
1247 .mkdir = kernfs_iop_mkdir,
1248 .rmdir = kernfs_iop_rmdir,
1249 .rename = kernfs_iop_rename,
1250 };
1251
1252 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1253 {
1254 struct kernfs_node *last;
1255
1256 while (true) {
1257 struct rb_node *rbn;
1258
1259 last = pos;
1260
1261 if (kernfs_type(pos) != KERNFS_DIR)
1262 break;
1263
1264 rbn = rb_first(&pos->dir.children);
1265 if (!rbn)
1266 break;
1267
1268 pos = rb_to_kn(rbn);
1269 }
1270
1271 return last;
1272 }
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1284 struct kernfs_node *root)
1285 {
1286 struct rb_node *rbn;
1287
1288 lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);
1289
1290
1291 if (!pos)
1292 return kernfs_leftmost_descendant(root);
1293
1294
1295 if (pos == root)
1296 return NULL;
1297
1298
1299 rbn = rb_next(&pos->rb);
1300 if (rbn)
1301 return kernfs_leftmost_descendant(rb_to_kn(rbn));
1302
1303
1304 return pos->parent;
1305 }
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320 void kernfs_activate(struct kernfs_node *kn)
1321 {
1322 struct kernfs_node *pos;
1323 struct kernfs_root *root = kernfs_root(kn);
1324
1325 down_write(&root->kernfs_rwsem);
1326
1327 pos = NULL;
1328 while ((pos = kernfs_next_descendant_post(pos, kn))) {
1329 if (pos->flags & KERNFS_ACTIVATED)
1330 continue;
1331
1332 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1333 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1334
1335 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1336 pos->flags |= KERNFS_ACTIVATED;
1337 }
1338
1339 up_write(&root->kernfs_rwsem);
1340 }
1341
1342 static void __kernfs_remove(struct kernfs_node *kn)
1343 {
1344 struct kernfs_node *pos;
1345
1346
1347 if (!kn)
1348 return;
1349
1350 lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
1351
1352
1353
1354
1355
1356 if (kn->parent && RB_EMPTY_NODE(&kn->rb))
1357 return;
1358
1359 pr_debug("kernfs %s: removing\n", kn->name);
1360
1361
1362 pos = NULL;
1363 while ((pos = kernfs_next_descendant_post(pos, kn)))
1364 if (kernfs_active(pos))
1365 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1366
1367
1368 do {
1369 pos = kernfs_leftmost_descendant(kn);
1370
1371
1372
1373
1374
1375
1376
1377 kernfs_get(pos);
1378
1379
1380
1381
1382
1383
1384
1385 if (kn->flags & KERNFS_ACTIVATED)
1386 kernfs_drain(pos);
1387 else
1388 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1389
1390
1391
1392
1393
1394 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1395 struct kernfs_iattrs *ps_iattr =
1396 pos->parent ? pos->parent->iattr : NULL;
1397
1398
1399 if (ps_iattr) {
1400 ktime_get_real_ts64(&ps_iattr->ia_ctime);
1401 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
1402 }
1403
1404 kernfs_put(pos);
1405 }
1406
1407 kernfs_put(pos);
1408 } while (pos != kn);
1409 }
1410
1411
1412
1413
1414
1415
1416
1417 void kernfs_remove(struct kernfs_node *kn)
1418 {
1419 struct kernfs_root *root;
1420
1421 if (!kn)
1422 return;
1423
1424 root = kernfs_root(kn);
1425
1426 down_write(&root->kernfs_rwsem);
1427 __kernfs_remove(kn);
1428 up_write(&root->kernfs_rwsem);
1429 }
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445 void kernfs_break_active_protection(struct kernfs_node *kn)
1446 {
1447
1448
1449
1450
1451 kernfs_put_active(kn);
1452 }
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469 void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1470 {
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 atomic_inc(&kn->active);
1481 if (kernfs_lockdep(kn))
1482 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1483 }
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511 bool kernfs_remove_self(struct kernfs_node *kn)
1512 {
1513 bool ret;
1514 struct kernfs_root *root = kernfs_root(kn);
1515
1516 down_write(&root->kernfs_rwsem);
1517 kernfs_break_active_protection(kn);
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 if (!(kn->flags & KERNFS_SUICIDAL)) {
1529 kn->flags |= KERNFS_SUICIDAL;
1530 __kernfs_remove(kn);
1531 kn->flags |= KERNFS_SUICIDED;
1532 ret = true;
1533 } else {
1534 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1535 DEFINE_WAIT(wait);
1536
1537 while (true) {
1538 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1539
1540 if ((kn->flags & KERNFS_SUICIDED) &&
1541 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1542 break;
1543
1544 up_write(&root->kernfs_rwsem);
1545 schedule();
1546 down_write(&root->kernfs_rwsem);
1547 }
1548 finish_wait(waitq, &wait);
1549 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1550 ret = false;
1551 }
1552
1553
1554
1555
1556
1557 kernfs_unbreak_active_protection(kn);
1558
1559 up_write(&root->kernfs_rwsem);
1560 return ret;
1561 }
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1573 const void *ns)
1574 {
1575 struct kernfs_node *kn;
1576 struct kernfs_root *root;
1577
1578 if (!parent) {
1579 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1580 name);
1581 return -ENOENT;
1582 }
1583
1584 root = kernfs_root(parent);
1585 down_write(&root->kernfs_rwsem);
1586
1587 kn = kernfs_find_ns(parent, name, ns);
1588 if (kn)
1589 __kernfs_remove(kn);
1590
1591 up_write(&root->kernfs_rwsem);
1592
1593 if (kn)
1594 return 0;
1595 else
1596 return -ENOENT;
1597 }
1598
1599
1600
1601
1602
1603
1604
1605
1606 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1607 const char *new_name, const void *new_ns)
1608 {
1609 struct kernfs_node *old_parent;
1610 struct kernfs_root *root;
1611 const char *old_name = NULL;
1612 int error;
1613
1614
1615 if (!kn->parent)
1616 return -EINVAL;
1617
1618 root = kernfs_root(kn);
1619 down_write(&root->kernfs_rwsem);
1620
1621 error = -ENOENT;
1622 if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1623 (new_parent->flags & KERNFS_EMPTY_DIR))
1624 goto out;
1625
1626 error = 0;
1627 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1628 (strcmp(kn->name, new_name) == 0))
1629 goto out;
1630
1631 error = -EEXIST;
1632 if (kernfs_find_ns(new_parent, new_name, new_ns))
1633 goto out;
1634
1635
1636 if (strcmp(kn->name, new_name) != 0) {
1637 error = -ENOMEM;
1638 new_name = kstrdup_const(new_name, GFP_KERNEL);
1639 if (!new_name)
1640 goto out;
1641 } else {
1642 new_name = NULL;
1643 }
1644
1645
1646
1647
1648 kernfs_unlink_sibling(kn);
1649 kernfs_get(new_parent);
1650
1651
1652 spin_lock_irq(&kernfs_rename_lock);
1653
1654 old_parent = kn->parent;
1655 kn->parent = new_parent;
1656
1657 kn->ns = new_ns;
1658 if (new_name) {
1659 old_name = kn->name;
1660 kn->name = new_name;
1661 }
1662
1663 spin_unlock_irq(&kernfs_rename_lock);
1664
1665 kn->hash = kernfs_name_hash(kn->name, kn->ns);
1666 kernfs_link_sibling(kn);
1667
1668 kernfs_put(old_parent);
1669 kfree_const(old_name);
1670
1671 error = 0;
1672 out:
1673 up_write(&root->kernfs_rwsem);
1674 return error;
1675 }
1676
1677
1678 static inline unsigned char dt_type(struct kernfs_node *kn)
1679 {
1680 return (kn->mode >> 12) & 15;
1681 }
1682
1683 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1684 {
1685 kernfs_put(filp->private_data);
1686 return 0;
1687 }
1688
1689 static struct kernfs_node *kernfs_dir_pos(const void *ns,
1690 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1691 {
1692 if (pos) {
1693 int valid = kernfs_active(pos) &&
1694 pos->parent == parent && hash == pos->hash;
1695 kernfs_put(pos);
1696 if (!valid)
1697 pos = NULL;
1698 }
1699 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1700 struct rb_node *node = parent->dir.children.rb_node;
1701 while (node) {
1702 pos = rb_to_kn(node);
1703
1704 if (hash < pos->hash)
1705 node = node->rb_left;
1706 else if (hash > pos->hash)
1707 node = node->rb_right;
1708 else
1709 break;
1710 }
1711 }
1712
1713 while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1714 struct rb_node *node = rb_next(&pos->rb);
1715 if (!node)
1716 pos = NULL;
1717 else
1718 pos = rb_to_kn(node);
1719 }
1720 return pos;
1721 }
1722
1723 static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1724 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1725 {
1726 pos = kernfs_dir_pos(ns, parent, ino, pos);
1727 if (pos) {
1728 do {
1729 struct rb_node *node = rb_next(&pos->rb);
1730 if (!node)
1731 pos = NULL;
1732 else
1733 pos = rb_to_kn(node);
1734 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1735 }
1736 return pos;
1737 }
1738
1739 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1740 {
1741 struct dentry *dentry = file->f_path.dentry;
1742 struct kernfs_node *parent = kernfs_dentry_node(dentry);
1743 struct kernfs_node *pos = file->private_data;
1744 struct kernfs_root *root;
1745 const void *ns = NULL;
1746
1747 if (!dir_emit_dots(file, ctx))
1748 return 0;
1749
1750 root = kernfs_root(parent);
1751 down_read(&root->kernfs_rwsem);
1752
1753 if (kernfs_ns_enabled(parent))
1754 ns = kernfs_info(dentry->d_sb)->ns;
1755
1756 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1757 pos;
1758 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1759 const char *name = pos->name;
1760 unsigned int type = dt_type(pos);
1761 int len = strlen(name);
1762 ino_t ino = kernfs_ino(pos);
1763
1764 ctx->pos = pos->hash;
1765 file->private_data = pos;
1766 kernfs_get(pos);
1767
1768 up_read(&root->kernfs_rwsem);
1769 if (!dir_emit(ctx, name, len, ino, type))
1770 return 0;
1771 down_read(&root->kernfs_rwsem);
1772 }
1773 up_read(&root->kernfs_rwsem);
1774 file->private_data = NULL;
1775 ctx->pos = INT_MAX;
1776 return 0;
1777 }
1778
1779 const struct file_operations kernfs_dir_fops = {
1780 .read = generic_read_dir,
1781 .iterate_shared = kernfs_fop_readdir,
1782 .release = kernfs_dir_fop_release,
1783 .llseek = generic_file_llseek,
1784 };