Back to home page

LXR

 
 

    


0001 /*
0002  *  fs/libfs.c
0003  *  Library for filesystems writers.
0004  */
0005 
0006 #include <linux/blkdev.h>
0007 #include <linux/export.h>
0008 #include <linux/pagemap.h>
0009 #include <linux/slab.h>
0010 #include <linux/mount.h>
0011 #include <linux/vfs.h>
0012 #include <linux/quotaops.h>
0013 #include <linux/mutex.h>
0014 #include <linux/namei.h>
0015 #include <linux/exportfs.h>
0016 #include <linux/writeback.h>
0017 #include <linux/buffer_head.h> /* sync_mapping_buffers */
0018 
0019 #include <linux/uaccess.h>
0020 
0021 #include "internal.h"
0022 
0023 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
0024            struct kstat *stat)
0025 {
0026     struct inode *inode = d_inode(dentry);
0027     generic_fillattr(inode, stat);
0028     stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
0029     return 0;
0030 }
0031 EXPORT_SYMBOL(simple_getattr);
0032 
0033 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
0034 {
0035     buf->f_type = dentry->d_sb->s_magic;
0036     buf->f_bsize = PAGE_SIZE;
0037     buf->f_namelen = NAME_MAX;
0038     return 0;
0039 }
0040 EXPORT_SYMBOL(simple_statfs);
0041 
0042 /*
0043  * Retaining negative dentries for an in-memory filesystem just wastes
0044  * memory and lookup time: arrange for them to be deleted immediately.
0045  */
0046 int always_delete_dentry(const struct dentry *dentry)
0047 {
0048     return 1;
0049 }
0050 EXPORT_SYMBOL(always_delete_dentry);
0051 
0052 const struct dentry_operations simple_dentry_operations = {
0053     .d_delete = always_delete_dentry,
0054 };
0055 EXPORT_SYMBOL(simple_dentry_operations);
0056 
0057 /*
0058  * Lookup the data. This is trivial - if the dentry didn't already
0059  * exist, we know it is negative.  Set d_op to delete negative dentries.
0060  */
0061 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
0062 {
0063     if (dentry->d_name.len > NAME_MAX)
0064         return ERR_PTR(-ENAMETOOLONG);
0065     if (!dentry->d_sb->s_d_op)
0066         d_set_d_op(dentry, &simple_dentry_operations);
0067     d_add(dentry, NULL);
0068     return NULL;
0069 }
0070 EXPORT_SYMBOL(simple_lookup);
0071 
0072 int dcache_dir_open(struct inode *inode, struct file *file)
0073 {
0074     file->private_data = d_alloc_cursor(file->f_path.dentry);
0075 
0076     return file->private_data ? 0 : -ENOMEM;
0077 }
0078 EXPORT_SYMBOL(dcache_dir_open);
0079 
0080 int dcache_dir_close(struct inode *inode, struct file *file)
0081 {
0082     dput(file->private_data);
0083     return 0;
0084 }
0085 EXPORT_SYMBOL(dcache_dir_close);
0086 
0087 /* parent is locked at least shared */
0088 static struct dentry *next_positive(struct dentry *parent,
0089                     struct list_head *from,
0090                     int count)
0091 {
0092     unsigned *seq = &parent->d_inode->i_dir_seq, n;
0093     struct dentry *res;
0094     struct list_head *p;
0095     bool skipped;
0096     int i;
0097 
0098 retry:
0099     i = count;
0100     skipped = false;
0101     n = smp_load_acquire(seq) & ~1;
0102     res = NULL;
0103     rcu_read_lock();
0104     for (p = from->next; p != &parent->d_subdirs; p = p->next) {
0105         struct dentry *d = list_entry(p, struct dentry, d_child);
0106         if (!simple_positive(d)) {
0107             skipped = true;
0108         } else if (!--i) {
0109             res = d;
0110             break;
0111         }
0112     }
0113     rcu_read_unlock();
0114     if (skipped) {
0115         smp_rmb();
0116         if (unlikely(*seq != n))
0117             goto retry;
0118     }
0119     return res;
0120 }
0121 
0122 static void move_cursor(struct dentry *cursor, struct list_head *after)
0123 {
0124     struct dentry *parent = cursor->d_parent;
0125     unsigned n, *seq = &parent->d_inode->i_dir_seq;
0126     spin_lock(&parent->d_lock);
0127     for (;;) {
0128         n = *seq;
0129         if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
0130             break;
0131         cpu_relax();
0132     }
0133     __list_del(cursor->d_child.prev, cursor->d_child.next);
0134     if (after)
0135         list_add(&cursor->d_child, after);
0136     else
0137         list_add_tail(&cursor->d_child, &parent->d_subdirs);
0138     smp_store_release(seq, n + 2);
0139     spin_unlock(&parent->d_lock);
0140 }
0141 
0142 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
0143 {
0144     struct dentry *dentry = file->f_path.dentry;
0145     switch (whence) {
0146         case 1:
0147             offset += file->f_pos;
0148         case 0:
0149             if (offset >= 0)
0150                 break;
0151         default:
0152             return -EINVAL;
0153     }
0154     if (offset != file->f_pos) {
0155         file->f_pos = offset;
0156         if (file->f_pos >= 2) {
0157             struct dentry *cursor = file->private_data;
0158             struct dentry *to;
0159             loff_t n = file->f_pos - 2;
0160 
0161             inode_lock_shared(dentry->d_inode);
0162             to = next_positive(dentry, &dentry->d_subdirs, n);
0163             move_cursor(cursor, to ? &to->d_child : NULL);
0164             inode_unlock_shared(dentry->d_inode);
0165         }
0166     }
0167     return offset;
0168 }
0169 EXPORT_SYMBOL(dcache_dir_lseek);
0170 
0171 /* Relationship between i_mode and the DT_xxx types */
0172 static inline unsigned char dt_type(struct inode *inode)
0173 {
0174     return (inode->i_mode >> 12) & 15;
0175 }
0176 
0177 /*
0178  * Directory is locked and all positive dentries in it are safe, since
0179  * for ramfs-type trees they can't go away without unlink() or rmdir(),
0180  * both impossible due to the lock on directory.
0181  */
0182 
0183 int dcache_readdir(struct file *file, struct dir_context *ctx)
0184 {
0185     struct dentry *dentry = file->f_path.dentry;
0186     struct dentry *cursor = file->private_data;
0187     struct list_head *p = &cursor->d_child;
0188     struct dentry *next;
0189     bool moved = false;
0190 
0191     if (!dir_emit_dots(file, ctx))
0192         return 0;
0193 
0194     if (ctx->pos == 2)
0195         p = &dentry->d_subdirs;
0196     while ((next = next_positive(dentry, p, 1)) != NULL) {
0197         if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
0198                   d_inode(next)->i_ino, dt_type(d_inode(next))))
0199             break;
0200         moved = true;
0201         p = &next->d_child;
0202         ctx->pos++;
0203     }
0204     if (moved)
0205         move_cursor(cursor, p);
0206     return 0;
0207 }
0208 EXPORT_SYMBOL(dcache_readdir);
0209 
0210 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
0211 {
0212     return -EISDIR;
0213 }
0214 EXPORT_SYMBOL(generic_read_dir);
0215 
0216 const struct file_operations simple_dir_operations = {
0217     .open       = dcache_dir_open,
0218     .release    = dcache_dir_close,
0219     .llseek     = dcache_dir_lseek,
0220     .read       = generic_read_dir,
0221     .iterate_shared = dcache_readdir,
0222     .fsync      = noop_fsync,
0223 };
0224 EXPORT_SYMBOL(simple_dir_operations);
0225 
0226 const struct inode_operations simple_dir_inode_operations = {
0227     .lookup     = simple_lookup,
0228 };
0229 EXPORT_SYMBOL(simple_dir_inode_operations);
0230 
0231 static const struct super_operations simple_super_operations = {
0232     .statfs     = simple_statfs,
0233 };
0234 
0235 /*
0236  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
0237  * will never be mountable)
0238  */
0239 struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
0240     const struct super_operations *ops, const struct xattr_handler **xattr,
0241     const struct dentry_operations *dops, unsigned long magic)
0242 {
0243     struct super_block *s;
0244     struct dentry *dentry;
0245     struct inode *root;
0246     struct qstr d_name = QSTR_INIT(name, strlen(name));
0247 
0248     s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
0249             &init_user_ns, NULL);
0250     if (IS_ERR(s))
0251         return ERR_CAST(s);
0252 
0253     s->s_maxbytes = MAX_LFS_FILESIZE;
0254     s->s_blocksize = PAGE_SIZE;
0255     s->s_blocksize_bits = PAGE_SHIFT;
0256     s->s_magic = magic;
0257     s->s_op = ops ? ops : &simple_super_operations;
0258     s->s_xattr = xattr;
0259     s->s_time_gran = 1;
0260     root = new_inode(s);
0261     if (!root)
0262         goto Enomem;
0263     /*
0264      * since this is the first inode, make it number 1. New inodes created
0265      * after this must take care not to collide with it (by passing
0266      * max_reserved of 1 to iunique).
0267      */
0268     root->i_ino = 1;
0269     root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
0270     root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
0271     dentry = __d_alloc(s, &d_name);
0272     if (!dentry) {
0273         iput(root);
0274         goto Enomem;
0275     }
0276     d_instantiate(dentry, root);
0277     s->s_root = dentry;
0278     s->s_d_op = dops;
0279     s->s_flags |= MS_ACTIVE;
0280     return dget(s->s_root);
0281 
0282 Enomem:
0283     deactivate_locked_super(s);
0284     return ERR_PTR(-ENOMEM);
0285 }
0286 EXPORT_SYMBOL(mount_pseudo_xattr);
0287 
0288 int simple_open(struct inode *inode, struct file *file)
0289 {
0290     if (inode->i_private)
0291         file->private_data = inode->i_private;
0292     return 0;
0293 }
0294 EXPORT_SYMBOL(simple_open);
0295 
0296 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
0297 {
0298     struct inode *inode = d_inode(old_dentry);
0299 
0300     inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
0301     inc_nlink(inode);
0302     ihold(inode);
0303     dget(dentry);
0304     d_instantiate(dentry, inode);
0305     return 0;
0306 }
0307 EXPORT_SYMBOL(simple_link);
0308 
0309 int simple_empty(struct dentry *dentry)
0310 {
0311     struct dentry *child;
0312     int ret = 0;
0313 
0314     spin_lock(&dentry->d_lock);
0315     list_for_each_entry(child, &dentry->d_subdirs, d_child) {
0316         spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
0317         if (simple_positive(child)) {
0318             spin_unlock(&child->d_lock);
0319             goto out;
0320         }
0321         spin_unlock(&child->d_lock);
0322     }
0323     ret = 1;
0324 out:
0325     spin_unlock(&dentry->d_lock);
0326     return ret;
0327 }
0328 EXPORT_SYMBOL(simple_empty);
0329 
0330 int simple_unlink(struct inode *dir, struct dentry *dentry)
0331 {
0332     struct inode *inode = d_inode(dentry);
0333 
0334     inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
0335     drop_nlink(inode);
0336     dput(dentry);
0337     return 0;
0338 }
0339 EXPORT_SYMBOL(simple_unlink);
0340 
0341 int simple_rmdir(struct inode *dir, struct dentry *dentry)
0342 {
0343     if (!simple_empty(dentry))
0344         return -ENOTEMPTY;
0345 
0346     drop_nlink(d_inode(dentry));
0347     simple_unlink(dir, dentry);
0348     drop_nlink(dir);
0349     return 0;
0350 }
0351 EXPORT_SYMBOL(simple_rmdir);
0352 
0353 int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
0354           struct inode *new_dir, struct dentry *new_dentry,
0355           unsigned int flags)
0356 {
0357     struct inode *inode = d_inode(old_dentry);
0358     int they_are_dirs = d_is_dir(old_dentry);
0359 
0360     if (flags & ~RENAME_NOREPLACE)
0361         return -EINVAL;
0362 
0363     if (!simple_empty(new_dentry))
0364         return -ENOTEMPTY;
0365 
0366     if (d_really_is_positive(new_dentry)) {
0367         simple_unlink(new_dir, new_dentry);
0368         if (they_are_dirs) {
0369             drop_nlink(d_inode(new_dentry));
0370             drop_nlink(old_dir);
0371         }
0372     } else if (they_are_dirs) {
0373         drop_nlink(old_dir);
0374         inc_nlink(new_dir);
0375     }
0376 
0377     old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
0378         new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
0379 
0380     return 0;
0381 }
0382 EXPORT_SYMBOL(simple_rename);
0383 
0384 /**
0385  * simple_setattr - setattr for simple filesystem
0386  * @dentry: dentry
0387  * @iattr: iattr structure
0388  *
0389  * Returns 0 on success, -error on failure.
0390  *
0391  * simple_setattr is a simple ->setattr implementation without a proper
0392  * implementation of size changes.
0393  *
0394  * It can either be used for in-memory filesystems or special files
0395  * on simple regular filesystems.  Anything that needs to change on-disk
0396  * or wire state on size changes needs its own setattr method.
0397  */
0398 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
0399 {
0400     struct inode *inode = d_inode(dentry);
0401     int error;
0402 
0403     error = setattr_prepare(dentry, iattr);
0404     if (error)
0405         return error;
0406 
0407     if (iattr->ia_valid & ATTR_SIZE)
0408         truncate_setsize(inode, iattr->ia_size);
0409     setattr_copy(inode, iattr);
0410     mark_inode_dirty(inode);
0411     return 0;
0412 }
0413 EXPORT_SYMBOL(simple_setattr);
0414 
0415 int simple_readpage(struct file *file, struct page *page)
0416 {
0417     clear_highpage(page);
0418     flush_dcache_page(page);
0419     SetPageUptodate(page);
0420     unlock_page(page);
0421     return 0;
0422 }
0423 EXPORT_SYMBOL(simple_readpage);
0424 
0425 int simple_write_begin(struct file *file, struct address_space *mapping,
0426             loff_t pos, unsigned len, unsigned flags,
0427             struct page **pagep, void **fsdata)
0428 {
0429     struct page *page;
0430     pgoff_t index;
0431 
0432     index = pos >> PAGE_SHIFT;
0433 
0434     page = grab_cache_page_write_begin(mapping, index, flags);
0435     if (!page)
0436         return -ENOMEM;
0437 
0438     *pagep = page;
0439 
0440     if (!PageUptodate(page) && (len != PAGE_SIZE)) {
0441         unsigned from = pos & (PAGE_SIZE - 1);
0442 
0443         zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
0444     }
0445     return 0;
0446 }
0447 EXPORT_SYMBOL(simple_write_begin);
0448 
0449 /**
0450  * simple_write_end - .write_end helper for non-block-device FSes
0451  * @available: See .write_end of address_space_operations
0452  * @file:       "
0453  * @mapping:        "
0454  * @pos:        "
0455  * @len:        "
0456  * @copied:         "
0457  * @page:       "
0458  * @fsdata:         "
0459  *
0460  * simple_write_end does the minimum needed for updating a page after writing is
0461  * done. It has the same API signature as the .write_end of
0462  * address_space_operations vector. So it can just be set onto .write_end for
0463  * FSes that don't need any other processing. i_mutex is assumed to be held.
0464  * Block based filesystems should use generic_write_end().
0465  * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
0466  * is not called, so a filesystem that actually does store data in .write_inode
0467  * should extend on what's done here with a call to mark_inode_dirty() in the
0468  * case that i_size has changed.
0469  *
0470  * Use *ONLY* with simple_readpage()
0471  */
0472 int simple_write_end(struct file *file, struct address_space *mapping,
0473             loff_t pos, unsigned len, unsigned copied,
0474             struct page *page, void *fsdata)
0475 {
0476     struct inode *inode = page->mapping->host;
0477     loff_t last_pos = pos + copied;
0478 
0479     /* zero the stale part of the page if we did a short copy */
0480     if (!PageUptodate(page)) {
0481         if (copied < len) {
0482             unsigned from = pos & (PAGE_SIZE - 1);
0483 
0484             zero_user(page, from + copied, len - copied);
0485         }
0486         SetPageUptodate(page);
0487     }
0488     /*
0489      * No need to use i_size_read() here, the i_size
0490      * cannot change under us because we hold the i_mutex.
0491      */
0492     if (last_pos > inode->i_size)
0493         i_size_write(inode, last_pos);
0494 
0495     set_page_dirty(page);
0496     unlock_page(page);
0497     put_page(page);
0498 
0499     return copied;
0500 }
0501 EXPORT_SYMBOL(simple_write_end);
0502 
0503 /*
0504  * the inodes created here are not hashed. If you use iunique to generate
0505  * unique inode values later for this filesystem, then you must take care
0506  * to pass it an appropriate max_reserved value to avoid collisions.
0507  */
0508 int simple_fill_super(struct super_block *s, unsigned long magic,
0509               struct tree_descr *files)
0510 {
0511     struct inode *inode;
0512     struct dentry *root;
0513     struct dentry *dentry;
0514     int i;
0515 
0516     s->s_blocksize = PAGE_SIZE;
0517     s->s_blocksize_bits = PAGE_SHIFT;
0518     s->s_magic = magic;
0519     s->s_op = &simple_super_operations;
0520     s->s_time_gran = 1;
0521 
0522     inode = new_inode(s);
0523     if (!inode)
0524         return -ENOMEM;
0525     /*
0526      * because the root inode is 1, the files array must not contain an
0527      * entry at index 1
0528      */
0529     inode->i_ino = 1;
0530     inode->i_mode = S_IFDIR | 0755;
0531     inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
0532     inode->i_op = &simple_dir_inode_operations;
0533     inode->i_fop = &simple_dir_operations;
0534     set_nlink(inode, 2);
0535     root = d_make_root(inode);
0536     if (!root)
0537         return -ENOMEM;
0538     for (i = 0; !files->name || files->name[0]; i++, files++) {
0539         if (!files->name)
0540             continue;
0541 
0542         /* warn if it tries to conflict with the root inode */
0543         if (unlikely(i == 1))
0544             printk(KERN_WARNING "%s: %s passed in a files array"
0545                 "with an index of 1!\n", __func__,
0546                 s->s_type->name);
0547 
0548         dentry = d_alloc_name(root, files->name);
0549         if (!dentry)
0550             goto out;
0551         inode = new_inode(s);
0552         if (!inode) {
0553             dput(dentry);
0554             goto out;
0555         }
0556         inode->i_mode = S_IFREG | files->mode;
0557         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
0558         inode->i_fop = files->ops;
0559         inode->i_ino = i;
0560         d_add(dentry, inode);
0561     }
0562     s->s_root = root;
0563     return 0;
0564 out:
0565     d_genocide(root);
0566     shrink_dcache_parent(root);
0567     dput(root);
0568     return -ENOMEM;
0569 }
0570 EXPORT_SYMBOL(simple_fill_super);
0571 
0572 static DEFINE_SPINLOCK(pin_fs_lock);
0573 
0574 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
0575 {
0576     struct vfsmount *mnt = NULL;
0577     spin_lock(&pin_fs_lock);
0578     if (unlikely(!*mount)) {
0579         spin_unlock(&pin_fs_lock);
0580         mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
0581         if (IS_ERR(mnt))
0582             return PTR_ERR(mnt);
0583         spin_lock(&pin_fs_lock);
0584         if (!*mount)
0585             *mount = mnt;
0586     }
0587     mntget(*mount);
0588     ++*count;
0589     spin_unlock(&pin_fs_lock);
0590     mntput(mnt);
0591     return 0;
0592 }
0593 EXPORT_SYMBOL(simple_pin_fs);
0594 
0595 void simple_release_fs(struct vfsmount **mount, int *count)
0596 {
0597     struct vfsmount *mnt;
0598     spin_lock(&pin_fs_lock);
0599     mnt = *mount;
0600     if (!--*count)
0601         *mount = NULL;
0602     spin_unlock(&pin_fs_lock);
0603     mntput(mnt);
0604 }
0605 EXPORT_SYMBOL(simple_release_fs);
0606 
0607 /**
0608  * simple_read_from_buffer - copy data from the buffer to user space
0609  * @to: the user space buffer to read to
0610  * @count: the maximum number of bytes to read
0611  * @ppos: the current position in the buffer
0612  * @from: the buffer to read from
0613  * @available: the size of the buffer
0614  *
0615  * The simple_read_from_buffer() function reads up to @count bytes from the
0616  * buffer @from at offset @ppos into the user space address starting at @to.
0617  *
0618  * On success, the number of bytes read is returned and the offset @ppos is
0619  * advanced by this number, or negative value is returned on error.
0620  **/
0621 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
0622                 const void *from, size_t available)
0623 {
0624     loff_t pos = *ppos;
0625     size_t ret;
0626 
0627     if (pos < 0)
0628         return -EINVAL;
0629     if (pos >= available || !count)
0630         return 0;
0631     if (count > available - pos)
0632         count = available - pos;
0633     ret = copy_to_user(to, from + pos, count);
0634     if (ret == count)
0635         return -EFAULT;
0636     count -= ret;
0637     *ppos = pos + count;
0638     return count;
0639 }
0640 EXPORT_SYMBOL(simple_read_from_buffer);
0641 
0642 /**
0643  * simple_write_to_buffer - copy data from user space to the buffer
0644  * @to: the buffer to write to
0645  * @available: the size of the buffer
0646  * @ppos: the current position in the buffer
0647  * @from: the user space buffer to read from
0648  * @count: the maximum number of bytes to read
0649  *
0650  * The simple_write_to_buffer() function reads up to @count bytes from the user
0651  * space address starting at @from into the buffer @to at offset @ppos.
0652  *
0653  * On success, the number of bytes written is returned and the offset @ppos is
0654  * advanced by this number, or negative value is returned on error.
0655  **/
0656 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
0657         const void __user *from, size_t count)
0658 {
0659     loff_t pos = *ppos;
0660     size_t res;
0661 
0662     if (pos < 0)
0663         return -EINVAL;
0664     if (pos >= available || !count)
0665         return 0;
0666     if (count > available - pos)
0667         count = available - pos;
0668     res = copy_from_user(to + pos, from, count);
0669     if (res == count)
0670         return -EFAULT;
0671     count -= res;
0672     *ppos = pos + count;
0673     return count;
0674 }
0675 EXPORT_SYMBOL(simple_write_to_buffer);
0676 
0677 /**
0678  * memory_read_from_buffer - copy data from the buffer
0679  * @to: the kernel space buffer to read to
0680  * @count: the maximum number of bytes to read
0681  * @ppos: the current position in the buffer
0682  * @from: the buffer to read from
0683  * @available: the size of the buffer
0684  *
0685  * The memory_read_from_buffer() function reads up to @count bytes from the
0686  * buffer @from at offset @ppos into the kernel space address starting at @to.
0687  *
0688  * On success, the number of bytes read is returned and the offset @ppos is
0689  * advanced by this number, or negative value is returned on error.
0690  **/
0691 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
0692                 const void *from, size_t available)
0693 {
0694     loff_t pos = *ppos;
0695 
0696     if (pos < 0)
0697         return -EINVAL;
0698     if (pos >= available)
0699         return 0;
0700     if (count > available - pos)
0701         count = available - pos;
0702     memcpy(to, from + pos, count);
0703     *ppos = pos + count;
0704 
0705     return count;
0706 }
0707 EXPORT_SYMBOL(memory_read_from_buffer);
0708 
0709 /*
0710  * Transaction based IO.
0711  * The file expects a single write which triggers the transaction, and then
0712  * possibly a read which collects the result - which is stored in a
0713  * file-local buffer.
0714  */
0715 
0716 void simple_transaction_set(struct file *file, size_t n)
0717 {
0718     struct simple_transaction_argresp *ar = file->private_data;
0719 
0720     BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
0721 
0722     /*
0723      * The barrier ensures that ar->size will really remain zero until
0724      * ar->data is ready for reading.
0725      */
0726     smp_mb();
0727     ar->size = n;
0728 }
0729 EXPORT_SYMBOL(simple_transaction_set);
0730 
0731 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
0732 {
0733     struct simple_transaction_argresp *ar;
0734     static DEFINE_SPINLOCK(simple_transaction_lock);
0735 
0736     if (size > SIMPLE_TRANSACTION_LIMIT - 1)
0737         return ERR_PTR(-EFBIG);
0738 
0739     ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
0740     if (!ar)
0741         return ERR_PTR(-ENOMEM);
0742 
0743     spin_lock(&simple_transaction_lock);
0744 
0745     /* only one write allowed per open */
0746     if (file->private_data) {
0747         spin_unlock(&simple_transaction_lock);
0748         free_page((unsigned long)ar);
0749         return ERR_PTR(-EBUSY);
0750     }
0751 
0752     file->private_data = ar;
0753 
0754     spin_unlock(&simple_transaction_lock);
0755 
0756     if (copy_from_user(ar->data, buf, size))
0757         return ERR_PTR(-EFAULT);
0758 
0759     return ar->data;
0760 }
0761 EXPORT_SYMBOL(simple_transaction_get);
0762 
0763 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
0764 {
0765     struct simple_transaction_argresp *ar = file->private_data;
0766 
0767     if (!ar)
0768         return 0;
0769     return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
0770 }
0771 EXPORT_SYMBOL(simple_transaction_read);
0772 
0773 int simple_transaction_release(struct inode *inode, struct file *file)
0774 {
0775     free_page((unsigned long)file->private_data);
0776     return 0;
0777 }
0778 EXPORT_SYMBOL(simple_transaction_release);
0779 
0780 /* Simple attribute files */
0781 
0782 struct simple_attr {
0783     int (*get)(void *, u64 *);
0784     int (*set)(void *, u64);
0785     char get_buf[24];   /* enough to store a u64 and "\n\0" */
0786     char set_buf[24];
0787     void *data;
0788     const char *fmt;    /* format for read operation */
0789     struct mutex mutex; /* protects access to these buffers */
0790 };
0791 
0792 /* simple_attr_open is called by an actual attribute open file operation
0793  * to set the attribute specific access operations. */
0794 int simple_attr_open(struct inode *inode, struct file *file,
0795              int (*get)(void *, u64 *), int (*set)(void *, u64),
0796              const char *fmt)
0797 {
0798     struct simple_attr *attr;
0799 
0800     attr = kmalloc(sizeof(*attr), GFP_KERNEL);
0801     if (!attr)
0802         return -ENOMEM;
0803 
0804     attr->get = get;
0805     attr->set = set;
0806     attr->data = inode->i_private;
0807     attr->fmt = fmt;
0808     mutex_init(&attr->mutex);
0809 
0810     file->private_data = attr;
0811 
0812     return nonseekable_open(inode, file);
0813 }
0814 EXPORT_SYMBOL_GPL(simple_attr_open);
0815 
0816 int simple_attr_release(struct inode *inode, struct file *file)
0817 {
0818     kfree(file->private_data);
0819     return 0;
0820 }
0821 EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only?  This?  Really? */
0822 
0823 /* read from the buffer that is filled with the get function */
0824 ssize_t simple_attr_read(struct file *file, char __user *buf,
0825              size_t len, loff_t *ppos)
0826 {
0827     struct simple_attr *attr;
0828     size_t size;
0829     ssize_t ret;
0830 
0831     attr = file->private_data;
0832 
0833     if (!attr->get)
0834         return -EACCES;
0835 
0836     ret = mutex_lock_interruptible(&attr->mutex);
0837     if (ret)
0838         return ret;
0839 
0840     if (*ppos) {        /* continued read */
0841         size = strlen(attr->get_buf);
0842     } else {        /* first read */
0843         u64 val;
0844         ret = attr->get(attr->data, &val);
0845         if (ret)
0846             goto out;
0847 
0848         size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
0849                  attr->fmt, (unsigned long long)val);
0850     }
0851 
0852     ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
0853 out:
0854     mutex_unlock(&attr->mutex);
0855     return ret;
0856 }
0857 EXPORT_SYMBOL_GPL(simple_attr_read);
0858 
0859 /* interpret the buffer as a number to call the set function with */
0860 ssize_t simple_attr_write(struct file *file, const char __user *buf,
0861               size_t len, loff_t *ppos)
0862 {
0863     struct simple_attr *attr;
0864     u64 val;
0865     size_t size;
0866     ssize_t ret;
0867 
0868     attr = file->private_data;
0869     if (!attr->set)
0870         return -EACCES;
0871 
0872     ret = mutex_lock_interruptible(&attr->mutex);
0873     if (ret)
0874         return ret;
0875 
0876     ret = -EFAULT;
0877     size = min(sizeof(attr->set_buf) - 1, len);
0878     if (copy_from_user(attr->set_buf, buf, size))
0879         goto out;
0880 
0881     attr->set_buf[size] = '\0';
0882     val = simple_strtoll(attr->set_buf, NULL, 0);
0883     ret = attr->set(attr->data, val);
0884     if (ret == 0)
0885         ret = len; /* on success, claim we got the whole input */
0886 out:
0887     mutex_unlock(&attr->mutex);
0888     return ret;
0889 }
0890 EXPORT_SYMBOL_GPL(simple_attr_write);
0891 
0892 /**
0893  * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
0894  * @sb:     filesystem to do the file handle conversion on
0895  * @fid:    file handle to convert
0896  * @fh_len: length of the file handle in bytes
0897  * @fh_type:    type of file handle
0898  * @get_inode:  filesystem callback to retrieve inode
0899  *
0900  * This function decodes @fid as long as it has one of the well-known
0901  * Linux filehandle types and calls @get_inode on it to retrieve the
0902  * inode for the object specified in the file handle.
0903  */
0904 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
0905         int fh_len, int fh_type, struct inode *(*get_inode)
0906             (struct super_block *sb, u64 ino, u32 gen))
0907 {
0908     struct inode *inode = NULL;
0909 
0910     if (fh_len < 2)
0911         return NULL;
0912 
0913     switch (fh_type) {
0914     case FILEID_INO32_GEN:
0915     case FILEID_INO32_GEN_PARENT:
0916         inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
0917         break;
0918     }
0919 
0920     return d_obtain_alias(inode);
0921 }
0922 EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
0923 
0924 /**
0925  * generic_fh_to_parent - generic helper for the fh_to_parent export operation
0926  * @sb:     filesystem to do the file handle conversion on
0927  * @fid:    file handle to convert
0928  * @fh_len: length of the file handle in bytes
0929  * @fh_type:    type of file handle
0930  * @get_inode:  filesystem callback to retrieve inode
0931  *
0932  * This function decodes @fid as long as it has one of the well-known
0933  * Linux filehandle types and calls @get_inode on it to retrieve the
0934  * inode for the _parent_ object specified in the file handle if it
0935  * is specified in the file handle, or NULL otherwise.
0936  */
0937 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
0938         int fh_len, int fh_type, struct inode *(*get_inode)
0939             (struct super_block *sb, u64 ino, u32 gen))
0940 {
0941     struct inode *inode = NULL;
0942 
0943     if (fh_len <= 2)
0944         return NULL;
0945 
0946     switch (fh_type) {
0947     case FILEID_INO32_GEN_PARENT:
0948         inode = get_inode(sb, fid->i32.parent_ino,
0949                   (fh_len > 3 ? fid->i32.parent_gen : 0));
0950         break;
0951     }
0952 
0953     return d_obtain_alias(inode);
0954 }
0955 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
0956 
0957 /**
0958  * __generic_file_fsync - generic fsync implementation for simple filesystems
0959  *
0960  * @file:   file to synchronize
0961  * @start:  start offset in bytes
0962  * @end:    end offset in bytes (inclusive)
0963  * @datasync:   only synchronize essential metadata if true
0964  *
0965  * This is a generic implementation of the fsync method for simple
0966  * filesystems which track all non-inode metadata in the buffers list
0967  * hanging off the address_space structure.
0968  */
0969 int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
0970                  int datasync)
0971 {
0972     struct inode *inode = file->f_mapping->host;
0973     int err;
0974     int ret;
0975 
0976     err = filemap_write_and_wait_range(inode->i_mapping, start, end);
0977     if (err)
0978         return err;
0979 
0980     inode_lock(inode);
0981     ret = sync_mapping_buffers(inode->i_mapping);
0982     if (!(inode->i_state & I_DIRTY_ALL))
0983         goto out;
0984     if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
0985         goto out;
0986 
0987     err = sync_inode_metadata(inode, 1);
0988     if (ret == 0)
0989         ret = err;
0990 
0991 out:
0992     inode_unlock(inode);
0993     return ret;
0994 }
0995 EXPORT_SYMBOL(__generic_file_fsync);
0996 
0997 /**
0998  * generic_file_fsync - generic fsync implementation for simple filesystems
0999  *          with flush
1000  * @file:   file to synchronize
1001  * @start:  start offset in bytes
1002  * @end:    end offset in bytes (inclusive)
1003  * @datasync:   only synchronize essential metadata if true
1004  *
1005  */
1006 
1007 int generic_file_fsync(struct file *file, loff_t start, loff_t end,
1008                int datasync)
1009 {
1010     struct inode *inode = file->f_mapping->host;
1011     int err;
1012 
1013     err = __generic_file_fsync(file, start, end, datasync);
1014     if (err)
1015         return err;
1016     return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
1017 }
1018 EXPORT_SYMBOL(generic_file_fsync);
1019 
1020 /**
1021  * generic_check_addressable - Check addressability of file system
1022  * @blocksize_bits: log of file system block size
1023  * @num_blocks:     number of blocks in file system
1024  *
1025  * Determine whether a file system with @num_blocks blocks (and a
1026  * block size of 2**@blocksize_bits) is addressable by the sector_t
1027  * and page cache of the system.  Return 0 if so and -EFBIG otherwise.
1028  */
1029 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
1030 {
1031     u64 last_fs_block = num_blocks - 1;
1032     u64 last_fs_page =
1033         last_fs_block >> (PAGE_SHIFT - blocksize_bits);
1034 
1035     if (unlikely(num_blocks == 0))
1036         return 0;
1037 
1038     if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
1039         return -EINVAL;
1040 
1041     if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
1042         (last_fs_page > (pgoff_t)(~0ULL))) {
1043         return -EFBIG;
1044     }
1045     return 0;
1046 }
1047 EXPORT_SYMBOL(generic_check_addressable);
1048 
1049 /*
1050  * No-op implementation of ->fsync for in-memory filesystems.
1051  */
1052 int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1053 {
1054     return 0;
1055 }
1056 EXPORT_SYMBOL(noop_fsync);
1057 
1058 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
1059 void kfree_link(void *p)
1060 {
1061     kfree(p);
1062 }
1063 EXPORT_SYMBOL(kfree_link);
1064 
1065 /*
1066  * nop .set_page_dirty method so that people can use .page_mkwrite on
1067  * anon inodes.
1068  */
1069 static int anon_set_page_dirty(struct page *page)
1070 {
1071     return 0;
1072 };
1073 
1074 /*
1075  * A single inode exists for all anon_inode files. Contrary to pipes,
1076  * anon_inode inodes have no associated per-instance data, so we need
1077  * only allocate one of them.
1078  */
1079 struct inode *alloc_anon_inode(struct super_block *s)
1080 {
1081     static const struct address_space_operations anon_aops = {
1082         .set_page_dirty = anon_set_page_dirty,
1083     };
1084     struct inode *inode = new_inode_pseudo(s);
1085 
1086     if (!inode)
1087         return ERR_PTR(-ENOMEM);
1088 
1089     inode->i_ino = get_next_ino();
1090     inode->i_mapping->a_ops = &anon_aops;
1091 
1092     /*
1093      * Mark the inode dirty from the very beginning,
1094      * that way it will never be moved to the dirty
1095      * list because mark_inode_dirty() will think
1096      * that it already _is_ on the dirty list.
1097      */
1098     inode->i_state = I_DIRTY;
1099     inode->i_mode = S_IRUSR | S_IWUSR;
1100     inode->i_uid = current_fsuid();
1101     inode->i_gid = current_fsgid();
1102     inode->i_flags |= S_PRIVATE;
1103     inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
1104     return inode;
1105 }
1106 EXPORT_SYMBOL(alloc_anon_inode);
1107 
1108 /**
1109  * simple_nosetlease - generic helper for prohibiting leases
1110  * @filp: file pointer
1111  * @arg: type of lease to obtain
1112  * @flp: new lease supplied for insertion
1113  * @priv: private data for lm_setup operation
1114  *
1115  * Generic helper for filesystems that do not wish to allow leases to be set.
1116  * All arguments are ignored and it just returns -EINVAL.
1117  */
1118 int
1119 simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
1120           void **priv)
1121 {
1122     return -EINVAL;
1123 }
1124 EXPORT_SYMBOL(simple_nosetlease);
1125 
1126 const char *simple_get_link(struct dentry *dentry, struct inode *inode,
1127                 struct delayed_call *done)
1128 {
1129     return inode->i_link;
1130 }
1131 EXPORT_SYMBOL(simple_get_link);
1132 
1133 const struct inode_operations simple_symlink_inode_operations = {
1134     .get_link = simple_get_link,
1135 };
1136 EXPORT_SYMBOL(simple_symlink_inode_operations);
1137 
1138 /*
1139  * Operations for a permanently empty directory.
1140  */
1141 static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1142 {
1143     return ERR_PTR(-ENOENT);
1144 }
1145 
1146 static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
1147                  struct kstat *stat)
1148 {
1149     struct inode *inode = d_inode(dentry);
1150     generic_fillattr(inode, stat);
1151     return 0;
1152 }
1153 
1154 static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
1155 {
1156     return -EPERM;
1157 }
1158 
1159 static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
1160 {
1161     return -EOPNOTSUPP;
1162 }
1163 
1164 static const struct inode_operations empty_dir_inode_operations = {
1165     .lookup     = empty_dir_lookup,
1166     .permission = generic_permission,
1167     .setattr    = empty_dir_setattr,
1168     .getattr    = empty_dir_getattr,
1169     .listxattr  = empty_dir_listxattr,
1170 };
1171 
1172 static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
1173 {
1174     /* An empty directory has two entries . and .. at offsets 0 and 1 */
1175     return generic_file_llseek_size(file, offset, whence, 2, 2);
1176 }
1177 
1178 static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
1179 {
1180     dir_emit_dots(file, ctx);
1181     return 0;
1182 }
1183 
1184 static const struct file_operations empty_dir_operations = {
1185     .llseek     = empty_dir_llseek,
1186     .read       = generic_read_dir,
1187     .iterate_shared = empty_dir_readdir,
1188     .fsync      = noop_fsync,
1189 };
1190 
1191 
1192 void make_empty_dir_inode(struct inode *inode)
1193 {
1194     set_nlink(inode, 2);
1195     inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1196     inode->i_uid = GLOBAL_ROOT_UID;
1197     inode->i_gid = GLOBAL_ROOT_GID;
1198     inode->i_rdev = 0;
1199     inode->i_size = 0;
1200     inode->i_blkbits = PAGE_SHIFT;
1201     inode->i_blocks = 0;
1202 
1203     inode->i_op = &empty_dir_inode_operations;
1204     inode->i_opflags &= ~IOP_XATTR;
1205     inode->i_fop = &empty_dir_operations;
1206 }
1207 
1208 bool is_empty_dir_inode(struct inode *inode)
1209 {
1210     return (inode->i_fop == &empty_dir_operations) &&
1211         (inode->i_op == &empty_dir_inode_operations);
1212 }