Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright IBM Corporation, 2021
0004  *
0005  * Author: Mike Rapoport <rppt@linux.ibm.com>
0006  */
0007 
0008 #include <linux/mm.h>
0009 #include <linux/fs.h>
0010 #include <linux/swap.h>
0011 #include <linux/mount.h>
0012 #include <linux/memfd.h>
0013 #include <linux/bitops.h>
0014 #include <linux/printk.h>
0015 #include <linux/pagemap.h>
0016 #include <linux/syscalls.h>
0017 #include <linux/pseudo_fs.h>
0018 #include <linux/secretmem.h>
0019 #include <linux/set_memory.h>
0020 #include <linux/sched/signal.h>
0021 
0022 #include <uapi/linux/magic.h>
0023 
0024 #include <asm/tlbflush.h>
0025 
0026 #include "internal.h"
0027 
0028 #undef pr_fmt
0029 #define pr_fmt(fmt) "secretmem: " fmt
0030 
0031 /*
0032  * Define mode and flag masks to allow validation of the system call
0033  * parameters.
0034  */
0035 #define SECRETMEM_MODE_MASK (0x0)
0036 #define SECRETMEM_FLAGS_MASK    SECRETMEM_MODE_MASK
0037 
0038 static bool secretmem_enable __ro_after_init;
0039 module_param_named(enable, secretmem_enable, bool, 0400);
0040 MODULE_PARM_DESC(secretmem_enable,
0041          "Enable secretmem and memfd_secret(2) system call");
0042 
0043 static atomic_t secretmem_users;
0044 
0045 bool secretmem_active(void)
0046 {
0047     return !!atomic_read(&secretmem_users);
0048 }
0049 
0050 static vm_fault_t secretmem_fault(struct vm_fault *vmf)
0051 {
0052     struct address_space *mapping = vmf->vma->vm_file->f_mapping;
0053     struct inode *inode = file_inode(vmf->vma->vm_file);
0054     pgoff_t offset = vmf->pgoff;
0055     gfp_t gfp = vmf->gfp_mask;
0056     unsigned long addr;
0057     struct page *page;
0058     vm_fault_t ret;
0059     int err;
0060 
0061     if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
0062         return vmf_error(-EINVAL);
0063 
0064     filemap_invalidate_lock_shared(mapping);
0065 
0066 retry:
0067     page = find_lock_page(mapping, offset);
0068     if (!page) {
0069         page = alloc_page(gfp | __GFP_ZERO);
0070         if (!page) {
0071             ret = VM_FAULT_OOM;
0072             goto out;
0073         }
0074 
0075         err = set_direct_map_invalid_noflush(page);
0076         if (err) {
0077             put_page(page);
0078             ret = vmf_error(err);
0079             goto out;
0080         }
0081 
0082         __SetPageUptodate(page);
0083         err = add_to_page_cache_lru(page, mapping, offset, gfp);
0084         if (unlikely(err)) {
0085             put_page(page);
0086             /*
0087              * If a split of large page was required, it
0088              * already happened when we marked the page invalid
0089              * which guarantees that this call won't fail
0090              */
0091             set_direct_map_default_noflush(page);
0092             if (err == -EEXIST)
0093                 goto retry;
0094 
0095             ret = vmf_error(err);
0096             goto out;
0097         }
0098 
0099         addr = (unsigned long)page_address(page);
0100         flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
0101     }
0102 
0103     vmf->page = page;
0104     ret = VM_FAULT_LOCKED;
0105 
0106 out:
0107     filemap_invalidate_unlock_shared(mapping);
0108     return ret;
0109 }
0110 
0111 static const struct vm_operations_struct secretmem_vm_ops = {
0112     .fault = secretmem_fault,
0113 };
0114 
0115 static int secretmem_release(struct inode *inode, struct file *file)
0116 {
0117     atomic_dec(&secretmem_users);
0118     return 0;
0119 }
0120 
0121 static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
0122 {
0123     unsigned long len = vma->vm_end - vma->vm_start;
0124 
0125     if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
0126         return -EINVAL;
0127 
0128     if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
0129         return -EAGAIN;
0130 
0131     vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
0132     vma->vm_ops = &secretmem_vm_ops;
0133 
0134     return 0;
0135 }
0136 
0137 bool vma_is_secretmem(struct vm_area_struct *vma)
0138 {
0139     return vma->vm_ops == &secretmem_vm_ops;
0140 }
0141 
0142 static const struct file_operations secretmem_fops = {
0143     .release    = secretmem_release,
0144     .mmap       = secretmem_mmap,
0145 };
0146 
0147 static int secretmem_migrate_folio(struct address_space *mapping,
0148         struct folio *dst, struct folio *src, enum migrate_mode mode)
0149 {
0150     return -EBUSY;
0151 }
0152 
0153 static void secretmem_free_folio(struct folio *folio)
0154 {
0155     set_direct_map_default_noflush(&folio->page);
0156     folio_zero_segment(folio, 0, folio_size(folio));
0157 }
0158 
0159 const struct address_space_operations secretmem_aops = {
0160     .dirty_folio    = noop_dirty_folio,
0161     .free_folio = secretmem_free_folio,
0162     .migrate_folio  = secretmem_migrate_folio,
0163 };
0164 
0165 static int secretmem_setattr(struct user_namespace *mnt_userns,
0166                  struct dentry *dentry, struct iattr *iattr)
0167 {
0168     struct inode *inode = d_inode(dentry);
0169     struct address_space *mapping = inode->i_mapping;
0170     unsigned int ia_valid = iattr->ia_valid;
0171     int ret;
0172 
0173     filemap_invalidate_lock(mapping);
0174 
0175     if ((ia_valid & ATTR_SIZE) && inode->i_size)
0176         ret = -EINVAL;
0177     else
0178         ret = simple_setattr(mnt_userns, dentry, iattr);
0179 
0180     filemap_invalidate_unlock(mapping);
0181 
0182     return ret;
0183 }
0184 
0185 static const struct inode_operations secretmem_iops = {
0186     .setattr = secretmem_setattr,
0187 };
0188 
0189 static struct vfsmount *secretmem_mnt;
0190 
0191 static struct file *secretmem_file_create(unsigned long flags)
0192 {
0193     struct file *file = ERR_PTR(-ENOMEM);
0194     struct inode *inode;
0195     const char *anon_name = "[secretmem]";
0196     const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
0197     int err;
0198 
0199     inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
0200     if (IS_ERR(inode))
0201         return ERR_CAST(inode);
0202 
0203     err = security_inode_init_security_anon(inode, &qname, NULL);
0204     if (err) {
0205         file = ERR_PTR(err);
0206         goto err_free_inode;
0207     }
0208 
0209     file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
0210                  O_RDWR, &secretmem_fops);
0211     if (IS_ERR(file))
0212         goto err_free_inode;
0213 
0214     mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
0215     mapping_set_unevictable(inode->i_mapping);
0216 
0217     inode->i_op = &secretmem_iops;
0218     inode->i_mapping->a_ops = &secretmem_aops;
0219 
0220     /* pretend we are a normal file with zero size */
0221     inode->i_mode |= S_IFREG;
0222     inode->i_size = 0;
0223 
0224     return file;
0225 
0226 err_free_inode:
0227     iput(inode);
0228     return file;
0229 }
0230 
0231 SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
0232 {
0233     struct file *file;
0234     int fd, err;
0235 
0236     /* make sure local flags do not confict with global fcntl.h */
0237     BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
0238 
0239     if (!secretmem_enable)
0240         return -ENOSYS;
0241 
0242     if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
0243         return -EINVAL;
0244     if (atomic_read(&secretmem_users) < 0)
0245         return -ENFILE;
0246 
0247     fd = get_unused_fd_flags(flags & O_CLOEXEC);
0248     if (fd < 0)
0249         return fd;
0250 
0251     file = secretmem_file_create(flags);
0252     if (IS_ERR(file)) {
0253         err = PTR_ERR(file);
0254         goto err_put_fd;
0255     }
0256 
0257     file->f_flags |= O_LARGEFILE;
0258 
0259     atomic_inc(&secretmem_users);
0260     fd_install(fd, file);
0261     return fd;
0262 
0263 err_put_fd:
0264     put_unused_fd(fd);
0265     return err;
0266 }
0267 
0268 static int secretmem_init_fs_context(struct fs_context *fc)
0269 {
0270     return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
0271 }
0272 
0273 static struct file_system_type secretmem_fs = {
0274     .name       = "secretmem",
0275     .init_fs_context = secretmem_init_fs_context,
0276     .kill_sb    = kill_anon_super,
0277 };
0278 
0279 static int secretmem_init(void)
0280 {
0281     int ret = 0;
0282 
0283     if (!secretmem_enable)
0284         return ret;
0285 
0286     secretmem_mnt = kern_mount(&secretmem_fs);
0287     if (IS_ERR(secretmem_mnt))
0288         return PTR_ERR(secretmem_mnt);
0289 
0290     /* prevent secretmem mappings from ever getting PROT_EXEC */
0291     secretmem_mnt->mnt_flags |= MNT_NOEXEC;
0292 
0293     return ret;
0294 }
0295 fs_initcall(secretmem_init);