0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/mm.h>
0009 #include <linux/fs.h>
0010 #include <linux/swap.h>
0011 #include <linux/mount.h>
0012 #include <linux/memfd.h>
0013 #include <linux/bitops.h>
0014 #include <linux/printk.h>
0015 #include <linux/pagemap.h>
0016 #include <linux/syscalls.h>
0017 #include <linux/pseudo_fs.h>
0018 #include <linux/secretmem.h>
0019 #include <linux/set_memory.h>
0020 #include <linux/sched/signal.h>
0021
0022 #include <uapi/linux/magic.h>
0023
0024 #include <asm/tlbflush.h>
0025
0026 #include "internal.h"
0027
0028 #undef pr_fmt
0029 #define pr_fmt(fmt) "secretmem: " fmt
0030
0031
0032
0033
0034
0035 #define SECRETMEM_MODE_MASK (0x0)
0036 #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
0037
0038 static bool secretmem_enable __ro_after_init;
0039 module_param_named(enable, secretmem_enable, bool, 0400);
0040 MODULE_PARM_DESC(secretmem_enable,
0041 "Enable secretmem and memfd_secret(2) system call");
0042
0043 static atomic_t secretmem_users;
0044
0045 bool secretmem_active(void)
0046 {
0047 return !!atomic_read(&secretmem_users);
0048 }
0049
0050 static vm_fault_t secretmem_fault(struct vm_fault *vmf)
0051 {
0052 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
0053 struct inode *inode = file_inode(vmf->vma->vm_file);
0054 pgoff_t offset = vmf->pgoff;
0055 gfp_t gfp = vmf->gfp_mask;
0056 unsigned long addr;
0057 struct page *page;
0058 vm_fault_t ret;
0059 int err;
0060
0061 if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
0062 return vmf_error(-EINVAL);
0063
0064 filemap_invalidate_lock_shared(mapping);
0065
0066 retry:
0067 page = find_lock_page(mapping, offset);
0068 if (!page) {
0069 page = alloc_page(gfp | __GFP_ZERO);
0070 if (!page) {
0071 ret = VM_FAULT_OOM;
0072 goto out;
0073 }
0074
0075 err = set_direct_map_invalid_noflush(page);
0076 if (err) {
0077 put_page(page);
0078 ret = vmf_error(err);
0079 goto out;
0080 }
0081
0082 __SetPageUptodate(page);
0083 err = add_to_page_cache_lru(page, mapping, offset, gfp);
0084 if (unlikely(err)) {
0085 put_page(page);
0086
0087
0088
0089
0090
0091 set_direct_map_default_noflush(page);
0092 if (err == -EEXIST)
0093 goto retry;
0094
0095 ret = vmf_error(err);
0096 goto out;
0097 }
0098
0099 addr = (unsigned long)page_address(page);
0100 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
0101 }
0102
0103 vmf->page = page;
0104 ret = VM_FAULT_LOCKED;
0105
0106 out:
0107 filemap_invalidate_unlock_shared(mapping);
0108 return ret;
0109 }
0110
0111 static const struct vm_operations_struct secretmem_vm_ops = {
0112 .fault = secretmem_fault,
0113 };
0114
0115 static int secretmem_release(struct inode *inode, struct file *file)
0116 {
0117 atomic_dec(&secretmem_users);
0118 return 0;
0119 }
0120
0121 static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
0122 {
0123 unsigned long len = vma->vm_end - vma->vm_start;
0124
0125 if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
0126 return -EINVAL;
0127
0128 if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
0129 return -EAGAIN;
0130
0131 vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
0132 vma->vm_ops = &secretmem_vm_ops;
0133
0134 return 0;
0135 }
0136
0137 bool vma_is_secretmem(struct vm_area_struct *vma)
0138 {
0139 return vma->vm_ops == &secretmem_vm_ops;
0140 }
0141
0142 static const struct file_operations secretmem_fops = {
0143 .release = secretmem_release,
0144 .mmap = secretmem_mmap,
0145 };
0146
0147 static int secretmem_migrate_folio(struct address_space *mapping,
0148 struct folio *dst, struct folio *src, enum migrate_mode mode)
0149 {
0150 return -EBUSY;
0151 }
0152
0153 static void secretmem_free_folio(struct folio *folio)
0154 {
0155 set_direct_map_default_noflush(&folio->page);
0156 folio_zero_segment(folio, 0, folio_size(folio));
0157 }
0158
0159 const struct address_space_operations secretmem_aops = {
0160 .dirty_folio = noop_dirty_folio,
0161 .free_folio = secretmem_free_folio,
0162 .migrate_folio = secretmem_migrate_folio,
0163 };
0164
0165 static int secretmem_setattr(struct user_namespace *mnt_userns,
0166 struct dentry *dentry, struct iattr *iattr)
0167 {
0168 struct inode *inode = d_inode(dentry);
0169 struct address_space *mapping = inode->i_mapping;
0170 unsigned int ia_valid = iattr->ia_valid;
0171 int ret;
0172
0173 filemap_invalidate_lock(mapping);
0174
0175 if ((ia_valid & ATTR_SIZE) && inode->i_size)
0176 ret = -EINVAL;
0177 else
0178 ret = simple_setattr(mnt_userns, dentry, iattr);
0179
0180 filemap_invalidate_unlock(mapping);
0181
0182 return ret;
0183 }
0184
0185 static const struct inode_operations secretmem_iops = {
0186 .setattr = secretmem_setattr,
0187 };
0188
0189 static struct vfsmount *secretmem_mnt;
0190
0191 static struct file *secretmem_file_create(unsigned long flags)
0192 {
0193 struct file *file = ERR_PTR(-ENOMEM);
0194 struct inode *inode;
0195 const char *anon_name = "[secretmem]";
0196 const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
0197 int err;
0198
0199 inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
0200 if (IS_ERR(inode))
0201 return ERR_CAST(inode);
0202
0203 err = security_inode_init_security_anon(inode, &qname, NULL);
0204 if (err) {
0205 file = ERR_PTR(err);
0206 goto err_free_inode;
0207 }
0208
0209 file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
0210 O_RDWR, &secretmem_fops);
0211 if (IS_ERR(file))
0212 goto err_free_inode;
0213
0214 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
0215 mapping_set_unevictable(inode->i_mapping);
0216
0217 inode->i_op = &secretmem_iops;
0218 inode->i_mapping->a_ops = &secretmem_aops;
0219
0220
0221 inode->i_mode |= S_IFREG;
0222 inode->i_size = 0;
0223
0224 return file;
0225
0226 err_free_inode:
0227 iput(inode);
0228 return file;
0229 }
0230
0231 SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
0232 {
0233 struct file *file;
0234 int fd, err;
0235
0236
0237 BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
0238
0239 if (!secretmem_enable)
0240 return -ENOSYS;
0241
0242 if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
0243 return -EINVAL;
0244 if (atomic_read(&secretmem_users) < 0)
0245 return -ENFILE;
0246
0247 fd = get_unused_fd_flags(flags & O_CLOEXEC);
0248 if (fd < 0)
0249 return fd;
0250
0251 file = secretmem_file_create(flags);
0252 if (IS_ERR(file)) {
0253 err = PTR_ERR(file);
0254 goto err_put_fd;
0255 }
0256
0257 file->f_flags |= O_LARGEFILE;
0258
0259 atomic_inc(&secretmem_users);
0260 fd_install(fd, file);
0261 return fd;
0262
0263 err_put_fd:
0264 put_unused_fd(fd);
0265 return err;
0266 }
0267
0268 static int secretmem_init_fs_context(struct fs_context *fc)
0269 {
0270 return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
0271 }
0272
0273 static struct file_system_type secretmem_fs = {
0274 .name = "secretmem",
0275 .init_fs_context = secretmem_init_fs_context,
0276 .kill_sb = kill_anon_super,
0277 };
0278
0279 static int secretmem_init(void)
0280 {
0281 int ret = 0;
0282
0283 if (!secretmem_enable)
0284 return ret;
0285
0286 secretmem_mnt = kern_mount(&secretmem_fs);
0287 if (IS_ERR(secretmem_mnt))
0288 return PTR_ERR(secretmem_mnt);
0289
0290
0291 secretmem_mnt->mnt_flags |= MNT_NOEXEC;
0292
0293 return ret;
0294 }
0295 fs_initcall(secretmem_init);