Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2017-2018 HUAWEI, Inc.
0004  *             https://www.huawei.com/
0005  * Copyright (C) 2021, Alibaba Cloud
0006  */
0007 #include "internal.h"
0008 #include <linux/prefetch.h>
0009 #include <linux/sched/mm.h>
0010 #include <linux/dax.h>
0011 #include <trace/events/erofs.h>
0012 
0013 void erofs_unmap_metabuf(struct erofs_buf *buf)
0014 {
0015     if (buf->kmap_type == EROFS_KMAP)
0016         kunmap(buf->page);
0017     else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
0018         kunmap_atomic(buf->base);
0019     buf->base = NULL;
0020     buf->kmap_type = EROFS_NO_KMAP;
0021 }
0022 
0023 void erofs_put_metabuf(struct erofs_buf *buf)
0024 {
0025     if (!buf->page)
0026         return;
0027     erofs_unmap_metabuf(buf);
0028     put_page(buf->page);
0029     buf->page = NULL;
0030 }
0031 
0032 void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
0033           erofs_blk_t blkaddr, enum erofs_kmap_type type)
0034 {
0035     struct address_space *const mapping = inode->i_mapping;
0036     erofs_off_t offset = blknr_to_addr(blkaddr);
0037     pgoff_t index = offset >> PAGE_SHIFT;
0038     struct page *page = buf->page;
0039     struct folio *folio;
0040     unsigned int nofs_flag;
0041 
0042     if (!page || page->index != index) {
0043         erofs_put_metabuf(buf);
0044 
0045         nofs_flag = memalloc_nofs_save();
0046         folio = read_cache_folio(mapping, index, NULL, NULL);
0047         memalloc_nofs_restore(nofs_flag);
0048         if (IS_ERR(folio))
0049             return folio;
0050 
0051         /* should already be PageUptodate, no need to lock page */
0052         page = folio_file_page(folio, index);
0053         buf->page = page;
0054     }
0055     if (buf->kmap_type == EROFS_NO_KMAP) {
0056         if (type == EROFS_KMAP)
0057             buf->base = kmap(page);
0058         else if (type == EROFS_KMAP_ATOMIC)
0059             buf->base = kmap_atomic(page);
0060         buf->kmap_type = type;
0061     } else if (buf->kmap_type != type) {
0062         DBG_BUGON(1);
0063         return ERR_PTR(-EFAULT);
0064     }
0065     if (type == EROFS_NO_KMAP)
0066         return NULL;
0067     return buf->base + (offset & ~PAGE_MASK);
0068 }
0069 
0070 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
0071              erofs_blk_t blkaddr, enum erofs_kmap_type type)
0072 {
0073     if (erofs_is_fscache_mode(sb))
0074         return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode,
0075                    blkaddr, type);
0076 
0077     return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
0078 }
0079 
0080 static int erofs_map_blocks_flatmode(struct inode *inode,
0081                      struct erofs_map_blocks *map,
0082                      int flags)
0083 {
0084     erofs_blk_t nblocks, lastblk;
0085     u64 offset = map->m_la;
0086     struct erofs_inode *vi = EROFS_I(inode);
0087     bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
0088 
0089     nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
0090     lastblk = nblocks - tailendpacking;
0091 
0092     /* there is no hole in flatmode */
0093     map->m_flags = EROFS_MAP_MAPPED;
0094     if (offset < blknr_to_addr(lastblk)) {
0095         map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
0096         map->m_plen = blknr_to_addr(lastblk) - offset;
0097     } else if (tailendpacking) {
0098         /* 2 - inode inline B: inode, [xattrs], inline last blk... */
0099         struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
0100 
0101         map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
0102             vi->xattr_isize + erofs_blkoff(map->m_la);
0103         map->m_plen = inode->i_size - offset;
0104 
0105         /* inline data should be located in the same meta block */
0106         if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
0107             erofs_err(inode->i_sb,
0108                   "inline data cross block boundary @ nid %llu",
0109                   vi->nid);
0110             DBG_BUGON(1);
0111             return -EFSCORRUPTED;
0112         }
0113         map->m_flags |= EROFS_MAP_META;
0114     } else {
0115         erofs_err(inode->i_sb,
0116               "internal error @ nid: %llu (size %llu), m_la 0x%llx",
0117               vi->nid, inode->i_size, map->m_la);
0118         DBG_BUGON(1);
0119         return -EIO;
0120     }
0121     return 0;
0122 }
0123 
0124 int erofs_map_blocks(struct inode *inode,
0125              struct erofs_map_blocks *map, int flags)
0126 {
0127     struct super_block *sb = inode->i_sb;
0128     struct erofs_inode *vi = EROFS_I(inode);
0129     struct erofs_inode_chunk_index *idx;
0130     struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
0131     u64 chunknr;
0132     unsigned int unit;
0133     erofs_off_t pos;
0134     void *kaddr;
0135     int err = 0;
0136 
0137     trace_erofs_map_blocks_enter(inode, map, flags);
0138     map->m_deviceid = 0;
0139     if (map->m_la >= inode->i_size) {
0140         /* leave out-of-bound access unmapped */
0141         map->m_flags = 0;
0142         map->m_plen = 0;
0143         goto out;
0144     }
0145 
0146     if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
0147         err = erofs_map_blocks_flatmode(inode, map, flags);
0148         goto out;
0149     }
0150 
0151     if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
0152         unit = sizeof(*idx);            /* chunk index */
0153     else
0154         unit = EROFS_BLOCK_MAP_ENTRY_SIZE;  /* block map */
0155 
0156     chunknr = map->m_la >> vi->chunkbits;
0157     pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
0158             vi->xattr_isize, unit) + unit * chunknr;
0159 
0160     kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
0161     if (IS_ERR(kaddr)) {
0162         err = PTR_ERR(kaddr);
0163         goto out;
0164     }
0165     map->m_la = chunknr << vi->chunkbits;
0166     map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
0167                 roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
0168 
0169     /* handle block map */
0170     if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
0171         __le32 *blkaddr = kaddr + erofs_blkoff(pos);
0172 
0173         if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
0174             map->m_flags = 0;
0175         } else {
0176             map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
0177             map->m_flags = EROFS_MAP_MAPPED;
0178         }
0179         goto out_unlock;
0180     }
0181     /* parse chunk indexes */
0182     idx = kaddr + erofs_blkoff(pos);
0183     switch (le32_to_cpu(idx->blkaddr)) {
0184     case EROFS_NULL_ADDR:
0185         map->m_flags = 0;
0186         break;
0187     default:
0188         map->m_deviceid = le16_to_cpu(idx->device_id) &
0189             EROFS_SB(sb)->device_id_mask;
0190         map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
0191         map->m_flags = EROFS_MAP_MAPPED;
0192         break;
0193     }
0194 out_unlock:
0195     erofs_put_metabuf(&buf);
0196 out:
0197     if (!err)
0198         map->m_llen = map->m_plen;
0199     trace_erofs_map_blocks_exit(inode, map, flags, 0);
0200     return err;
0201 }
0202 
0203 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
0204 {
0205     struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
0206     struct erofs_device_info *dif;
0207     int id;
0208 
0209     /* primary device by default */
0210     map->m_bdev = sb->s_bdev;
0211     map->m_daxdev = EROFS_SB(sb)->dax_dev;
0212     map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
0213     map->m_fscache = EROFS_SB(sb)->s_fscache;
0214 
0215     if (map->m_deviceid) {
0216         down_read(&devs->rwsem);
0217         dif = idr_find(&devs->tree, map->m_deviceid - 1);
0218         if (!dif) {
0219             up_read(&devs->rwsem);
0220             return -ENODEV;
0221         }
0222         map->m_bdev = dif->bdev;
0223         map->m_daxdev = dif->dax_dev;
0224         map->m_dax_part_off = dif->dax_part_off;
0225         map->m_fscache = dif->fscache;
0226         up_read(&devs->rwsem);
0227     } else if (devs->extra_devices) {
0228         down_read(&devs->rwsem);
0229         idr_for_each_entry(&devs->tree, dif, id) {
0230             erofs_off_t startoff, length;
0231 
0232             if (!dif->mapped_blkaddr)
0233                 continue;
0234             startoff = blknr_to_addr(dif->mapped_blkaddr);
0235             length = blknr_to_addr(dif->blocks);
0236 
0237             if (map->m_pa >= startoff &&
0238                 map->m_pa < startoff + length) {
0239                 map->m_pa -= startoff;
0240                 map->m_bdev = dif->bdev;
0241                 map->m_daxdev = dif->dax_dev;
0242                 map->m_dax_part_off = dif->dax_part_off;
0243                 map->m_fscache = dif->fscache;
0244                 break;
0245             }
0246         }
0247         up_read(&devs->rwsem);
0248     }
0249     return 0;
0250 }
0251 
0252 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
0253         unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
0254 {
0255     int ret;
0256     struct erofs_map_blocks map;
0257     struct erofs_map_dev mdev;
0258 
0259     map.m_la = offset;
0260     map.m_llen = length;
0261 
0262     ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
0263     if (ret < 0)
0264         return ret;
0265 
0266     mdev = (struct erofs_map_dev) {
0267         .m_deviceid = map.m_deviceid,
0268         .m_pa = map.m_pa,
0269     };
0270     ret = erofs_map_dev(inode->i_sb, &mdev);
0271     if (ret)
0272         return ret;
0273 
0274     iomap->offset = map.m_la;
0275     if (flags & IOMAP_DAX)
0276         iomap->dax_dev = mdev.m_daxdev;
0277     else
0278         iomap->bdev = mdev.m_bdev;
0279     iomap->length = map.m_llen;
0280     iomap->flags = 0;
0281     iomap->private = NULL;
0282 
0283     if (!(map.m_flags & EROFS_MAP_MAPPED)) {
0284         iomap->type = IOMAP_HOLE;
0285         iomap->addr = IOMAP_NULL_ADDR;
0286         if (!iomap->length)
0287             iomap->length = length;
0288         return 0;
0289     }
0290 
0291     if (map.m_flags & EROFS_MAP_META) {
0292         void *ptr;
0293         struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
0294 
0295         iomap->type = IOMAP_INLINE;
0296         ptr = erofs_read_metabuf(&buf, inode->i_sb,
0297                      erofs_blknr(mdev.m_pa), EROFS_KMAP);
0298         if (IS_ERR(ptr))
0299             return PTR_ERR(ptr);
0300         iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
0301         iomap->private = buf.base;
0302     } else {
0303         iomap->type = IOMAP_MAPPED;
0304         iomap->addr = mdev.m_pa;
0305         if (flags & IOMAP_DAX)
0306             iomap->addr += mdev.m_dax_part_off;
0307     }
0308     return 0;
0309 }
0310 
0311 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
0312         ssize_t written, unsigned int flags, struct iomap *iomap)
0313 {
0314     void *ptr = iomap->private;
0315 
0316     if (ptr) {
0317         struct erofs_buf buf = {
0318             .page = kmap_to_page(ptr),
0319             .base = ptr,
0320             .kmap_type = EROFS_KMAP,
0321         };
0322 
0323         DBG_BUGON(iomap->type != IOMAP_INLINE);
0324         erofs_put_metabuf(&buf);
0325     } else {
0326         DBG_BUGON(iomap->type == IOMAP_INLINE);
0327     }
0328     return written;
0329 }
0330 
0331 static const struct iomap_ops erofs_iomap_ops = {
0332     .iomap_begin = erofs_iomap_begin,
0333     .iomap_end = erofs_iomap_end,
0334 };
0335 
0336 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
0337          u64 start, u64 len)
0338 {
0339     if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
0340 #ifdef CONFIG_EROFS_FS_ZIP
0341         return iomap_fiemap(inode, fieinfo, start, len,
0342                     &z_erofs_iomap_report_ops);
0343 #else
0344         return -EOPNOTSUPP;
0345 #endif
0346     }
0347     return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
0348 }
0349 
0350 /*
0351  * since we dont have write or truncate flows, so no inode
0352  * locking needs to be held at the moment.
0353  */
0354 static int erofs_read_folio(struct file *file, struct folio *folio)
0355 {
0356     return iomap_read_folio(folio, &erofs_iomap_ops);
0357 }
0358 
0359 static void erofs_readahead(struct readahead_control *rac)
0360 {
0361     return iomap_readahead(rac, &erofs_iomap_ops);
0362 }
0363 
0364 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
0365 {
0366     return iomap_bmap(mapping, block, &erofs_iomap_ops);
0367 }
0368 
0369 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
0370 {
0371     struct inode *inode = file_inode(iocb->ki_filp);
0372 
0373     /* no need taking (shared) inode lock since it's a ro filesystem */
0374     if (!iov_iter_count(to))
0375         return 0;
0376 
0377 #ifdef CONFIG_FS_DAX
0378     if (IS_DAX(inode))
0379         return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
0380 #endif
0381     if (iocb->ki_flags & IOCB_DIRECT) {
0382         struct block_device *bdev = inode->i_sb->s_bdev;
0383         unsigned int blksize_mask;
0384 
0385         if (bdev)
0386             blksize_mask = bdev_logical_block_size(bdev) - 1;
0387         else
0388             blksize_mask = (1 << inode->i_blkbits) - 1;
0389 
0390         if ((iocb->ki_pos | iov_iter_count(to) |
0391              iov_iter_alignment(to)) & blksize_mask)
0392             return -EINVAL;
0393 
0394         return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
0395                     NULL, 0, NULL, 0);
0396     }
0397     return filemap_read(iocb, to, 0);
0398 }
0399 
0400 /* for uncompressed (aligned) files and raw access for other files */
0401 const struct address_space_operations erofs_raw_access_aops = {
0402     .read_folio = erofs_read_folio,
0403     .readahead = erofs_readahead,
0404     .bmap = erofs_bmap,
0405     .direct_IO = noop_direct_IO,
0406 };
0407 
0408 #ifdef CONFIG_FS_DAX
0409 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
0410         enum page_entry_size pe_size)
0411 {
0412     return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
0413 }
0414 
0415 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
0416 {
0417     return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
0418 }
0419 
0420 static const struct vm_operations_struct erofs_dax_vm_ops = {
0421     .fault      = erofs_dax_fault,
0422     .huge_fault = erofs_dax_huge_fault,
0423 };
0424 
0425 static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
0426 {
0427     if (!IS_DAX(file_inode(file)))
0428         return generic_file_readonly_mmap(file, vma);
0429 
0430     if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
0431         return -EINVAL;
0432 
0433     vma->vm_ops = &erofs_dax_vm_ops;
0434     vma->vm_flags |= VM_HUGEPAGE;
0435     return 0;
0436 }
0437 #else
0438 #define erofs_file_mmap generic_file_readonly_mmap
0439 #endif
0440 
0441 const struct file_operations erofs_file_fops = {
0442     .llseek     = generic_file_llseek,
0443     .read_iter  = erofs_file_read_iter,
0444     .mmap       = erofs_file_mmap,
0445     .splice_read    = generic_file_splice_read,
0446 };