Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  fs/ext4/mballoc.h
0004  *
0005  *  Written by: Alex Tomas <alex@clusterfs.com>
0006  *
0007  */
0008 #ifndef _EXT4_MBALLOC_H
0009 #define _EXT4_MBALLOC_H
0010 
0011 #include <linux/time.h>
0012 #include <linux/fs.h>
0013 #include <linux/namei.h>
0014 #include <linux/quotaops.h>
0015 #include <linux/buffer_head.h>
0016 #include <linux/module.h>
0017 #include <linux/swap.h>
0018 #include <linux/proc_fs.h>
0019 #include <linux/pagemap.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/blkdev.h>
0022 #include <linux/mutex.h>
0023 #include "ext4_jbd2.h"
0024 #include "ext4.h"
0025 
0026 /*
0027  * mb_debug() dynamic printk msgs could be used to debug mballoc code.
0028  */
0029 #ifdef CONFIG_EXT4_DEBUG
0030 #define mb_debug(sb, fmt, ...)                      \
0031     pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt,        \
0032         current->comm, task_pid_nr(current), sb->s_id,      \
0033            __FILE__, __LINE__, __func__, ##__VA_ARGS__)
0034 #else
0035 #define mb_debug(sb, fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
0036 #endif
0037 
0038 #define EXT4_MB_HISTORY_ALLOC       1   /* allocation */
0039 #define EXT4_MB_HISTORY_PREALLOC    2   /* preallocated blocks used */
0040 
0041 /*
0042  * How long mballoc can look for a best extent (in found extents)
0043  */
0044 #define MB_DEFAULT_MAX_TO_SCAN      200
0045 
0046 /*
0047  * How long mballoc must look for a best extent
0048  */
0049 #define MB_DEFAULT_MIN_TO_SCAN      10
0050 
0051 /*
0052  * with 'ext4_mb_stats' allocator will collect stats that will be
0053  * shown at umount. The collecting costs though!
0054  */
0055 #define MB_DEFAULT_STATS        0
0056 
0057 /*
0058  * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
0059  * by the stream allocator, which purpose is to pack requests
0060  * as close each to other as possible to produce smooth I/O traffic
0061  * We use locality group prealloc space for stream request.
0062  * We can tune the same via /proc/fs/ext4/<partition>/stream_req
0063  */
0064 #define MB_DEFAULT_STREAM_THRESHOLD 16  /* 64K */
0065 
0066 /*
0067  * for which requests use 2^N search using buddies
0068  */
0069 #define MB_DEFAULT_ORDER2_REQS      2
0070 
0071 /*
0072  * default group prealloc size 512 blocks
0073  */
0074 #define MB_DEFAULT_GROUP_PREALLOC   512
0075 
0076 /*
0077  * maximum length of inode prealloc list
0078  */
0079 #define MB_DEFAULT_MAX_INODE_PREALLOC   512
0080 
0081 /*
0082  * Number of groups to search linearly before performing group scanning
0083  * optimization.
0084  */
0085 #define MB_DEFAULT_LINEAR_LIMIT     4
0086 
0087 /*
0088  * Minimum number of groups that should be present in the file system to perform
0089  * group scanning optimizations.
0090  */
0091 #define MB_DEFAULT_LINEAR_SCAN_THRESHOLD    16
0092 
0093 /*
0094  * Number of valid buddy orders
0095  */
0096 #define MB_NUM_ORDERS(sb)       ((sb)->s_blocksize_bits + 2)
0097 
0098 struct ext4_free_data {
0099     /* this links the free block information from sb_info */
0100     struct list_head        efd_list;
0101 
0102     /* this links the free block information from group_info */
0103     struct rb_node          efd_node;
0104 
0105     /* group which free block extent belongs */
0106     ext4_group_t            efd_group;
0107 
0108     /* free block extent */
0109     ext4_grpblk_t           efd_start_cluster;
0110     ext4_grpblk_t           efd_count;
0111 
0112     /* transaction which freed this extent */
0113     tid_t               efd_tid;
0114 };
0115 
0116 struct ext4_prealloc_space {
0117     struct list_head    pa_inode_list;
0118     struct list_head    pa_group_list;
0119     union {
0120         struct list_head pa_tmp_list;
0121         struct rcu_head pa_rcu;
0122     } u;
0123     spinlock_t      pa_lock;
0124     atomic_t        pa_count;
0125     unsigned        pa_deleted;
0126     ext4_fsblk_t        pa_pstart;  /* phys. block */
0127     ext4_lblk_t     pa_lstart;  /* log. block */
0128     ext4_grpblk_t       pa_len;     /* len of preallocated chunk */
0129     ext4_grpblk_t       pa_free;    /* how many blocks are free */
0130     unsigned short      pa_type;    /* pa type. inode or group */
0131     spinlock_t      *pa_obj_lock;
0132     struct inode        *pa_inode;  /* hack, for history only */
0133 };
0134 
0135 enum {
0136     MB_INODE_PA = 0,
0137     MB_GROUP_PA = 1
0138 };
0139 
0140 struct ext4_free_extent {
0141     ext4_lblk_t fe_logical;
0142     ext4_grpblk_t fe_start; /* In cluster units */
0143     ext4_group_t fe_group;
0144     ext4_grpblk_t fe_len;   /* In cluster units */
0145 };
0146 
0147 /*
0148  * Locality group:
0149  *   we try to group all related changes together
0150  *   so that writeback can flush/allocate them together as well
0151  *   Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
0152  *   (512). We store prealloc space into the hash based on the pa_free blocks
0153  *   order value.ie, fls(pa_free)-1;
0154  */
0155 #define PREALLOC_TB_SIZE 10
0156 struct ext4_locality_group {
0157     /* for allocator */
0158     /* to serialize allocates */
0159     struct mutex        lg_mutex;
0160     /* list of preallocations */
0161     struct list_head    lg_prealloc_list[PREALLOC_TB_SIZE];
0162     spinlock_t      lg_prealloc_lock;
0163 };
0164 
0165 struct ext4_allocation_context {
0166     struct inode *ac_inode;
0167     struct super_block *ac_sb;
0168 
0169     /* original request */
0170     struct ext4_free_extent ac_o_ex;
0171 
0172     /* goal request (normalized ac_o_ex) */
0173     struct ext4_free_extent ac_g_ex;
0174 
0175     /* the best found extent */
0176     struct ext4_free_extent ac_b_ex;
0177 
0178     /* copy of the best found extent taken before preallocation efforts */
0179     struct ext4_free_extent ac_f_ex;
0180 
0181     __u32 ac_groups_considered;
0182     __u32 ac_flags;     /* allocation hints */
0183     __u16 ac_groups_scanned;
0184     __u16 ac_groups_linear_remaining;
0185     __u16 ac_found;
0186     __u16 ac_tail;
0187     __u16 ac_buddy;
0188     __u8 ac_status;
0189     __u8 ac_criteria;
0190     __u8 ac_2order;     /* if request is to allocate 2^N blocks and
0191                  * N > 0, the field stores N, otherwise 0 */
0192     __u8 ac_op;     /* operation, for history only */
0193     struct page *ac_bitmap_page;
0194     struct page *ac_buddy_page;
0195     struct ext4_prealloc_space *ac_pa;
0196     struct ext4_locality_group *ac_lg;
0197 };
0198 
0199 #define AC_STATUS_CONTINUE  1
0200 #define AC_STATUS_FOUND     2
0201 #define AC_STATUS_BREAK     3
0202 
0203 struct ext4_buddy {
0204     struct page *bd_buddy_page;
0205     void *bd_buddy;
0206     struct page *bd_bitmap_page;
0207     void *bd_bitmap;
0208     struct ext4_group_info *bd_info;
0209     struct super_block *bd_sb;
0210     __u16 bd_blkbits;
0211     ext4_group_t bd_group;
0212 };
0213 
0214 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
0215                     struct ext4_free_extent *fex)
0216 {
0217     return ext4_group_first_block_no(sb, fex->fe_group) +
0218         (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
0219 }
0220 
0221 typedef int (*ext4_mballoc_query_range_fn)(
0222     struct super_block      *sb,
0223     ext4_group_t            agno,
0224     ext4_grpblk_t           start,
0225     ext4_grpblk_t           len,
0226     void                *priv);
0227 
0228 int
0229 ext4_mballoc_query_range(
0230     struct super_block      *sb,
0231     ext4_group_t            agno,
0232     ext4_grpblk_t           start,
0233     ext4_grpblk_t           end,
0234     ext4_mballoc_query_range_fn formatter,
0235     void                *priv);
0236 
0237 #endif