Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 
0003 #ifndef BTRFS_BLOCK_GROUP_H
0004 #define BTRFS_BLOCK_GROUP_H
0005 
0006 #include "free-space-cache.h"
0007 
0008 enum btrfs_disk_cache_state {
0009     BTRFS_DC_WRITTEN,
0010     BTRFS_DC_ERROR,
0011     BTRFS_DC_CLEAR,
0012     BTRFS_DC_SETUP,
0013 };
0014 
0015 /*
0016  * This describes the state of the block_group for async discard.  This is due
0017  * to the two pass nature of it where extent discarding is prioritized over
0018  * bitmap discarding.  BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
0019  * between lists to prevent contention for discard state variables
0020  * (eg. discard_cursor).
0021  */
0022 enum btrfs_discard_state {
0023     BTRFS_DISCARD_EXTENTS,
0024     BTRFS_DISCARD_BITMAPS,
0025     BTRFS_DISCARD_RESET_CURSOR,
0026 };
0027 
0028 /*
0029  * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
0030  * only allocate a chunk if we really need one.
0031  *
0032  * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few
0033  * chunks already allocated.  This is used as part of the clustering code to
0034  * help make sure we have a good pool of storage to cluster in, without filling
0035  * the FS with empty chunks
0036  *
0037  * CHUNK_ALLOC_FORCE means it must try to allocate one
0038  *
0039  * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
0040  * find_free_extent() that also activaes the zone
0041  */
0042 enum btrfs_chunk_alloc_enum {
0043     CHUNK_ALLOC_NO_FORCE,
0044     CHUNK_ALLOC_LIMITED,
0045     CHUNK_ALLOC_FORCE,
0046     CHUNK_ALLOC_FORCE_FOR_EXTENT,
0047 };
0048 
0049 struct btrfs_caching_control {
0050     struct list_head list;
0051     struct mutex mutex;
0052     wait_queue_head_t wait;
0053     struct btrfs_work work;
0054     struct btrfs_block_group *block_group;
0055     u64 progress;
0056     refcount_t count;
0057 };
0058 
0059 /* Once caching_thread() finds this much free space, it will wake up waiters. */
0060 #define CACHING_CTL_WAKE_UP SZ_2M
0061 
0062 struct btrfs_block_group {
0063     struct btrfs_fs_info *fs_info;
0064     struct inode *inode;
0065     spinlock_t lock;
0066     u64 start;
0067     u64 length;
0068     u64 pinned;
0069     u64 reserved;
0070     u64 used;
0071     u64 delalloc_bytes;
0072     u64 bytes_super;
0073     u64 flags;
0074     u64 cache_generation;
0075     u64 global_root_id;
0076 
0077     /*
0078      * If the free space extent count exceeds this number, convert the block
0079      * group to bitmaps.
0080      */
0081     u32 bitmap_high_thresh;
0082 
0083     /*
0084      * If the free space extent count drops below this number, convert the
0085      * block group back to extents.
0086      */
0087     u32 bitmap_low_thresh;
0088 
0089     /*
0090      * It is just used for the delayed data space allocation because
0091      * only the data space allocation and the relative metadata update
0092      * can be done cross the transaction.
0093      */
0094     struct rw_semaphore data_rwsem;
0095 
0096     /* For raid56, this is a full stripe, without parity */
0097     unsigned long full_stripe_len;
0098 
0099     unsigned int ro;
0100     unsigned int iref:1;
0101     unsigned int has_caching_ctl:1;
0102     unsigned int removed:1;
0103     unsigned int to_copy:1;
0104     unsigned int relocating_repair:1;
0105     unsigned int chunk_item_inserted:1;
0106     unsigned int zone_is_active:1;
0107     unsigned int zoned_data_reloc_ongoing:1;
0108 
0109     int disk_cache_state;
0110 
0111     /* Cache tracking stuff */
0112     int cached;
0113     struct btrfs_caching_control *caching_ctl;
0114     u64 last_byte_to_unpin;
0115 
0116     struct btrfs_space_info *space_info;
0117 
0118     /* Free space cache stuff */
0119     struct btrfs_free_space_ctl *free_space_ctl;
0120 
0121     /* Block group cache stuff */
0122     struct rb_node cache_node;
0123 
0124     /* For block groups in the same raid type */
0125     struct list_head list;
0126 
0127     refcount_t refs;
0128 
0129     /*
0130      * List of struct btrfs_free_clusters for this block group.
0131      * Today it will only have one thing on it, but that may change
0132      */
0133     struct list_head cluster_list;
0134 
0135     /* For delayed block group creation or deletion of empty block groups */
0136     struct list_head bg_list;
0137 
0138     /* For read-only block groups */
0139     struct list_head ro_list;
0140 
0141     /*
0142      * When non-zero it means the block group's logical address and its
0143      * device extents can not be reused for future block group allocations
0144      * until the counter goes down to 0. This is to prevent them from being
0145      * reused while some task is still using the block group after it was
0146      * deleted - we want to make sure they can only be reused for new block
0147      * groups after that task is done with the deleted block group.
0148      */
0149     atomic_t frozen;
0150 
0151     /* For discard operations */
0152     struct list_head discard_list;
0153     int discard_index;
0154     u64 discard_eligible_time;
0155     u64 discard_cursor;
0156     enum btrfs_discard_state discard_state;
0157 
0158     /* For dirty block groups */
0159     struct list_head dirty_list;
0160     struct list_head io_list;
0161 
0162     struct btrfs_io_ctl io_ctl;
0163 
0164     /*
0165      * Incremented when doing extent allocations and holding a read lock
0166      * on the space_info's groups_sem semaphore.
0167      * Decremented when an ordered extent that represents an IO against this
0168      * block group's range is created (after it's added to its inode's
0169      * root's list of ordered extents) or immediately after the allocation
0170      * if it's a metadata extent or fallocate extent (for these cases we
0171      * don't create ordered extents).
0172      */
0173     atomic_t reservations;
0174 
0175     /*
0176      * Incremented while holding the spinlock *lock* by a task checking if
0177      * it can perform a nocow write (incremented if the value for the *ro*
0178      * field is 0). Decremented by such tasks once they create an ordered
0179      * extent or before that if some error happens before reaching that step.
0180      * This is to prevent races between block group relocation and nocow
0181      * writes through direct IO.
0182      */
0183     atomic_t nocow_writers;
0184 
0185     /* Lock for free space tree operations. */
0186     struct mutex free_space_lock;
0187 
0188     /*
0189      * Does the block group need to be added to the free space tree?
0190      * Protected by free_space_lock.
0191      */
0192     int needs_free_space;
0193 
0194     /* Flag indicating this block group is placed on a sequential zone */
0195     bool seq_zone;
0196 
0197     /*
0198      * Number of extents in this block group used for swap files.
0199      * All accesses protected by the spinlock 'lock'.
0200      */
0201     int swap_extents;
0202 
0203     /* Record locked full stripes for RAID5/6 block group */
0204     struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
0205 
0206     /*
0207      * Allocation offset for the block group to implement sequential
0208      * allocation. This is used only on a zoned filesystem.
0209      */
0210     u64 alloc_offset;
0211     u64 zone_unusable;
0212     u64 zone_capacity;
0213     u64 meta_write_pointer;
0214     struct map_lookup *physical_map;
0215     struct list_head active_bg_list;
0216     struct work_struct zone_finish_work;
0217     struct extent_buffer *last_eb;
0218 };
0219 
0220 static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
0221 {
0222     return (block_group->start + block_group->length);
0223 }
0224 
0225 static inline bool btrfs_is_block_group_data_only(
0226                     struct btrfs_block_group *block_group)
0227 {
0228     /*
0229      * In mixed mode the fragmentation is expected to be high, lowering the
0230      * efficiency, so only proper data block groups are considered.
0231      */
0232     return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
0233            !(block_group->flags & BTRFS_BLOCK_GROUP_METADATA);
0234 }
0235 
0236 #ifdef CONFIG_BTRFS_DEBUG
0237 static inline int btrfs_should_fragment_free_space(
0238         struct btrfs_block_group *block_group)
0239 {
0240     struct btrfs_fs_info *fs_info = block_group->fs_info;
0241 
0242     return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
0243         block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
0244            (btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
0245         block_group->flags &  BTRFS_BLOCK_GROUP_DATA);
0246 }
0247 #endif
0248 
0249 struct btrfs_block_group *btrfs_lookup_first_block_group(
0250         struct btrfs_fs_info *info, u64 bytenr);
0251 struct btrfs_block_group *btrfs_lookup_block_group(
0252         struct btrfs_fs_info *info, u64 bytenr);
0253 struct btrfs_block_group *btrfs_next_block_group(
0254         struct btrfs_block_group *cache);
0255 void btrfs_get_block_group(struct btrfs_block_group *cache);
0256 void btrfs_put_block_group(struct btrfs_block_group *cache);
0257 void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
0258                     const u64 start);
0259 void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
0260 struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
0261                           u64 bytenr);
0262 void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
0263 void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
0264 void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
0265                            u64 num_bytes);
0266 int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
0267 void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
0268 struct btrfs_caching_control *btrfs_get_caching_control(
0269         struct btrfs_block_group *cache);
0270 u64 add_new_free_space(struct btrfs_block_group *block_group,
0271                u64 start, u64 end);
0272 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
0273                 struct btrfs_fs_info *fs_info,
0274                 const u64 chunk_offset);
0275 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
0276                  u64 group_start, struct extent_map *em);
0277 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
0278 void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
0279 void btrfs_reclaim_bgs_work(struct work_struct *work);
0280 void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
0281 void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
0282 int btrfs_read_block_groups(struct btrfs_fs_info *info);
0283 struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
0284                          u64 bytes_used, u64 type,
0285                          u64 chunk_offset, u64 size);
0286 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
0287 int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
0288                  bool do_chunk_alloc);
0289 void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
0290 int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
0291 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
0292 int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
0293 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
0294                  u64 bytenr, u64 num_bytes, bool alloc);
0295 int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
0296                  u64 ram_bytes, u64 num_bytes, int delalloc);
0297 void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
0298                    u64 num_bytes, int delalloc);
0299 int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
0300               enum btrfs_chunk_alloc_enum force);
0301 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
0302 void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
0303 void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
0304                   bool is_item_insertion);
0305 u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
0306 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
0307 int btrfs_free_block_groups(struct btrfs_fs_info *info);
0308 void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
0309                 struct btrfs_caching_control *caching_ctl);
0310 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
0311                struct block_device *bdev, u64 physical, u64 **logical,
0312                int *naddrs, int *stripe_len);
0313 
0314 static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
0315 {
0316     return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
0317 }
0318 
0319 static inline u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
0320 {
0321     return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
0322 }
0323 
0324 static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
0325 {
0326     return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
0327 }
0328 
0329 static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
0330 {
0331     smp_mb();
0332     return cache->cached == BTRFS_CACHE_FINISHED ||
0333         cache->cached == BTRFS_CACHE_ERROR;
0334 }
0335 
0336 void btrfs_freeze_block_group(struct btrfs_block_group *cache);
0337 void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
0338 
0339 bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
0340 void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
0341 
0342 #endif /* BTRFS_BLOCK_GROUP_H */