Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * This file is part of UBIFS.
0004  *
0005  * Copyright (C) 2006-2008 Nokia Corporation.
0006  *
0007  * Authors: Artem Bityutskiy (Битюцкий Артём)
0008  *          Adrian Hunter
0009  */
0010 
0011 /*
0012  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
0013  * tree when Linux VM needs more RAM.
0014  *
0015  * We do not implement any LRU lists to find oldest znodes to free because it
0016  * would add additional overhead to the file system fast paths. So the shrinker
0017  * just walks the TNC tree when searching for znodes to free.
0018  *
0019  * If the root of a TNC sub-tree is clean and old enough, then the children are
0020  * also clean and old enough. So the shrinker walks the TNC in level order and
0021  * dumps entire sub-trees.
0022  *
0023  * The age of znodes is just the time-stamp when they were last looked at.
0024  * The current shrinker first tries to evict old znodes, then young ones.
0025  *
0026  * Since the shrinker is global, it has to protect against races with FS
0027  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
0028  */
0029 
0030 #include "ubifs.h"
0031 
0032 /* List of all UBIFS file-system instances */
0033 LIST_HEAD(ubifs_infos);
0034 
0035 /*
0036  * We number each shrinker run and record the number on the ubifs_info structure
0037  * so that we can easily work out which ubifs_info structures have already been
0038  * done by the current run.
0039  */
0040 static unsigned int shrinker_run_no;
0041 
0042 /* Protects 'ubifs_infos' list */
0043 DEFINE_SPINLOCK(ubifs_infos_lock);
0044 
0045 /* Global clean znode counter (for all mounted UBIFS instances) */
0046 atomic_long_t ubifs_clean_zn_cnt;
0047 
0048 /**
0049  * shrink_tnc - shrink TNC tree.
0050  * @c: UBIFS file-system description object
0051  * @nr: number of znodes to free
0052  * @age: the age of znodes to free
0053  * @contention: if any contention, this is set to %1
0054  *
0055  * This function traverses TNC tree and frees clean znodes. It does not free
0056  * clean znodes which younger then @age. Returns number of freed znodes.
0057  */
0058 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
0059 {
0060     int total_freed = 0;
0061     struct ubifs_znode *znode, *zprev;
0062     time64_t time = ktime_get_seconds();
0063 
0064     ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
0065     ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
0066 
0067     if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
0068         return 0;
0069 
0070     /*
0071      * Traverse the TNC tree in levelorder manner, so that it is possible
0072      * to destroy large sub-trees. Indeed, if a znode is old, then all its
0073      * children are older or of the same age.
0074      *
0075      * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
0076      * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
0077      * changed only when the 'c->tnc_mutex' is held.
0078      */
0079     zprev = NULL;
0080     znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
0081     while (znode && total_freed < nr &&
0082            atomic_long_read(&c->clean_zn_cnt) > 0) {
0083         int freed;
0084 
0085         /*
0086          * If the znode is clean, but it is in the 'c->cnext' list, this
0087          * means that this znode has just been written to flash as a
0088          * part of commit and was marked clean. They will be removed
0089          * from the list at end commit. We cannot change the list,
0090          * because it is not protected by any mutex (design decision to
0091          * make commit really independent and parallel to main I/O). So
0092          * we just skip these znodes.
0093          *
0094          * Note, the 'clean_zn_cnt' counters are not updated until
0095          * after the commit, so the UBIFS shrinker does not report
0096          * the znodes which are in the 'c->cnext' list as freeable.
0097          *
0098          * Also note, if the root of a sub-tree is not in 'c->cnext',
0099          * then the whole sub-tree is not in 'c->cnext' as well, so it
0100          * is safe to dump whole sub-tree.
0101          */
0102 
0103         if (znode->cnext) {
0104             /*
0105              * Very soon these znodes will be removed from the list
0106              * and become freeable.
0107              */
0108             *contention = 1;
0109         } else if (!ubifs_zn_dirty(znode) &&
0110                abs(time - znode->time) >= age) {
0111             if (znode->parent)
0112                 znode->parent->zbranch[znode->iip].znode = NULL;
0113             else
0114                 c->zroot.znode = NULL;
0115 
0116             freed = ubifs_destroy_tnc_subtree(c, znode);
0117             atomic_long_sub(freed, &ubifs_clean_zn_cnt);
0118             atomic_long_sub(freed, &c->clean_zn_cnt);
0119             total_freed += freed;
0120             znode = zprev;
0121         }
0122 
0123         if (unlikely(!c->zroot.znode))
0124             break;
0125 
0126         zprev = znode;
0127         znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
0128         cond_resched();
0129     }
0130 
0131     return total_freed;
0132 }
0133 
0134 /**
0135  * shrink_tnc_trees - shrink UBIFS TNC trees.
0136  * @nr: number of znodes to free
0137  * @age: the age of znodes to free
0138  * @contention: if any contention, this is set to %1
0139  *
0140  * This function walks the list of mounted UBIFS file-systems and frees clean
0141  * znodes which are older than @age, until at least @nr znodes are freed.
0142  * Returns the number of freed znodes.
0143  */
0144 static int shrink_tnc_trees(int nr, int age, int *contention)
0145 {
0146     struct ubifs_info *c;
0147     struct list_head *p;
0148     unsigned int run_no;
0149     int freed = 0;
0150 
0151     spin_lock(&ubifs_infos_lock);
0152     do {
0153         run_no = ++shrinker_run_no;
0154     } while (run_no == 0);
0155     /* Iterate over all mounted UBIFS file-systems and try to shrink them */
0156     p = ubifs_infos.next;
0157     while (p != &ubifs_infos) {
0158         c = list_entry(p, struct ubifs_info, infos_list);
0159         /*
0160          * We move the ones we do to the end of the list, so we stop
0161          * when we see one we have already done.
0162          */
0163         if (c->shrinker_run_no == run_no)
0164             break;
0165         if (!mutex_trylock(&c->umount_mutex)) {
0166             /* Some un-mount is in progress, try next FS */
0167             *contention = 1;
0168             p = p->next;
0169             continue;
0170         }
0171         /*
0172          * We're holding 'c->umount_mutex', so the file-system won't go
0173          * away.
0174          */
0175         if (!mutex_trylock(&c->tnc_mutex)) {
0176             mutex_unlock(&c->umount_mutex);
0177             *contention = 1;
0178             p = p->next;
0179             continue;
0180         }
0181         spin_unlock(&ubifs_infos_lock);
0182         /*
0183          * OK, now we have TNC locked, the file-system cannot go away -
0184          * it is safe to reap the cache.
0185          */
0186         c->shrinker_run_no = run_no;
0187         freed += shrink_tnc(c, nr, age, contention);
0188         mutex_unlock(&c->tnc_mutex);
0189         spin_lock(&ubifs_infos_lock);
0190         /* Get the next list element before we move this one */
0191         p = p->next;
0192         /*
0193          * Move this one to the end of the list to provide some
0194          * fairness.
0195          */
0196         list_move_tail(&c->infos_list, &ubifs_infos);
0197         mutex_unlock(&c->umount_mutex);
0198         if (freed >= nr)
0199             break;
0200     }
0201     spin_unlock(&ubifs_infos_lock);
0202     return freed;
0203 }
0204 
0205 /**
0206  * kick_a_thread - kick a background thread to start commit.
0207  *
0208  * This function kicks a background thread to start background commit. Returns
0209  * %-1 if a thread was kicked or there is another reason to assume the memory
0210  * will soon be freed or become freeable. If there are no dirty znodes, returns
0211  * %0.
0212  */
0213 static int kick_a_thread(void)
0214 {
0215     int i;
0216     struct ubifs_info *c;
0217 
0218     /*
0219      * Iterate over all mounted UBIFS file-systems and find out if there is
0220      * already an ongoing commit operation there. If no, then iterate for
0221      * the second time and initiate background commit.
0222      */
0223     spin_lock(&ubifs_infos_lock);
0224     for (i = 0; i < 2; i++) {
0225         list_for_each_entry(c, &ubifs_infos, infos_list) {
0226             long dirty_zn_cnt;
0227 
0228             if (!mutex_trylock(&c->umount_mutex)) {
0229                 /*
0230                  * Some un-mount is in progress, it will
0231                  * certainly free memory, so just return.
0232                  */
0233                 spin_unlock(&ubifs_infos_lock);
0234                 return -1;
0235             }
0236 
0237             dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
0238 
0239             if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
0240                 c->ro_mount || c->ro_error) {
0241                 mutex_unlock(&c->umount_mutex);
0242                 continue;
0243             }
0244 
0245             if (c->cmt_state != COMMIT_RESTING) {
0246                 spin_unlock(&ubifs_infos_lock);
0247                 mutex_unlock(&c->umount_mutex);
0248                 return -1;
0249             }
0250 
0251             if (i == 1) {
0252                 list_move_tail(&c->infos_list, &ubifs_infos);
0253                 spin_unlock(&ubifs_infos_lock);
0254 
0255                 ubifs_request_bg_commit(c);
0256                 mutex_unlock(&c->umount_mutex);
0257                 return -1;
0258             }
0259             mutex_unlock(&c->umount_mutex);
0260         }
0261     }
0262     spin_unlock(&ubifs_infos_lock);
0263 
0264     return 0;
0265 }
0266 
0267 unsigned long ubifs_shrink_count(struct shrinker *shrink,
0268                  struct shrink_control *sc)
0269 {
0270     long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
0271 
0272     /*
0273      * Due to the way UBIFS updates the clean znode counter it may
0274      * temporarily be negative.
0275      */
0276     return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
0277 }
0278 
0279 unsigned long ubifs_shrink_scan(struct shrinker *shrink,
0280                 struct shrink_control *sc)
0281 {
0282     unsigned long nr = sc->nr_to_scan;
0283     int contention = 0;
0284     unsigned long freed;
0285     long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
0286 
0287     if (!clean_zn_cnt) {
0288         /*
0289          * No clean znodes, nothing to reap. All we can do in this case
0290          * is to kick background threads to start commit, which will
0291          * probably make clean znodes which, in turn, will be freeable.
0292          * And we return -1 which means will make VM call us again
0293          * later.
0294          */
0295         dbg_tnc("no clean znodes, kick a thread");
0296         return kick_a_thread();
0297     }
0298 
0299     freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
0300     if (freed >= nr)
0301         goto out;
0302 
0303     dbg_tnc("not enough old znodes, try to free young ones");
0304     freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
0305     if (freed >= nr)
0306         goto out;
0307 
0308     dbg_tnc("not enough young znodes, free all");
0309     freed += shrink_tnc_trees(nr - freed, 0, &contention);
0310 
0311     if (!freed && contention) {
0312         dbg_tnc("freed nothing, but contention");
0313         return SHRINK_STOP;
0314     }
0315 
0316 out:
0317     dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
0318     return freed;
0319 }