Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * Copyright (C) 2016 Oracle.  All Rights Reserved.
0004  * Author: Darrick J. Wong <darrick.wong@oracle.com>
0005  */
0006 #include "xfs.h"
0007 #include "xfs_fs.h"
0008 #include "xfs_shared.h"
0009 #include "xfs_format.h"
0010 #include "xfs_log_format.h"
0011 #include "xfs_trans_resv.h"
0012 #include "xfs_mount.h"
0013 #include "xfs_defer.h"
0014 #include "xfs_trans.h"
0015 #include "xfs_buf_item.h"
0016 #include "xfs_inode.h"
0017 #include "xfs_inode_item.h"
0018 #include "xfs_trace.h"
0019 #include "xfs_icache.h"
0020 #include "xfs_log.h"
0021 #include "xfs_rmap.h"
0022 #include "xfs_refcount.h"
0023 #include "xfs_bmap.h"
0024 #include "xfs_alloc.h"
0025 #include "xfs_buf.h"
0026 #include "xfs_da_format.h"
0027 #include "xfs_da_btree.h"
0028 #include "xfs_attr.h"
0029 
0030 static struct kmem_cache    *xfs_defer_pending_cache;
0031 
0032 /*
0033  * Deferred Operations in XFS
0034  *
0035  * Due to the way locking rules work in XFS, certain transactions (block
0036  * mapping and unmapping, typically) have permanent reservations so that
0037  * we can roll the transaction to adhere to AG locking order rules and
0038  * to unlock buffers between metadata updates.  Prior to rmap/reflink,
0039  * the mapping code had a mechanism to perform these deferrals for
0040  * extents that were going to be freed; this code makes that facility
0041  * more generic.
0042  *
0043  * When adding the reverse mapping and reflink features, it became
0044  * necessary to perform complex remapping multi-transactions to comply
0045  * with AG locking order rules, and to be able to spread a single
0046  * refcount update operation (an operation on an n-block extent can
0047  * update as many as n records!) among multiple transactions.  XFS can
0048  * roll a transaction to facilitate this, but using this facility
0049  * requires us to log "intent" items in case log recovery needs to
0050  * redo the operation, and to log "done" items to indicate that redo
0051  * is not necessary.
0052  *
0053  * Deferred work is tracked in xfs_defer_pending items.  Each pending
0054  * item tracks one type of deferred work.  Incoming work items (which
0055  * have not yet had an intent logged) are attached to a pending item
0056  * on the dop_intake list, where they wait for the caller to finish
0057  * the deferred operations.
0058  *
0059  * Finishing a set of deferred operations is an involved process.  To
0060  * start, we define "rolling a deferred-op transaction" as follows:
0061  *
0062  * > For each xfs_defer_pending item on the dop_intake list,
0063  *   - Sort the work items in AG order.  XFS locking
0064  *     order rules require us to lock buffers in AG order.
0065  *   - Create a log intent item for that type.
0066  *   - Attach it to the pending item.
0067  *   - Move the pending item from the dop_intake list to the
0068  *     dop_pending list.
0069  * > Roll the transaction.
0070  *
0071  * NOTE: To avoid exceeding the transaction reservation, we limit the
0072  * number of items that we attach to a given xfs_defer_pending.
0073  *
0074  * The actual finishing process looks like this:
0075  *
0076  * > For each xfs_defer_pending in the dop_pending list,
0077  *   - Roll the deferred-op transaction as above.
0078  *   - Create a log done item for that type, and attach it to the
0079  *     log intent item.
0080  *   - For each work item attached to the log intent item,
0081  *     * Perform the described action.
0082  *     * Attach the work item to the log done item.
0083  *     * If the result of doing the work was -EAGAIN, ->finish work
0084  *       wants a new transaction.  See the "Requesting a Fresh
0085  *       Transaction while Finishing Deferred Work" section below for
0086  *       details.
0087  *
0088  * The key here is that we must log an intent item for all pending
0089  * work items every time we roll the transaction, and that we must log
0090  * a done item as soon as the work is completed.  With this mechanism
0091  * we can perform complex remapping operations, chaining intent items
0092  * as needed.
0093  *
0094  * Requesting a Fresh Transaction while Finishing Deferred Work
0095  *
0096  * If ->finish_item decides that it needs a fresh transaction to
0097  * finish the work, it must ask its caller (xfs_defer_finish) for a
0098  * continuation.  The most likely cause of this circumstance are the
0099  * refcount adjust functions deciding that they've logged enough items
0100  * to be at risk of exceeding the transaction reservation.
0101  *
0102  * To get a fresh transaction, we want to log the existing log done
0103  * item to prevent the log intent item from replaying, immediately log
0104  * a new log intent item with the unfinished work items, roll the
0105  * transaction, and re-call ->finish_item wherever it left off.  The
0106  * log done item and the new log intent item must be in the same
0107  * transaction or atomicity cannot be guaranteed; defer_finish ensures
0108  * that this happens.
0109  *
0110  * This requires some coordination between ->finish_item and
0111  * defer_finish.  Upon deciding to request a new transaction,
0112  * ->finish_item should update the current work item to reflect the
0113  * unfinished work.  Next, it should reset the log done item's list
0114  * count to the number of items finished, and return -EAGAIN.
0115  * defer_finish sees the -EAGAIN, logs the new log intent item
0116  * with the remaining work items, and leaves the xfs_defer_pending
0117  * item at the head of the dop_work queue.  Then it rolls the
0118  * transaction and picks up processing where it left off.  It is
0119  * required that ->finish_item must be careful to leave enough
0120  * transaction reservation to fit the new log intent item.
0121  *
0122  * This is an example of remapping the extent (E, E+B) into file X at
0123  * offset A and dealing with the extent (C, C+B) already being mapped
0124  * there:
0125  * +-------------------------------------------------+
0126  * | Unmap file X startblock C offset A length B     | t0
0127  * | Intent to reduce refcount for extent (C, B)     |
0128  * | Intent to remove rmap (X, C, A, B)              |
0129  * | Intent to free extent (D, 1) (bmbt block)       |
0130  * | Intent to map (X, A, B) at startblock E         |
0131  * +-------------------------------------------------+
0132  * | Map file X startblock E offset A length B       | t1
0133  * | Done mapping (X, E, A, B)                       |
0134  * | Intent to increase refcount for extent (E, B)   |
0135  * | Intent to add rmap (X, E, A, B)                 |
0136  * +-------------------------------------------------+
0137  * | Reduce refcount for extent (C, B)               | t2
0138  * | Done reducing refcount for extent (C, 9)        |
0139  * | Intent to reduce refcount for extent (C+9, B-9) |
0140  * | (ran out of space after 9 refcount updates)     |
0141  * +-------------------------------------------------+
0142  * | Reduce refcount for extent (C+9, B+9)           | t3
0143  * | Done reducing refcount for extent (C+9, B-9)    |
0144  * | Increase refcount for extent (E, B)             |
0145  * | Done increasing refcount for extent (E, B)      |
0146  * | Intent to free extent (C, B)                    |
0147  * | Intent to free extent (F, 1) (refcountbt block) |
0148  * | Intent to remove rmap (F, 1, REFC)              |
0149  * +-------------------------------------------------+
0150  * | Remove rmap (X, C, A, B)                        | t4
0151  * | Done removing rmap (X, C, A, B)                 |
0152  * | Add rmap (X, E, A, B)                           |
0153  * | Done adding rmap (X, E, A, B)                   |
0154  * | Remove rmap (F, 1, REFC)                        |
0155  * | Done removing rmap (F, 1, REFC)                 |
0156  * +-------------------------------------------------+
0157  * | Free extent (C, B)                              | t5
0158  * | Done freeing extent (C, B)                      |
0159  * | Free extent (D, 1)                              |
0160  * | Done freeing extent (D, 1)                      |
0161  * | Free extent (F, 1)                              |
0162  * | Done freeing extent (F, 1)                      |
0163  * +-------------------------------------------------+
0164  *
0165  * If we should crash before t2 commits, log recovery replays
0166  * the following intent items:
0167  *
0168  * - Intent to reduce refcount for extent (C, B)
0169  * - Intent to remove rmap (X, C, A, B)
0170  * - Intent to free extent (D, 1) (bmbt block)
0171  * - Intent to increase refcount for extent (E, B)
0172  * - Intent to add rmap (X, E, A, B)
0173  *
0174  * In the process of recovering, it should also generate and take care
0175  * of these intent items:
0176  *
0177  * - Intent to free extent (C, B)
0178  * - Intent to free extent (F, 1) (refcountbt block)
0179  * - Intent to remove rmap (F, 1, REFC)
0180  *
0181  * Note that the continuation requested between t2 and t3 is likely to
0182  * reoccur.
0183  */
0184 
0185 static const struct xfs_defer_op_type *defer_op_types[] = {
0186     [XFS_DEFER_OPS_TYPE_BMAP]   = &xfs_bmap_update_defer_type,
0187     [XFS_DEFER_OPS_TYPE_REFCOUNT]   = &xfs_refcount_update_defer_type,
0188     [XFS_DEFER_OPS_TYPE_RMAP]   = &xfs_rmap_update_defer_type,
0189     [XFS_DEFER_OPS_TYPE_FREE]   = &xfs_extent_free_defer_type,
0190     [XFS_DEFER_OPS_TYPE_AGFL_FREE]  = &xfs_agfl_free_defer_type,
0191     [XFS_DEFER_OPS_TYPE_ATTR]   = &xfs_attr_defer_type,
0192 };
0193 
0194 /*
0195  * Ensure there's a log intent item associated with this deferred work item if
0196  * the operation must be restarted on crash.  Returns 1 if there's a log item;
0197  * 0 if there isn't; or a negative errno.
0198  */
0199 static int
0200 xfs_defer_create_intent(
0201     struct xfs_trans        *tp,
0202     struct xfs_defer_pending    *dfp,
0203     bool                sort)
0204 {
0205     const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
0206     struct xfs_log_item     *lip;
0207 
0208     if (dfp->dfp_intent)
0209         return 1;
0210 
0211     lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
0212     if (!lip)
0213         return 0;
0214     if (IS_ERR(lip))
0215         return PTR_ERR(lip);
0216 
0217     dfp->dfp_intent = lip;
0218     return 1;
0219 }
0220 
0221 /*
0222  * For each pending item in the intake list, log its intent item and the
0223  * associated extents, then add the entire intake list to the end of
0224  * the pending list.
0225  *
0226  * Returns 1 if at least one log item was associated with the deferred work;
0227  * 0 if there are no log items; or a negative errno.
0228  */
0229 static int
0230 xfs_defer_create_intents(
0231     struct xfs_trans        *tp)
0232 {
0233     struct xfs_defer_pending    *dfp;
0234     int             ret = 0;
0235 
0236     list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
0237         int         ret2;
0238 
0239         trace_xfs_defer_create_intent(tp->t_mountp, dfp);
0240         ret2 = xfs_defer_create_intent(tp, dfp, true);
0241         if (ret2 < 0)
0242             return ret2;
0243         ret |= ret2;
0244     }
0245     return ret;
0246 }
0247 
0248 /* Abort all the intents that were committed. */
0249 STATIC void
0250 xfs_defer_trans_abort(
0251     struct xfs_trans        *tp,
0252     struct list_head        *dop_pending)
0253 {
0254     struct xfs_defer_pending    *dfp;
0255     const struct xfs_defer_op_type  *ops;
0256 
0257     trace_xfs_defer_trans_abort(tp, _RET_IP_);
0258 
0259     /* Abort intent items that don't have a done item. */
0260     list_for_each_entry(dfp, dop_pending, dfp_list) {
0261         ops = defer_op_types[dfp->dfp_type];
0262         trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
0263         if (dfp->dfp_intent && !dfp->dfp_done) {
0264             ops->abort_intent(dfp->dfp_intent);
0265             dfp->dfp_intent = NULL;
0266         }
0267     }
0268 }
0269 
0270 /*
0271  * Capture resources that the caller said not to release ("held") when the
0272  * transaction commits.  Caller is responsible for zero-initializing @dres.
0273  */
0274 static int
0275 xfs_defer_save_resources(
0276     struct xfs_defer_resources  *dres,
0277     struct xfs_trans        *tp)
0278 {
0279     struct xfs_buf_log_item     *bli;
0280     struct xfs_inode_log_item   *ili;
0281     struct xfs_log_item     *lip;
0282 
0283     BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS);
0284 
0285     list_for_each_entry(lip, &tp->t_items, li_trans) {
0286         switch (lip->li_type) {
0287         case XFS_LI_BUF:
0288             bli = container_of(lip, struct xfs_buf_log_item,
0289                        bli_item);
0290             if (bli->bli_flags & XFS_BLI_HOLD) {
0291                 if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) {
0292                     ASSERT(0);
0293                     return -EFSCORRUPTED;
0294                 }
0295                 if (bli->bli_flags & XFS_BLI_ORDERED)
0296                     dres->dr_ordered |=
0297                             (1U << dres->dr_bufs);
0298                 else
0299                     xfs_trans_dirty_buf(tp, bli->bli_buf);
0300                 dres->dr_bp[dres->dr_bufs++] = bli->bli_buf;
0301             }
0302             break;
0303         case XFS_LI_INODE:
0304             ili = container_of(lip, struct xfs_inode_log_item,
0305                        ili_item);
0306             if (ili->ili_lock_flags == 0) {
0307                 if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) {
0308                     ASSERT(0);
0309                     return -EFSCORRUPTED;
0310                 }
0311                 xfs_trans_log_inode(tp, ili->ili_inode,
0312                             XFS_ILOG_CORE);
0313                 dres->dr_ip[dres->dr_inos++] = ili->ili_inode;
0314             }
0315             break;
0316         default:
0317             break;
0318         }
0319     }
0320 
0321     return 0;
0322 }
0323 
0324 /* Attach the held resources to the transaction. */
0325 static void
0326 xfs_defer_restore_resources(
0327     struct xfs_trans        *tp,
0328     struct xfs_defer_resources  *dres)
0329 {
0330     unsigned short          i;
0331 
0332     /* Rejoin the joined inodes. */
0333     for (i = 0; i < dres->dr_inos; i++)
0334         xfs_trans_ijoin(tp, dres->dr_ip[i], 0);
0335 
0336     /* Rejoin the buffers and dirty them so the log moves forward. */
0337     for (i = 0; i < dres->dr_bufs; i++) {
0338         xfs_trans_bjoin(tp, dres->dr_bp[i]);
0339         if (dres->dr_ordered & (1U << i))
0340             xfs_trans_ordered_buf(tp, dres->dr_bp[i]);
0341         xfs_trans_bhold(tp, dres->dr_bp[i]);
0342     }
0343 }
0344 
0345 /* Roll a transaction so we can do some deferred op processing. */
0346 STATIC int
0347 xfs_defer_trans_roll(
0348     struct xfs_trans        **tpp)
0349 {
0350     struct xfs_defer_resources  dres = { };
0351     int             error;
0352 
0353     error = xfs_defer_save_resources(&dres, *tpp);
0354     if (error)
0355         return error;
0356 
0357     trace_xfs_defer_trans_roll(*tpp, _RET_IP_);
0358 
0359     /*
0360      * Roll the transaction.  Rolling always given a new transaction (even
0361      * if committing the old one fails!) to hand back to the caller, so we
0362      * join the held resources to the new transaction so that we always
0363      * return with the held resources joined to @tpp, no matter what
0364      * happened.
0365      */
0366     error = xfs_trans_roll(tpp);
0367 
0368     xfs_defer_restore_resources(*tpp, &dres);
0369 
0370     if (error)
0371         trace_xfs_defer_trans_roll_error(*tpp, error);
0372     return error;
0373 }
0374 
0375 /*
0376  * Free up any items left in the list.
0377  */
0378 static void
0379 xfs_defer_cancel_list(
0380     struct xfs_mount        *mp,
0381     struct list_head        *dop_list)
0382 {
0383     struct xfs_defer_pending    *dfp;
0384     struct xfs_defer_pending    *pli;
0385     struct list_head        *pwi;
0386     struct list_head        *n;
0387     const struct xfs_defer_op_type  *ops;
0388 
0389     /*
0390      * Free the pending items.  Caller should already have arranged
0391      * for the intent items to be released.
0392      */
0393     list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
0394         ops = defer_op_types[dfp->dfp_type];
0395         trace_xfs_defer_cancel_list(mp, dfp);
0396         list_del(&dfp->dfp_list);
0397         list_for_each_safe(pwi, n, &dfp->dfp_work) {
0398             list_del(pwi);
0399             dfp->dfp_count--;
0400             ops->cancel_item(pwi);
0401         }
0402         ASSERT(dfp->dfp_count == 0);
0403         kmem_cache_free(xfs_defer_pending_cache, dfp);
0404     }
0405 }
0406 
0407 /*
0408  * Prevent a log intent item from pinning the tail of the log by logging a
0409  * done item to release the intent item; and then log a new intent item.
0410  * The caller should provide a fresh transaction and roll it after we're done.
0411  */
0412 static int
0413 xfs_defer_relog(
0414     struct xfs_trans        **tpp,
0415     struct list_head        *dfops)
0416 {
0417     struct xlog         *log = (*tpp)->t_mountp->m_log;
0418     struct xfs_defer_pending    *dfp;
0419     xfs_lsn_t           threshold_lsn = NULLCOMMITLSN;
0420 
0421 
0422     ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
0423 
0424     list_for_each_entry(dfp, dfops, dfp_list) {
0425         /*
0426          * If the log intent item for this deferred op is not a part of
0427          * the current log checkpoint, relog the intent item to keep
0428          * the log tail moving forward.  We're ok with this being racy
0429          * because an incorrect decision means we'll be a little slower
0430          * at pushing the tail.
0431          */
0432         if (dfp->dfp_intent == NULL ||
0433             xfs_log_item_in_current_chkpt(dfp->dfp_intent))
0434             continue;
0435 
0436         /*
0437          * Figure out where we need the tail to be in order to maintain
0438          * the minimum required free space in the log.  Only sample
0439          * the log threshold once per call.
0440          */
0441         if (threshold_lsn == NULLCOMMITLSN) {
0442             threshold_lsn = xlog_grant_push_threshold(log, 0);
0443             if (threshold_lsn == NULLCOMMITLSN)
0444                 break;
0445         }
0446         if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
0447             continue;
0448 
0449         trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
0450         XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
0451         dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
0452     }
0453 
0454     if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
0455         return xfs_defer_trans_roll(tpp);
0456     return 0;
0457 }
0458 
0459 /*
0460  * Log an intent-done item for the first pending intent, and finish the work
0461  * items.
0462  */
0463 static int
0464 xfs_defer_finish_one(
0465     struct xfs_trans        *tp,
0466     struct xfs_defer_pending    *dfp)
0467 {
0468     const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
0469     struct xfs_btree_cur        *state = NULL;
0470     struct list_head        *li, *n;
0471     int             error;
0472 
0473     trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
0474 
0475     dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
0476     list_for_each_safe(li, n, &dfp->dfp_work) {
0477         list_del(li);
0478         dfp->dfp_count--;
0479         error = ops->finish_item(tp, dfp->dfp_done, li, &state);
0480         if (error == -EAGAIN) {
0481             int     ret;
0482 
0483             /*
0484              * Caller wants a fresh transaction; put the work item
0485              * back on the list and log a new log intent item to
0486              * replace the old one.  See "Requesting a Fresh
0487              * Transaction while Finishing Deferred Work" above.
0488              */
0489             list_add(li, &dfp->dfp_work);
0490             dfp->dfp_count++;
0491             dfp->dfp_done = NULL;
0492             dfp->dfp_intent = NULL;
0493             ret = xfs_defer_create_intent(tp, dfp, false);
0494             if (ret < 0)
0495                 error = ret;
0496         }
0497 
0498         if (error)
0499             goto out;
0500     }
0501 
0502     /* Done with the dfp, free it. */
0503     list_del(&dfp->dfp_list);
0504     kmem_cache_free(xfs_defer_pending_cache, dfp);
0505 out:
0506     if (ops->finish_cleanup)
0507         ops->finish_cleanup(tp, state, error);
0508     return error;
0509 }
0510 
0511 /*
0512  * Finish all the pending work.  This involves logging intent items for
0513  * any work items that wandered in since the last transaction roll (if
0514  * one has even happened), rolling the transaction, and finishing the
0515  * work items in the first item on the logged-and-pending list.
0516  *
0517  * If an inode is provided, relog it to the new transaction.
0518  */
0519 int
0520 xfs_defer_finish_noroll(
0521     struct xfs_trans        **tp)
0522 {
0523     struct xfs_defer_pending    *dfp = NULL;
0524     int             error = 0;
0525     LIST_HEAD(dop_pending);
0526 
0527     ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
0528 
0529     trace_xfs_defer_finish(*tp, _RET_IP_);
0530 
0531     /* Until we run out of pending work to finish... */
0532     while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
0533         /*
0534          * Deferred items that are created in the process of finishing
0535          * other deferred work items should be queued at the head of
0536          * the pending list, which puts them ahead of the deferred work
0537          * that was created by the caller.  This keeps the number of
0538          * pending work items to a minimum, which decreases the amount
0539          * of time that any one intent item can stick around in memory,
0540          * pinning the log tail.
0541          */
0542         int has_intents = xfs_defer_create_intents(*tp);
0543 
0544         list_splice_init(&(*tp)->t_dfops, &dop_pending);
0545 
0546         if (has_intents < 0) {
0547             error = has_intents;
0548             goto out_shutdown;
0549         }
0550         if (has_intents || dfp) {
0551             error = xfs_defer_trans_roll(tp);
0552             if (error)
0553                 goto out_shutdown;
0554 
0555             /* Relog intent items to keep the log moving. */
0556             error = xfs_defer_relog(tp, &dop_pending);
0557             if (error)
0558                 goto out_shutdown;
0559         }
0560 
0561         dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
0562                        dfp_list);
0563         error = xfs_defer_finish_one(*tp, dfp);
0564         if (error && error != -EAGAIN)
0565             goto out_shutdown;
0566     }
0567 
0568     trace_xfs_defer_finish_done(*tp, _RET_IP_);
0569     return 0;
0570 
0571 out_shutdown:
0572     xfs_defer_trans_abort(*tp, &dop_pending);
0573     xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
0574     trace_xfs_defer_finish_error(*tp, error);
0575     xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
0576     xfs_defer_cancel(*tp);
0577     return error;
0578 }
0579 
0580 int
0581 xfs_defer_finish(
0582     struct xfs_trans    **tp)
0583 {
0584     int         error;
0585 
0586     /*
0587      * Finish and roll the transaction once more to avoid returning to the
0588      * caller with a dirty transaction.
0589      */
0590     error = xfs_defer_finish_noroll(tp);
0591     if (error)
0592         return error;
0593     if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
0594         error = xfs_defer_trans_roll(tp);
0595         if (error) {
0596             xfs_force_shutdown((*tp)->t_mountp,
0597                        SHUTDOWN_CORRUPT_INCORE);
0598             return error;
0599         }
0600     }
0601 
0602     /* Reset LOWMODE now that we've finished all the dfops. */
0603     ASSERT(list_empty(&(*tp)->t_dfops));
0604     (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
0605     return 0;
0606 }
0607 
0608 void
0609 xfs_defer_cancel(
0610     struct xfs_trans    *tp)
0611 {
0612     struct xfs_mount    *mp = tp->t_mountp;
0613 
0614     trace_xfs_defer_cancel(tp, _RET_IP_);
0615     xfs_defer_cancel_list(mp, &tp->t_dfops);
0616 }
0617 
0618 /* Add an item for later deferred processing. */
0619 void
0620 xfs_defer_add(
0621     struct xfs_trans        *tp,
0622     enum xfs_defer_ops_type     type,
0623     struct list_head        *li)
0624 {
0625     struct xfs_defer_pending    *dfp = NULL;
0626     const struct xfs_defer_op_type  *ops;
0627 
0628     ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
0629     BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
0630 
0631     /*
0632      * Add the item to a pending item at the end of the intake list.
0633      * If the last pending item has the same type, reuse it.  Else,
0634      * create a new pending item at the end of the intake list.
0635      */
0636     if (!list_empty(&tp->t_dfops)) {
0637         dfp = list_last_entry(&tp->t_dfops,
0638                 struct xfs_defer_pending, dfp_list);
0639         ops = defer_op_types[dfp->dfp_type];
0640         if (dfp->dfp_type != type ||
0641             (ops->max_items && dfp->dfp_count >= ops->max_items))
0642             dfp = NULL;
0643     }
0644     if (!dfp) {
0645         dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
0646                 GFP_NOFS | __GFP_NOFAIL);
0647         dfp->dfp_type = type;
0648         dfp->dfp_intent = NULL;
0649         dfp->dfp_done = NULL;
0650         dfp->dfp_count = 0;
0651         INIT_LIST_HEAD(&dfp->dfp_work);
0652         list_add_tail(&dfp->dfp_list, &tp->t_dfops);
0653     }
0654 
0655     list_add_tail(li, &dfp->dfp_work);
0656     dfp->dfp_count++;
0657 }
0658 
0659 /*
0660  * Move deferred ops from one transaction to another and reset the source to
0661  * initial state. This is primarily used to carry state forward across
0662  * transaction rolls with pending dfops.
0663  */
0664 void
0665 xfs_defer_move(
0666     struct xfs_trans    *dtp,
0667     struct xfs_trans    *stp)
0668 {
0669     list_splice_init(&stp->t_dfops, &dtp->t_dfops);
0670 
0671     /*
0672      * Low free space mode was historically controlled by a dfops field.
0673      * This meant that low mode state potentially carried across multiple
0674      * transaction rolls. Transfer low mode on a dfops move to preserve
0675      * that behavior.
0676      */
0677     dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
0678     stp->t_flags &= ~XFS_TRANS_LOWMODE;
0679 }
0680 
0681 /*
0682  * Prepare a chain of fresh deferred ops work items to be completed later.  Log
0683  * recovery requires the ability to put off until later the actual finishing
0684  * work so that it can process unfinished items recovered from the log in
0685  * correct order.
0686  *
0687  * Create and log intent items for all the work that we're capturing so that we
0688  * can be assured that the items will get replayed if the system goes down
0689  * before log recovery gets a chance to finish the work it put off.  The entire
0690  * deferred ops state is transferred to the capture structure and the
0691  * transaction is then ready for the caller to commit it.  If there are no
0692  * intent items to capture, this function returns NULL.
0693  *
0694  * If capture_ip is not NULL, the capture structure will obtain an extra
0695  * reference to the inode.
0696  */
0697 static struct xfs_defer_capture *
0698 xfs_defer_ops_capture(
0699     struct xfs_trans        *tp)
0700 {
0701     struct xfs_defer_capture    *dfc;
0702     unsigned short          i;
0703     int             error;
0704 
0705     if (list_empty(&tp->t_dfops))
0706         return NULL;
0707 
0708     error = xfs_defer_create_intents(tp);
0709     if (error < 0)
0710         return ERR_PTR(error);
0711 
0712     /* Create an object to capture the defer ops. */
0713     dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
0714     INIT_LIST_HEAD(&dfc->dfc_list);
0715     INIT_LIST_HEAD(&dfc->dfc_dfops);
0716 
0717     /* Move the dfops chain and transaction state to the capture struct. */
0718     list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
0719     dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
0720     tp->t_flags &= ~XFS_TRANS_LOWMODE;
0721 
0722     /* Capture the remaining block reservations along with the dfops. */
0723     dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
0724     dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
0725 
0726     /* Preserve the log reservation size. */
0727     dfc->dfc_logres = tp->t_log_res;
0728 
0729     error = xfs_defer_save_resources(&dfc->dfc_held, tp);
0730     if (error) {
0731         /*
0732          * Resource capture should never fail, but if it does, we
0733          * still have to shut down the log and release things
0734          * properly.
0735          */
0736         xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
0737     }
0738 
0739     /*
0740      * Grab extra references to the inodes and buffers because callers are
0741      * expected to release their held references after we commit the
0742      * transaction.
0743      */
0744     for (i = 0; i < dfc->dfc_held.dr_inos; i++) {
0745         ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL));
0746         ihold(VFS_I(dfc->dfc_held.dr_ip[i]));
0747     }
0748 
0749     for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
0750         xfs_buf_hold(dfc->dfc_held.dr_bp[i]);
0751 
0752     return dfc;
0753 }
0754 
0755 /* Release all resources that we used to capture deferred ops. */
0756 void
0757 xfs_defer_ops_capture_free(
0758     struct xfs_mount        *mp,
0759     struct xfs_defer_capture    *dfc)
0760 {
0761     unsigned short          i;
0762 
0763     xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
0764 
0765     for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
0766         xfs_buf_relse(dfc->dfc_held.dr_bp[i]);
0767 
0768     for (i = 0; i < dfc->dfc_held.dr_inos; i++)
0769         xfs_irele(dfc->dfc_held.dr_ip[i]);
0770 
0771     kmem_free(dfc);
0772 }
0773 
0774 /*
0775  * Capture any deferred ops and commit the transaction.  This is the last step
0776  * needed to finish a log intent item that we recovered from the log.  If any
0777  * of the deferred ops operate on an inode, the caller must pass in that inode
0778  * so that the reference can be transferred to the capture structure.  The
0779  * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
0780  * xfs_defer_ops_continue.
0781  */
0782 int
0783 xfs_defer_ops_capture_and_commit(
0784     struct xfs_trans        *tp,
0785     struct list_head        *capture_list)
0786 {
0787     struct xfs_mount        *mp = tp->t_mountp;
0788     struct xfs_defer_capture    *dfc;
0789     int             error;
0790 
0791     /* If we don't capture anything, commit transaction and exit. */
0792     dfc = xfs_defer_ops_capture(tp);
0793     if (IS_ERR(dfc)) {
0794         xfs_trans_cancel(tp);
0795         return PTR_ERR(dfc);
0796     }
0797     if (!dfc)
0798         return xfs_trans_commit(tp);
0799 
0800     /* Commit the transaction and add the capture structure to the list. */
0801     error = xfs_trans_commit(tp);
0802     if (error) {
0803         xfs_defer_ops_capture_free(mp, dfc);
0804         return error;
0805     }
0806 
0807     list_add_tail(&dfc->dfc_list, capture_list);
0808     return 0;
0809 }
0810 
0811 /*
0812  * Attach a chain of captured deferred ops to a new transaction and free the
0813  * capture structure.  If an inode was captured, it will be passed back to the
0814  * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
0815  * The caller now owns the inode reference.
0816  */
0817 void
0818 xfs_defer_ops_continue(
0819     struct xfs_defer_capture    *dfc,
0820     struct xfs_trans        *tp,
0821     struct xfs_defer_resources  *dres)
0822 {
0823     unsigned int            i;
0824 
0825     ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
0826     ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
0827 
0828     /* Lock the captured resources to the new transaction. */
0829     if (dfc->dfc_held.dr_inos == 2)
0830         xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
0831                     dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
0832     else if (dfc->dfc_held.dr_inos == 1)
0833         xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
0834 
0835     for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
0836         xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
0837 
0838     /* Join the captured resources to the new transaction. */
0839     xfs_defer_restore_resources(tp, &dfc->dfc_held);
0840     memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
0841     dres->dr_bufs = 0;
0842 
0843     /* Move captured dfops chain and state to the transaction. */
0844     list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
0845     tp->t_flags |= dfc->dfc_tpflags;
0846 
0847     kmem_free(dfc);
0848 }
0849 
0850 /* Release the resources captured and continued during recovery. */
0851 void
0852 xfs_defer_resources_rele(
0853     struct xfs_defer_resources  *dres)
0854 {
0855     unsigned short          i;
0856 
0857     for (i = 0; i < dres->dr_inos; i++) {
0858         xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL);
0859         xfs_irele(dres->dr_ip[i]);
0860         dres->dr_ip[i] = NULL;
0861     }
0862 
0863     for (i = 0; i < dres->dr_bufs; i++) {
0864         xfs_buf_relse(dres->dr_bp[i]);
0865         dres->dr_bp[i] = NULL;
0866     }
0867 
0868     dres->dr_inos = 0;
0869     dres->dr_bufs = 0;
0870     dres->dr_ordered = 0;
0871 }
0872 
0873 static inline int __init
0874 xfs_defer_init_cache(void)
0875 {
0876     xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending",
0877             sizeof(struct xfs_defer_pending),
0878             0, 0, NULL);
0879 
0880     return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM;
0881 }
0882 
0883 static inline void
0884 xfs_defer_destroy_cache(void)
0885 {
0886     kmem_cache_destroy(xfs_defer_pending_cache);
0887     xfs_defer_pending_cache = NULL;
0888 }
0889 
0890 /* Set up caches for deferred work items. */
0891 int __init
0892 xfs_defer_init_item_caches(void)
0893 {
0894     int             error;
0895 
0896     error = xfs_defer_init_cache();
0897     if (error)
0898         return error;
0899     error = xfs_rmap_intent_init_cache();
0900     if (error)
0901         goto err;
0902     error = xfs_refcount_intent_init_cache();
0903     if (error)
0904         goto err;
0905     error = xfs_bmap_intent_init_cache();
0906     if (error)
0907         goto err;
0908     error = xfs_extfree_intent_init_cache();
0909     if (error)
0910         goto err;
0911     error = xfs_attr_intent_init_cache();
0912     if (error)
0913         goto err;
0914     return 0;
0915 err:
0916     xfs_defer_destroy_item_caches();
0917     return error;
0918 }
0919 
0920 /* Destroy all the deferred work item caches, if they've been allocated. */
0921 void
0922 xfs_defer_destroy_item_caches(void)
0923 {
0924     xfs_attr_intent_destroy_cache();
0925     xfs_extfree_intent_destroy_cache();
0926     xfs_bmap_intent_destroy_cache();
0927     xfs_refcount_intent_destroy_cache();
0928     xfs_rmap_intent_destroy_cache();
0929     xfs_defer_destroy_cache();
0930 }