fs/xfs/xfs_trans.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
0004  * Copyright (C) 2010 Red Hat, Inc.
0005  * All Rights Reserved.
0006  */
0007 #include "xfs.h"
0008 #include "xfs_fs.h"
0009 #include "xfs_shared.h"
0010 #include "xfs_format.h"
0011 #include "xfs_log_format.h"
0012 #include "xfs_trans_resv.h"
0013 #include "xfs_mount.h"
0014 #include "xfs_extent_busy.h"
0015 #include "xfs_quota.h"
0016 #include "xfs_trans.h"
0017 #include "xfs_trans_priv.h"
0018 #include "xfs_log.h"
0019 #include "xfs_log_priv.h"
0020 #include "xfs_trace.h"
0021 #include "xfs_error.h"
0022 #include "xfs_defer.h"
0023 #include "xfs_inode.h"
0024 #include "xfs_dquot_item.h"
0025 #include "xfs_dquot.h"
0026 #include "xfs_icache.h"
0027
0028 struct kmem_cache   *xfs_trans_cache;
0029
0030 #if defined(CONFIG_TRACEPOINTS)
0031 static void
0032 xfs_trans_trace_reservations(
0033     struct xfs_mount    *mp)
0034 {
0035     struct xfs_trans_res    *res;
0036     struct xfs_trans_res    *end_res;
0037     int         i;
0038
0039     res = (struct xfs_trans_res *)M_RES(mp);
0040     end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
0041     for (i = 0; res < end_res; i++, res++)
0042         trace_xfs_trans_resv_calc(mp, i, res);
0043 }
0044 #else
0045 # define xfs_trans_trace_reservations(mp)
0046 #endif
0047
0048 /*
0049  * Initialize the precomputed transaction reservation values
0050  * in the mount structure.
0051  */
0052 void
0053 xfs_trans_init(
0054     struct xfs_mount    *mp)
0055 {
0056     xfs_trans_resv_calc(mp, M_RES(mp));
0057     xfs_trans_trace_reservations(mp);
0058 }
0059
0060 /*
0061  * Free the transaction structure.  If there is more clean up
0062  * to do when the structure is freed, add it here.
0063  */
0064 STATIC void
0065 xfs_trans_free(
0066     struct xfs_trans    *tp)
0067 {
0068     xfs_extent_busy_sort(&tp->t_busy);
0069     xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
0070
0071     trace_xfs_trans_free(tp, _RET_IP_);
0072     xfs_trans_clear_context(tp);
0073     if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
0074         sb_end_intwrite(tp->t_mountp->m_super);
0075     xfs_trans_free_dqinfo(tp);
0076     kmem_cache_free(xfs_trans_cache, tp);
0077 }
0078
0079 /*
0080  * This is called to create a new transaction which will share the
0081  * permanent log reservation of the given transaction.  The remaining
0082  * unused block and rt extent reservations are also inherited.  This
0083  * implies that the original transaction is no longer allowed to allocate
0084  * blocks.  Locks and log items, however, are no inherited.  They must
0085  * be added to the new transaction explicitly.
0086  */
0087 STATIC struct xfs_trans *
0088 xfs_trans_dup(
0089     struct xfs_trans    *tp)
0090 {
0091     struct xfs_trans    *ntp;
0092
0093     trace_xfs_trans_dup(tp, _RET_IP_);
0094
0095     ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
0096
0097     /*
0098      * Initialize the new transaction structure.
0099      */
0100     ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
0101     ntp->t_mountp = tp->t_mountp;
0102     INIT_LIST_HEAD(&ntp->t_items);
0103     INIT_LIST_HEAD(&ntp->t_busy);
0104     INIT_LIST_HEAD(&ntp->t_dfops);
0105     ntp->t_firstblock = NULLFSBLOCK;
0106
0107     ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
0108     ASSERT(tp->t_ticket != NULL);
0109
0110     ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
0111                (tp->t_flags & XFS_TRANS_RESERVE) |
0112                (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) |
0113                (tp->t_flags & XFS_TRANS_RES_FDBLKS);
0114     /* We gave our writer reference to the new transaction */
0115     tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
0116     ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
0117
0118     ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
0119     ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
0120     tp->t_blk_res = tp->t_blk_res_used;
0121
0122     ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
0123     tp->t_rtx_res = tp->t_rtx_res_used;
0124
0125     xfs_trans_switch_context(tp, ntp);
0126
0127     /* move deferred ops over to the new tp */
0128     xfs_defer_move(ntp, tp);
0129
0130     xfs_trans_dup_dqinfo(tp, ntp);
0131     return ntp;
0132 }
0133
0134 /*
0135  * This is called to reserve free disk blocks and log space for the
0136  * given transaction.  This must be done before allocating any resources
0137  * within the transaction.
0138  *
0139  * This will return ENOSPC if there are not enough blocks available.
0140  * It will sleep waiting for available log space.
0141  * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
0142  * is used by long running transactions.  If any one of the reservations
0143  * fails then they will all be backed out.
0144  *
0145  * This does not do quota reservations. That typically is done by the
0146  * caller afterwards.
0147  */
0148 static int
0149 xfs_trans_reserve(
0150     struct xfs_trans    *tp,
0151     struct xfs_trans_res    *resp,
0152     uint            blocks,
0153     uint            rtextents)
0154 {
0155     struct xfs_mount    *mp = tp->t_mountp;
0156     int         error = 0;
0157     bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
0158
0159     /*
0160      * Attempt to reserve the needed disk blocks by decrementing
0161      * the number needed from the number available.  This will
0162      * fail if the count would go below zero.
0163      */
0164     if (blocks > 0) {
0165         error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
0166         if (error != 0)
0167             return -ENOSPC;
0168         tp->t_blk_res += blocks;
0169     }
0170
0171     /*
0172      * Reserve the log space needed for this transaction.
0173      */
0174     if (resp->tr_logres > 0) {
0175         bool    permanent = false;
0176
0177         ASSERT(tp->t_log_res == 0 ||
0178                tp->t_log_res == resp->tr_logres);
0179         ASSERT(tp->t_log_count == 0 ||
0180                tp->t_log_count == resp->tr_logcount);
0181
0182         if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
0183             tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
0184             permanent = true;
0185         } else {
0186             ASSERT(tp->t_ticket == NULL);
0187             ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
0188         }
0189
0190         if (tp->t_ticket != NULL) {
0191             ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
0192             error = xfs_log_regrant(mp, tp->t_ticket);
0193         } else {
0194             error = xfs_log_reserve(mp, resp->tr_logres,
0195                         resp->tr_logcount,
0196                         &tp->t_ticket, permanent);
0197         }
0198
0199         if (error)
0200             goto undo_blocks;
0201
0202         tp->t_log_res = resp->tr_logres;
0203         tp->t_log_count = resp->tr_logcount;
0204     }
0205
0206     /*
0207      * Attempt to reserve the needed realtime extents by decrementing
0208      * the number needed from the number available.  This will
0209      * fail if the count would go below zero.
0210      */
0211     if (rtextents > 0) {
0212         error = xfs_mod_frextents(mp, -((int64_t)rtextents));
0213         if (error) {
0214             error = -ENOSPC;
0215             goto undo_log;
0216         }
0217         tp->t_rtx_res += rtextents;
0218     }
0219
0220     return 0;
0221
0222     /*
0223      * Error cases jump to one of these labels to undo any
0224      * reservations which have already been performed.
0225      */
0226 undo_log:
0227     if (resp->tr_logres > 0) {
0228         xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
0229         tp->t_ticket = NULL;
0230         tp->t_log_res = 0;
0231         tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
0232     }
0233
0234 undo_blocks:
0235     if (blocks > 0) {
0236         xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
0237         tp->t_blk_res = 0;
0238     }
0239     return error;
0240 }
0241
0242 int
0243 xfs_trans_alloc(
0244     struct xfs_mount    *mp,
0245     struct xfs_trans_res    *resp,
0246     uint            blocks,
0247     uint            rtextents,
0248     uint            flags,
0249     struct xfs_trans    **tpp)
0250 {
0251     struct xfs_trans    *tp;
0252     bool            want_retry = true;
0253     int         error;
0254
0255     /*
0256      * Allocate the handle before we do our freeze accounting and setting up
0257      * GFP_NOFS allocation context so that we avoid lockdep false positives
0258      * by doing GFP_KERNEL allocations inside sb_start_intwrite().
0259      */
0260 retry:
0261     tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
0262     if (!(flags & XFS_TRANS_NO_WRITECOUNT))
0263         sb_start_intwrite(mp->m_super);
0264     xfs_trans_set_context(tp);
0265
0266     /*
0267      * Zero-reservation ("empty") transactions can't modify anything, so
0268      * they're allowed to run while we're frozen.
0269      */
0270     WARN_ON(resp->tr_logres > 0 &&
0271         mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
0272     ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
0273            xfs_has_lazysbcount(mp));
0274
0275     tp->t_magic = XFS_TRANS_HEADER_MAGIC;
0276     tp->t_flags = flags;
0277     tp->t_mountp = mp;
0278     INIT_LIST_HEAD(&tp->t_items);
0279     INIT_LIST_HEAD(&tp->t_busy);
0280     INIT_LIST_HEAD(&tp->t_dfops);
0281     tp->t_firstblock = NULLFSBLOCK;
0282
0283     error = xfs_trans_reserve(tp, resp, blocks, rtextents);
0284     if (error == -ENOSPC && want_retry) {
0285         xfs_trans_cancel(tp);
0286
0287         /*
0288          * We weren't able to reserve enough space for the transaction.
0289          * Flush the other speculative space allocations to free space.
0290          * Do not perform a synchronous scan because callers can hold
0291          * other locks.
0292          */
0293         xfs_blockgc_flush_all(mp);
0294         want_retry = false;
0295         goto retry;
0296     }
0297     if (error) {
0298         xfs_trans_cancel(tp);
0299         return error;
0300     }
0301
0302     trace_xfs_trans_alloc(tp, _RET_IP_);
0303
0304     *tpp = tp;
0305     return 0;
0306 }
0307
0308 /*
0309  * Create an empty transaction with no reservation.  This is a defensive
0310  * mechanism for routines that query metadata without actually modifying them --
0311  * if the metadata being queried is somehow cross-linked (think a btree block
0312  * pointer that points higher in the tree), we risk deadlock.  However, blocks
0313  * grabbed as part of a transaction can be re-grabbed.  The verifiers will
0314  * notice the corrupt block and the operation will fail back to userspace
0315  * without deadlocking.
0316  *
0317  * Note the zero-length reservation; this transaction MUST be cancelled without
0318  * any dirty data.
0319  *
0320  * Callers should obtain freeze protection to avoid a conflict with fs freezing
0321  * where we can be grabbing buffers at the same time that freeze is trying to
0322  * drain the buffer LRU list.
0323  */
0324 int
0325 xfs_trans_alloc_empty(
0326     struct xfs_mount        *mp,
0327     struct xfs_trans        **tpp)
0328 {
0329     struct xfs_trans_res        resv = {0};
0330
0331     return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
0332 }
0333
0334 /*
0335  * Record the indicated change to the given field for application
0336  * to the file system's superblock when the transaction commits.
0337  * For now, just store the change in the transaction structure.
0338  *
0339  * Mark the transaction structure to indicate that the superblock
0340  * needs to be updated before committing.
0341  *
0342  * Because we may not be keeping track of allocated/free inodes and
0343  * used filesystem blocks in the superblock, we do not mark the
0344  * superblock dirty in this transaction if we modify these fields.
0345  * We still need to update the transaction deltas so that they get
0346  * applied to the incore superblock, but we don't want them to
0347  * cause the superblock to get locked and logged if these are the
0348  * only fields in the superblock that the transaction modifies.
0349  */
0350 void
0351 xfs_trans_mod_sb(
0352     xfs_trans_t *tp,
0353     uint        field,
0354     int64_t     delta)
0355 {
0356     uint32_t    flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
0357     xfs_mount_t *mp = tp->t_mountp;
0358
0359     switch (field) {
0360     case XFS_TRANS_SB_ICOUNT:
0361         tp->t_icount_delta += delta;
0362         if (xfs_has_lazysbcount(mp))
0363             flags &= ~XFS_TRANS_SB_DIRTY;
0364         break;
0365     case XFS_TRANS_SB_IFREE:
0366         tp->t_ifree_delta += delta;
0367         if (xfs_has_lazysbcount(mp))
0368             flags &= ~XFS_TRANS_SB_DIRTY;
0369         break;
0370     case XFS_TRANS_SB_FDBLOCKS:
0371         /*
0372          * Track the number of blocks allocated in the transaction.
0373          * Make sure it does not exceed the number reserved. If so,
0374          * shutdown as this can lead to accounting inconsistency.
0375          */
0376         if (delta < 0) {
0377             tp->t_blk_res_used += (uint)-delta;
0378             if (tp->t_blk_res_used > tp->t_blk_res)
0379                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
0380         } else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) {
0381             int64_t blkres_delta;
0382
0383             /*
0384              * Return freed blocks directly to the reservation
0385              * instead of the global pool, being careful not to
0386              * overflow the trans counter. This is used to preserve
0387              * reservation across chains of transaction rolls that
0388              * repeatedly free and allocate blocks.
0389              */
0390             blkres_delta = min_t(int64_t, delta,
0391                          UINT_MAX - tp->t_blk_res);
0392             tp->t_blk_res += blkres_delta;
0393             delta -= blkres_delta;
0394         }
0395         tp->t_fdblocks_delta += delta;
0396         if (xfs_has_lazysbcount(mp))
0397             flags &= ~XFS_TRANS_SB_DIRTY;
0398         break;
0399     case XFS_TRANS_SB_RES_FDBLOCKS:
0400         /*
0401          * The allocation has already been applied to the
0402          * in-core superblock's counter.  This should only
0403          * be applied to the on-disk superblock.
0404          */
0405         tp->t_res_fdblocks_delta += delta;
0406         if (xfs_has_lazysbcount(mp))
0407             flags &= ~XFS_TRANS_SB_DIRTY;
0408         break;
0409     case XFS_TRANS_SB_FREXTENTS:
0410         /*
0411          * Track the number of blocks allocated in the
0412          * transaction.  Make sure it does not exceed the
0413          * number reserved.
0414          */
0415         if (delta < 0) {
0416             tp->t_rtx_res_used += (uint)-delta;
0417             ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
0418         }
0419         tp->t_frextents_delta += delta;
0420         break;
0421     case XFS_TRANS_SB_RES_FREXTENTS:
0422         /*
0423          * The allocation has already been applied to the
0424          * in-core superblock's counter.  This should only
0425          * be applied to the on-disk superblock.
0426          */
0427         ASSERT(delta < 0);
0428         tp->t_res_frextents_delta += delta;
0429         break;
0430     case XFS_TRANS_SB_DBLOCKS:
0431         tp->t_dblocks_delta += delta;
0432         break;
0433     case XFS_TRANS_SB_AGCOUNT:
0434         ASSERT(delta > 0);
0435         tp->t_agcount_delta += delta;
0436         break;
0437     case XFS_TRANS_SB_IMAXPCT:
0438         tp->t_imaxpct_delta += delta;
0439         break;
0440     case XFS_TRANS_SB_REXTSIZE:
0441         tp->t_rextsize_delta += delta;
0442         break;
0443     case XFS_TRANS_SB_RBMBLOCKS:
0444         tp->t_rbmblocks_delta += delta;
0445         break;
0446     case XFS_TRANS_SB_RBLOCKS:
0447         tp->t_rblocks_delta += delta;
0448         break;
0449     case XFS_TRANS_SB_REXTENTS:
0450         tp->t_rextents_delta += delta;
0451         break;
0452     case XFS_TRANS_SB_REXTSLOG:
0453         tp->t_rextslog_delta += delta;
0454         break;
0455     default:
0456         ASSERT(0);
0457         return;
0458     }
0459
0460     tp->t_flags |= flags;
0461 }
0462
0463 /*
0464  * xfs_trans_apply_sb_deltas() is called from the commit code
0465  * to bring the superblock buffer into the current transaction
0466  * and modify it as requested by earlier calls to xfs_trans_mod_sb().
0467  *
0468  * For now we just look at each field allowed to change and change
0469  * it if necessary.
0470  */
0471 STATIC void
0472 xfs_trans_apply_sb_deltas(
0473     xfs_trans_t *tp)
0474 {
0475     struct xfs_dsb  *sbp;
0476     struct xfs_buf  *bp;
0477     int     whole = 0;
0478
0479     bp = xfs_trans_getsb(tp);
0480     sbp = bp->b_addr;
0481
0482     /*
0483      * Only update the superblock counters if we are logging them
0484      */
0485     if (!xfs_has_lazysbcount((tp->t_mountp))) {
0486         if (tp->t_icount_delta)
0487             be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
0488         if (tp->t_ifree_delta)
0489             be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
0490         if (tp->t_fdblocks_delta)
0491             be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
0492         if (tp->t_res_fdblocks_delta)
0493             be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
0494     }
0495
0496     /*
0497      * Updating frextents requires careful handling because it does not
0498      * behave like the lazysb counters because we cannot rely on log
0499      * recovery in older kenels to recompute the value from the rtbitmap.
0500      * This means that the ondisk frextents must be consistent with the
0501      * rtbitmap.
0502      *
0503      * Therefore, log the frextents change to the ondisk superblock and
0504      * update the incore superblock so that future calls to xfs_log_sb
0505      * write the correct value ondisk.
0506      *
0507      * Don't touch m_frextents because it includes incore reservations,
0508      * and those are handled by the unreserve function.
0509      */
0510     if (tp->t_frextents_delta || tp->t_res_frextents_delta) {
0511         struct xfs_mount    *mp = tp->t_mountp;
0512         int64_t         rtxdelta;
0513
0514         rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta;
0515
0516         spin_lock(&mp->m_sb_lock);
0517         be64_add_cpu(&sbp->sb_frextents, rtxdelta);
0518         mp->m_sb.sb_frextents += rtxdelta;
0519         spin_unlock(&mp->m_sb_lock);
0520     }
0521
0522     if (tp->t_dblocks_delta) {
0523         be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
0524         whole = 1;
0525     }
0526     if (tp->t_agcount_delta) {
0527         be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
0528         whole = 1;
0529     }
0530     if (tp->t_imaxpct_delta) {
0531         sbp->sb_imax_pct += tp->t_imaxpct_delta;
0532         whole = 1;
0533     }
0534     if (tp->t_rextsize_delta) {
0535         be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
0536         whole = 1;
0537     }
0538     if (tp->t_rbmblocks_delta) {
0539         be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
0540         whole = 1;
0541     }
0542     if (tp->t_rblocks_delta) {
0543         be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
0544         whole = 1;
0545     }
0546     if (tp->t_rextents_delta) {
0547         be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
0548         whole = 1;
0549     }
0550     if (tp->t_rextslog_delta) {
0551         sbp->sb_rextslog += tp->t_rextslog_delta;
0552         whole = 1;
0553     }
0554
0555     xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
0556     if (whole)
0557         /*
0558          * Log the whole thing, the fields are noncontiguous.
0559          */
0560         xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
0561     else
0562         /*
0563          * Since all the modifiable fields are contiguous, we
0564          * can get away with this.
0565          */
0566         xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount),
0567                   offsetof(struct xfs_dsb, sb_frextents) +
0568                   sizeof(sbp->sb_frextents) - 1);
0569 }
0570
0571 /*
0572  * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
0573  * apply superblock counter changes to the in-core superblock.  The
0574  * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
0575  * applied to the in-core superblock.  The idea is that that has already been
0576  * done.
0577  *
0578  * If we are not logging superblock counters, then the inode allocated/free and
0579  * used block counts are not updated in the on disk superblock. In this case,
0580  * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
0581  * still need to update the incore superblock with the changes.
0582  *
0583  * Deltas for the inode count are +/-64, hence we use a large batch size of 128
0584  * so we don't need to take the counter lock on every update.
0585  */
0586 #define XFS_ICOUNT_BATCH    128
0587
0588 void
0589 xfs_trans_unreserve_and_mod_sb(
0590     struct xfs_trans    *tp)
0591 {
0592     struct xfs_mount    *mp = tp->t_mountp;
0593     bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
0594     int64_t         blkdelta = 0;
0595     int64_t         rtxdelta = 0;
0596     int64_t         idelta = 0;
0597     int64_t         ifreedelta = 0;
0598     int         error;
0599
0600     /* calculate deltas */
0601     if (tp->t_blk_res > 0)
0602         blkdelta = tp->t_blk_res;
0603     if ((tp->t_fdblocks_delta != 0) &&
0604         (xfs_has_lazysbcount(mp) ||
0605          (tp->t_flags & XFS_TRANS_SB_DIRTY)))
0606             blkdelta += tp->t_fdblocks_delta;
0607
0608     if (tp->t_rtx_res > 0)
0609         rtxdelta = tp->t_rtx_res;
0610     if ((tp->t_frextents_delta != 0) &&
0611         (tp->t_flags & XFS_TRANS_SB_DIRTY))
0612         rtxdelta += tp->t_frextents_delta;
0613
0614     if (xfs_has_lazysbcount(mp) ||
0615          (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
0616         idelta = tp->t_icount_delta;
0617         ifreedelta = tp->t_ifree_delta;
0618     }
0619
0620     /* apply the per-cpu counters */
0621     if (blkdelta) {
0622         error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
0623         ASSERT(!error);
0624     }
0625
0626     if (idelta)
0627         percpu_counter_add_batch(&mp->m_icount, idelta,
0628                      XFS_ICOUNT_BATCH);
0629
0630     if (ifreedelta)
0631         percpu_counter_add(&mp->m_ifree, ifreedelta);
0632
0633     if (rtxdelta) {
0634         error = xfs_mod_frextents(mp, rtxdelta);
0635         ASSERT(!error);
0636     }
0637
0638     if (!(tp->t_flags & XFS_TRANS_SB_DIRTY))
0639         return;
0640
0641     /* apply remaining deltas */
0642     spin_lock(&mp->m_sb_lock);
0643     mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta;
0644     mp->m_sb.sb_icount += idelta;
0645     mp->m_sb.sb_ifree += ifreedelta;
0646     /*
0647      * Do not touch sb_frextents here because we are dealing with incore
0648      * reservation.  sb_frextents is not part of the lazy sb counters so it
0649      * must be consistent with the ondisk rtbitmap and must never include
0650      * incore reservations.
0651      */
0652     mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
0653     mp->m_sb.sb_agcount += tp->t_agcount_delta;
0654     mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
0655     mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
0656     mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
0657     mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
0658     mp->m_sb.sb_rextents += tp->t_rextents_delta;
0659     mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
0660     spin_unlock(&mp->m_sb_lock);
0661
0662     /*
0663      * Debug checks outside of the spinlock so they don't lock up the
0664      * machine if they fail.
0665      */
0666     ASSERT(mp->m_sb.sb_imax_pct >= 0);
0667     ASSERT(mp->m_sb.sb_rextslog >= 0);
0668     return;
0669 }
0670
0671 /* Add the given log item to the transaction's list of log items. */
0672 void
0673 xfs_trans_add_item(
0674     struct xfs_trans    *tp,
0675     struct xfs_log_item *lip)
0676 {
0677     ASSERT(lip->li_log == tp->t_mountp->m_log);
0678     ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
0679     ASSERT(list_empty(&lip->li_trans));
0680     ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
0681
0682     list_add_tail(&lip->li_trans, &tp->t_items);
0683     trace_xfs_trans_add_item(tp, _RET_IP_);
0684 }
0685
0686 /*
0687  * Unlink the log item from the transaction. the log item is no longer
0688  * considered dirty in this transaction, as the linked transaction has
0689  * finished, either by abort or commit completion.
0690  */
0691 void
0692 xfs_trans_del_item(
0693     struct xfs_log_item *lip)
0694 {
0695     clear_bit(XFS_LI_DIRTY, &lip->li_flags);
0696     list_del_init(&lip->li_trans);
0697 }
0698
0699 /* Detach and unlock all of the items in a transaction */
0700 static void
0701 xfs_trans_free_items(
0702     struct xfs_trans    *tp,
0703     bool            abort)
0704 {
0705     struct xfs_log_item *lip, *next;
0706
0707     trace_xfs_trans_free_items(tp, _RET_IP_);
0708
0709     list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
0710         xfs_trans_del_item(lip);
0711         if (abort)
0712             set_bit(XFS_LI_ABORTED, &lip->li_flags);
0713         if (lip->li_ops->iop_release)
0714             lip->li_ops->iop_release(lip);
0715     }
0716 }
0717
0718 static inline void
0719 xfs_log_item_batch_insert(
0720     struct xfs_ail      *ailp,
0721     struct xfs_ail_cursor   *cur,
0722     struct xfs_log_item **log_items,
0723     int         nr_items,
0724     xfs_lsn_t       commit_lsn)
0725 {
0726     int i;
0727
0728     spin_lock(&ailp->ail_lock);
0729     /* xfs_trans_ail_update_bulk drops ailp->ail_lock */
0730     xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
0731
0732     for (i = 0; i < nr_items; i++) {
0733         struct xfs_log_item *lip = log_items[i];
0734
0735         if (lip->li_ops->iop_unpin)
0736             lip->li_ops->iop_unpin(lip, 0);
0737     }
0738 }
0739
0740 /*
0741  * Bulk operation version of xfs_trans_committed that takes a log vector of
0742  * items to insert into the AIL. This uses bulk AIL insertion techniques to
0743  * minimise lock traffic.
0744  *
0745  * If we are called with the aborted flag set, it is because a log write during
0746  * a CIL checkpoint commit has failed. In this case, all the items in the
0747  * checkpoint have already gone through iop_committed and iop_committing, which
0748  * means that checkpoint commit abort handling is treated exactly the same
0749  * as an iclog write error even though we haven't started any IO yet. Hence in
0750  * this case all we need to do is iop_committed processing, followed by an
0751  * iop_unpin(aborted) call.
0752  *
0753  * The AIL cursor is used to optimise the insert process. If commit_lsn is not
0754  * at the end of the AIL, the insert cursor avoids the need to walk
0755  * the AIL to find the insertion point on every xfs_log_item_batch_insert()
0756  * call. This saves a lot of needless list walking and is a net win, even
0757  * though it slightly increases that amount of AIL lock traffic to set it up
0758  * and tear it down.
0759  */
0760 void
0761 xfs_trans_committed_bulk(
0762     struct xfs_ail      *ailp,
0763     struct list_head    *lv_chain,
0764     xfs_lsn_t       commit_lsn,
0765     bool            aborted)
0766 {
0767 #define LOG_ITEM_BATCH_SIZE 32
0768     struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
0769     struct xfs_log_vec  *lv;
0770     struct xfs_ail_cursor   cur;
0771     int         i = 0;
0772
0773     spin_lock(&ailp->ail_lock);
0774     xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
0775     spin_unlock(&ailp->ail_lock);
0776
0777     /* unpin all the log items */
0778     list_for_each_entry(lv, lv_chain, lv_list) {
0779         struct xfs_log_item *lip = lv->lv_item;
0780         xfs_lsn_t       item_lsn;
0781
0782         if (aborted)
0783             set_bit(XFS_LI_ABORTED, &lip->li_flags);
0784
0785         if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
0786             lip->li_ops->iop_release(lip);
0787             continue;
0788         }
0789
0790         if (lip->li_ops->iop_committed)
0791             item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
0792         else
0793             item_lsn = commit_lsn;
0794
0795         /* item_lsn of -1 means the item needs no further processing */
0796         if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
0797             continue;
0798
0799         /*
0800          * if we are aborting the operation, no point in inserting the
0801          * object into the AIL as we are in a shutdown situation.
0802          */
0803         if (aborted) {
0804             ASSERT(xlog_is_shutdown(ailp->ail_log));
0805             if (lip->li_ops->iop_unpin)
0806                 lip->li_ops->iop_unpin(lip, 1);
0807             continue;
0808         }
0809
0810         if (item_lsn != commit_lsn) {
0811
0812             /*
0813              * Not a bulk update option due to unusual item_lsn.
0814              * Push into AIL immediately, rechecking the lsn once
0815              * we have the ail lock. Then unpin the item. This does
0816              * not affect the AIL cursor the bulk insert path is
0817              * using.
0818              */
0819             spin_lock(&ailp->ail_lock);
0820             if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
0821                 xfs_trans_ail_update(ailp, lip, item_lsn);
0822             else
0823                 spin_unlock(&ailp->ail_lock);
0824             if (lip->li_ops->iop_unpin)
0825                 lip->li_ops->iop_unpin(lip, 0);
0826             continue;
0827         }
0828
0829         /* Item is a candidate for bulk AIL insert.  */
0830         log_items[i++] = lv->lv_item;
0831         if (i >= LOG_ITEM_BATCH_SIZE) {
0832             xfs_log_item_batch_insert(ailp, &cur, log_items,
0833                     LOG_ITEM_BATCH_SIZE, commit_lsn);
0834             i = 0;
0835         }
0836     }
0837
0838     /* make sure we insert the remainder! */
0839     if (i)
0840         xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
0841
0842     spin_lock(&ailp->ail_lock);
0843     xfs_trans_ail_cursor_done(&cur);
0844     spin_unlock(&ailp->ail_lock);
0845 }
0846
0847 /*
0848  * Sort transaction items prior to running precommit operations. This will
0849  * attempt to order the items such that they will always be locked in the same
0850  * order. Items that have no sort function are moved to the end of the list
0851  * and so are locked last.
0852  *
0853  * This may need refinement as different types of objects add sort functions.
0854  *
0855  * Function is more complex than it needs to be because we are comparing 64 bit
0856  * values and the function only returns 32 bit values.
0857  */
0858 static int
0859 xfs_trans_precommit_sort(
0860     void            *unused_arg,
0861     const struct list_head  *a,
0862     const struct list_head  *b)
0863 {
0864     struct xfs_log_item *lia = container_of(a,
0865                     struct xfs_log_item, li_trans);
0866     struct xfs_log_item *lib = container_of(b,
0867                     struct xfs_log_item, li_trans);
0868     int64_t         diff;
0869
0870     /*
0871      * If both items are non-sortable, leave them alone. If only one is
0872      * sortable, move the non-sortable item towards the end of the list.
0873      */
0874     if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort)
0875         return 0;
0876     if (!lia->li_ops->iop_sort)
0877         return 1;
0878     if (!lib->li_ops->iop_sort)
0879         return -1;
0880
0881     diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib);
0882     if (diff < 0)
0883         return -1;
0884     if (diff > 0)
0885         return 1;
0886     return 0;
0887 }
0888
0889 /*
0890  * Run transaction precommit functions.
0891  *
0892  * If there is an error in any of the callouts, then stop immediately and
0893  * trigger a shutdown to abort the transaction. There is no recovery possible
0894  * from errors at this point as the transaction is dirty....
0895  */
0896 static int
0897 xfs_trans_run_precommits(
0898     struct xfs_trans    *tp)
0899 {
0900     struct xfs_mount    *mp = tp->t_mountp;
0901     struct xfs_log_item *lip, *n;
0902     int         error = 0;
0903
0904     /*
0905      * Sort the item list to avoid ABBA deadlocks with other transactions
0906      * running precommit operations that lock multiple shared items such as
0907      * inode cluster buffers.
0908      */
0909     list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort);
0910
0911     /*
0912      * Precommit operations can remove the log item from the transaction
0913      * if the log item exists purely to delay modifications until they
0914      * can be ordered against other operations. Hence we have to use
0915      * list_for_each_entry_safe() here.
0916      */
0917     list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
0918         if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
0919             continue;
0920         if (lip->li_ops->iop_precommit) {
0921             error = lip->li_ops->iop_precommit(tp, lip);
0922             if (error)
0923                 break;
0924         }
0925     }
0926     if (error)
0927         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
0928     return error;
0929 }
0930
0931 /*
0932  * Commit the given transaction to the log.
0933  *
0934  * XFS disk error handling mechanism is not based on a typical
0935  * transaction abort mechanism. Logically after the filesystem
0936  * gets marked 'SHUTDOWN', we can't let any new transactions
0937  * be durable - ie. committed to disk - because some metadata might
0938  * be inconsistent. In such cases, this returns an error, and the
0939  * caller may assume that all locked objects joined to the transaction
0940  * have already been unlocked as if the commit had succeeded.
0941  * Do not reference the transaction structure after this call.
0942  */
0943 static int
0944 __xfs_trans_commit(
0945     struct xfs_trans    *tp,
0946     bool            regrant)
0947 {
0948     struct xfs_mount    *mp = tp->t_mountp;
0949     struct xlog     *log = mp->m_log;
0950     xfs_csn_t       commit_seq = 0;
0951     int         error = 0;
0952     int         sync = tp->t_flags & XFS_TRANS_SYNC;
0953
0954     trace_xfs_trans_commit(tp, _RET_IP_);
0955
0956     error = xfs_trans_run_precommits(tp);
0957     if (error) {
0958         if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
0959             xfs_defer_cancel(tp);
0960         goto out_unreserve;
0961     }
0962
0963     /*
0964      * Finish deferred items on final commit. Only permanent transactions
0965      * should ever have deferred ops.
0966      */
0967     WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
0968              !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
0969     if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
0970         error = xfs_defer_finish_noroll(&tp);
0971         if (error)
0972             goto out_unreserve;
0973     }
0974
0975     /*
0976      * If there is nothing to be logged by the transaction,
0977      * then unlock all of the items associated with the
0978      * transaction and free the transaction structure.
0979      * Also make sure to return any reserved blocks to
0980      * the free pool.
0981      */
0982     if (!(tp->t_flags & XFS_TRANS_DIRTY))
0983         goto out_unreserve;
0984
0985     /*
0986      * We must check against log shutdown here because we cannot abort log
0987      * items and leave them dirty, inconsistent and unpinned in memory while
0988      * the log is active. This leaves them open to being written back to
0989      * disk, and that will lead to on-disk corruption.
0990      */
0991     if (xlog_is_shutdown(log)) {
0992         error = -EIO;
0993         goto out_unreserve;
0994     }
0995
0996     ASSERT(tp->t_ticket != NULL);
0997
0998     /*
0999      * If we need to update the superblock, then do it now.
1000      */
1001     if (tp->t_flags & XFS_TRANS_SB_DIRTY)
1002         xfs_trans_apply_sb_deltas(tp);
1003     xfs_trans_apply_dquot_deltas(tp);
1004
1005     xlog_cil_commit(log, tp, &commit_seq, regrant);
1006
1007     xfs_trans_free(tp);
1008
1009     /*
1010      * If the transaction needs to be synchronous, then force the
1011      * log out now and wait for it.
1012      */
1013     if (sync) {
1014         error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
1015         XFS_STATS_INC(mp, xs_trans_sync);
1016     } else {
1017         XFS_STATS_INC(mp, xs_trans_async);
1018     }
1019
1020     return error;
1021
1022 out_unreserve:
1023     xfs_trans_unreserve_and_mod_sb(tp);
1024
1025     /*
1026      * It is indeed possible for the transaction to be not dirty but
1027      * the dqinfo portion to be.  All that means is that we have some
1028      * (non-persistent) quota reservations that need to be unreserved.
1029      */
1030     xfs_trans_unreserve_and_mod_dquots(tp);
1031     if (tp->t_ticket) {
1032         if (regrant && !xlog_is_shutdown(log))
1033             xfs_log_ticket_regrant(log, tp->t_ticket);
1034         else
1035             xfs_log_ticket_ungrant(log, tp->t_ticket);
1036         tp->t_ticket = NULL;
1037     }
1038     xfs_trans_free_items(tp, !!error);
1039     xfs_trans_free(tp);
1040
1041     XFS_STATS_INC(mp, xs_trans_empty);
1042     return error;
1043 }
1044
1045 int
1046 xfs_trans_commit(
1047     struct xfs_trans    *tp)
1048 {
1049     return __xfs_trans_commit(tp, false);
1050 }
1051
1052 /*
1053  * Unlock all of the transaction's items and free the transaction.  If the
1054  * transaction is dirty, we must shut down the filesystem because there is no
1055  * way to restore them to their previous state.
1056  *
1057  * If the transaction has made a log reservation, make sure to release it as
1058  * well.
1059  *
1060  * This is a high level function (equivalent to xfs_trans_commit()) and so can
1061  * be called after the transaction has effectively been aborted due to the mount
1062  * being shut down. However, if the mount has not been shut down and the
1063  * transaction is dirty we will shut the mount down and, in doing so, that
1064  * guarantees that the log is shut down, too. Hence we don't need to be as
1065  * careful with shutdown state and dirty items here as we need to be in
1066  * xfs_trans_commit().
1067  */
1068 void
1069 xfs_trans_cancel(
1070     struct xfs_trans    *tp)
1071 {
1072     struct xfs_mount    *mp = tp->t_mountp;
1073     struct xlog     *log = mp->m_log;
1074     bool            dirty = (tp->t_flags & XFS_TRANS_DIRTY);
1075
1076     trace_xfs_trans_cancel(tp, _RET_IP_);
1077
1078     /*
1079      * It's never valid to cancel a transaction with deferred ops attached,
1080      * because the transaction is effectively dirty.  Complain about this
1081      * loudly before freeing the in-memory defer items.
1082      */
1083     if (!list_empty(&tp->t_dfops)) {
1084         ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
1085         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1086         dirty = true;
1087         xfs_defer_cancel(tp);
1088     }
1089
1090     /*
1091      * See if the caller is relying on us to shut down the filesystem. We
1092      * only want an error report if there isn't already a shutdown in
1093      * progress, so we only need to check against the mount shutdown state
1094      * here.
1095      */
1096     if (dirty && !xfs_is_shutdown(mp)) {
1097         XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1098         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1099     }
1100 #ifdef DEBUG
1101     /* Log items need to be consistent until the log is shut down. */
1102     if (!dirty && !xlog_is_shutdown(log)) {
1103         struct xfs_log_item *lip;
1104
1105         list_for_each_entry(lip, &tp->t_items, li_trans)
1106             ASSERT(!xlog_item_is_intent_done(lip));
1107     }
1108 #endif
1109     xfs_trans_unreserve_and_mod_sb(tp);
1110     xfs_trans_unreserve_and_mod_dquots(tp);
1111
1112     if (tp->t_ticket) {
1113         xfs_log_ticket_ungrant(log, tp->t_ticket);
1114         tp->t_ticket = NULL;
1115     }
1116
1117     xfs_trans_free_items(tp, dirty);
1118     xfs_trans_free(tp);
1119 }
1120
1121 /*
1122  * Roll from one trans in the sequence of PERMANENT transactions to
1123  * the next: permanent transactions are only flushed out when
1124  * committed with xfs_trans_commit(), but we still want as soon
1125  * as possible to let chunks of it go to the log. So we commit the
1126  * chunk we've been working on and get a new transaction to continue.
1127  */
1128 int
1129 xfs_trans_roll(
1130     struct xfs_trans    **tpp)
1131 {
1132     struct xfs_trans    *trans = *tpp;
1133     struct xfs_trans_res    tres;
1134     int         error;
1135
1136     trace_xfs_trans_roll(trans, _RET_IP_);
1137
1138     /*
1139      * Copy the critical parameters from one trans to the next.
1140      */
1141     tres.tr_logres = trans->t_log_res;
1142     tres.tr_logcount = trans->t_log_count;
1143
1144     *tpp = xfs_trans_dup(trans);
1145
1146     /*
1147      * Commit the current transaction.
1148      * If this commit failed, then it'd just unlock those items that
1149      * are not marked ihold. That also means that a filesystem shutdown
1150      * is in progress. The caller takes the responsibility to cancel
1151      * the duplicate transaction that gets returned.
1152      */
1153     error = __xfs_trans_commit(trans, true);
1154     if (error)
1155         return error;
1156
1157     /*
1158      * Reserve space in the log for the next transaction.
1159      * This also pushes items in the "AIL", the list of logged items,
1160      * out to disk if they are taking up space at the tail of the log
1161      * that we want to use.  This requires that either nothing be locked
1162      * across this call, or that anything that is locked be logged in
1163      * the prior and the next transactions.
1164      */
1165     tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
1166     return xfs_trans_reserve(*tpp, &tres, 0, 0);
1167 }
1168
1169 /*
1170  * Allocate an transaction, lock and join the inode to it, and reserve quota.
1171  *
1172  * The caller must ensure that the on-disk dquots attached to this inode have
1173  * already been allocated and initialized.  The caller is responsible for
1174  * releasing ILOCK_EXCL if a new transaction is returned.
1175  */
1176 int
1177 xfs_trans_alloc_inode(
1178     struct xfs_inode    *ip,
1179     struct xfs_trans_res    *resv,
1180     unsigned int        dblocks,
1181     unsigned int        rblocks,
1182     bool            force,
1183     struct xfs_trans    **tpp)
1184 {
1185     struct xfs_trans    *tp;
1186     struct xfs_mount    *mp = ip->i_mount;
1187     bool            retried = false;
1188     int         error;
1189
1190 retry:
1191     error = xfs_trans_alloc(mp, resv, dblocks,
1192             rblocks / mp->m_sb.sb_rextsize,
1193             force ? XFS_TRANS_RESERVE : 0, &tp);
1194     if (error)
1195         return error;
1196
1197     xfs_ilock(ip, XFS_ILOCK_EXCL);
1198     xfs_trans_ijoin(tp, ip, 0);
1199
1200     error = xfs_qm_dqattach_locked(ip, false);
1201     if (error) {
1202         /* Caller should have allocated the dquots! */
1203         ASSERT(error != -ENOENT);
1204         goto out_cancel;
1205     }
1206
1207     error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force);
1208     if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1209         xfs_trans_cancel(tp);
1210         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1211         xfs_blockgc_free_quota(ip, 0);
1212         retried = true;
1213         goto retry;
1214     }
1215     if (error)
1216         goto out_cancel;
1217
1218     *tpp = tp;
1219     return 0;
1220
1221 out_cancel:
1222     xfs_trans_cancel(tp);
1223     xfs_iunlock(ip, XFS_ILOCK_EXCL);
1224     return error;
1225 }
1226
1227 /*
1228  * Allocate an transaction in preparation for inode creation by reserving quota
1229  * against the given dquots.  Callers are not required to hold any inode locks.
1230  */
1231 int
1232 xfs_trans_alloc_icreate(
1233     struct xfs_mount    *mp,
1234     struct xfs_trans_res    *resv,
1235     struct xfs_dquot    *udqp,
1236     struct xfs_dquot    *gdqp,
1237     struct xfs_dquot    *pdqp,
1238     unsigned int        dblocks,
1239     struct xfs_trans    **tpp)
1240 {
1241     struct xfs_trans    *tp;
1242     bool            retried = false;
1243     int         error;
1244
1245 retry:
1246     error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp);
1247     if (error)
1248         return error;
1249
1250     error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks);
1251     if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1252         xfs_trans_cancel(tp);
1253         xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
1254         retried = true;
1255         goto retry;
1256     }
1257     if (error) {
1258         xfs_trans_cancel(tp);
1259         return error;
1260     }
1261
1262     *tpp = tp;
1263     return 0;
1264 }
1265
1266 /*
1267  * Allocate an transaction, lock and join the inode to it, and reserve quota
1268  * in preparation for inode attribute changes that include uid, gid, or prid
1269  * changes.
1270  *
1271  * The caller must ensure that the on-disk dquots attached to this inode have
1272  * already been allocated and initialized.  The ILOCK will be dropped when the
1273  * transaction is committed or cancelled.
1274  */
1275 int
1276 xfs_trans_alloc_ichange(
1277     struct xfs_inode    *ip,
1278     struct xfs_dquot    *new_udqp,
1279     struct xfs_dquot    *new_gdqp,
1280     struct xfs_dquot    *new_pdqp,
1281     bool            force,
1282     struct xfs_trans    **tpp)
1283 {
1284     struct xfs_trans    *tp;
1285     struct xfs_mount    *mp = ip->i_mount;
1286     struct xfs_dquot    *udqp;
1287     struct xfs_dquot    *gdqp;
1288     struct xfs_dquot    *pdqp;
1289     bool            retried = false;
1290     int         error;
1291
1292 retry:
1293     error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
1294     if (error)
1295         return error;
1296
1297     xfs_ilock(ip, XFS_ILOCK_EXCL);
1298     xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1299
1300     error = xfs_qm_dqattach_locked(ip, false);
1301     if (error) {
1302         /* Caller should have allocated the dquots! */
1303         ASSERT(error != -ENOENT);
1304         goto out_cancel;
1305     }
1306
1307     /*
1308      * For each quota type, skip quota reservations if the inode's dquots
1309      * now match the ones that came from the caller, or the caller didn't
1310      * pass one in.  The inode's dquots can change if we drop the ILOCK to
1311      * perform a blockgc scan, so we must preserve the caller's arguments.
1312      */
1313     udqp = (new_udqp != ip->i_udquot) ? new_udqp : NULL;
1314     gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
1315     pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
1316     if (udqp || gdqp || pdqp) {
1317         unsigned int    qflags = XFS_QMOPT_RES_REGBLKS;
1318
1319         if (force)
1320             qflags |= XFS_QMOPT_FORCE_RES;
1321
1322         /*
1323          * Reserve enough quota to handle blocks on disk and reserved
1324          * for a delayed allocation.  We'll actually transfer the
1325          * delalloc reservation between dquots at chown time, even
1326          * though that part is only semi-transactional.
1327          */
1328         error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
1329                 pdqp, ip->i_nblocks + ip->i_delayed_blks,
1330                 1, qflags);
1331         if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1332             xfs_trans_cancel(tp);
1333             xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
1334             retried = true;
1335             goto retry;
1336         }
1337         if (error)
1338             goto out_cancel;
1339     }
1340
1341     *tpp = tp;
1342     return 0;
1343
1344 out_cancel:
1345     xfs_trans_cancel(tp);
1346     return error;
1347 }
1348
1349 /*
1350  * Allocate an transaction, lock and join the directory and child inodes to it,
1351  * and reserve quota for a directory update.  If there isn't sufficient space,
1352  * @dblocks will be set to zero for a reservationless directory update and
1353  * @nospace_error will be set to a negative errno describing the space
1354  * constraint we hit.
1355  *
1356  * The caller must ensure that the on-disk dquots attached to this inode have
1357  * already been allocated and initialized.  The ILOCKs will be dropped when the
1358  * transaction is committed or cancelled.
1359  */
1360 int
1361 xfs_trans_alloc_dir(
1362     struct xfs_inode    *dp,
1363     struct xfs_trans_res    *resv,
1364     struct xfs_inode    *ip,
1365     unsigned int        *dblocks,
1366     struct xfs_trans    **tpp,
1367     int         *nospace_error)
1368 {
1369     struct xfs_trans    *tp;
1370     struct xfs_mount    *mp = ip->i_mount;
1371     unsigned int        resblks;
1372     bool            retried = false;
1373     int         error;
1374
1375 retry:
1376     *nospace_error = 0;
1377     resblks = *dblocks;
1378     error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
1379     if (error == -ENOSPC) {
1380         *nospace_error = error;
1381         resblks = 0;
1382         error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
1383     }
1384     if (error)
1385         return error;
1386
1387     xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
1388
1389     xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1390     xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1391
1392     error = xfs_qm_dqattach_locked(dp, false);
1393     if (error) {
1394         /* Caller should have allocated the dquots! */
1395         ASSERT(error != -ENOENT);
1396         goto out_cancel;
1397     }
1398
1399     error = xfs_qm_dqattach_locked(ip, false);
1400     if (error) {
1401         /* Caller should have allocated the dquots! */
1402         ASSERT(error != -ENOENT);
1403         goto out_cancel;
1404     }
1405
1406     if (resblks == 0)
1407         goto done;
1408
1409     error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
1410     if (error == -EDQUOT || error == -ENOSPC) {
1411         if (!retried) {
1412             xfs_trans_cancel(tp);
1413             xfs_blockgc_free_quota(dp, 0);
1414             retried = true;
1415             goto retry;
1416         }
1417
1418         *nospace_error = error;
1419         resblks = 0;
1420         error = 0;
1421     }
1422     if (error)
1423         goto out_cancel;
1424
1425 done:
1426     *tpp = tp;
1427     *dblocks = resblks;
1428     return 0;
1429
1430 out_cancel:
1431     xfs_trans_cancel(tp);
1432     return error;
1433 }