0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <linux/time.h>
0018 #include <linux/fs.h>
0019 #include <linux/jbd2.h>
0020 #include <linux/errno.h>
0021 #include <linux/slab.h>
0022 #include <linux/timer.h>
0023 #include <linux/mm.h>
0024 #include <linux/highmem.h>
0025 #include <linux/hrtimer.h>
0026 #include <linux/backing-dev.h>
0027 #include <linux/bug.h>
0028 #include <linux/module.h>
0029 #include <linux/sched/mm.h>
0030
0031 #include <trace/events/jbd2.h>
0032
0033 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
0034 static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
0035
0036 static struct kmem_cache *transaction_cache;
0037 int __init jbd2_journal_init_transaction_cache(void)
0038 {
0039 J_ASSERT(!transaction_cache);
0040 transaction_cache = kmem_cache_create("jbd2_transaction_s",
0041 sizeof(transaction_t),
0042 0,
0043 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
0044 NULL);
0045 if (!transaction_cache) {
0046 pr_emerg("JBD2: failed to create transaction cache\n");
0047 return -ENOMEM;
0048 }
0049 return 0;
0050 }
0051
0052 void jbd2_journal_destroy_transaction_cache(void)
0053 {
0054 kmem_cache_destroy(transaction_cache);
0055 transaction_cache = NULL;
0056 }
0057
0058 void jbd2_journal_free_transaction(transaction_t *transaction)
0059 {
0060 if (unlikely(ZERO_OR_NULL_PTR(transaction)))
0061 return;
0062 kmem_cache_free(transaction_cache, transaction);
0063 }
0064
0065
0066
0067
0068 static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
0069 {
0070 int tag_space = journal->j_blocksize - sizeof(journal_header_t);
0071 int tags_per_block;
0072
0073
0074 tag_space -= 16;
0075 if (jbd2_journal_has_csum_v2or3(journal))
0076 tag_space -= sizeof(struct jbd2_journal_block_tail);
0077
0078 tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
0079
0080
0081
0082
0083 return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
0084 tags_per_block);
0085 }
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102 static void jbd2_get_transaction(journal_t *journal,
0103 transaction_t *transaction)
0104 {
0105 transaction->t_journal = journal;
0106 transaction->t_state = T_RUNNING;
0107 transaction->t_start_time = ktime_get();
0108 transaction->t_tid = journal->j_transaction_sequence++;
0109 transaction->t_expires = jiffies + journal->j_commit_interval;
0110 atomic_set(&transaction->t_updates, 0);
0111 atomic_set(&transaction->t_outstanding_credits,
0112 jbd2_descriptor_blocks_per_trans(journal) +
0113 atomic_read(&journal->j_reserved_credits));
0114 atomic_set(&transaction->t_outstanding_revokes, 0);
0115 atomic_set(&transaction->t_handle_count, 0);
0116 INIT_LIST_HEAD(&transaction->t_inode_list);
0117 INIT_LIST_HEAD(&transaction->t_private_list);
0118
0119
0120 journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
0121 add_timer(&journal->j_commit_timer);
0122
0123 J_ASSERT(journal->j_running_transaction == NULL);
0124 journal->j_running_transaction = transaction;
0125 transaction->t_max_wait = 0;
0126 transaction->t_start = jiffies;
0127 transaction->t_requested = 0;
0128 }
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146 static inline void update_t_max_wait(transaction_t *transaction,
0147 unsigned long ts)
0148 {
0149 unsigned long oldts, newts;
0150
0151 if (time_after(transaction->t_start, ts)) {
0152 newts = jbd2_time_diff(ts, transaction->t_start);
0153 oldts = READ_ONCE(transaction->t_max_wait);
0154 while (oldts < newts)
0155 oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
0156 }
0157 }
0158
0159
0160
0161
0162
0163
0164 static void wait_transaction_locked(journal_t *journal)
0165 __releases(journal->j_state_lock)
0166 {
0167 DEFINE_WAIT(wait);
0168 int need_to_start;
0169 tid_t tid = journal->j_running_transaction->t_tid;
0170
0171 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
0172 TASK_UNINTERRUPTIBLE);
0173 need_to_start = !tid_geq(journal->j_commit_request, tid);
0174 read_unlock(&journal->j_state_lock);
0175 if (need_to_start)
0176 jbd2_log_start_commit(journal, tid);
0177 jbd2_might_wait_for_commit(journal);
0178 schedule();
0179 finish_wait(&journal->j_wait_transaction_locked, &wait);
0180 }
0181
0182
0183
0184
0185
0186
0187 static void wait_transaction_switching(journal_t *journal)
0188 __releases(journal->j_state_lock)
0189 {
0190 DEFINE_WAIT(wait);
0191
0192 if (WARN_ON(!journal->j_running_transaction ||
0193 journal->j_running_transaction->t_state != T_SWITCH)) {
0194 read_unlock(&journal->j_state_lock);
0195 return;
0196 }
0197 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
0198 TASK_UNINTERRUPTIBLE);
0199 read_unlock(&journal->j_state_lock);
0200
0201
0202
0203
0204
0205
0206 schedule();
0207 finish_wait(&journal->j_wait_transaction_locked, &wait);
0208 }
0209
0210 static void sub_reserved_credits(journal_t *journal, int blocks)
0211 {
0212 atomic_sub(blocks, &journal->j_reserved_credits);
0213 wake_up(&journal->j_wait_reserved);
0214 }
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227 static int add_transaction_credits(journal_t *journal, int blocks,
0228 int rsv_blocks)
0229 __must_hold(&journal->j_state_lock)
0230 {
0231 transaction_t *t = journal->j_running_transaction;
0232 int needed;
0233 int total = blocks + rsv_blocks;
0234
0235
0236
0237
0238
0239 if (t->t_state != T_RUNNING) {
0240 WARN_ON_ONCE(t->t_state >= T_FLUSH);
0241 wait_transaction_locked(journal);
0242 __acquire(&journal->j_state_lock);
0243 return 1;
0244 }
0245
0246
0247
0248
0249
0250
0251 needed = atomic_add_return(total, &t->t_outstanding_credits);
0252 if (needed > journal->j_max_transaction_buffers) {
0253
0254
0255
0256
0257
0258 atomic_sub(total, &t->t_outstanding_credits);
0259
0260
0261
0262
0263
0264 if (atomic_read(&journal->j_reserved_credits) + total >
0265 journal->j_max_transaction_buffers) {
0266 read_unlock(&journal->j_state_lock);
0267 jbd2_might_wait_for_commit(journal);
0268 wait_event(journal->j_wait_reserved,
0269 atomic_read(&journal->j_reserved_credits) + total <=
0270 journal->j_max_transaction_buffers);
0271 __acquire(&journal->j_state_lock);
0272 return 1;
0273 }
0274
0275 wait_transaction_locked(journal);
0276 __acquire(&journal->j_state_lock);
0277 return 1;
0278 }
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291 if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
0292 atomic_sub(total, &t->t_outstanding_credits);
0293 read_unlock(&journal->j_state_lock);
0294 jbd2_might_wait_for_commit(journal);
0295 write_lock(&journal->j_state_lock);
0296 if (jbd2_log_space_left(journal) <
0297 journal->j_max_transaction_buffers)
0298 __jbd2_log_wait_for_space(journal);
0299 write_unlock(&journal->j_state_lock);
0300 __acquire(&journal->j_state_lock);
0301 return 1;
0302 }
0303
0304
0305 if (!rsv_blocks)
0306 return 0;
0307
0308 needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
0309
0310 if (needed > journal->j_max_transaction_buffers / 2) {
0311 sub_reserved_credits(journal, rsv_blocks);
0312 atomic_sub(total, &t->t_outstanding_credits);
0313 read_unlock(&journal->j_state_lock);
0314 jbd2_might_wait_for_commit(journal);
0315 wait_event(journal->j_wait_reserved,
0316 atomic_read(&journal->j_reserved_credits) + rsv_blocks
0317 <= journal->j_max_transaction_buffers / 2);
0318 __acquire(&journal->j_state_lock);
0319 return 1;
0320 }
0321 return 0;
0322 }
0323
0324
0325
0326
0327
0328
0329
0330
0331 static int start_this_handle(journal_t *journal, handle_t *handle,
0332 gfp_t gfp_mask)
0333 {
0334 transaction_t *transaction, *new_transaction = NULL;
0335 int blocks = handle->h_total_credits;
0336 int rsv_blocks = 0;
0337 unsigned long ts = jiffies;
0338
0339 if (handle->h_rsv_handle)
0340 rsv_blocks = handle->h_rsv_handle->h_total_credits;
0341
0342
0343
0344
0345
0346
0347 if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
0348 (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
0349 printk(KERN_ERR "JBD2: %s wants too many credits "
0350 "credits:%d rsv_credits:%d max:%d\n",
0351 current->comm, blocks, rsv_blocks,
0352 journal->j_max_transaction_buffers);
0353 WARN_ON(1);
0354 return -ENOSPC;
0355 }
0356
0357 alloc_transaction:
0358
0359
0360
0361
0362
0363 if (!data_race(journal->j_running_transaction)) {
0364
0365
0366
0367
0368 if ((gfp_mask & __GFP_FS) == 0)
0369 gfp_mask |= __GFP_NOFAIL;
0370 new_transaction = kmem_cache_zalloc(transaction_cache,
0371 gfp_mask);
0372 if (!new_transaction)
0373 return -ENOMEM;
0374 }
0375
0376 jbd2_debug(3, "New handle %p going live.\n", handle);
0377
0378
0379
0380
0381
0382 repeat:
0383 read_lock(&journal->j_state_lock);
0384 BUG_ON(journal->j_flags & JBD2_UNMOUNT);
0385 if (is_journal_aborted(journal) ||
0386 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
0387 read_unlock(&journal->j_state_lock);
0388 jbd2_journal_free_transaction(new_transaction);
0389 return -EROFS;
0390 }
0391
0392
0393
0394
0395
0396
0397 if (!handle->h_reserved && journal->j_barrier_count) {
0398 read_unlock(&journal->j_state_lock);
0399 wait_event(journal->j_wait_transaction_locked,
0400 journal->j_barrier_count == 0);
0401 goto repeat;
0402 }
0403
0404 if (!journal->j_running_transaction) {
0405 read_unlock(&journal->j_state_lock);
0406 if (!new_transaction)
0407 goto alloc_transaction;
0408 write_lock(&journal->j_state_lock);
0409 if (!journal->j_running_transaction &&
0410 (handle->h_reserved || !journal->j_barrier_count)) {
0411 jbd2_get_transaction(journal, new_transaction);
0412 new_transaction = NULL;
0413 }
0414 write_unlock(&journal->j_state_lock);
0415 goto repeat;
0416 }
0417
0418 transaction = journal->j_running_transaction;
0419
0420 if (!handle->h_reserved) {
0421
0422 if (add_transaction_credits(journal, blocks, rsv_blocks)) {
0423
0424
0425
0426
0427 __release(&journal->j_state_lock);
0428 goto repeat;
0429 }
0430 } else {
0431
0432
0433
0434
0435
0436
0437
0438 if (transaction->t_state == T_SWITCH) {
0439 wait_transaction_switching(journal);
0440 goto repeat;
0441 }
0442 sub_reserved_credits(journal, blocks);
0443 handle->h_reserved = 0;
0444 }
0445
0446
0447
0448
0449 update_t_max_wait(transaction, ts);
0450 handle->h_transaction = transaction;
0451 handle->h_requested_credits = blocks;
0452 handle->h_revoke_credits_requested = handle->h_revoke_credits;
0453 handle->h_start_jiffies = jiffies;
0454 atomic_inc(&transaction->t_updates);
0455 atomic_inc(&transaction->t_handle_count);
0456 jbd2_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
0457 handle, blocks,
0458 atomic_read(&transaction->t_outstanding_credits),
0459 jbd2_log_space_left(journal));
0460 read_unlock(&journal->j_state_lock);
0461 current->journal_info = handle;
0462
0463 rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
0464 jbd2_journal_free_transaction(new_transaction);
0465
0466
0467
0468
0469 handle->saved_alloc_context = memalloc_nofs_save();
0470 return 0;
0471 }
0472
0473
0474 static handle_t *new_handle(int nblocks)
0475 {
0476 handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
0477 if (!handle)
0478 return NULL;
0479 handle->h_total_credits = nblocks;
0480 handle->h_ref = 1;
0481
0482 return handle;
0483 }
0484
0485 handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
0486 int revoke_records, gfp_t gfp_mask,
0487 unsigned int type, unsigned int line_no)
0488 {
0489 handle_t *handle = journal_current_handle();
0490 int err;
0491
0492 if (!journal)
0493 return ERR_PTR(-EROFS);
0494
0495 if (handle) {
0496 J_ASSERT(handle->h_transaction->t_journal == journal);
0497 handle->h_ref++;
0498 return handle;
0499 }
0500
0501 nblocks += DIV_ROUND_UP(revoke_records,
0502 journal->j_revoke_records_per_block);
0503 handle = new_handle(nblocks);
0504 if (!handle)
0505 return ERR_PTR(-ENOMEM);
0506 if (rsv_blocks) {
0507 handle_t *rsv_handle;
0508
0509 rsv_handle = new_handle(rsv_blocks);
0510 if (!rsv_handle) {
0511 jbd2_free_handle(handle);
0512 return ERR_PTR(-ENOMEM);
0513 }
0514 rsv_handle->h_reserved = 1;
0515 rsv_handle->h_journal = journal;
0516 handle->h_rsv_handle = rsv_handle;
0517 }
0518 handle->h_revoke_credits = revoke_records;
0519
0520 err = start_this_handle(journal, handle, gfp_mask);
0521 if (err < 0) {
0522 if (handle->h_rsv_handle)
0523 jbd2_free_handle(handle->h_rsv_handle);
0524 jbd2_free_handle(handle);
0525 return ERR_PTR(err);
0526 }
0527 handle->h_type = type;
0528 handle->h_line_no = line_no;
0529 trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
0530 handle->h_transaction->t_tid, type,
0531 line_no, nblocks);
0532
0533 return handle;
0534 }
0535 EXPORT_SYMBOL(jbd2__journal_start);
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557 handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
0558 {
0559 return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
0560 }
0561 EXPORT_SYMBOL(jbd2_journal_start);
0562
0563 static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
0564 {
0565 journal_t *journal = handle->h_journal;
0566
0567 WARN_ON(!handle->h_reserved);
0568 sub_reserved_credits(journal, handle->h_total_credits);
0569 if (t)
0570 atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
0571 }
0572
0573 void jbd2_journal_free_reserved(handle_t *handle)
0574 {
0575 journal_t *journal = handle->h_journal;
0576
0577
0578 read_lock(&journal->j_state_lock);
0579 __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
0580 read_unlock(&journal->j_state_lock);
0581 jbd2_free_handle(handle);
0582 }
0583 EXPORT_SYMBOL(jbd2_journal_free_reserved);
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599 int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
0600 unsigned int line_no)
0601 {
0602 journal_t *journal = handle->h_journal;
0603 int ret = -EIO;
0604
0605 if (WARN_ON(!handle->h_reserved)) {
0606
0607 jbd2_journal_stop(handle);
0608 return ret;
0609 }
0610
0611
0612
0613
0614 if (WARN_ON(current->journal_info)) {
0615 jbd2_journal_free_reserved(handle);
0616 return ret;
0617 }
0618
0619 handle->h_journal = NULL;
0620
0621
0622
0623
0624 ret = start_this_handle(journal, handle, GFP_NOFS);
0625 if (ret < 0) {
0626 handle->h_journal = journal;
0627 jbd2_journal_free_reserved(handle);
0628 return ret;
0629 }
0630 handle->h_type = type;
0631 handle->h_line_no = line_no;
0632 trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
0633 handle->h_transaction->t_tid, type,
0634 line_no, handle->h_total_credits);
0635 return 0;
0636 }
0637 EXPORT_SYMBOL(jbd2_journal_start_reserved);
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660 int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
0661 {
0662 transaction_t *transaction = handle->h_transaction;
0663 journal_t *journal;
0664 int result;
0665 int wanted;
0666
0667 if (is_handle_aborted(handle))
0668 return -EROFS;
0669 journal = transaction->t_journal;
0670
0671 result = 1;
0672
0673 read_lock(&journal->j_state_lock);
0674
0675
0676 if (transaction->t_state != T_RUNNING) {
0677 jbd2_debug(3, "denied handle %p %d blocks: "
0678 "transaction not running\n", handle, nblocks);
0679 goto error_out;
0680 }
0681
0682 nblocks += DIV_ROUND_UP(
0683 handle->h_revoke_credits_requested + revoke_records,
0684 journal->j_revoke_records_per_block) -
0685 DIV_ROUND_UP(
0686 handle->h_revoke_credits_requested,
0687 journal->j_revoke_records_per_block);
0688 wanted = atomic_add_return(nblocks,
0689 &transaction->t_outstanding_credits);
0690
0691 if (wanted > journal->j_max_transaction_buffers) {
0692 jbd2_debug(3, "denied handle %p %d blocks: "
0693 "transaction too large\n", handle, nblocks);
0694 atomic_sub(nblocks, &transaction->t_outstanding_credits);
0695 goto error_out;
0696 }
0697
0698 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
0699 transaction->t_tid,
0700 handle->h_type, handle->h_line_no,
0701 handle->h_total_credits,
0702 nblocks);
0703
0704 handle->h_total_credits += nblocks;
0705 handle->h_requested_credits += nblocks;
0706 handle->h_revoke_credits += revoke_records;
0707 handle->h_revoke_credits_requested += revoke_records;
0708 result = 0;
0709
0710 jbd2_debug(3, "extended handle %p by %d\n", handle, nblocks);
0711 error_out:
0712 read_unlock(&journal->j_state_lock);
0713 return result;
0714 }
0715
0716 static void stop_this_handle(handle_t *handle)
0717 {
0718 transaction_t *transaction = handle->h_transaction;
0719 journal_t *journal = transaction->t_journal;
0720 int revokes;
0721
0722 J_ASSERT(journal_current_handle() == handle);
0723 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
0724 current->journal_info = NULL;
0725
0726
0727
0728
0729
0730
0731 revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
0732 if (revokes) {
0733 int t_revokes, revoke_descriptors;
0734 int rr_per_blk = journal->j_revoke_records_per_block;
0735
0736 WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
0737 > handle->h_total_credits);
0738 t_revokes = atomic_add_return(revokes,
0739 &transaction->t_outstanding_revokes);
0740 revoke_descriptors =
0741 DIV_ROUND_UP(t_revokes, rr_per_blk) -
0742 DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
0743 handle->h_total_credits -= revoke_descriptors;
0744 }
0745 atomic_sub(handle->h_total_credits,
0746 &transaction->t_outstanding_credits);
0747 if (handle->h_rsv_handle)
0748 __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
0749 transaction);
0750 if (atomic_dec_and_test(&transaction->t_updates))
0751 wake_up(&journal->j_wait_updates);
0752
0753 rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
0754
0755
0756
0757
0758 memalloc_nofs_restore(handle->saved_alloc_context);
0759 }
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778 int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
0779 gfp_t gfp_mask)
0780 {
0781 transaction_t *transaction = handle->h_transaction;
0782 journal_t *journal;
0783 tid_t tid;
0784 int need_to_start;
0785 int ret;
0786
0787
0788
0789 if (is_handle_aborted(handle))
0790 return 0;
0791 journal = transaction->t_journal;
0792 tid = transaction->t_tid;
0793
0794
0795
0796
0797
0798 jbd2_debug(2, "restarting handle %p\n", handle);
0799 stop_this_handle(handle);
0800 handle->h_transaction = NULL;
0801
0802
0803
0804
0805
0806 read_lock(&journal->j_state_lock);
0807 need_to_start = !tid_geq(journal->j_commit_request, tid);
0808 read_unlock(&journal->j_state_lock);
0809 if (need_to_start)
0810 jbd2_log_start_commit(journal, tid);
0811 handle->h_total_credits = nblocks +
0812 DIV_ROUND_UP(revoke_records,
0813 journal->j_revoke_records_per_block);
0814 handle->h_revoke_credits = revoke_records;
0815 ret = start_this_handle(journal, handle, gfp_mask);
0816 trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
0817 ret ? 0 : handle->h_transaction->t_tid,
0818 handle->h_type, handle->h_line_no,
0819 handle->h_total_credits);
0820 return ret;
0821 }
0822 EXPORT_SYMBOL(jbd2__journal_restart);
0823
0824
0825 int jbd2_journal_restart(handle_t *handle, int nblocks)
0826 {
0827 return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
0828 }
0829 EXPORT_SYMBOL(jbd2_journal_restart);
0830
0831
0832
0833
0834
0835 void jbd2_journal_wait_updates(journal_t *journal)
0836 {
0837 DEFINE_WAIT(wait);
0838
0839 while (1) {
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850 transaction_t *transaction = journal->j_running_transaction;
0851
0852 if (!transaction)
0853 break;
0854
0855 prepare_to_wait(&journal->j_wait_updates, &wait,
0856 TASK_UNINTERRUPTIBLE);
0857 if (!atomic_read(&transaction->t_updates)) {
0858 finish_wait(&journal->j_wait_updates, &wait);
0859 break;
0860 }
0861 write_unlock(&journal->j_state_lock);
0862 schedule();
0863 finish_wait(&journal->j_wait_updates, &wait);
0864 write_lock(&journal->j_state_lock);
0865 }
0866 }
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877
0878 void jbd2_journal_lock_updates(journal_t *journal)
0879 {
0880 jbd2_might_wait_for_commit(journal);
0881
0882 write_lock(&journal->j_state_lock);
0883 ++journal->j_barrier_count;
0884
0885
0886 if (atomic_read(&journal->j_reserved_credits)) {
0887 write_unlock(&journal->j_state_lock);
0888 wait_event(journal->j_wait_reserved,
0889 atomic_read(&journal->j_reserved_credits) == 0);
0890 write_lock(&journal->j_state_lock);
0891 }
0892
0893
0894 jbd2_journal_wait_updates(journal);
0895
0896 write_unlock(&journal->j_state_lock);
0897
0898
0899
0900
0901
0902
0903
0904 mutex_lock(&journal->j_barrier);
0905 }
0906
0907
0908
0909
0910
0911
0912
0913
0914
0915 void jbd2_journal_unlock_updates (journal_t *journal)
0916 {
0917 J_ASSERT(journal->j_barrier_count != 0);
0918
0919 mutex_unlock(&journal->j_barrier);
0920 write_lock(&journal->j_state_lock);
0921 --journal->j_barrier_count;
0922 write_unlock(&journal->j_state_lock);
0923 wake_up(&journal->j_wait_transaction_locked);
0924 }
0925
0926 static void warn_dirty_buffer(struct buffer_head *bh)
0927 {
0928 printk(KERN_WARNING
0929 "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
0930 "There's a risk of filesystem corruption in case of system "
0931 "crash.\n",
0932 bh->b_bdev, (unsigned long long)bh->b_blocknr);
0933 }
0934
0935
0936 static void jbd2_freeze_jh_data(struct journal_head *jh)
0937 {
0938 struct page *page;
0939 int offset;
0940 char *source;
0941 struct buffer_head *bh = jh2bh(jh);
0942
0943 J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
0944 page = bh->b_page;
0945 offset = offset_in_page(bh->b_data);
0946 source = kmap_atomic(page);
0947
0948 jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
0949 memcpy(jh->b_frozen_data, source + offset, bh->b_size);
0950 kunmap_atomic(source);
0951
0952
0953
0954
0955
0956 jh->b_frozen_triggers = jh->b_triggers;
0957 }
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969 static int
0970 do_get_write_access(handle_t *handle, struct journal_head *jh,
0971 int force_copy)
0972 {
0973 struct buffer_head *bh;
0974 transaction_t *transaction = handle->h_transaction;
0975 journal_t *journal;
0976 int error;
0977 char *frozen_buffer = NULL;
0978 unsigned long start_lock, time_lock;
0979
0980 journal = transaction->t_journal;
0981
0982 jbd2_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
0983
0984 JBUFFER_TRACE(jh, "entry");
0985 repeat:
0986 bh = jh2bh(jh);
0987
0988
0989
0990 start_lock = jiffies;
0991 lock_buffer(bh);
0992 spin_lock(&jh->b_state_lock);
0993
0994
0995 time_lock = jbd2_time_diff(start_lock, jiffies);
0996 if (time_lock > HZ/10)
0997 trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
0998 jiffies_to_msecs(time_lock));
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 if (buffer_dirty(bh)) {
1014
1015
1016
1017
1018 if (jh->b_transaction) {
1019 J_ASSERT_JH(jh,
1020 jh->b_transaction == transaction ||
1021 jh->b_transaction ==
1022 journal->j_committing_transaction);
1023 if (jh->b_next_transaction)
1024 J_ASSERT_JH(jh, jh->b_next_transaction ==
1025 transaction);
1026 warn_dirty_buffer(bh);
1027 }
1028
1029
1030
1031
1032
1033 JBUFFER_TRACE(jh, "Journalling dirty buffer");
1034 clear_buffer_dirty(bh);
1035 set_buffer_jbddirty(bh);
1036 }
1037
1038 unlock_buffer(bh);
1039
1040 error = -EROFS;
1041 if (is_handle_aborted(handle)) {
1042 spin_unlock(&jh->b_state_lock);
1043 goto out;
1044 }
1045 error = 0;
1046
1047
1048
1049
1050
1051 if (jh->b_transaction == transaction ||
1052 jh->b_next_transaction == transaction)
1053 goto done;
1054
1055
1056
1057
1058
1059 jh->b_modified = 0;
1060
1061
1062
1063
1064
1065
1066 if (!jh->b_transaction) {
1067 JBUFFER_TRACE(jh, "no transaction");
1068 J_ASSERT_JH(jh, !jh->b_next_transaction);
1069 JBUFFER_TRACE(jh, "file as BJ_Reserved");
1070
1071
1072
1073
1074
1075 smp_wmb();
1076 spin_lock(&journal->j_list_lock);
1077 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
1078 spin_unlock(&journal->j_list_lock);
1079 goto done;
1080 }
1081
1082
1083
1084
1085 if (jh->b_frozen_data) {
1086 JBUFFER_TRACE(jh, "has frozen data");
1087 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1088 goto attach_next;
1089 }
1090
1091 JBUFFER_TRACE(jh, "owned by older transaction");
1092 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1093 J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 if (buffer_shadow(bh)) {
1105 JBUFFER_TRACE(jh, "on shadow: sleep");
1106 spin_unlock(&jh->b_state_lock);
1107 wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
1108 goto repeat;
1109 }
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 if (jh->b_jlist == BJ_Metadata || force_copy) {
1124 JBUFFER_TRACE(jh, "generate frozen data");
1125 if (!frozen_buffer) {
1126 JBUFFER_TRACE(jh, "allocate memory for buffer");
1127 spin_unlock(&jh->b_state_lock);
1128 frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
1129 GFP_NOFS | __GFP_NOFAIL);
1130 goto repeat;
1131 }
1132 jh->b_frozen_data = frozen_buffer;
1133 frozen_buffer = NULL;
1134 jbd2_freeze_jh_data(jh);
1135 }
1136 attach_next:
1137
1138
1139
1140
1141
1142 smp_wmb();
1143 jh->b_next_transaction = transaction;
1144
1145 done:
1146 spin_unlock(&jh->b_state_lock);
1147
1148
1149
1150
1151
1152 jbd2_journal_cancel_revoke(handle, jh);
1153
1154 out:
1155 if (unlikely(frozen_buffer))
1156 jbd2_free(frozen_buffer, bh->b_size);
1157
1158 JBUFFER_TRACE(jh, "exit");
1159 return error;
1160 }
1161
1162
1163 static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
1164 bool undo)
1165 {
1166 struct journal_head *jh;
1167 bool ret = false;
1168
1169
1170 if (buffer_dirty(bh))
1171 return false;
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184 rcu_read_lock();
1185 if (!buffer_jbd(bh))
1186 goto out;
1187
1188 jh = READ_ONCE(bh->b_private);
1189 if (!jh)
1190 goto out;
1191
1192 if (undo && !jh->b_committed_data)
1193 goto out;
1194 if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
1195 READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
1196 goto out;
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 smp_mb();
1207 if (unlikely(jh->b_bh != bh))
1208 goto out;
1209 ret = true;
1210 out:
1211 rcu_read_unlock();
1212 return ret;
1213 }
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
1228 {
1229 struct journal_head *jh;
1230 int rc;
1231
1232 if (is_handle_aborted(handle))
1233 return -EROFS;
1234
1235 if (jbd2_write_access_granted(handle, bh, false))
1236 return 0;
1237
1238 jh = jbd2_journal_add_journal_head(bh);
1239
1240
1241
1242 rc = do_get_write_access(handle, jh, 0);
1243 jbd2_journal_put_journal_head(jh);
1244 return rc;
1245 }
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1268 {
1269 transaction_t *transaction = handle->h_transaction;
1270 journal_t *journal;
1271 struct journal_head *jh = jbd2_journal_add_journal_head(bh);
1272 int err;
1273
1274 jbd2_debug(5, "journal_head %p\n", jh);
1275 err = -EROFS;
1276 if (is_handle_aborted(handle))
1277 goto out;
1278 journal = transaction->t_journal;
1279 err = 0;
1280
1281 JBUFFER_TRACE(jh, "entry");
1282
1283
1284
1285
1286
1287
1288
1289 spin_lock(&jh->b_state_lock);
1290 J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
1291 jh->b_transaction == NULL ||
1292 (jh->b_transaction == journal->j_committing_transaction &&
1293 jh->b_jlist == BJ_Forget)));
1294
1295 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1296 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
1297
1298 if (jh->b_transaction == NULL) {
1299
1300
1301
1302
1303
1304
1305
1306
1307 clear_buffer_dirty(jh2bh(jh));
1308
1309 jh->b_modified = 0;
1310
1311 JBUFFER_TRACE(jh, "file as BJ_Reserved");
1312 spin_lock(&journal->j_list_lock);
1313 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
1314 spin_unlock(&journal->j_list_lock);
1315 } else if (jh->b_transaction == journal->j_committing_transaction) {
1316
1317 jh->b_modified = 0;
1318
1319 JBUFFER_TRACE(jh, "set next transaction");
1320 spin_lock(&journal->j_list_lock);
1321 jh->b_next_transaction = transaction;
1322 spin_unlock(&journal->j_list_lock);
1323 }
1324 spin_unlock(&jh->b_state_lock);
1325
1326
1327
1328
1329
1330
1331
1332
1333 JBUFFER_TRACE(jh, "cancelling revoke");
1334 jbd2_journal_cancel_revoke(handle, jh);
1335 out:
1336 jbd2_journal_put_journal_head(jh);
1337 return err;
1338 }
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
1367 {
1368 int err;
1369 struct journal_head *jh;
1370 char *committed_data = NULL;
1371
1372 if (is_handle_aborted(handle))
1373 return -EROFS;
1374
1375 if (jbd2_write_access_granted(handle, bh, true))
1376 return 0;
1377
1378 jh = jbd2_journal_add_journal_head(bh);
1379 JBUFFER_TRACE(jh, "entry");
1380
1381
1382
1383
1384
1385
1386 err = do_get_write_access(handle, jh, 1);
1387 if (err)
1388 goto out;
1389
1390 repeat:
1391 if (!jh->b_committed_data)
1392 committed_data = jbd2_alloc(jh2bh(jh)->b_size,
1393 GFP_NOFS|__GFP_NOFAIL);
1394
1395 spin_lock(&jh->b_state_lock);
1396 if (!jh->b_committed_data) {
1397
1398
1399 JBUFFER_TRACE(jh, "generate b_committed data");
1400 if (!committed_data) {
1401 spin_unlock(&jh->b_state_lock);
1402 goto repeat;
1403 }
1404
1405 jh->b_committed_data = committed_data;
1406 committed_data = NULL;
1407 memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
1408 }
1409 spin_unlock(&jh->b_state_lock);
1410 out:
1411 jbd2_journal_put_journal_head(jh);
1412 if (unlikely(committed_data))
1413 jbd2_free(committed_data, bh->b_size);
1414 return err;
1415 }
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 void jbd2_journal_set_triggers(struct buffer_head *bh,
1429 struct jbd2_buffer_trigger_type *type)
1430 {
1431 struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
1432
1433 if (WARN_ON_ONCE(!jh))
1434 return;
1435 jh->b_triggers = type;
1436 jbd2_journal_put_journal_head(jh);
1437 }
1438
1439 void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
1440 struct jbd2_buffer_trigger_type *triggers)
1441 {
1442 struct buffer_head *bh = jh2bh(jh);
1443
1444 if (!triggers || !triggers->t_frozen)
1445 return;
1446
1447 triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
1448 }
1449
1450 void jbd2_buffer_abort_trigger(struct journal_head *jh,
1451 struct jbd2_buffer_trigger_type *triggers)
1452 {
1453 if (!triggers || !triggers->t_abort)
1454 return;
1455
1456 triggers->t_abort(triggers, jh2bh(jh));
1457 }
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482 int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1483 {
1484 transaction_t *transaction = handle->h_transaction;
1485 journal_t *journal;
1486 struct journal_head *jh;
1487 int ret = 0;
1488
1489 if (!buffer_jbd(bh))
1490 return -EUCLEAN;
1491
1492
1493
1494
1495
1496 jh = bh2jh(bh);
1497 jbd2_debug(5, "journal_head %p\n", jh);
1498 JBUFFER_TRACE(jh, "entry");
1499
1500
1501
1502
1503
1504
1505
1506 if (data_race(jh->b_transaction != transaction &&
1507 jh->b_next_transaction != transaction)) {
1508 spin_lock(&jh->b_state_lock);
1509 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1510 jh->b_next_transaction == transaction);
1511 spin_unlock(&jh->b_state_lock);
1512 }
1513 if (jh->b_modified == 1) {
1514
1515 if (data_race(jh->b_transaction == transaction &&
1516 jh->b_jlist != BJ_Metadata)) {
1517 spin_lock(&jh->b_state_lock);
1518 if (jh->b_transaction == transaction &&
1519 jh->b_jlist != BJ_Metadata)
1520 pr_err("JBD2: assertion failure: h_type=%u "
1521 "h_line_no=%u block_no=%llu jlist=%u\n",
1522 handle->h_type, handle->h_line_no,
1523 (unsigned long long) bh->b_blocknr,
1524 jh->b_jlist);
1525 J_ASSERT_JH(jh, jh->b_transaction != transaction ||
1526 jh->b_jlist == BJ_Metadata);
1527 spin_unlock(&jh->b_state_lock);
1528 }
1529 goto out;
1530 }
1531
1532 journal = transaction->t_journal;
1533 spin_lock(&jh->b_state_lock);
1534
1535 if (is_handle_aborted(handle)) {
1536
1537
1538
1539
1540
1541
1542
1543 ret = -EROFS;
1544 goto out_unlock_bh;
1545 }
1546
1547 if (jh->b_modified == 0) {
1548
1549
1550
1551
1552
1553 if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
1554 ret = -ENOSPC;
1555 goto out_unlock_bh;
1556 }
1557 jh->b_modified = 1;
1558 handle->h_total_credits--;
1559 }
1560
1561
1562
1563
1564
1565
1566
1567
1568 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1569 JBUFFER_TRACE(jh, "fastpath");
1570 if (unlikely(jh->b_transaction !=
1571 journal->j_running_transaction)) {
1572 printk(KERN_ERR "JBD2: %s: "
1573 "jh->b_transaction (%llu, %p, %u) != "
1574 "journal->j_running_transaction (%p, %u)\n",
1575 journal->j_devname,
1576 (unsigned long long) bh->b_blocknr,
1577 jh->b_transaction,
1578 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1579 journal->j_running_transaction,
1580 journal->j_running_transaction ?
1581 journal->j_running_transaction->t_tid : 0);
1582 ret = -EINVAL;
1583 }
1584 goto out_unlock_bh;
1585 }
1586
1587 set_buffer_jbddirty(bh);
1588
1589
1590
1591
1592
1593
1594
1595 if (jh->b_transaction != transaction) {
1596 JBUFFER_TRACE(jh, "already on other transaction");
1597 if (unlikely(((jh->b_transaction !=
1598 journal->j_committing_transaction)) ||
1599 (jh->b_next_transaction != transaction))) {
1600 printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
1601 "bad jh for block %llu: "
1602 "transaction (%p, %u), "
1603 "jh->b_transaction (%p, %u), "
1604 "jh->b_next_transaction (%p, %u), jlist %u\n",
1605 journal->j_devname,
1606 (unsigned long long) bh->b_blocknr,
1607 transaction, transaction->t_tid,
1608 jh->b_transaction,
1609 jh->b_transaction ?
1610 jh->b_transaction->t_tid : 0,
1611 jh->b_next_transaction,
1612 jh->b_next_transaction ?
1613 jh->b_next_transaction->t_tid : 0,
1614 jh->b_jlist);
1615 WARN_ON(1);
1616 ret = -EINVAL;
1617 }
1618
1619
1620 goto out_unlock_bh;
1621 }
1622
1623
1624 J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1625
1626 JBUFFER_TRACE(jh, "file as BJ_Metadata");
1627 spin_lock(&journal->j_list_lock);
1628 __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
1629 spin_unlock(&journal->j_list_lock);
1630 out_unlock_bh:
1631 spin_unlock(&jh->b_state_lock);
1632 out:
1633 JBUFFER_TRACE(jh, "exit");
1634 return ret;
1635 }
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654 int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
1655 {
1656 transaction_t *transaction = handle->h_transaction;
1657 journal_t *journal;
1658 struct journal_head *jh;
1659 int drop_reserve = 0;
1660 int err = 0;
1661 int was_modified = 0;
1662
1663 if (is_handle_aborted(handle))
1664 return -EROFS;
1665 journal = transaction->t_journal;
1666
1667 BUFFER_TRACE(bh, "entry");
1668
1669 jh = jbd2_journal_grab_journal_head(bh);
1670 if (!jh) {
1671 __bforget(bh);
1672 return 0;
1673 }
1674
1675 spin_lock(&jh->b_state_lock);
1676
1677
1678
1679 if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1680 "inconsistent data on disk")) {
1681 err = -EIO;
1682 goto drop;
1683 }
1684
1685
1686 was_modified = jh->b_modified;
1687
1688
1689
1690
1691
1692 jh->b_modified = 0;
1693
1694 if (jh->b_transaction == transaction) {
1695 J_ASSERT_JH(jh, !jh->b_frozen_data);
1696
1697
1698
1699
1700 clear_buffer_dirty(bh);
1701 clear_buffer_jbddirty(bh);
1702
1703 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1704
1705
1706
1707
1708
1709 if (was_modified)
1710 drop_reserve = 1;
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724 spin_lock(&journal->j_list_lock);
1725 if (jh->b_cp_transaction) {
1726 __jbd2_journal_temp_unlink_buffer(jh);
1727 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1728 } else {
1729 __jbd2_journal_unfile_buffer(jh);
1730 jbd2_journal_put_journal_head(jh);
1731 }
1732 spin_unlock(&journal->j_list_lock);
1733 } else if (jh->b_transaction) {
1734 J_ASSERT_JH(jh, (jh->b_transaction ==
1735 journal->j_committing_transaction));
1736
1737
1738 JBUFFER_TRACE(jh, "belongs to older transaction");
1739
1740
1741
1742
1743
1744
1745
1746 set_buffer_freed(bh);
1747
1748 if (!jh->b_next_transaction) {
1749 spin_lock(&journal->j_list_lock);
1750 jh->b_next_transaction = transaction;
1751 spin_unlock(&journal->j_list_lock);
1752 } else {
1753 J_ASSERT(jh->b_next_transaction == transaction);
1754
1755
1756
1757
1758
1759 if (was_modified)
1760 drop_reserve = 1;
1761 }
1762 } else {
1763
1764
1765
1766
1767
1768 spin_lock(&journal->j_list_lock);
1769 if (!jh->b_cp_transaction) {
1770 JBUFFER_TRACE(jh, "belongs to none transaction");
1771 spin_unlock(&journal->j_list_lock);
1772 goto drop;
1773 }
1774
1775
1776
1777
1778
1779 if (!buffer_dirty(bh)) {
1780 __jbd2_journal_remove_checkpoint(jh);
1781 spin_unlock(&journal->j_list_lock);
1782 goto drop;
1783 }
1784
1785
1786
1787
1788
1789
1790
1791 clear_buffer_dirty(bh);
1792 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1793 spin_unlock(&journal->j_list_lock);
1794 }
1795 drop:
1796 __brelse(bh);
1797 spin_unlock(&jh->b_state_lock);
1798 jbd2_journal_put_journal_head(jh);
1799 if (drop_reserve) {
1800
1801 handle->h_total_credits++;
1802 }
1803 return err;
1804 }
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822 int jbd2_journal_stop(handle_t *handle)
1823 {
1824 transaction_t *transaction = handle->h_transaction;
1825 journal_t *journal;
1826 int err = 0, wait_for_commit = 0;
1827 tid_t tid;
1828 pid_t pid;
1829
1830 if (--handle->h_ref > 0) {
1831 jbd2_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1832 handle->h_ref);
1833 if (is_handle_aborted(handle))
1834 return -EIO;
1835 return 0;
1836 }
1837 if (!transaction) {
1838
1839
1840
1841
1842 memalloc_nofs_restore(handle->saved_alloc_context);
1843 goto free_and_exit;
1844 }
1845 journal = transaction->t_journal;
1846 tid = transaction->t_tid;
1847
1848 if (is_handle_aborted(handle))
1849 err = -EIO;
1850
1851 jbd2_debug(4, "Handle %p going down\n", handle);
1852 trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1853 tid, handle->h_type, handle->h_line_no,
1854 jiffies - handle->h_start_jiffies,
1855 handle->h_sync, handle->h_requested_credits,
1856 (handle->h_requested_credits -
1857 handle->h_total_credits));
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888 pid = current->pid;
1889 if (handle->h_sync && journal->j_last_sync_writer != pid &&
1890 journal->j_max_batch_time) {
1891 u64 commit_time, trans_time;
1892
1893 journal->j_last_sync_writer = pid;
1894
1895 read_lock(&journal->j_state_lock);
1896 commit_time = journal->j_average_commit_time;
1897 read_unlock(&journal->j_state_lock);
1898
1899 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1900 transaction->t_start_time));
1901
1902 commit_time = max_t(u64, commit_time,
1903 1000*journal->j_min_batch_time);
1904 commit_time = min_t(u64, commit_time,
1905 1000*journal->j_max_batch_time);
1906
1907 if (trans_time < commit_time) {
1908 ktime_t expires = ktime_add_ns(ktime_get(),
1909 commit_time);
1910 set_current_state(TASK_UNINTERRUPTIBLE);
1911 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1912 }
1913 }
1914
1915 if (handle->h_sync)
1916 transaction->t_synchronous_commit = 1;
1917
1918
1919
1920
1921
1922
1923 if (handle->h_sync ||
1924 time_after_eq(jiffies, transaction->t_expires)) {
1925
1926
1927
1928
1929 jbd2_debug(2, "transaction too old, requesting commit for "
1930 "handle %p\n", handle);
1931
1932 jbd2_log_start_commit(journal, tid);
1933
1934
1935
1936
1937
1938 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1939 wait_for_commit = 1;
1940 }
1941
1942
1943
1944
1945
1946
1947
1948 stop_this_handle(handle);
1949
1950 if (wait_for_commit)
1951 err = jbd2_log_wait_commit(journal, tid);
1952
1953 free_and_exit:
1954 if (handle->h_rsv_handle)
1955 jbd2_free_handle(handle->h_rsv_handle);
1956 jbd2_free_handle(handle);
1957 return err;
1958 }
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976 static inline void
1977 __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1978 {
1979 if (!*list) {
1980 jh->b_tnext = jh->b_tprev = jh;
1981 *list = jh;
1982 } else {
1983
1984 struct journal_head *first = *list, *last = first->b_tprev;
1985 jh->b_tprev = last;
1986 jh->b_tnext = first;
1987 last->b_tnext = first->b_tprev = jh;
1988 }
1989 }
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000 static inline void
2001 __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
2002 {
2003 if (*list == jh) {
2004 *list = jh->b_tnext;
2005 if (*list == jh)
2006 *list = NULL;
2007 }
2008 jh->b_tprev->b_tnext = jh->b_tnext;
2009 jh->b_tnext->b_tprev = jh->b_tprev;
2010 }
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
2024 {
2025 struct journal_head **list = NULL;
2026 transaction_t *transaction;
2027 struct buffer_head *bh = jh2bh(jh);
2028
2029 lockdep_assert_held(&jh->b_state_lock);
2030 transaction = jh->b_transaction;
2031 if (transaction)
2032 assert_spin_locked(&transaction->t_journal->j_list_lock);
2033
2034 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
2035 if (jh->b_jlist != BJ_None)
2036 J_ASSERT_JH(jh, transaction != NULL);
2037
2038 switch (jh->b_jlist) {
2039 case BJ_None:
2040 return;
2041 case BJ_Metadata:
2042 transaction->t_nr_buffers--;
2043 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
2044 list = &transaction->t_buffers;
2045 break;
2046 case BJ_Forget:
2047 list = &transaction->t_forget;
2048 break;
2049 case BJ_Shadow:
2050 list = &transaction->t_shadow_list;
2051 break;
2052 case BJ_Reserved:
2053 list = &transaction->t_reserved_list;
2054 break;
2055 }
2056
2057 __blist_del_buffer(list, jh);
2058 jh->b_jlist = BJ_None;
2059 if (transaction && is_journal_aborted(transaction->t_journal))
2060 clear_buffer_jbddirty(bh);
2061 else if (test_clear_buffer_jbddirty(bh))
2062 mark_buffer_dirty(bh);
2063 }
2064
2065
2066
2067
2068
2069
2070
2071 static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
2072 {
2073 J_ASSERT_JH(jh, jh->b_transaction != NULL);
2074 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2075
2076 __jbd2_journal_temp_unlink_buffer(jh);
2077 jh->b_transaction = NULL;
2078 }
2079
2080 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
2081 {
2082 struct buffer_head *bh = jh2bh(jh);
2083
2084
2085 get_bh(bh);
2086 spin_lock(&jh->b_state_lock);
2087 spin_lock(&journal->j_list_lock);
2088 __jbd2_journal_unfile_buffer(jh);
2089 spin_unlock(&journal->j_list_lock);
2090 spin_unlock(&jh->b_state_lock);
2091 jbd2_journal_put_journal_head(jh);
2092 __brelse(bh);
2093 }
2094
2095
2096
2097
2098
2099
2100 static void
2101 __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
2102 {
2103 struct journal_head *jh;
2104
2105 jh = bh2jh(bh);
2106
2107 if (buffer_locked(bh) || buffer_dirty(bh))
2108 goto out;
2109
2110 if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
2111 goto out;
2112
2113 spin_lock(&journal->j_list_lock);
2114 if (jh->b_cp_transaction != NULL) {
2115
2116 JBUFFER_TRACE(jh, "remove from checkpoint list");
2117 __jbd2_journal_remove_checkpoint(jh);
2118 }
2119 spin_unlock(&journal->j_list_lock);
2120 out:
2121 return;
2122 }
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158 bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
2159 {
2160 struct buffer_head *head;
2161 struct buffer_head *bh;
2162 bool ret = false;
2163
2164 J_ASSERT(folio_test_locked(folio));
2165
2166 head = folio_buffers(folio);
2167 bh = head;
2168 do {
2169 struct journal_head *jh;
2170
2171
2172
2173
2174
2175
2176 jh = jbd2_journal_grab_journal_head(bh);
2177 if (!jh)
2178 continue;
2179
2180 spin_lock(&jh->b_state_lock);
2181 __journal_try_to_free_buffer(journal, bh);
2182 spin_unlock(&jh->b_state_lock);
2183 jbd2_journal_put_journal_head(jh);
2184 if (buffer_jbd(bh))
2185 goto busy;
2186 } while ((bh = bh->b_this_page) != head);
2187
2188 ret = try_to_free_buffers(folio);
2189 busy:
2190 return ret;
2191 }
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
2206 {
2207 int may_free = 1;
2208 struct buffer_head *bh = jh2bh(jh);
2209
2210 if (jh->b_cp_transaction) {
2211 JBUFFER_TRACE(jh, "on running+cp transaction");
2212 __jbd2_journal_temp_unlink_buffer(jh);
2213
2214
2215
2216
2217
2218 clear_buffer_dirty(bh);
2219 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
2220 may_free = 0;
2221 } else {
2222 JBUFFER_TRACE(jh, "on running transaction");
2223 __jbd2_journal_unfile_buffer(jh);
2224 jbd2_journal_put_journal_head(jh);
2225 }
2226 return may_free;
2227 }
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276 static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
2277 int partial_page)
2278 {
2279 transaction_t *transaction;
2280 struct journal_head *jh;
2281 int may_free = 1;
2282
2283 BUFFER_TRACE(bh, "entry");
2284
2285
2286
2287
2288
2289
2290
2291 jh = jbd2_journal_grab_journal_head(bh);
2292 if (!jh)
2293 goto zap_buffer_unlocked;
2294
2295
2296 write_lock(&journal->j_state_lock);
2297 spin_lock(&jh->b_state_lock);
2298 spin_lock(&journal->j_list_lock);
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323 transaction = jh->b_transaction;
2324 if (transaction == NULL) {
2325
2326
2327
2328
2329 if (!jh->b_cp_transaction) {
2330 JBUFFER_TRACE(jh, "not on any transaction: zap");
2331 goto zap_buffer;
2332 }
2333
2334 if (!buffer_dirty(bh)) {
2335
2336 __jbd2_journal_remove_checkpoint(jh);
2337 goto zap_buffer;
2338 }
2339
2340
2341
2342
2343
2344 if (journal->j_running_transaction) {
2345
2346
2347
2348 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
2349 may_free = __dispose_buffer(jh,
2350 journal->j_running_transaction);
2351 goto zap_buffer;
2352 } else {
2353
2354
2355
2356
2357 if (journal->j_committing_transaction) {
2358 JBUFFER_TRACE(jh, "give to committing trans");
2359 may_free = __dispose_buffer(jh,
2360 journal->j_committing_transaction);
2361 goto zap_buffer;
2362 } else {
2363
2364
2365 clear_buffer_jbddirty(bh);
2366 __jbd2_journal_remove_checkpoint(jh);
2367 goto zap_buffer;
2368 }
2369 }
2370 } else if (transaction == journal->j_committing_transaction) {
2371 JBUFFER_TRACE(jh, "on committing transaction");
2372
2373
2374
2375
2376
2377 if (partial_page) {
2378 spin_unlock(&journal->j_list_lock);
2379 spin_unlock(&jh->b_state_lock);
2380 write_unlock(&journal->j_state_lock);
2381 jbd2_journal_put_journal_head(jh);
2382 return -EBUSY;
2383 }
2384
2385
2386
2387
2388
2389
2390
2391 set_buffer_freed(bh);
2392 if (journal->j_running_transaction && buffer_jbddirty(bh))
2393 jh->b_next_transaction = journal->j_running_transaction;
2394 jh->b_modified = 0;
2395 spin_unlock(&journal->j_list_lock);
2396 spin_unlock(&jh->b_state_lock);
2397 write_unlock(&journal->j_state_lock);
2398 jbd2_journal_put_journal_head(jh);
2399 return 0;
2400 } else {
2401
2402
2403
2404
2405
2406
2407 J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
2408 JBUFFER_TRACE(jh, "on running transaction");
2409 may_free = __dispose_buffer(jh, transaction);
2410 }
2411
2412 zap_buffer:
2413
2414
2415
2416
2417
2418
2419
2420
2421 jh->b_modified = 0;
2422 spin_unlock(&journal->j_list_lock);
2423 spin_unlock(&jh->b_state_lock);
2424 write_unlock(&journal->j_state_lock);
2425 jbd2_journal_put_journal_head(jh);
2426 zap_buffer_unlocked:
2427 clear_buffer_dirty(bh);
2428 J_ASSERT_BH(bh, !buffer_jbddirty(bh));
2429 clear_buffer_mapped(bh);
2430 clear_buffer_req(bh);
2431 clear_buffer_new(bh);
2432 clear_buffer_delay(bh);
2433 clear_buffer_unwritten(bh);
2434 bh->b_bdev = NULL;
2435 return may_free;
2436 }
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450 int jbd2_journal_invalidate_folio(journal_t *journal, struct folio *folio,
2451 size_t offset, size_t length)
2452 {
2453 struct buffer_head *head, *bh, *next;
2454 unsigned int stop = offset + length;
2455 unsigned int curr_off = 0;
2456 int partial_page = (offset || length < folio_size(folio));
2457 int may_free = 1;
2458 int ret = 0;
2459
2460 if (!folio_test_locked(folio))
2461 BUG();
2462 head = folio_buffers(folio);
2463 if (!head)
2464 return 0;
2465
2466 BUG_ON(stop > folio_size(folio) || stop < length);
2467
2468
2469
2470
2471
2472 bh = head;
2473 do {
2474 unsigned int next_off = curr_off + bh->b_size;
2475 next = bh->b_this_page;
2476
2477 if (next_off > stop)
2478 return 0;
2479
2480 if (offset <= curr_off) {
2481
2482 lock_buffer(bh);
2483 ret = journal_unmap_buffer(journal, bh, partial_page);
2484 unlock_buffer(bh);
2485 if (ret < 0)
2486 return ret;
2487 may_free &= ret;
2488 }
2489 curr_off = next_off;
2490 bh = next;
2491
2492 } while (bh != head);
2493
2494 if (!partial_page) {
2495 if (may_free && try_to_free_buffers(folio))
2496 J_ASSERT(!folio_buffers(folio));
2497 }
2498 return 0;
2499 }
2500
2501
2502
2503
2504 void __jbd2_journal_file_buffer(struct journal_head *jh,
2505 transaction_t *transaction, int jlist)
2506 {
2507 struct journal_head **list = NULL;
2508 int was_dirty = 0;
2509 struct buffer_head *bh = jh2bh(jh);
2510
2511 lockdep_assert_held(&jh->b_state_lock);
2512 assert_spin_locked(&transaction->t_journal->j_list_lock);
2513
2514 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
2515 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
2516 jh->b_transaction == NULL);
2517
2518 if (jh->b_transaction && jh->b_jlist == jlist)
2519 return;
2520
2521 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
2522 jlist == BJ_Shadow || jlist == BJ_Forget) {
2523
2524
2525
2526
2527
2528
2529
2530 if (buffer_dirty(bh))
2531 warn_dirty_buffer(bh);
2532 if (test_clear_buffer_dirty(bh) ||
2533 test_clear_buffer_jbddirty(bh))
2534 was_dirty = 1;
2535 }
2536
2537 if (jh->b_transaction)
2538 __jbd2_journal_temp_unlink_buffer(jh);
2539 else
2540 jbd2_journal_grab_journal_head(bh);
2541 jh->b_transaction = transaction;
2542
2543 switch (jlist) {
2544 case BJ_None:
2545 J_ASSERT_JH(jh, !jh->b_committed_data);
2546 J_ASSERT_JH(jh, !jh->b_frozen_data);
2547 return;
2548 case BJ_Metadata:
2549 transaction->t_nr_buffers++;
2550 list = &transaction->t_buffers;
2551 break;
2552 case BJ_Forget:
2553 list = &transaction->t_forget;
2554 break;
2555 case BJ_Shadow:
2556 list = &transaction->t_shadow_list;
2557 break;
2558 case BJ_Reserved:
2559 list = &transaction->t_reserved_list;
2560 break;
2561 }
2562
2563 __blist_add_buffer(list, jh);
2564 jh->b_jlist = jlist;
2565
2566 if (was_dirty)
2567 set_buffer_jbddirty(bh);
2568 }
2569
2570 void jbd2_journal_file_buffer(struct journal_head *jh,
2571 transaction_t *transaction, int jlist)
2572 {
2573 spin_lock(&jh->b_state_lock);
2574 spin_lock(&transaction->t_journal->j_list_lock);
2575 __jbd2_journal_file_buffer(jh, transaction, jlist);
2576 spin_unlock(&transaction->t_journal->j_list_lock);
2577 spin_unlock(&jh->b_state_lock);
2578 }
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 bool __jbd2_journal_refile_buffer(struct journal_head *jh)
2594 {
2595 int was_dirty, jlist;
2596 struct buffer_head *bh = jh2bh(jh);
2597
2598 lockdep_assert_held(&jh->b_state_lock);
2599 if (jh->b_transaction)
2600 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
2601
2602
2603 if (jh->b_next_transaction == NULL) {
2604 __jbd2_journal_unfile_buffer(jh);
2605 return true;
2606 }
2607
2608
2609
2610
2611
2612
2613 was_dirty = test_clear_buffer_jbddirty(bh);
2614 __jbd2_journal_temp_unlink_buffer(jh);
2615
2616
2617
2618
2619
2620 J_ASSERT_JH(jh, jh->b_transaction != NULL);
2621
2622
2623
2624
2625
2626
2627 WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
2628 WRITE_ONCE(jh->b_next_transaction, NULL);
2629 if (buffer_freed(bh))
2630 jlist = BJ_Forget;
2631 else if (jh->b_modified)
2632 jlist = BJ_Metadata;
2633 else
2634 jlist = BJ_Reserved;
2635 __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
2636 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2637
2638 if (was_dirty)
2639 set_buffer_jbddirty(bh);
2640 return false;
2641 }
2642
2643
2644
2645
2646
2647
2648
2649 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2650 {
2651 bool drop;
2652
2653 spin_lock(&jh->b_state_lock);
2654 spin_lock(&journal->j_list_lock);
2655 drop = __jbd2_journal_refile_buffer(jh);
2656 spin_unlock(&jh->b_state_lock);
2657 spin_unlock(&journal->j_list_lock);
2658 if (drop)
2659 jbd2_journal_put_journal_head(jh);
2660 }
2661
2662
2663
2664
2665 static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
2666 unsigned long flags, loff_t start_byte, loff_t end_byte)
2667 {
2668 transaction_t *transaction = handle->h_transaction;
2669 journal_t *journal;
2670
2671 if (is_handle_aborted(handle))
2672 return -EROFS;
2673 journal = transaction->t_journal;
2674
2675 jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2676 transaction->t_tid);
2677
2678 spin_lock(&journal->j_list_lock);
2679 jinode->i_flags |= flags;
2680
2681 if (jinode->i_dirty_end) {
2682 jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
2683 jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
2684 } else {
2685 jinode->i_dirty_start = start_byte;
2686 jinode->i_dirty_end = end_byte;
2687 }
2688
2689
2690 if (jinode->i_transaction == transaction ||
2691 jinode->i_next_transaction == transaction)
2692 goto done;
2693
2694
2695
2696
2697
2698
2699 if (!transaction->t_need_data_flush)
2700 transaction->t_need_data_flush = 1;
2701
2702
2703 if (jinode->i_transaction) {
2704 J_ASSERT(jinode->i_next_transaction == NULL);
2705 J_ASSERT(jinode->i_transaction ==
2706 journal->j_committing_transaction);
2707 jinode->i_next_transaction = transaction;
2708 goto done;
2709 }
2710
2711 J_ASSERT(!jinode->i_next_transaction);
2712 jinode->i_transaction = transaction;
2713 list_add(&jinode->i_list, &transaction->t_inode_list);
2714 done:
2715 spin_unlock(&journal->j_list_lock);
2716
2717 return 0;
2718 }
2719
2720 int jbd2_journal_inode_ranged_write(handle_t *handle,
2721 struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
2722 {
2723 return jbd2_journal_file_inode(handle, jinode,
2724 JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
2725 start_byte + length - 1);
2726 }
2727
2728 int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
2729 loff_t start_byte, loff_t length)
2730 {
2731 return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
2732 start_byte, start_byte + length - 1);
2733 }
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755 int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2756 struct jbd2_inode *jinode,
2757 loff_t new_size)
2758 {
2759 transaction_t *inode_trans, *commit_trans;
2760 int ret = 0;
2761
2762
2763 if (!jinode->i_transaction)
2764 goto out;
2765
2766
2767
2768 read_lock(&journal->j_state_lock);
2769 commit_trans = journal->j_committing_transaction;
2770 read_unlock(&journal->j_state_lock);
2771 spin_lock(&journal->j_list_lock);
2772 inode_trans = jinode->i_transaction;
2773 spin_unlock(&journal->j_list_lock);
2774 if (inode_trans == commit_trans) {
2775 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2776 new_size, LLONG_MAX);
2777 if (ret)
2778 jbd2_journal_abort(journal, ret);
2779 }
2780 out:
2781 return ret;
2782 }