0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <linux/time.h>
0018 #include <linux/fs.h>
0019 #include <linux/jbd2.h>
0020 #include <linux/errno.h>
0021 #include <linux/slab.h>
0022 #include <linux/blkdev.h>
0023 #include <trace/events/jbd2.h>
0024
0025
0026
0027
0028
0029
0030 static inline void __buffer_unlink_first(struct journal_head *jh)
0031 {
0032 transaction_t *transaction = jh->b_cp_transaction;
0033
0034 jh->b_cpnext->b_cpprev = jh->b_cpprev;
0035 jh->b_cpprev->b_cpnext = jh->b_cpnext;
0036 if (transaction->t_checkpoint_list == jh) {
0037 transaction->t_checkpoint_list = jh->b_cpnext;
0038 if (transaction->t_checkpoint_list == jh)
0039 transaction->t_checkpoint_list = NULL;
0040 }
0041 }
0042
0043
0044
0045
0046
0047
0048 static inline void __buffer_unlink(struct journal_head *jh)
0049 {
0050 transaction_t *transaction = jh->b_cp_transaction;
0051
0052 __buffer_unlink_first(jh);
0053 if (transaction->t_checkpoint_io_list == jh) {
0054 transaction->t_checkpoint_io_list = jh->b_cpnext;
0055 if (transaction->t_checkpoint_io_list == jh)
0056 transaction->t_checkpoint_io_list = NULL;
0057 }
0058 }
0059
0060
0061
0062
0063
0064
0065 static inline void __buffer_relink_io(struct journal_head *jh)
0066 {
0067 transaction_t *transaction = jh->b_cp_transaction;
0068
0069 __buffer_unlink_first(jh);
0070
0071 if (!transaction->t_checkpoint_io_list) {
0072 jh->b_cpnext = jh->b_cpprev = jh;
0073 } else {
0074 jh->b_cpnext = transaction->t_checkpoint_io_list;
0075 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
0076 jh->b_cpprev->b_cpnext = jh;
0077 jh->b_cpnext->b_cpprev = jh;
0078 }
0079 transaction->t_checkpoint_io_list = jh;
0080 }
0081
0082
0083
0084
0085
0086
0087 static inline bool __cp_buffer_busy(struct journal_head *jh)
0088 {
0089 struct buffer_head *bh = jh2bh(jh);
0090
0091 return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
0092 }
0093
0094
0095
0096
0097
0098
0099
0100 void __jbd2_log_wait_for_space(journal_t *journal)
0101 __acquires(&journal->j_state_lock)
0102 __releases(&journal->j_state_lock)
0103 {
0104 int nblocks, space_left;
0105
0106
0107 nblocks = journal->j_max_transaction_buffers;
0108 while (jbd2_log_space_left(journal) < nblocks) {
0109 write_unlock(&journal->j_state_lock);
0110 mutex_lock_io(&journal->j_checkpoint_mutex);
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123 write_lock(&journal->j_state_lock);
0124 if (journal->j_flags & JBD2_ABORT) {
0125 mutex_unlock(&journal->j_checkpoint_mutex);
0126 return;
0127 }
0128 spin_lock(&journal->j_list_lock);
0129 space_left = jbd2_log_space_left(journal);
0130 if (space_left < nblocks) {
0131 int chkpt = journal->j_checkpoint_transactions != NULL;
0132 tid_t tid = 0;
0133
0134 if (journal->j_committing_transaction)
0135 tid = journal->j_committing_transaction->t_tid;
0136 spin_unlock(&journal->j_list_lock);
0137 write_unlock(&journal->j_state_lock);
0138 if (chkpt) {
0139 jbd2_log_do_checkpoint(journal);
0140 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
0141
0142 ;
0143 } else if (tid) {
0144
0145
0146
0147
0148
0149 mutex_unlock(&journal->j_checkpoint_mutex);
0150 jbd2_log_wait_commit(journal, tid);
0151 write_lock(&journal->j_state_lock);
0152 continue;
0153 } else {
0154 printk(KERN_ERR "%s: needed %d blocks and "
0155 "only had %d space available\n",
0156 __func__, nblocks, space_left);
0157 printk(KERN_ERR "%s: no way to get more "
0158 "journal space in %s\n", __func__,
0159 journal->j_devname);
0160 WARN_ON(1);
0161 jbd2_journal_abort(journal, -EIO);
0162 }
0163 write_lock(&journal->j_state_lock);
0164 } else {
0165 spin_unlock(&journal->j_list_lock);
0166 }
0167 mutex_unlock(&journal->j_checkpoint_mutex);
0168 }
0169 }
0170
0171 static void
0172 __flush_batch(journal_t *journal, int *batch_count)
0173 {
0174 int i;
0175 struct blk_plug plug;
0176
0177 blk_start_plug(&plug);
0178 for (i = 0; i < *batch_count; i++)
0179 write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
0180 blk_finish_plug(&plug);
0181
0182 for (i = 0; i < *batch_count; i++) {
0183 struct buffer_head *bh = journal->j_chkpt_bhs[i];
0184 BUFFER_TRACE(bh, "brelse");
0185 __brelse(bh);
0186 }
0187 *batch_count = 0;
0188 }
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 int jbd2_log_do_checkpoint(journal_t *journal)
0199 {
0200 struct journal_head *jh;
0201 struct buffer_head *bh;
0202 transaction_t *transaction;
0203 tid_t this_tid;
0204 int result, batch_count = 0;
0205
0206 jbd2_debug(1, "Start checkpoint\n");
0207
0208
0209
0210
0211
0212
0213 result = jbd2_cleanup_journal_tail(journal);
0214 trace_jbd2_checkpoint(journal, result);
0215 jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
0216 if (result <= 0)
0217 return result;
0218
0219
0220
0221
0222
0223 spin_lock(&journal->j_list_lock);
0224 if (!journal->j_checkpoint_transactions)
0225 goto out;
0226 transaction = journal->j_checkpoint_transactions;
0227 if (transaction->t_chp_stats.cs_chp_time == 0)
0228 transaction->t_chp_stats.cs_chp_time = jiffies;
0229 this_tid = transaction->t_tid;
0230 restart:
0231
0232
0233
0234
0235
0236 if (journal->j_checkpoint_transactions != transaction ||
0237 transaction->t_tid != this_tid)
0238 goto out;
0239
0240
0241 while (transaction->t_checkpoint_list) {
0242 jh = transaction->t_checkpoint_list;
0243 bh = jh2bh(jh);
0244
0245 if (buffer_locked(bh)) {
0246 get_bh(bh);
0247 spin_unlock(&journal->j_list_lock);
0248 wait_on_buffer(bh);
0249
0250 BUFFER_TRACE(bh, "brelse");
0251 __brelse(bh);
0252 goto retry;
0253 }
0254 if (jh->b_transaction != NULL) {
0255 transaction_t *t = jh->b_transaction;
0256 tid_t tid = t->t_tid;
0257
0258 transaction->t_chp_stats.cs_forced_to_close++;
0259 spin_unlock(&journal->j_list_lock);
0260 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
0261
0262
0263
0264
0265
0266
0267 printk(KERN_ERR
0268 "JBD2: %s: Waiting for Godot: block %llu\n",
0269 journal->j_devname, (unsigned long long) bh->b_blocknr);
0270
0271 if (batch_count)
0272 __flush_batch(journal, &batch_count);
0273 jbd2_log_start_commit(journal, tid);
0274
0275
0276
0277
0278
0279
0280
0281
0282 mutex_unlock(&journal->j_checkpoint_mutex);
0283 jbd2_log_wait_commit(journal, tid);
0284 mutex_lock_io(&journal->j_checkpoint_mutex);
0285 spin_lock(&journal->j_list_lock);
0286 goto restart;
0287 }
0288 if (!buffer_dirty(bh)) {
0289 BUFFER_TRACE(bh, "remove from checkpoint");
0290 if (__jbd2_journal_remove_checkpoint(jh))
0291
0292 goto out;
0293 continue;
0294 }
0295
0296
0297
0298
0299
0300
0301
0302
0303 BUFFER_TRACE(bh, "queue");
0304 get_bh(bh);
0305 J_ASSERT_BH(bh, !buffer_jwrite(bh));
0306 journal->j_chkpt_bhs[batch_count++] = bh;
0307 __buffer_relink_io(jh);
0308 transaction->t_chp_stats.cs_written++;
0309 if ((batch_count == JBD2_NR_BATCH) ||
0310 need_resched() ||
0311 spin_needbreak(&journal->j_list_lock))
0312 goto unlock_and_flush;
0313 }
0314
0315 if (batch_count) {
0316 unlock_and_flush:
0317 spin_unlock(&journal->j_list_lock);
0318 retry:
0319 if (batch_count)
0320 __flush_batch(journal, &batch_count);
0321 spin_lock(&journal->j_list_lock);
0322 goto restart;
0323 }
0324
0325
0326
0327
0328
0329 restart2:
0330
0331 if (journal->j_checkpoint_transactions != transaction ||
0332 transaction->t_tid != this_tid)
0333 goto out;
0334
0335 while (transaction->t_checkpoint_io_list) {
0336 jh = transaction->t_checkpoint_io_list;
0337 bh = jh2bh(jh);
0338 if (buffer_locked(bh)) {
0339 get_bh(bh);
0340 spin_unlock(&journal->j_list_lock);
0341 wait_on_buffer(bh);
0342
0343 BUFFER_TRACE(bh, "brelse");
0344 __brelse(bh);
0345 spin_lock(&journal->j_list_lock);
0346 goto restart2;
0347 }
0348
0349
0350
0351
0352
0353
0354 if (__jbd2_journal_remove_checkpoint(jh))
0355 break;
0356 }
0357 out:
0358 spin_unlock(&journal->j_list_lock);
0359 result = jbd2_cleanup_journal_tail(journal);
0360
0361 return (result < 0) ? result : 0;
0362 }
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382 int jbd2_cleanup_journal_tail(journal_t *journal)
0383 {
0384 tid_t first_tid;
0385 unsigned long blocknr;
0386
0387 if (is_journal_aborted(journal))
0388 return -EIO;
0389
0390 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
0391 return 1;
0392 J_ASSERT(blocknr != 0);
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402 if (journal->j_flags & JBD2_BARRIER)
0403 blkdev_issue_flush(journal->j_fs_dev);
0404
0405 return __jbd2_update_log_tail(journal, first_tid, blocknr);
0406 }
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420 static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
0421 {
0422 struct journal_head *last_jh;
0423 struct journal_head *next_jh = jh;
0424
0425 if (!jh)
0426 return 0;
0427
0428 last_jh = jh->b_cpprev;
0429 do {
0430 jh = next_jh;
0431 next_jh = jh->b_cpnext;
0432
0433 if (!destroy && __cp_buffer_busy(jh))
0434 return 0;
0435
0436 if (__jbd2_journal_remove_checkpoint(jh))
0437 return 1;
0438
0439
0440
0441
0442
0443
0444 if (need_resched())
0445 return 0;
0446 } while (jh != last_jh);
0447
0448 return 0;
0449 }
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461 static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
0462 unsigned long *nr_to_scan,
0463 bool *released)
0464 {
0465 struct journal_head *last_jh;
0466 struct journal_head *next_jh = jh;
0467 unsigned long nr_freed = 0;
0468 int ret;
0469
0470 if (!jh || *nr_to_scan == 0)
0471 return 0;
0472
0473 last_jh = jh->b_cpprev;
0474 do {
0475 jh = next_jh;
0476 next_jh = jh->b_cpnext;
0477
0478 (*nr_to_scan)--;
0479 if (__cp_buffer_busy(jh))
0480 continue;
0481
0482 nr_freed++;
0483 ret = __jbd2_journal_remove_checkpoint(jh);
0484 if (ret) {
0485 *released = true;
0486 break;
0487 }
0488
0489 if (need_resched())
0490 break;
0491 } while (jh != last_jh && *nr_to_scan);
0492
0493 return nr_freed;
0494 }
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505 unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
0506 unsigned long *nr_to_scan)
0507 {
0508 transaction_t *transaction, *last_transaction, *next_transaction;
0509 bool released;
0510 tid_t first_tid = 0, last_tid = 0, next_tid = 0;
0511 tid_t tid = 0;
0512 unsigned long nr_freed = 0;
0513 unsigned long nr_scanned = *nr_to_scan;
0514
0515 again:
0516 spin_lock(&journal->j_list_lock);
0517 if (!journal->j_checkpoint_transactions) {
0518 spin_unlock(&journal->j_list_lock);
0519 goto out;
0520 }
0521
0522
0523
0524
0525
0526
0527
0528 if (journal->j_shrink_transaction)
0529 transaction = journal->j_shrink_transaction;
0530 else
0531 transaction = journal->j_checkpoint_transactions;
0532
0533 if (!first_tid)
0534 first_tid = transaction->t_tid;
0535 last_transaction = journal->j_checkpoint_transactions->t_cpprev;
0536 next_transaction = transaction;
0537 last_tid = last_transaction->t_tid;
0538 do {
0539 transaction = next_transaction;
0540 next_transaction = transaction->t_cpnext;
0541 tid = transaction->t_tid;
0542 released = false;
0543
0544 nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
0545 nr_to_scan, &released);
0546 if (*nr_to_scan == 0)
0547 break;
0548 if (need_resched() || spin_needbreak(&journal->j_list_lock))
0549 break;
0550 if (released)
0551 continue;
0552
0553 nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
0554 nr_to_scan, &released);
0555 if (*nr_to_scan == 0)
0556 break;
0557 if (need_resched() || spin_needbreak(&journal->j_list_lock))
0558 break;
0559 } while (transaction != last_transaction);
0560
0561 if (transaction != last_transaction) {
0562 journal->j_shrink_transaction = next_transaction;
0563 next_tid = next_transaction->t_tid;
0564 } else {
0565 journal->j_shrink_transaction = NULL;
0566 next_tid = 0;
0567 }
0568
0569 spin_unlock(&journal->j_list_lock);
0570 cond_resched();
0571
0572 if (*nr_to_scan && next_tid)
0573 goto again;
0574 out:
0575 nr_scanned -= *nr_to_scan;
0576 trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
0577 nr_freed, nr_scanned, next_tid);
0578
0579 return nr_freed;
0580 }
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
0591 {
0592 transaction_t *transaction, *last_transaction, *next_transaction;
0593 int ret;
0594
0595 transaction = journal->j_checkpoint_transactions;
0596 if (!transaction)
0597 return;
0598
0599 last_transaction = transaction->t_cpprev;
0600 next_transaction = transaction;
0601 do {
0602 transaction = next_transaction;
0603 next_transaction = transaction->t_cpnext;
0604 ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
0605 destroy);
0606
0607
0608
0609
0610
0611 if (need_resched())
0612 return;
0613 if (ret)
0614 continue;
0615
0616
0617
0618
0619
0620 ret = journal_clean_one_cp_list(transaction->
0621 t_checkpoint_io_list, destroy);
0622 if (need_resched())
0623 return;
0624
0625
0626
0627
0628
0629 if (!ret)
0630 return;
0631 } while (transaction != last_transaction);
0632 }
0633
0634
0635
0636
0637
0638 void jbd2_journal_destroy_checkpoint(journal_t *journal)
0639 {
0640
0641
0642
0643
0644 while (1) {
0645 spin_lock(&journal->j_list_lock);
0646 if (!journal->j_checkpoint_transactions) {
0647 spin_unlock(&journal->j_list_lock);
0648 break;
0649 }
0650 __jbd2_journal_clean_checkpoint_list(journal, true);
0651 spin_unlock(&journal->j_list_lock);
0652 cond_resched();
0653 }
0654 }
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
0675 {
0676 struct transaction_chp_stats_s *stats;
0677 transaction_t *transaction;
0678 journal_t *journal;
0679 struct buffer_head *bh = jh2bh(jh);
0680
0681 JBUFFER_TRACE(jh, "entry");
0682
0683 transaction = jh->b_cp_transaction;
0684 if (!transaction) {
0685 JBUFFER_TRACE(jh, "not on transaction");
0686 return 0;
0687 }
0688 journal = transaction->t_journal;
0689
0690 JBUFFER_TRACE(jh, "removing from transaction");
0691
0692
0693
0694
0695
0696
0697
0698
0699 if (buffer_write_io_error(bh))
0700 set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
0701
0702 __buffer_unlink(jh);
0703 jh->b_cp_transaction = NULL;
0704 percpu_counter_dec(&journal->j_checkpoint_jh_count);
0705 jbd2_journal_put_journal_head(jh);
0706
0707
0708 if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
0709 return 0;
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720 if (transaction->t_state != T_FINISHED)
0721 return 0;
0722
0723
0724
0725
0726
0727 stats = &transaction->t_chp_stats;
0728 if (stats->cs_chp_time)
0729 stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
0730 jiffies);
0731 trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
0732 transaction->t_tid, stats);
0733
0734 __jbd2_journal_drop_transaction(journal, transaction);
0735 jbd2_journal_free_transaction(transaction);
0736 return 1;
0737 }
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747 void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
0748 transaction_t *transaction)
0749 {
0750 JBUFFER_TRACE(jh, "entry");
0751 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
0752 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
0753
0754
0755 jbd2_journal_grab_journal_head(jh2bh(jh));
0756 jh->b_cp_transaction = transaction;
0757
0758 if (!transaction->t_checkpoint_list) {
0759 jh->b_cpnext = jh->b_cpprev = jh;
0760 } else {
0761 jh->b_cpnext = transaction->t_checkpoint_list;
0762 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
0763 jh->b_cpprev->b_cpnext = jh;
0764 jh->b_cpnext->b_cpprev = jh;
0765 }
0766 transaction->t_checkpoint_list = jh;
0767 percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count);
0768 }
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
0781 {
0782 assert_spin_locked(&journal->j_list_lock);
0783
0784 journal->j_shrink_transaction = NULL;
0785 if (transaction->t_cpnext) {
0786 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
0787 transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
0788 if (journal->j_checkpoint_transactions == transaction)
0789 journal->j_checkpoint_transactions =
0790 transaction->t_cpnext;
0791 if (journal->j_checkpoint_transactions == transaction)
0792 journal->j_checkpoint_transactions = NULL;
0793 }
0794
0795 J_ASSERT(transaction->t_state == T_FINISHED);
0796 J_ASSERT(transaction->t_buffers == NULL);
0797 J_ASSERT(transaction->t_forget == NULL);
0798 J_ASSERT(transaction->t_shadow_list == NULL);
0799 J_ASSERT(transaction->t_checkpoint_list == NULL);
0800 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
0801 J_ASSERT(atomic_read(&transaction->t_updates) == 0);
0802 J_ASSERT(journal->j_committing_transaction != transaction);
0803 J_ASSERT(journal->j_running_transaction != transaction);
0804
0805 trace_jbd2_drop_transaction(journal, transaction);
0806
0807 jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
0808 }