fs/gfs2/lops.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/sched.h>
  11 #include <linux/slab.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/completion.h>
  14 #include <linux/buffer_head.h>
  15 #include <linux/mempool.h>
  16 #include <linux/gfs2_ondisk.h>
  17 #include <linux/bio.h>
  18 #include <linux/fs.h>
  19 #include <linux/list_sort.h>
  20 #include <linux/blkdev.h>
  21
  22 #include "bmap.h"
  23 #include "dir.h"
  24 #include "gfs2.h"
  25 #include "incore.h"
  26 #include "inode.h"
  27 #include "glock.h"
  28 #include "log.h"
  29 #include "lops.h"
  30 #include "meta_io.h"
  31 #include "recovery.h"
  32 #include "rgrp.h"
  33 #include "trans.h"
  34 #include "util.h"
  35 #include "trace_gfs2.h"
  36
  37 /**
  38  * gfs2_pin - Pin a buffer in memory
  39  * @sdp: The superblock
  40  * @bh: The buffer to be pinned
  41  *
  42  * The log lock must be held when calling this function
  43  */
  44 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  45 {
  46         struct gfs2_bufdata *bd;
  47
  48         BUG_ON(!current->journal_info);
  49
  50         clear_buffer_dirty(bh);
  51         if (test_set_buffer_pinned(bh))
  52                 gfs2_assert_withdraw(sdp, 0);
  53         if (!buffer_uptodate(bh))
  54                 gfs2_io_error_bh_wd(sdp, bh);
  55         bd = bh->b_private;
  56         /* If this buffer is in the AIL and it has already been written
  57          * to in-place disk block, remove it from the AIL.
  58          */
  59         spin_lock(&sdp->sd_ail_lock);
  60         if (bd->bd_tr)
  61                 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
  62         spin_unlock(&sdp->sd_ail_lock);
  63         get_bh(bh);
  64         atomic_inc(&sdp->sd_log_pinned);
  65         trace_gfs2_pin(bd, 1);
  66 }
  67
  68 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  69 {
  70         return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  71 }
  72
  73 static void maybe_release_space(struct gfs2_bufdata *bd)
  74 {
  75         struct gfs2_glock *gl = bd->bd_gl;
  76         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  77         struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
  78         unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  79         struct gfs2_bitmap *bi = rgd->rd_bits + index;
  80
  81         if (bi->bi_clone == NULL)
  82                 return;
  83         if (sdp->sd_args.ar_discard)
  84                 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
  85         memcpy(bi->bi_clone + bi->bi_offset,
  86                bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
  87         clear_bit(GBF_FULL, &bi->bi_flags);
  88         rgd->rd_free_clone = rgd->rd_free;
  89         rgd->rd_extfail_pt = rgd->rd_free;
  90 }
  91
  92 /**
  93  * gfs2_unpin - Unpin a buffer
  94  * @sdp: the filesystem the buffer belongs to
  95  * @bh: The buffer to unpin
  96  * @ai:
  97  * @flags: The inode dirty flags
  98  *
  99  */
 100
 101 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 102                        struct gfs2_trans *tr)
 103 {
 104         struct gfs2_bufdata *bd = bh->b_private;
 105
 106         BUG_ON(!buffer_uptodate(bh));
 107         BUG_ON(!buffer_pinned(bh));
 108
 109         lock_buffer(bh);
 110         mark_buffer_dirty(bh);
 111         clear_buffer_pinned(bh);
 112
 113         if (buffer_is_rgrp(bd))
 114                 maybe_release_space(bd);
 115
 116         spin_lock(&sdp->sd_ail_lock);
 117         if (bd->bd_tr) {
 118                 list_del(&bd->bd_ail_st_list);
 119                 brelse(bh);
 120         } else {
 121                 struct gfs2_glock *gl = bd->bd_gl;
 122                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
 123                 atomic_inc(&gl->gl_ail_count);
 124         }
 125         bd->bd_tr = tr;
 126         list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
 127         spin_unlock(&sdp->sd_ail_lock);
 128
 129         clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 130         trace_gfs2_pin(bd, 0);
 131         unlock_buffer(bh);
 132         atomic_dec(&sdp->sd_log_pinned);
 133 }
 134
 135 static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 136 {
 137         BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
 138                (sdp->sd_log_flush_head != sdp->sd_log_head));
 139
 140         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
 141                 sdp->sd_log_flush_head = 0;
 142 }
 143
 144 u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
 145 {
 146         unsigned int lbn = sdp->sd_log_flush_head;
 147         struct gfs2_journal_extent *je;
 148         u64 block;
 149
 150         list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
 151                 if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
 152                         block = je->dblock + lbn - je->lblock;
 153                         gfs2_log_incr_head(sdp);
 154                         return block;
 155                 }
 156         }
 157
 158         return -1;
 159 }
 160
 161 /**
 162  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
 163  * @sdp: The superblock
 164  * @bvec: The bio_vec
 165  * @error: The i/o status
 166  *
 167  * This finds the relevant buffers and unlocks them and sets the
 168  * error flag according to the status of the i/o request. This is
 169  * used when the log is writing data which has an in-place version
 170  * that is pinned in the pagecache.
 171  */
 172
 173 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 174                                   blk_status_t error)
 175 {
 176         struct buffer_head *bh, *next;
 177         struct page *page = bvec->bv_page;
 178         unsigned size;
 179
 180         bh = page_buffers(page);
 181         size = bvec->bv_len;
 182         while (bh_offset(bh) < bvec->bv_offset)
 183                 bh = bh->b_this_page;
 184         do {
 185                 if (error)
 186                         mark_buffer_write_io_error(bh);
 187                 unlock_buffer(bh);
 188                 next = bh->b_this_page;
 189                 size -= bh->b_size;
 190                 brelse(bh);
 191                 bh = next;
 192         } while(bh && size);
 193 }
 194
 195 /**
 196  * gfs2_end_log_write - end of i/o to the log
 197  * @bio: The bio
 198  *
 199  * Each bio_vec contains either data from the pagecache or data
 200  * relating to the log itself. Here we iterate over the bio_vec
 201  * array, processing both kinds of data.
 202  *
 203  */
 204
 205 static void gfs2_end_log_write(struct bio *bio)
 206 {
 207         struct gfs2_sbd *sdp = bio->bi_private;
 208         struct bio_vec *bvec;
 209         struct page *page;
 210         int i;
 211
 212         if (bio->bi_status) {
 213                 fs_err(sdp, "Error %d writing to journal, jid=%u\n",
 214                        bio->bi_status, sdp->sd_jdesc->jd_jid);
 215                 wake_up(&sdp->sd_logd_waitq);
 216         }
 217
 218         bio_for_each_segment_all(bvec, bio, i) {
 219                 page = bvec->bv_page;
 220                 if (page_has_buffers(page))
 221                         gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 222                 else
 223                         mempool_free(page, gfs2_page_pool);
 224         }
 225
 226         bio_put(bio);
 227         if (atomic_dec_and_test(&sdp->sd_log_in_flight))
 228                 wake_up(&sdp->sd_log_flush_wait);
 229 }
 230
 231 /**
 232  * gfs2_log_submit_bio - Submit any pending log bio
 233  * @biop: Address of the bio pointer
 234  * @opf: REQ_OP | op_flags
 235  *
 236  * Submit any pending part-built or full bio to the block device. If
 237  * there is no pending bio, then this is a no-op.
 238  */
 239
 240 void gfs2_log_submit_bio(struct bio **biop, int opf)
 241 {
 242         struct bio *bio = *biop;
 243         if (bio) {
 244                 struct gfs2_sbd *sdp = bio->bi_private;
 245                 atomic_inc(&sdp->sd_log_in_flight);
 246                 bio->bi_opf = opf;
 247                 submit_bio(bio);
 248                 *biop = NULL;
 249         }
 250 }
 251
 252 /**
 253  * gfs2_log_alloc_bio - Allocate a bio
 254  * @sdp: The super block
 255  * @blkno: The device block number we want to write to
 256  * @end_io: The bi_end_io callback
 257  *
 258  * Allocate a new bio, initialize it with the given parameters and return it.
 259  *
 260  * Returns: The newly allocated bio
 261  */
 262
 263 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
 264                                       bio_end_io_t *end_io)
 265 {
 266         struct super_block *sb = sdp->sd_vfs;
 267         struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 268
 269         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 270         bio_set_dev(bio, sb->s_bdev);
 271         bio->bi_end_io = end_io;
 272         bio->bi_private = sdp;
 273
 274         return bio;
 275 }
 276
 277 /**
 278  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
 279  * @sdp: The super block
 280  * @blkno: The device block number we want to write to
 281  * @bio: The bio to get or allocate
 282  * @op: REQ_OP
 283  * @end_io: The bi_end_io callback
 284  * @flush: Always flush the current bio and allocate a new one?
 285  *
 286  * If there is a cached bio, then if the next block number is sequential
 287  * with the previous one, return it, otherwise flush the bio to the
 288  * device. If there is no cached bio, or we just flushed it, then
 289  * allocate a new one.
 290  *
 291  * Returns: The bio to use for log writes
 292  */
 293
 294 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
 295                                     struct bio **biop, int op,
 296                                     bio_end_io_t *end_io, bool flush)
 297 {
 298         struct bio *bio = *biop;
 299
 300         if (bio) {
 301                 u64 nblk;
 302
 303                 nblk = bio_end_sector(bio);
 304                 nblk >>= sdp->sd_fsb2bb_shift;
 305                 if (blkno == nblk && !flush)
 306                         return bio;
 307                 gfs2_log_submit_bio(biop, op);
 308         }
 309
 310         *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
 311         return *biop;
 312 }
 313
 314 /**
 315  * gfs2_log_write - write to log
 316  * @sdp: the filesystem
 317  * @page: the page to write
 318  * @size: the size of the data to write
 319  * @offset: the offset within the page
 320  * @blkno: block number of the log entry
 321  *
 322  * Try and add the page segment to the current bio. If that fails,
 323  * submit the current bio to the device and create a new one, and
 324  * then add the page segment to that.
 325  */
 326
 327 void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 328                     unsigned size, unsigned offset, u64 blkno)
 329 {
 330         struct bio *bio;
 331         int ret;
 332
 333         bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
 334                                gfs2_end_log_write, false);
 335         ret = bio_add_page(bio, page, size, offset);
 336         if (ret == 0) {
 337                 bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
 338                                        REQ_OP_WRITE, gfs2_end_log_write, true);
 339                 ret = bio_add_page(bio, page, size, offset);
 340                 WARN_ON(ret == 0);
 341         }
 342 }
 343
 344 /**
 345  * gfs2_log_write_bh - write a buffer's content to the log
 346  * @sdp: The super block
 347  * @bh: The buffer pointing to the in-place location
 348  *
 349  * This writes the content of the buffer to the next available location
 350  * in the log. The buffer will be unlocked once the i/o to the log has
 351  * completed.
 352  */
 353
 354 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 355 {
 356         gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
 357                        gfs2_log_bmap(sdp));
 358 }
 359
 360 /**
 361  * gfs2_log_write_page - write one block stored in a page, into the log
 362  * @sdp: The superblock
 363  * @page: The struct page
 364  *
 365  * This writes the first block-sized part of the page into the log. Note
 366  * that the page must have been allocated from the gfs2_page_pool mempool
 367  * and that after this has been called, ownership has been transferred and
 368  * the page may be freed at any time.
 369  */
 370
 371 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 372 {
 373         struct super_block *sb = sdp->sd_vfs;
 374         gfs2_log_write(sdp, page, sb->s_blocksize, 0,
 375                        gfs2_log_bmap(sdp));
 376 }
 377
 378 /**
 379  * gfs2_end_log_read - end I/O callback for reads from the log
 380  * @bio: The bio
 381  *
 382  * Simply unlock the pages in the bio. The main thread will wait on them and
 383  * process them in order as necessary.
 384  */
 385
 386 static void gfs2_end_log_read(struct bio *bio)
 387 {
 388         struct page *page;
 389         struct bio_vec *bvec;
 390         int i;
 391
 392         bio_for_each_segment_all(bvec, bio, i) {
 393                 page = bvec->bv_page;
 394                 if (bio->bi_status) {
 395                         int err = blk_status_to_errno(bio->bi_status);
 396
 397                         SetPageError(page);
 398                         mapping_set_error(page->mapping, err);
 399                 }
 400                 unlock_page(page);
 401         }
 402
 403         bio_put(bio);
 404 }
 405
 406 /**
 407  * gfs2_jhead_pg_srch - Look for the journal head in a given page.
 408  * @jd: The journal descriptor
 409  * @page: The page to look in
 410  *
 411  * Returns: 1 if found, 0 otherwise.
 412  */
 413
 414 static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
 415                               struct gfs2_log_header_host *head,
 416                               struct page *page)
 417 {
 418         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 419         struct gfs2_log_header_host uninitialized_var(lh);
 420         void *kaddr = kmap_atomic(page);
 421         unsigned int offset;
 422         bool ret = false;
 423
 424         for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
 425                 if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
 426                         if (lh.lh_sequence > head->lh_sequence)
 427                                 *head = lh;
 428                         else {
 429                                 ret = true;
 430                                 break;
 431                         }
 432                 }
 433         }
 434         kunmap_atomic(kaddr);
 435         return ret;
 436 }
 437
 438 /**
 439  * gfs2_jhead_process_page - Search/cleanup a page
 440  * @jd: The journal descriptor
 441  * @index: Index of the page to look into
 442  * @done: If set, perform only cleanup, else search and set if found.
 443  *
 444  * Find the page with 'index' in the journal's mapping. Search the page for
 445  * the journal head if requested (cleanup == false). Release refs on the
 446  * page so the page cache can reclaim it (put_page() twice). We grabbed a
 447  * reference on this page two times, first when we did a find_or_create_page()
 448  * to obtain the page to add it to the bio and second when we do a
 449  * find_get_page() here to get the page to wait on while I/O on it is being
 450  * completed.
 451  * This function is also used to free up a page we might've grabbed but not
 452  * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
 453  * submitted the I/O, but we already found the jhead so we only need to drop
 454  * our references to the page.
 455  */
 456
 457 static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
 458                                     struct gfs2_log_header_host *head,
 459                                     bool *done)
 460 {
 461         struct page *page;
 462
 463         page = find_get_page(jd->jd_inode->i_mapping, index);
 464         wait_on_page_locked(page);
 465
 466         if (PageError(page))
 467                 *done = true;
 468
 469         if (!*done)
 470                 *done = gfs2_jhead_pg_srch(jd, head, page);
 471
 472         put_page(page); /* Once for find_get_page */
 473         put_page(page); /* Once more for find_or_create_page */
 474 }
 475
 476 /**
 477  * gfs2_find_jhead - find the head of a log
 478  * @jd: The journal descriptor
 479  * @head: The log descriptor for the head of the log is returned here
 480  *
 481  * Do a search of a journal by reading it in large chunks using bios and find
 482  * the valid log entry with the highest sequence number.  (i.e. the log head)
 483  *
 484  * Returns: 0 on success, errno otherwise
 485  */
 486
 487 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 488 {
 489         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 490         struct address_space *mapping = jd->jd_inode->i_mapping;
 491         struct gfs2_journal_extent *je;
 492         u32 block, read_idx = 0, submit_idx = 0, index = 0;
 493         int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
 494         int blocks_per_page = 1 << shift, sz, ret = 0;
 495         struct bio *bio = NULL;
 496         struct page *page;
 497         bool done = false;
 498         errseq_t since;
 499
 500         memset(head, 0, sizeof(*head));
 501         if (list_empty(&jd->extent_list))
 502                 gfs2_map_journal_extents(sdp, jd);
 503
 504         since = filemap_sample_wb_err(mapping);
 505         list_for_each_entry(je, &jd->extent_list, list) {
 506                 for (block = 0; block < je->blocks; block += blocks_per_page) {
 507                         index = (je->lblock + block) >> shift;
 508
 509                         page = find_or_create_page(mapping, index, GFP_NOFS);
 510                         if (!page) {
 511                                 ret = -ENOMEM;
 512                                 done = true;
 513                                 goto out;
 514                         }
 515
 516                         if (bio) {
 517                                 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
 518                                 if (sz == PAGE_SIZE)
 519                                         goto page_added;
 520                                 submit_idx = index;
 521                                 submit_bio(bio);
 522                                 bio = NULL;
 523                         }
 524
 525                         bio = gfs2_log_alloc_bio(sdp,
 526                                                  je->dblock + (index << shift),
 527                                                  gfs2_end_log_read);
 528                         bio->bi_opf = REQ_OP_READ;
 529                         sz = bio_add_page(bio, page, PAGE_SIZE, 0);
 530                         gfs2_assert_warn(sdp, sz == PAGE_SIZE);
 531
 532 page_added:
 533                         if (submit_idx <= read_idx + BIO_MAX_PAGES) {
 534                                 /* Keep at least one bio in flight */
 535                                 continue;
 536                         }
 537
 538                         gfs2_jhead_process_page(jd, read_idx++, head, &done);
 539                         if (done)
 540                                 goto out;  /* found */
 541                 }
 542         }
 543
 544 out:
 545         if (bio)
 546                 submit_bio(bio);
 547         while (read_idx <= index)
 548                 gfs2_jhead_process_page(jd, read_idx++, head, &done);
 549
 550         if (!ret)
 551                 ret = filemap_check_wb_err(mapping, since);
 552
 553         return ret;
 554 }
 555
 556 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 557                                       u32 ld_length, u32 ld_data1)
 558 {
 559         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 560         struct gfs2_log_descriptor *ld = page_address(page);
 561         clear_page(ld);
 562         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 563         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
 564         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
 565         ld->ld_type = cpu_to_be32(ld_type);
 566         ld->ld_length = cpu_to_be32(ld_length);
 567         ld->ld_data1 = cpu_to_be32(ld_data1);
 568         ld->ld_data2 = 0;
 569         return page;
 570 }
 571
 572 static void gfs2_check_magic(struct buffer_head *bh)
 573 {
 574         void *kaddr;
 575         __be32 *ptr;
 576
 577         clear_buffer_escaped(bh);
 578         kaddr = kmap_atomic(bh->b_page);
 579         ptr = kaddr + bh_offset(bh);
 580         if (*ptr == cpu_to_be32(GFS2_MAGIC))
 581                 set_buffer_escaped(bh);
 582         kunmap_atomic(kaddr);
 583 }
 584
 585 static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
 586 {
 587         struct gfs2_bufdata *bda, *bdb;
 588
 589         bda = list_entry(a, struct gfs2_bufdata, bd_list);
 590         bdb = list_entry(b, struct gfs2_bufdata, bd_list);
 591
 592         if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
 593                 return -1;
 594         if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
 595                 return 1;
 596         return 0;
 597 }
 598
 599 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
 600                                 unsigned int total, struct list_head *blist,
 601                                 bool is_databuf)
 602 {
 603         struct gfs2_log_descriptor *ld;
 604         struct gfs2_bufdata *bd1 = NULL, *bd2;
 605         struct page *page;
 606         unsigned int num;
 607         unsigned n;
 608         __be64 *ptr;
 609
 610         gfs2_log_lock(sdp);
 611         list_sort(NULL, blist, blocknr_cmp);
 612         bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
 613         while(total) {
 614                 num = total;
 615                 if (total > limit)
 616                         num = limit;
 617                 gfs2_log_unlock(sdp);
 618                 page = gfs2_get_log_desc(sdp,
 619                                          is_databuf ? GFS2_LOG_DESC_JDATA :
 620                                          GFS2_LOG_DESC_METADATA, num + 1, num);
 621                 ld = page_address(page);
 622                 gfs2_log_lock(sdp);
 623                 ptr = (__be64 *)(ld + 1);
 624
 625                 n = 0;
 626                 list_for_each_entry_continue(bd1, blist, bd_list) {
 627                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
 628                         if (is_databuf) {
 629                                 gfs2_check_magic(bd1->bd_bh);
 630                                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
 631                         }
 632                         if (++n >= num)
 633                                 break;
 634                 }
 635
 636                 gfs2_log_unlock(sdp);
 637                 gfs2_log_write_page(sdp, page);
 638                 gfs2_log_lock(sdp);
 639
 640                 n = 0;
 641                 list_for_each_entry_continue(bd2, blist, bd_list) {
 642                         get_bh(bd2->bd_bh);
 643                         gfs2_log_unlock(sdp);
 644                         lock_buffer(bd2->bd_bh);
 645
 646                         if (buffer_escaped(bd2->bd_bh)) {
 647                                 void *kaddr;
 648                                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 649                                 ptr = page_address(page);
 650                                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
 651                                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
 652                                        bd2->bd_bh->b_size);
 653                                 kunmap_atomic(kaddr);
 654                                 *(__be32 *)ptr = 0;
 655                                 clear_buffer_escaped(bd2->bd_bh);
 656                                 unlock_buffer(bd2->bd_bh);
 657                                 brelse(bd2->bd_bh);
 658                                 gfs2_log_write_page(sdp, page);
 659                         } else {
 660                                 gfs2_log_write_bh(sdp, bd2->bd_bh);
 661                         }
 662                         gfs2_log_lock(sdp);
 663                         if (++n >= num)
 664                                 break;
 665                 }
 666
 667                 BUG_ON(total < num);
 668                 total -= num;
 669         }
 670         gfs2_log_unlock(sdp);
 671 }
 672
 673 static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 674 {
 675         unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
 676         unsigned int nbuf;
 677         if (tr == NULL)
 678                 return;
 679         nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
 680         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
 681 }
 682
 683 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 684 {
 685         struct list_head *head;
 686         struct gfs2_bufdata *bd;
 687
 688         if (tr == NULL)
 689                 return;
 690
 691         head = &tr->tr_buf;
 692         while (!list_empty(head)) {
 693                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 694                 list_del_init(&bd->bd_list);
 695                 gfs2_unpin(sdp, bd->bd_bh, tr);
 696         }
 697 }
 698
 699 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
 700                                struct gfs2_log_header_host *head, int pass)
 701 {
 702         if (pass != 0)
 703                 return;
 704
 705         jd->jd_found_blocks = 0;
 706         jd->jd_replayed_blocks = 0;
 707 }
 708
 709 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 710                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
 711                                 int pass)
 712 {
 713         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 714         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 715         struct gfs2_glock *gl = ip->i_gl;
 716         unsigned int blks = be32_to_cpu(ld->ld_data1);
 717         struct buffer_head *bh_log, *bh_ip;
 718         u64 blkno;
 719         int error = 0;
 720
 721         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 722                 return 0;
 723
 724         gfs2_replay_incr_blk(jd, &start);
 725
 726         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 727                 blkno = be64_to_cpu(*ptr++);
 728
 729                 jd->jd_found_blocks++;
 730
 731                 if (gfs2_revoke_check(jd, blkno, start))
 732                         continue;
 733
 734                 error = gfs2_replay_read_block(jd, start, &bh_log);
 735                 if (error)
 736                         return error;
 737
 738                 bh_ip = gfs2_meta_new(gl, blkno);
 739                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 740
 741                 if (gfs2_meta_check(sdp, bh_ip))
 742                         error = -EIO;
 743                 else
 744                         mark_buffer_dirty(bh_ip);
 745
 746                 brelse(bh_log);
 747                 brelse(bh_ip);
 748
 749                 if (error)
 750                         break;
 751
 752                 jd->jd_replayed_blocks++;
 753         }
 754
 755         return error;
 756 }
 757
 758 /**
 759  * gfs2_meta_sync - Sync all buffers associated with a glock
 760  * @gl: The glock
 761  *
 762  */
 763
 764 static void gfs2_meta_sync(struct gfs2_glock *gl)
 765 {
 766         struct address_space *mapping = gfs2_glock2aspace(gl);
 767         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 768         int error;
 769
 770         if (mapping == NULL)
 771                 mapping = &sdp->sd_aspace;
 772
 773         filemap_fdatawrite(mapping);
 774         error = filemap_fdatawait(mapping);
 775
 776         if (error)
 777                 gfs2_io_error(gl->gl_name.ln_sbd);
 778 }
 779
 780 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 781 {
 782         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 783         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 784
 785         if (error) {
 786                 gfs2_meta_sync(ip->i_gl);
 787                 return;
 788         }
 789         if (pass != 1)
 790                 return;
 791
 792         gfs2_meta_sync(ip->i_gl);
 793
 794         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
 795                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
 796 }
 797
 798 static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 799 {
 800         struct gfs2_meta_header *mh;
 801         unsigned int offset;
 802         struct list_head *head = &sdp->sd_log_le_revoke;
 803         struct gfs2_bufdata *bd;
 804         struct page *page;
 805         unsigned int length;
 806
 807         gfs2_write_revokes(sdp);
 808         if (!sdp->sd_log_num_revoke)
 809                 return;
 810
 811         length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
 812         page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
 813         offset = sizeof(struct gfs2_log_descriptor);
 814
 815         list_for_each_entry(bd, head, bd_list) {
 816                 sdp->sd_log_num_revoke--;
 817
 818                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
 819
 820                         gfs2_log_write_page(sdp, page);
 821                         page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 822                         mh = page_address(page);
 823                         clear_page(mh);
 824                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
 825                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
 826                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
 827                         offset = sizeof(struct gfs2_meta_header);
 828                 }
 829
 830                 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
 831                 offset += sizeof(u64);
 832         }
 833         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 834
 835         gfs2_log_write_page(sdp, page);
 836 }
 837
 838 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 839 {
 840         struct list_head *head = &sdp->sd_log_le_revoke;
 841         struct gfs2_bufdata *bd;
 842         struct gfs2_glock *gl;
 843
 844         while (!list_empty(head)) {
 845                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 846                 list_del_init(&bd->bd_list);
 847                 gl = bd->bd_gl;
 848                 atomic_dec(&gl->gl_revokes);
 849                 clear_bit(GLF_LFLUSH, &gl->gl_flags);
 850                 kmem_cache_free(gfs2_bufdata_cachep, bd);
 851         }
 852 }
 853
 854 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 855                                   struct gfs2_log_header_host *head, int pass)
 856 {
 857         if (pass != 0)
 858                 return;
 859
 860         jd->jd_found_revokes = 0;
 861         jd->jd_replay_tail = head->lh_tail;
 862 }
 863
 864 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 865                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
 866                                    int pass)
 867 {
 868         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 869         unsigned int blks = be32_to_cpu(ld->ld_length);
 870         unsigned int revokes = be32_to_cpu(ld->ld_data1);
 871         struct buffer_head *bh;
 872         unsigned int offset;
 873         u64 blkno;
 874         int first = 1;
 875         int error;
 876
 877         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
 878                 return 0;
 879
 880         offset = sizeof(struct gfs2_log_descriptor);
 881
 882         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 883                 error = gfs2_replay_read_block(jd, start, &bh);
 884                 if (error)
 885                         return error;
 886
 887                 if (!first)
 888                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
 889
 890                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
 891                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
 892
 893                         error = gfs2_revoke_add(jd, blkno, start);
 894                         if (error < 0) {
 895                                 brelse(bh);
 896                                 return error;
 897                         }
 898                         else if (error)
 899                                 jd->jd_found_revokes++;
 900
 901                         if (!--revokes)
 902                                 break;
 903                         offset += sizeof(u64);
 904                 }
 905
 906                 brelse(bh);
 907                 offset = sizeof(struct gfs2_meta_header);
 908                 first = 0;
 909         }
 910
 911         return 0;
 912 }
 913
 914 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 915 {
 916         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 917
 918         if (error) {
 919                 gfs2_revoke_clean(jd);
 920                 return;
 921         }
 922         if (pass != 1)
 923                 return;
 924
 925         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
 926                 jd->jd_jid, jd->jd_found_revokes);
 927
 928         gfs2_revoke_clean(jd);
 929 }
 930
 931 /**
 932  * databuf_lo_before_commit - Scan the data buffers, writing as we go
 933  *
 934  */
 935
 936 static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 937 {
 938         unsigned int limit = databuf_limit(sdp);
 939         unsigned int nbuf;
 940         if (tr == NULL)
 941                 return;
 942         nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
 943         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
 944 }
 945
 946 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 947                                     struct gfs2_log_descriptor *ld,
 948                                     __be64 *ptr, int pass)
 949 {
 950         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 951         struct gfs2_glock *gl = ip->i_gl;
 952         unsigned int blks = be32_to_cpu(ld->ld_data1);
 953         struct buffer_head *bh_log, *bh_ip;
 954         u64 blkno;
 955         u64 esc;
 956         int error = 0;
 957
 958         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
 959                 return 0;
 960
 961         gfs2_replay_incr_blk(jd, &start);
 962         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 963                 blkno = be64_to_cpu(*ptr++);
 964                 esc = be64_to_cpu(*ptr++);
 965
 966                 jd->jd_found_blocks++;
 967
 968                 if (gfs2_revoke_check(jd, blkno, start))
 969                         continue;
 970
 971                 error = gfs2_replay_read_block(jd, start, &bh_log);
 972                 if (error)
 973                         return error;
 974
 975                 bh_ip = gfs2_meta_new(gl, blkno);
 976                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 977
 978                 /* Unescape */
 979                 if (esc) {
 980                         __be32 *eptr = (__be32 *)bh_ip->b_data;
 981                         *eptr = cpu_to_be32(GFS2_MAGIC);
 982                 }
 983                 mark_buffer_dirty(bh_ip);
 984
 985                 brelse(bh_log);
 986                 brelse(bh_ip);
 987
 988                 jd->jd_replayed_blocks++;
 989         }
 990
 991         return error;
 992 }
 993
 994 /* FIXME: sort out accounting for log blocks etc. */
 995
 996 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 997 {
 998         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 999         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
1000
1001         if (error) {
1002                 gfs2_meta_sync(ip->i_gl);
1003                 return;
1004         }
1005         if (pass != 1)
1006                 return;
1007
1008         /* data sync? */
1009         gfs2_meta_sync(ip->i_gl);
1010
1011         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
1012                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
1013 }
1014
1015 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1016 {
1017         struct list_head *head;
1018         struct gfs2_bufdata *bd;
1019
1020         if (tr == NULL)
1021                 return;
1022
1023         head = &tr->tr_databuf;
1024         while (!list_empty(head)) {
1025                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
1026                 list_del_init(&bd->bd_list);
1027                 gfs2_unpin(sdp, bd->bd_bh, tr);
1028         }
1029 }
1030
1031
1032 const struct gfs2_log_operations gfs2_buf_lops = {
1033         .lo_before_commit = buf_lo_before_commit,
1034         .lo_after_commit = buf_lo_after_commit,
1035         .lo_before_scan = buf_lo_before_scan,
1036         .lo_scan_elements = buf_lo_scan_elements,
1037         .lo_after_scan = buf_lo_after_scan,
1038         .lo_name = "buf",
1039 };
1040
1041 const struct gfs2_log_operations gfs2_revoke_lops = {
1042         .lo_before_commit = revoke_lo_before_commit,
1043         .lo_after_commit = revoke_lo_after_commit,
1044         .lo_before_scan = revoke_lo_before_scan,
1045         .lo_scan_elements = revoke_lo_scan_elements,
1046         .lo_after_scan = revoke_lo_after_scan,
1047         .lo_name = "revoke",
1048 };
1049
1050 const struct gfs2_log_operations gfs2_databuf_lops = {
1051         .lo_before_commit = databuf_lo_before_commit,
1052         .lo_after_commit = databuf_lo_after_commit,
1053         .lo_scan_elements = databuf_lo_scan_elements,
1054         .lo_after_scan = databuf_lo_after_scan,
1055         .lo_name = "databuf",
1056 };
1057
1058 const struct gfs2_log_operations *gfs2_log_ops[] = {
1059         &gfs2_databuf_lops,
1060         &gfs2_buf_lops,
1061         &gfs2_revoke_lops,
1062         NULL,
1063 };
1064