* deleted while it is being flushed.
* [2] Flush data buffers to disk and clear "EXT4_STATE_FC_FLUSHING_DATA"
* state.
- * [3] Lock the journal by calling jbd2_journal_lock_updates. This ensures that
- * all the exsiting handles finish and no new handles can start.
- * [4] Mark all the fast commit eligible inodes as undergoing fast commit
- * by setting "EXT4_STATE_FC_COMMITTING" state.
- * [5] Unlock the journal by calling jbd2_journal_unlock_updates. This allows
+ * [3] Lock the journal by calling jbd2_journal_lock_updates(). This ensures
+ * that all the existing handles finish and no new handles can start.
+ * [4] Mark all the fast commit eligible inodes as undergoing fast commit by
+ * setting "EXT4_STATE_FC_COMMITTING" state, and snapshot the inode state
+ * needed for log writing.
+ * [5] Unlock the journal by calling jbd2_journal_unlock_updates(). This allows
* starting of new handles. If new handles try to start an update on
* any of the inodes that are being committed, ext4_fc_track_inode()
* will block until those inodes have finished the fast commit.
* [6] Commit all the directory entry updates in the fast commit space.
- * [7] Commit all the changed inodes in the fast commit space and clear
- * "EXT4_STATE_FC_COMMITTING" for these inodes.
+ * [7] Commit all the changed inodes in the fast commit space.
* [8] Write tail tag (this tag ensures the atomicity, please read the following
* section for more details).
+ * [9] Clear "EXT4_STATE_FC_COMMITTING" and wake up waiters in
+ * ext4_fc_cleanup().
*
- * All the inode updates must be enclosed within jbd2_jounrnal_start()
+ * All the inode updates must be enclosed within jbd2_journal_start()
* and jbd2_journal_stop() similar to JBD2 journaling.
*
* Fast Commit Ineligibility
unlock_buffer(bh);
}
+static void ext4_fc_free_inode_snap(struct inode *inode);
+
static inline void ext4_fc_reset_inode(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list);
INIT_LIST_HEAD(&ei->i_fc_dilist);
+ ei->i_fc_snap = NULL;
}
static bool ext4_fc_disabled(struct super_block *sb)
alloc_ctx = ext4_fc_lock(inode->i_sb);
if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
+ ext4_fc_free_inode_snap(inode);
ext4_fc_unlock(inode->i_sb, alloc_ctx);
return;
}
}
finish_wait(wq, &wait.wq_entry);
}
+ ext4_fc_free_inode_snap(inode);
list_del_init(&ei->i_fc_list);
/*
return true;
}
+struct ext4_fc_range {
+ struct list_head list;
+ u16 tag;
+ ext4_lblk_t lblk;
+ ext4_lblk_t len;
+ ext4_fsblk_t pblk;
+ bool unwritten;
+};
+
+struct ext4_fc_inode_snap {
+ struct list_head data_list;
+ unsigned int inode_len;
+ u8 inode_buf[];
+};
+
/*
* Writes inode in the fast commit space under TLV with tag @tag.
* Returns 0 on success, error on failure.
static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
- int ret;
- struct ext4_iloc iloc;
+ struct ext4_fc_inode_snap *snap = ei->i_fc_snap;
struct ext4_fc_inode fc_inode;
struct ext4_fc_tl tl;
u8 *dst;
+ u8 *src;
+ int inode_len;
+ int ret;
- ret = ext4_get_inode_loc(inode, &iloc);
- if (ret)
- return ret;
+ if (!snap)
+ return -ECANCELED;
- if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
- inode_len = EXT4_INODE_SIZE(inode->i_sb);
- else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
- inode_len += ei->i_extra_isize;
+ src = snap->inode_buf;
+ inode_len = snap->inode_len;
+ if (!src || inode_len == 0)
+ return -ECANCELED;
fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
dst += EXT4_FC_TAG_BASE_LEN;
memcpy(dst, &fc_inode, sizeof(fc_inode));
dst += sizeof(fc_inode);
- memcpy(dst, (u8 *)ext4_raw_inode(&iloc), inode_len);
+ memcpy(dst, src, inode_len);
ret = 0;
err:
- brelse(iloc.bh);
return ret;
}
*/
static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
{
- ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_map_blocks map;
+ struct ext4_fc_inode_snap *snap = ei->i_fc_snap;
struct ext4_fc_add_range fc_ext;
struct ext4_fc_del_range lrange;
struct ext4_extent *ex;
+ struct ext4_fc_range *range;
+
+ if (!snap)
+ return -ECANCELED;
+
+ list_for_each_entry(range, &snap->data_list, list) {
+ if (range->tag == EXT4_FC_TAG_DEL_RANGE) {
+ lrange.fc_ino = cpu_to_le32(inode->i_ino);
+ lrange.fc_lblk = cpu_to_le32(range->lblk);
+ lrange.fc_len = cpu_to_le32(range->len);
+ if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
+ sizeof(lrange), (u8 *)&lrange, crc))
+ return -ENOSPC;
+ continue;
+ }
+
+ fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
+ ex = (struct ext4_extent *)&fc_ext.fc_ex;
+ ex->ee_block = cpu_to_le32(range->lblk);
+ ex->ee_len = cpu_to_le16(range->len);
+ ext4_ext_store_pblock(ex, range->pblk);
+ if (range->unwritten)
+ ext4_ext_mark_unwritten(ex);
+ else
+ ext4_ext_mark_initialized(ex);
+
+ if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
+ sizeof(fc_ext), (u8 *)&fc_ext, crc))
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static void ext4_fc_free_ranges(struct list_head *head)
+{
+ struct ext4_fc_range *range, *range_n;
+
+ list_for_each_entry_safe(range, range_n, head, list) {
+ list_del(&range->list);
+ kfree(range);
+ }
+}
+
+static void ext4_fc_free_inode_snap(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_fc_inode_snap *snap = ei->i_fc_snap;
+
+ if (!snap)
+ return;
+
+ ext4_fc_free_ranges(&snap->data_list);
+ kfree(snap);
+ ei->i_fc_snap = NULL;
+}
+
+static int ext4_fc_snapshot_inode_data(struct inode *inode,
+ struct list_head *ranges)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ ext4_lblk_t start_lblk, end_lblk, cur_lblk;
+ struct ext4_map_blocks map;
int ret;
spin_lock(&ei->i_fc_lock);
spin_unlock(&ei->i_fc_lock);
return 0;
}
- old_blk_size = ei->i_fc_lblk_start;
- new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
+ start_lblk = ei->i_fc_lblk_start;
+ end_lblk = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
ei->i_fc_lblk_len = 0;
spin_unlock(&ei->i_fc_lock);
- cur_lblk_off = old_blk_size;
- ext4_debug("will try writing %d to %d for inode %llu\n",
- cur_lblk_off, new_blk_size, inode->i_ino);
+ cur_lblk = start_lblk;
+ ext4_debug("snapshot data ranges %u-%u for inode %llu\n",
+ start_lblk, end_lblk,
+ (unsigned long long)inode->i_ino);
+
+ while (cur_lblk <= end_lblk) {
+ struct ext4_fc_range *range;
- while (cur_lblk_off <= new_blk_size) {
- map.m_lblk = cur_lblk_off;
- map.m_len = new_blk_size - cur_lblk_off + 1;
+ map.m_lblk = cur_lblk;
+ map.m_len = end_lblk - cur_lblk + 1;
ret = ext4_map_blocks(NULL, inode, &map,
EXT4_GET_BLOCKS_IO_SUBMIT |
EXT4_EX_NOCACHE);
return -ECANCELED;
if (map.m_len == 0) {
- cur_lblk_off++;
+ cur_lblk++;
continue;
}
+ range = kmalloc(sizeof(*range), GFP_NOFS);
+ if (!range)
+ return -ENOMEM;
+
+ range->lblk = map.m_lblk;
+ range->len = map.m_len;
+ range->pblk = 0;
+ range->unwritten = false;
+
if (ret == 0) {
- lrange.fc_ino = cpu_to_le32(inode->i_ino);
- lrange.fc_lblk = cpu_to_le32(map.m_lblk);
- lrange.fc_len = cpu_to_le32(map.m_len);
- if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
- sizeof(lrange), (u8 *)&lrange, crc))
- return -ENOSPC;
+ range->tag = EXT4_FC_TAG_DEL_RANGE;
} else {
unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
/* Limit the number of blocks in one extent */
map.m_len = min(max, map.m_len);
- fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
- ex = (struct ext4_extent *)&fc_ext.fc_ex;
- ex->ee_block = cpu_to_le32(map.m_lblk);
- ex->ee_len = cpu_to_le16(map.m_len);
- ext4_ext_store_pblock(ex, map.m_pblk);
- if (map.m_flags & EXT4_MAP_UNWRITTEN)
- ext4_ext_mark_unwritten(ex);
- else
- ext4_ext_mark_initialized(ex);
- if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
- sizeof(fc_ext), (u8 *)&fc_ext, crc))
- return -ENOSPC;
+ range->tag = EXT4_FC_TAG_ADD_RANGE;
+ range->len = map.m_len;
+ range->pblk = map.m_pblk;
+ range->unwritten = !!(map.m_flags & EXT4_MAP_UNWRITTEN);
}
- cur_lblk_off += map.m_len;
+ INIT_LIST_HEAD(&range->list);
+ list_add_tail(&range->list, ranges);
+
+ cur_lblk += map.m_len;
}
return 0;
}
+static int ext4_fc_snapshot_inode(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_fc_inode_snap *snap;
+ int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
+ struct ext4_iloc iloc;
+ LIST_HEAD(ranges);
+ int ret;
+ int alloc_ctx;
+
+ ret = ext4_get_inode_loc_noio(inode, &iloc);
+ if (ret)
+ return ret;
+
+ if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
+ inode_len = EXT4_INODE_SIZE(inode->i_sb);
+ else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
+ inode_len += ei->i_extra_isize;
+
+ snap = kmalloc(struct_size(snap, inode_buf, inode_len), GFP_NOFS);
+ if (!snap) {
+ brelse(iloc.bh);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&snap->data_list);
+ snap->inode_len = inode_len;
+
+ memcpy(snap->inode_buf, (u8 *)ext4_raw_inode(&iloc), inode_len);
+ brelse(iloc.bh);
+
+ ret = ext4_fc_snapshot_inode_data(inode, &ranges);
+ if (ret) {
+ kfree(snap);
+ ext4_fc_free_ranges(&ranges);
+ return ret;
+ }
+
+ alloc_ctx = ext4_fc_lock(inode->i_sb);
+ ext4_fc_free_inode_snap(inode);
+ ei->i_fc_snap = snap;
+ list_splice_tail_init(&ranges, &snap->data_list);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);
+
+ return 0;
+}
+
/* Flushes data of all the inodes in the commit queue. */
static int ext4_fc_flush_data(journal_t *journal)
*/
if (list_empty(&fc_dentry->fcd_dilist))
continue;
+ /*
+ * For EXT4_FC_TAG_CREAT, fcd_dilist is linked on the created
+ * inode's i_fc_dilist list (kept singular), so we can recover the
+ * inode through it.
+ */
ei = list_first_entry(&fc_dentry->fcd_dilist,
struct ext4_inode_info, i_fc_dilist);
inode = &ei->vfs_inode;
return 0;
}
+static int ext4_fc_snapshot_inodes(journal_t *journal)
+{
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_inode_info *iter;
+ struct ext4_fc_dentry_update *fc_dentry;
+ struct inode **inodes;
+ unsigned int nr_inodes = 0;
+ unsigned int i = 0;
+ int ret = 0;
+ int alloc_ctx;
+
+ alloc_ctx = ext4_fc_lock(sb);
+ list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list)
+ nr_inodes++;
+
+ list_for_each_entry(fc_dentry, &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
+ struct ext4_inode_info *ei;
+
+ if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT)
+ continue;
+ if (list_empty(&fc_dentry->fcd_dilist))
+ continue;
+
+ /* See the comment in ext4_fc_commit_dentry_updates(). */
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info, i_fc_dilist);
+ if (!list_empty(&ei->i_fc_list))
+ continue;
+
+ nr_inodes++;
+ }
+ ext4_fc_unlock(sb, alloc_ctx);
+
+ if (!nr_inodes)
+ return 0;
+
+ inodes = kvcalloc(nr_inodes, sizeof(*inodes), GFP_NOFS);
+ if (!inodes)
+ return -ENOMEM;
+
+ alloc_ctx = ext4_fc_lock(sb);
+ list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ inodes[i] = igrab(&iter->vfs_inode);
+ if (inodes[i])
+ i++;
+ }
+
+ list_for_each_entry(fc_dentry, &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
+ struct ext4_inode_info *ei;
+
+ if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT)
+ continue;
+ if (list_empty(&fc_dentry->fcd_dilist))
+ continue;
+
+ /* See the comment in ext4_fc_commit_dentry_updates(). */
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info, i_fc_dilist);
+ if (!list_empty(&ei->i_fc_list))
+ continue;
+
+ inodes[i] = igrab(&ei->vfs_inode);
+ if (inodes[i])
+ i++;
+ }
+ ext4_fc_unlock(sb, alloc_ctx);
+
+ for (nr_inodes = 0; nr_inodes < i; nr_inodes++) {
+ ret = ext4_fc_snapshot_inode(inodes[nr_inodes]);
+ if (ret)
+ break;
+ }
+
+ for (nr_inodes = 0; nr_inodes < i; nr_inodes++) {
+ if (inodes[nr_inodes])
+ iput(inodes[nr_inodes]);
+ }
+ kvfree(inodes);
+ return ret;
+}
+
static int ext4_fc_perform_commit(journal_t *journal)
{
struct super_block *sb = journal->j_private;
EXT4_STATE_FC_COMMITTING);
}
ext4_fc_unlock(sb, alloc_ctx);
+
+ ret = ext4_fc_snapshot_inodes(journal);
jbd2_journal_unlock_updates(journal);
+ if (ret)
+ return ret;
/*
* Step 5: If file system device is different from journal device,
struct ext4_inode_info,
i_fc_list);
list_del_init(&ei->i_fc_list);
+ ext4_fc_free_inode_snap(&ei->vfs_inode);
ext4_clear_inode_state(&ei->vfs_inode,
EXT4_STATE_FC_COMMITTING);
if (tid_geq(tid, ei->i_sync_tid)) {
struct ext4_fc_dentry_update,
fcd_list);
list_del_init(&fc_dentry->fcd_list);
+ if (fc_dentry->fcd_op == EXT4_FC_TAG_CREAT &&
+ !list_empty(&fc_dentry->fcd_dilist)) {
+ /* See the comment in ext4_fc_commit_dentry_updates(). */
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info,
+ i_fc_dilist);
+ ext4_fc_free_inode_snap(&ei->vfs_inode);
+ }
list_del_init(&fc_dentry->fcd_dilist);
release_dentry_name_snapshot(&fc_dentry->fcd_name);