#include <trace/events/ext4.h>
static struct kmem_cache *ext4_fc_dentry_cachep;
+static struct kmem_cache *ext4_fc_range_cachep;
+
+/*
+ * Avoid spending unbounded time/memory snapshotting highly fragmented files
+ * under jbd2_journal_lock_updates(). If we exceed this limit, fall back to
+ * full commit.
+ */
+#define EXT4_FC_SNAPSHOT_MAX_INODES 1024
+#define EXT4_FC_SNAPSHOT_MAX_RANGES 2048
static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
list_for_each_entry_safe(range, range_n, head, list) {
list_del(&range->list);
- kfree(range);
+ kmem_cache_free(ext4_fc_range_cachep, range);
}
}
}
static int ext4_fc_snapshot_inode_data(struct inode *inode,
- struct list_head *ranges)
+ struct list_head *ranges,
+ unsigned int nr_ranges_total,
+ unsigned int *nr_rangesp)
{
struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned int nr_ranges = 0;
ext4_lblk_t start_lblk, end_lblk, cur_lblk;
- struct ext4_map_blocks map;
- int ret;
spin_lock(&ei->i_fc_lock);
if (ei->i_fc_lblk_len == 0) {
spin_unlock(&ei->i_fc_lock);
+ if (nr_rangesp)
+ *nr_rangesp = 0;
return 0;
}
start_lblk = ei->i_fc_lblk_start;
(unsigned long long)inode->i_ino);
while (cur_lblk <= end_lblk) {
+ struct extent_status es;
struct ext4_fc_range *range;
+ ext4_lblk_t len;
+ u64 remaining = (u64)end_lblk - cur_lblk + 1;
- map.m_lblk = cur_lblk;
- map.m_len = end_lblk - cur_lblk + 1;
- ret = ext4_map_blocks(NULL, inode, &map,
- EXT4_GET_BLOCKS_IO_SUBMIT |
- EXT4_EX_NOCACHE);
- if (ret < 0)
- return -ECANCELED;
+ if (!ext4_es_lookup_extent(inode, cur_lblk, NULL, &es, NULL))
+ return -EAGAIN;
+
+ if (ext4_es_is_delayed(&es))
+ return -EAGAIN;
- if (map.m_len == 0) {
+ len = es.es_len - (cur_lblk - es.es_lblk);
+ if (len > remaining)
+ len = remaining;
+ if (len == 0) {
cur_lblk++;
continue;
}
- range = kmalloc(sizeof(*range), GFP_NOFS);
+ if (nr_ranges_total + nr_ranges >= EXT4_FC_SNAPSHOT_MAX_RANGES)
+ return -E2BIG;
+
+ range = kmem_cache_alloc(ext4_fc_range_cachep, GFP_NOFS);
if (!range)
return -ENOMEM;
+ nr_ranges++;
- range->lblk = map.m_lblk;
- range->len = map.m_len;
+ range->lblk = cur_lblk;
+ range->len = len;
range->pblk = 0;
range->unwritten = false;
- if (ret == 0) {
+ if (ext4_es_is_hole(&es)) {
range->tag = EXT4_FC_TAG_DEL_RANGE;
- } else {
- unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
- EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
-
- /* Limit the number of blocks in one extent */
- map.m_len = min(max, map.m_len);
+ } else if (ext4_es_is_written(&es) ||
+ ext4_es_is_unwritten(&es)) {
+ unsigned int max;
range->tag = EXT4_FC_TAG_ADD_RANGE;
- range->len = map.m_len;
- range->pblk = map.m_pblk;
- range->unwritten = !!(map.m_flags & EXT4_MAP_UNWRITTEN);
+ range->pblk = ext4_es_pblock(&es) +
+ (cur_lblk - es.es_lblk);
+ range->unwritten = ext4_es_is_unwritten(&es);
+
+ max = range->unwritten ? EXT_UNWRITTEN_MAX_LEN :
+ EXT_INIT_MAX_LEN;
+ if (range->len > max)
+ range->len = max;
+ } else {
+ kmem_cache_free(ext4_fc_range_cachep, range);
+ return -EAGAIN;
}
INIT_LIST_HEAD(&range->list);
list_add_tail(&range->list, ranges);
- cur_lblk += map.m_len;
+ if ((u64)range->len > (u64)end_lblk - cur_lblk)
+ break;
+
+ cur_lblk += range->len;
}
+ if (nr_rangesp)
+ *nr_rangesp = nr_ranges;
return 0;
}
-static int ext4_fc_snapshot_inode(struct inode *inode)
+static int ext4_fc_snapshot_inode(struct inode *inode,
+ unsigned int nr_ranges_total,
+ unsigned int *nr_rangesp)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_fc_inode_snap *snap;
int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
struct ext4_iloc iloc;
LIST_HEAD(ranges);
+ unsigned int nr_ranges = 0;
int ret;
int alloc_ctx;
memcpy(snap->inode_buf, (u8 *)ext4_raw_inode(&iloc), inode_len);
brelse(iloc.bh);
- ret = ext4_fc_snapshot_inode_data(inode, &ranges);
+ ret = ext4_fc_snapshot_inode_data(inode, &ranges, nr_ranges_total,
+ &nr_ranges);
if (ret) {
kfree(snap);
ext4_fc_free_ranges(&ranges);
list_splice_tail_init(&ranges, &snap->data_list);
ext4_fc_unlock(inode->i_sb, alloc_ctx);
+ if (nr_rangesp)
+ *nr_rangesp = nr_ranges;
return 0;
}
-
/* Flushes data of all the inodes in the commit queue. */
static int ext4_fc_flush_data(journal_t *journal)
{
return 0;
}
-static int ext4_fc_snapshot_inodes(journal_t *journal)
+static int ext4_fc_alloc_snapshot_inodes(struct super_block *sb,
+ struct inode ***inodesp,
+ unsigned int *nr_inodesp);
+
+static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
+ unsigned int inodes_size)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_inode_info *iter;
struct ext4_fc_dentry_update *fc_dentry;
- struct inode **inodes;
- unsigned int nr_inodes = 0;
unsigned int i = 0;
+ unsigned int idx;
+ unsigned int nr_ranges = 0;
int ret = 0;
int alloc_ctx;
- alloc_ctx = ext4_fc_lock(sb);
- list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list)
- nr_inodes++;
-
- list_for_each_entry(fc_dentry, &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
- struct ext4_inode_info *ei;
-
- if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT)
- continue;
- if (list_empty(&fc_dentry->fcd_dilist))
- continue;
-
- /* See the comment in ext4_fc_commit_dentry_updates(). */
- ei = list_first_entry(&fc_dentry->fcd_dilist,
- struct ext4_inode_info, i_fc_dilist);
- if (!list_empty(&ei->i_fc_list))
- continue;
-
- nr_inodes++;
- }
- ext4_fc_unlock(sb, alloc_ctx);
-
- if (!nr_inodes)
+ if (!inodes_size)
return 0;
- inodes = kvcalloc(nr_inodes, sizeof(*inodes), GFP_NOFS);
- if (!inodes)
- return -ENOMEM;
-
alloc_ctx = ext4_fc_lock(sb);
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ if (i >= inodes_size) {
+ ret = -E2BIG;
+ goto unlock;
+ }
inodes[i++] = &iter->vfs_inode;
}
if (!list_empty(&ei->i_fc_list))
continue;
+ if (i >= inodes_size) {
+ ret = -E2BIG;
+ goto unlock;
+ }
/*
* Create-only inodes may only be referenced via fcd_dilist and
* not appear on s_fc_q[MAIN]. They may hit the last iput while
ext4_set_inode_state(inode, EXT4_STATE_FC_COMMITTING);
inodes[i++] = inode;
}
+unlock:
ext4_fc_unlock(sb, alloc_ctx);
- for (nr_inodes = 0; nr_inodes < i; nr_inodes++) {
- ret = ext4_fc_snapshot_inode(inodes[nr_inodes]);
+ if (ret)
+ return ret;
+
+ for (idx = 0; idx < i; idx++) {
+ unsigned int inode_ranges = 0;
+
+ ret = ext4_fc_snapshot_inode(inodes[idx], nr_ranges,
+ &inode_ranges);
if (ret)
break;
+ nr_ranges += inode_ranges;
}
- kvfree(inodes);
return ret;
}
struct ext4_inode_info *iter;
struct ext4_fc_head head;
struct inode *inode;
+ struct inode **inodes;
+ unsigned int inodes_size;
struct blk_plug plug;
int ret = 0;
u32 crc = 0;
return ret;
+ ret = ext4_fc_alloc_snapshot_inodes(sb, &inodes, &inodes_size);
+ if (ret)
+ return ret;
+
/* Step 4: Mark all inodes as being committed. */
jbd2_journal_lock_updates(journal);
/*
}
ext4_fc_unlock(sb, alloc_ctx);
- ret = ext4_fc_snapshot_inodes(journal);
+ ret = ext4_fc_snapshot_inodes(journal, inodes, inodes_size);
jbd2_journal_unlock_updates(journal);
+ kvfree(inodes);
if (ret)
return ret;
return ret;
}
+static unsigned int ext4_fc_count_snapshot_inodes(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_inode_info *iter;
+ struct ext4_fc_dentry_update *fc_dentry;
+ unsigned int nr_inodes = 0;
+ int alloc_ctx;
+
+ alloc_ctx = ext4_fc_lock(sb);
+ list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list)
+ nr_inodes++;
+
+ list_for_each_entry(fc_dentry, &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
+ struct ext4_inode_info *ei;
+
+ if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT)
+ continue;
+ if (list_empty(&fc_dentry->fcd_dilist))
+ continue;
+
+ /* See the comment in ext4_fc_commit_dentry_updates(). */
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info, i_fc_dilist);
+ if (!list_empty(&ei->i_fc_list))
+ continue;
+
+ nr_inodes++;
+ }
+ ext4_fc_unlock(sb, alloc_ctx);
+
+ return nr_inodes;
+}
+
+static int ext4_fc_alloc_snapshot_inodes(struct super_block *sb,
+ struct inode ***inodesp,
+ unsigned int *nr_inodesp)
+{
+ unsigned int nr_inodes = ext4_fc_count_snapshot_inodes(sb);
+ struct inode **inodes;
+
+ *inodesp = NULL;
+ *nr_inodesp = 0;
+
+ if (!nr_inodes)
+ return 0;
+
+ if (nr_inodes > EXT4_FC_SNAPSHOT_MAX_INODES)
+ return -E2BIG;
+
+ inodes = kvcalloc(nr_inodes, sizeof(*inodes), GFP_NOFS);
+ if (!inodes)
+ return -ENOMEM;
+
+ *inodesp = inodes;
+ *nr_inodesp = nr_inodes;
+ return 0;
+}
+
static void ext4_fc_update_stats(struct super_block *sb, int status,
u64 commit_time, int nblks, tid_t commit_tid)
{
fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
ret = ext4_fc_perform_commit(journal);
if (ret < 0) {
- status = EXT4_FC_STATUS_FAILED;
+ if (ret == -EAGAIN || ret == -E2BIG || ret == -ECANCELED)
+ status = EXT4_FC_STATUS_INELIGIBLE;
+ else
+ status = EXT4_FC_STATUS_FAILED;
goto fallback;
}
nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
- struct ext4_fc_dentry_update,
- fcd_list);
+ struct ext4_fc_dentry_update,
+ fcd_list);
list_del_init(&fc_dentry->fcd_list);
if (fc_dentry->fcd_op == EXT4_FC_TAG_CREAT &&
- !list_empty(&fc_dentry->fcd_dilist)) {
+ !list_empty(&fc_dentry->fcd_dilist)) {
/* See the comment in ext4_fc_commit_dentry_updates(). */
ei = list_first_entry(&fc_dentry->fcd_dilist,
- struct ext4_inode_info,
- i_fc_dilist);
+ struct ext4_inode_info,
+ i_fc_dilist);
ext4_fc_free_inode_snap(&ei->vfs_inode);
spin_lock(&ei->i_fc_lock);
ext4_clear_inode_state(&ei->vfs_inode,
- EXT4_STATE_FC_REQUEUE);
+ EXT4_STATE_FC_REQUEUE);
ext4_clear_inode_state(&ei->vfs_inode,
- EXT4_STATE_FC_COMMITTING);
+ EXT4_STATE_FC_COMMITTING);
spin_unlock(&ei->i_fc_lock);
/*
* Make sure clearing of EXT4_STATE_FC_COMMITTING is
- * visible before we send the wakeup. Pairs with implicit
- * barrier in prepare_to_wait() in ext4_fc_del().
+ * visible before we send the wakeup. Pairs with
+ * implicit barrier in prepare_to_wait() in
+ * ext4_fc_del().
*/
smp_mb();
ext4_fc_wake_inode_state(&ei->vfs_inode,
ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
SLAB_RECLAIM_ACCOUNT);
- if (ext4_fc_dentry_cachep == NULL)
+ if (!ext4_fc_dentry_cachep)
return -ENOMEM;
+ ext4_fc_range_cachep = KMEM_CACHE(ext4_fc_range, SLAB_RECLAIM_ACCOUNT);
+ if (!ext4_fc_range_cachep) {
+ kmem_cache_destroy(ext4_fc_dentry_cachep);
+ return -ENOMEM;
+ }
+
return 0;
}
void ext4_fc_destroy_dentry_cache(void)
{
+ kmem_cache_destroy(ext4_fc_range_cachep);
kmem_cache_destroy(ext4_fc_dentry_cachep);
}