From: Sasha Levin Date: Sat, 29 Feb 2020 03:59:14 +0000 (-0500) Subject: fixes for 4.14 X-Git-Tag: v4.19.108~88 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=266b72b11a3514478cea37451d7ae882b472b0be;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 4.14 Signed-off-by: Sasha Levin --- diff --git a/queue-4.14/ext4-fix-potential-race-between-online-resizing-and-.patch b/queue-4.14/ext4-fix-potential-race-between-online-resizing-and-.patch new file mode 100644 index 00000000000..b0f2225e236 --- /dev/null +++ b/queue-4.14/ext4-fix-potential-race-between-online-resizing-and-.patch @@ -0,0 +1,325 @@ +From 6bb2c35de862c3bc2d996022321c1808b80efda0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Feb 2020 16:48:15 -0800 +Subject: ext4: fix potential race between online resizing and write operations + +From: Theodore Ts'o + +commit 1d0c3924a92e69bfa91163bda83c12a994b4d106 upstream. + +During an online resize an array of pointers to buffer heads gets +replaced so it can get enlarged. If there is a racing block +allocation or deallocation which uses the old array, and the old array +has gotten reused this can lead to a GPF or some other random kernel +memory getting modified. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 +Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu +Reported-by: Suraj Jitindar Singh +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org # 4.14.x +Signed-off-by: Sasha Levin +--- + fs/ext4/balloc.c | 14 +++++++++--- + fs/ext4/ext4.h | 20 +++++++++++++++++- + fs/ext4/resize.c | 55 ++++++++++++++++++++++++++++++++++++++---------- + fs/ext4/super.c | 31 +++++++++++++++++++-------- + 4 files changed, 96 insertions(+), 24 deletions(-) + +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index 70266a3355dc3..fb38f20f869e7 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -280,6 +280,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, + ext4_group_t ngroups = ext4_get_groups_count(sb); + struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); ++ struct buffer_head *bh_p; + + if (block_group >= ngroups) { + ext4_error(sb, "block_group >= groups_count - block_group = %u," +@@ -290,7 +291,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, + + group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); + offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); +- if (!sbi->s_group_desc[group_desc]) { ++ bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); ++ /* ++ * sbi_array_rcu_deref returns with rcu unlocked, this is ok since ++ * the pointer being dereferenced won't be dereferenced again. By ++ * looking at the usage in add_new_gdb() the value isn't modified, ++ * just the pointer, and so it remains valid. ++ */ ++ if (!bh_p) { + ext4_error(sb, "Group descriptor not loaded - " + "block_group = %u, group_desc = %u, desc = %u", + block_group, group_desc, offset); +@@ -298,10 +306,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, + } + + desc = (struct ext4_group_desc *)( +- (__u8 *)sbi->s_group_desc[group_desc]->b_data + ++ (__u8 *)bh_p->b_data + + offset * EXT4_DESC_SIZE(sb)); + if (bh) +- *bh = sbi->s_group_desc[group_desc]; ++ *bh = bh_p; + return desc; + } + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index b162f602c430f..94f4f6d55c1a4 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1382,7 +1382,7 @@ struct ext4_sb_info { + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ +- struct buffer_head **s_group_desc; ++ struct buffer_head * __rcu *s_group_desc; + unsigned int s_mount_opt; + unsigned int s_mount_opt2; + unsigned int s_mount_flags; +@@ -1556,6 +1556,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) + ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); + } + ++/* ++ * Returns: sbi->field[index] ++ * Used to access an array element from the following sbi fields which require ++ * rcu protection to avoid dereferencing an invalid pointer due to reassignment ++ * - s_group_desc ++ * - s_group_info ++ * - s_flex_group ++ */ ++#define sbi_array_rcu_deref(sbi, field, index) \ ++({ \ ++ typeof(*((sbi)->field)) _v; \ ++ rcu_read_lock(); \ ++ _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ ++ rcu_read_unlock(); \ ++ _v; \ ++}) ++ + /* + * Inode dynamic state flags + */ +@@ -2569,6 +2586,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, + extern bool ext4_empty_dir(struct inode *inode); + + /* resize.c */ ++extern void ext4_kvfree_array_rcu(void *to_free); + extern int ext4_group_add(struct super_block *sb, + struct ext4_new_group_data *input); + extern int ext4_group_extend(struct super_block *sb, +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index 4f7cd78d03647..16e3830da5487 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -17,6 +17,33 @@ + + #include "ext4_jbd2.h" + ++struct ext4_rcu_ptr { ++ struct rcu_head rcu; ++ void *ptr; ++}; ++ ++static void ext4_rcu_ptr_callback(struct rcu_head *head) ++{ ++ struct ext4_rcu_ptr *ptr; ++ ++ ptr = container_of(head, struct ext4_rcu_ptr, rcu); ++ kvfree(ptr->ptr); ++ kfree(ptr); ++} ++ ++void ext4_kvfree_array_rcu(void *to_free) ++{ ++ struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); ++ ++ if (ptr) { ++ ptr->ptr = to_free; ++ call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); ++ return; ++ } ++ synchronize_rcu(); ++ kvfree(to_free); ++} ++ + int ext4_resize_begin(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +@@ -545,8 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, + brelse(gdb); + goto out; + } +- memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, +- gdb->b_size); ++ memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, ++ s_group_desc, j)->b_data, gdb->b_size); + set_buffer_uptodate(gdb); + + err = ext4_handle_dirty_metadata(handle, NULL, gdb); +@@ -854,13 +881,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, + } + brelse(dind); + +- o_group_desc = EXT4_SB(sb)->s_group_desc; ++ rcu_read_lock(); ++ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); ++ rcu_read_unlock(); + n_group_desc[gdb_num] = gdb_bh; +- EXT4_SB(sb)->s_group_desc = n_group_desc; ++ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); + EXT4_SB(sb)->s_gdb_count++; +- kvfree(o_group_desc); ++ ext4_kvfree_array_rcu(o_group_desc); + + le16_add_cpu(&es->s_reserved_gdt_blocks, -1); + err = ext4_handle_dirty_super(handle, sb); +@@ -904,9 +933,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, + return err; + } + +- o_group_desc = EXT4_SB(sb)->s_group_desc; ++ rcu_read_lock(); ++ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); ++ rcu_read_unlock(); + n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); +@@ -917,9 +948,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, + return err; + } + +- EXT4_SB(sb)->s_group_desc = n_group_desc; ++ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); + EXT4_SB(sb)->s_gdb_count++; +- kvfree(o_group_desc); ++ ext4_kvfree_array_rcu(o_group_desc); + return err; + } + +@@ -1183,7 +1214,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, + * use non-sparse filesystems anymore. This is already checked above. + */ + if (gdb_off) { +- gdb_bh = sbi->s_group_desc[gdb_num]; ++ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, ++ gdb_num); + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + +@@ -1265,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, + /* + * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). + */ +- gdb_bh = sbi->s_group_desc[gdb_num]; ++ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); + /* Update group descriptor block for new group */ + gdp = (struct ext4_group_desc *)(gdb_bh->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); +@@ -1492,7 +1524,8 @@ exit_journal: + for (; gdb_num <= gdb_num_end; gdb_num++) { + struct buffer_head *gdb_bh; + +- gdb_bh = sbi->s_group_desc[gdb_num]; ++ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, ++ gdb_num); + if (old_gdb == gdb_bh->b_blocknr) + continue; + update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 09b443709bcab..b14a0c5638e70 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -900,6 +900,7 @@ static void ext4_put_super(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; ++ struct buffer_head **group_desc; + int aborted = 0; + int i, err; + +@@ -931,9 +932,12 @@ static void ext4_put_super(struct super_block *sb) + if (!sb_rdonly(sb)) + ext4_commit_super(sb, 1); + ++ rcu_read_lock(); ++ group_desc = rcu_dereference(sbi->s_group_desc); + for (i = 0; i < sbi->s_gdb_count; i++) +- brelse(sbi->s_group_desc[i]); +- kvfree(sbi->s_group_desc); ++ brelse(group_desc[i]); ++ kvfree(group_desc); ++ rcu_read_unlock(); + kvfree(sbi->s_flex_groups); + percpu_counter_destroy(&sbi->s_freeclusters_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); +@@ -3489,7 +3493,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + { + struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); + char *orig_data = kstrdup(data, GFP_KERNEL); +- struct buffer_head *bh; ++ struct buffer_head *bh, **group_desc; + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + ext4_fsblk_t block; +@@ -4104,9 +4108,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + goto failed_mount; + } + } +- sbi->s_group_desc = kvmalloc(db_count * ++ rcu_assign_pointer(sbi->s_group_desc, ++ kvmalloc_array(db_count, + sizeof(struct buffer_head *), +- GFP_KERNEL); ++ GFP_KERNEL)); + if (sbi->s_group_desc == NULL) { + ext4_msg(sb, KERN_ERR, "not enough memory"); + ret = -ENOMEM; +@@ -4122,14 +4127,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + } + + for (i = 0; i < db_count; i++) { ++ struct buffer_head *bh; ++ + block = descriptor_loc(sb, logical_sb_block, i); +- sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); +- if (!sbi->s_group_desc[i]) { ++ bh = sb_bread_unmovable(sb, block); ++ if (!bh) { + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); + db_count = i; + goto failed_mount2; + } ++ rcu_read_lock(); ++ rcu_dereference(sbi->s_group_desc)[i] = bh; ++ rcu_read_unlock(); + } + sbi->s_gdb_count = db_count; + if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { +@@ -4521,9 +4531,12 @@ failed_mount3: + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); + failed_mount2: ++ rcu_read_lock(); ++ group_desc = rcu_dereference(sbi->s_group_desc); + for (i = 0; i < db_count; i++) +- brelse(sbi->s_group_desc[i]); +- kvfree(sbi->s_group_desc); ++ brelse(group_desc[i]); ++ kvfree(group_desc); ++ rcu_read_unlock(); + failed_mount: + if (sbi->s_chksum_driver) + crypto_free_shash(sbi->s_chksum_driver); +-- +2.20.1 + diff --git a/queue-4.14/ext4-fix-potential-race-between-s_flex_groups-online.patch b/queue-4.14/ext4-fix-potential-race-between-s_flex_groups-online.patch new file mode 100644 index 00000000000..86e0c405066 --- /dev/null +++ b/queue-4.14/ext4-fix-potential-race-between-s_flex_groups-online.patch @@ -0,0 +1,304 @@ +From 5504960e101c8a983e2a8d7b2e255e36a6587096 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Feb 2020 16:48:17 -0800 +Subject: ext4: fix potential race between s_flex_groups online resizing and + access + +From: Suraj Jitindar Singh + +commit 7c990728b99ed6fbe9c75fc202fce1172d9916da upstream. + +During an online resize an array of s_flex_groups structures gets replaced +so it can get enlarged. If there is a concurrent access to the array and +this memory has been reused then this can lead to an invalid memory access. + +The s_flex_group array has been converted into an array of pointers rather +than an array of structures. This is to ensure that the information +contained in the structures cannot get out of sync during a resize due to +an accessor updating the value in the old structure after it has been +copied but before the array pointer is updated. Since the structures them- +selves are no longer copied but only the pointers to them this case is +mitigated. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 +Link: https://lore.kernel.org/r/20200221053458.730016-4-tytso@mit.edu +Signed-off-by: Suraj Jitindar Singh +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org # 4.14.x +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/ialloc.c | 23 +++++++++------ + fs/ext4/mballoc.c | 9 ++++-- + fs/ext4/resize.c | 7 +++-- + fs/ext4/super.c | 72 ++++++++++++++++++++++++++++++++--------------- + 5 files changed, 76 insertions(+), 37 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 94f4f6d55c1a4..8b55abdd7249a 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1492,7 +1492,7 @@ struct ext4_sb_info { + unsigned int s_extent_max_zeroout_kb; + + unsigned int s_log_groups_per_flex; +- struct flex_groups *s_flex_groups; ++ struct flex_groups * __rcu *s_flex_groups; + ext4_group_t s_flex_groups_allocated; + + /* workqueue for reserved extent conversions (buffered io) */ +diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c +index 2f46564d3fca9..2a480c0ef1bc1 100644 +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -333,11 +333,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) + + percpu_counter_inc(&sbi->s_freeinodes_counter); + if (sbi->s_log_groups_per_flex) { +- ext4_group_t f = ext4_flex_group(sbi, block_group); ++ struct flex_groups *fg; + +- atomic_inc(&sbi->s_flex_groups[f].free_inodes); ++ fg = sbi_array_rcu_deref(sbi, s_flex_groups, ++ ext4_flex_group(sbi, block_group)); ++ atomic_inc(&fg->free_inodes); + if (is_directory) +- atomic_dec(&sbi->s_flex_groups[f].used_dirs); ++ atomic_dec(&fg->used_dirs); + } + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); + fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); +@@ -378,12 +380,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, + int flex_size, struct orlov_stats *stats) + { + struct ext4_group_desc *desc; +- struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; + + if (flex_size > 1) { +- stats->free_inodes = atomic_read(&flex_group[g].free_inodes); +- stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); +- stats->used_dirs = atomic_read(&flex_group[g].used_dirs); ++ struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), ++ s_flex_groups, g); ++ stats->free_inodes = atomic_read(&fg->free_inodes); ++ stats->free_clusters = atomic64_read(&fg->free_clusters); ++ stats->used_dirs = atomic_read(&fg->used_dirs); + return; + } + +@@ -1062,7 +1065,8 @@ got: + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + +- atomic_inc(&sbi->s_flex_groups[f].used_dirs); ++ atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, ++ f)->used_dirs); + } + } + if (ext4_has_group_desc_csum(sb)) { +@@ -1085,7 +1089,8 @@ got: + + if (sbi->s_log_groups_per_flex) { + flex_group = ext4_flex_group(sbi, group); +- atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); ++ atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, ++ flex_group)->free_inodes); + } + + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 3ba9a4ae4eacd..fb865216edb9b 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3052,7 +3052,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ext4_group_t flex_group = ext4_flex_group(sbi, + ac->ac_b_ex.fe_group); + atomic64_sub(ac->ac_b_ex.fe_len, +- &sbi->s_flex_groups[flex_group].free_clusters); ++ &sbi_array_rcu_deref(sbi, s_flex_groups, ++ flex_group)->free_clusters); + } + + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); +@@ -4947,7 +4948,8 @@ do_more: + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); + atomic64_add(count_clusters, +- &sbi->s_flex_groups[flex_group].free_clusters); ++ &sbi_array_rcu_deref(sbi, s_flex_groups, ++ flex_group)->free_clusters); + } + + if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) +@@ -5092,7 +5094,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), +- &sbi->s_flex_groups[flex_group].free_clusters); ++ &sbi_array_rcu_deref(sbi, s_flex_groups, ++ flex_group)->free_clusters); + } + + ext4_mb_unload_buddy(&e4b); +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index 16e3830da5487..d42f7471fd5b8 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1425,11 +1425,14 @@ static void ext4_update_super(struct super_block *sb, + percpu_counter_read(&sbi->s_freeclusters_counter)); + if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { + ext4_group_t flex_group; ++ struct flex_groups *fg; ++ + flex_group = ext4_flex_group(sbi, group_data[0].group); ++ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), +- &sbi->s_flex_groups[flex_group].free_clusters); ++ &fg->free_clusters); + atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, +- &sbi->s_flex_groups[flex_group].free_inodes); ++ &fg->free_inodes); + } + + /* +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index b14a0c5638e70..f1c1c180d2671 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -901,6 +901,7 @@ static void ext4_put_super(struct super_block *sb) + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; ++ struct flex_groups **flex_groups; + int aborted = 0; + int i, err; + +@@ -937,8 +938,13 @@ static void ext4_put_super(struct super_block *sb) + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(group_desc[i]); + kvfree(group_desc); ++ flex_groups = rcu_dereference(sbi->s_flex_groups); ++ if (flex_groups) { ++ for (i = 0; i < sbi->s_flex_groups_allocated; i++) ++ kvfree(flex_groups[i]); ++ kvfree(flex_groups); ++ } + rcu_read_unlock(); +- kvfree(sbi->s_flex_groups); + percpu_counter_destroy(&sbi->s_freeclusters_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); +@@ -2231,8 +2237,8 @@ done: + int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +- struct flex_groups *new_groups; +- int size; ++ struct flex_groups **old_groups, **new_groups; ++ int size, i; + + if (!sbi->s_log_groups_per_flex) + return 0; +@@ -2241,22 +2247,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) + if (size <= sbi->s_flex_groups_allocated) + return 0; + +- size = roundup_pow_of_two(size * sizeof(struct flex_groups)); +- new_groups = kvzalloc(size, GFP_KERNEL); ++ new_groups = kvzalloc(roundup_pow_of_two(size * ++ sizeof(*sbi->s_flex_groups)), GFP_KERNEL); + if (!new_groups) { +- ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", +- size / (int) sizeof(struct flex_groups)); ++ ext4_msg(sb, KERN_ERR, ++ "not enough memory for %d flex group pointers", size); + return -ENOMEM; + } +- +- if (sbi->s_flex_groups) { +- memcpy(new_groups, sbi->s_flex_groups, +- (sbi->s_flex_groups_allocated * +- sizeof(struct flex_groups))); +- kvfree(sbi->s_flex_groups); ++ for (i = sbi->s_flex_groups_allocated; i < size; i++) { ++ new_groups[i] = kvzalloc(roundup_pow_of_two( ++ sizeof(struct flex_groups)), ++ GFP_KERNEL); ++ if (!new_groups[i]) { ++ for (i--; i >= sbi->s_flex_groups_allocated; i--) ++ kvfree(new_groups[i]); ++ kvfree(new_groups); ++ ext4_msg(sb, KERN_ERR, ++ "not enough memory for %d flex groups", size); ++ return -ENOMEM; ++ } + } +- sbi->s_flex_groups = new_groups; +- sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); ++ rcu_read_lock(); ++ old_groups = rcu_dereference(sbi->s_flex_groups); ++ if (old_groups) ++ memcpy(new_groups, old_groups, ++ (sbi->s_flex_groups_allocated * ++ sizeof(struct flex_groups *))); ++ rcu_read_unlock(); ++ rcu_assign_pointer(sbi->s_flex_groups, new_groups); ++ sbi->s_flex_groups_allocated = size; ++ if (old_groups) ++ ext4_kvfree_array_rcu(old_groups); + return 0; + } + +@@ -2264,6 +2285,7 @@ static int ext4_fill_flex_info(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp = NULL; ++ struct flex_groups *fg; + ext4_group_t flex_group; + int i, err; + +@@ -2281,12 +2303,11 @@ static int ext4_fill_flex_info(struct super_block *sb) + gdp = ext4_get_group_desc(sb, i, NULL); + + flex_group = ext4_flex_group(sbi, i); +- atomic_add(ext4_free_inodes_count(sb, gdp), +- &sbi->s_flex_groups[flex_group].free_inodes); ++ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); ++ atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); + atomic64_add(ext4_free_group_clusters(sb, gdp), +- &sbi->s_flex_groups[flex_group].free_clusters); +- atomic_add(ext4_used_dirs_count(sb, gdp), +- &sbi->s_flex_groups[flex_group].used_dirs); ++ &fg->free_clusters); ++ atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); + } + + return 1; +@@ -3496,6 +3517,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + struct buffer_head *bh, **group_desc; + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ++ struct flex_groups **flex_groups; + ext4_fsblk_t block; + ext4_fsblk_t sb_block = get_sb_block(&data); + ext4_fsblk_t logical_sb_block; +@@ -4494,8 +4516,14 @@ failed_mount7: + ext4_unregister_li_request(sb); + failed_mount6: + ext4_mb_release(sb); +- if (sbi->s_flex_groups) +- kvfree(sbi->s_flex_groups); ++ rcu_read_lock(); ++ flex_groups = rcu_dereference(sbi->s_flex_groups); ++ if (flex_groups) { ++ for (i = 0; i < sbi->s_flex_groups_allocated; i++) ++ kvfree(flex_groups[i]); ++ kvfree(flex_groups); ++ } ++ rcu_read_unlock(); + percpu_counter_destroy(&sbi->s_freeclusters_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); +-- +2.20.1 + diff --git a/queue-4.14/ext4-fix-potential-race-between-s_group_info-online-.patch b/queue-4.14/ext4-fix-potential-race-between-s_group_info-online-.patch new file mode 100644 index 00000000000..2a92357c23a --- /dev/null +++ b/queue-4.14/ext4-fix-potential-race-between-s_group_info-online-.patch @@ -0,0 +1,187 @@ +From 2e8081759cfabedc36b8a8c2d963cc0ee805e789 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2020 19:08:50 -0800 +Subject: ext4: fix potential race between s_group_info online resizing and + access + +From: Suraj Jitindar Singh + +[ Upstream commit df3da4ea5a0fc5d115c90d5aa6caa4dd433750a7 ] + +During an online resize an array of pointers to s_group_info gets replaced +so it can get enlarged. If there is a concurrent access to the array in +ext4_get_group_info() and this memory has been reused then this can lead to +an invalid memory access. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 +Link: https://lore.kernel.org/r/20200221053458.730016-3-tytso@mit.edu +Signed-off-by: Suraj Jitindar Singh +Signed-off-by: Theodore Ts'o +Reviewed-by: Balbir Singh +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 8 ++++---- + fs/ext4/mballoc.c | 52 +++++++++++++++++++++++++++++++---------------- + 2 files changed, 39 insertions(+), 21 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 8b55abdd7249a..4aa0f8f7d9a0e 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1442,7 +1442,7 @@ struct ext4_sb_info { + #endif + + /* for buddy allocator */ +- struct ext4_group_info ***s_group_info; ++ struct ext4_group_info ** __rcu *s_group_info; + struct inode *s_buddy_cache; + spinlock_t s_md_lock; + unsigned short *s_mb_offsets; +@@ -2832,13 +2832,13 @@ static inline + struct ext4_group_info *ext4_get_group_info(struct super_block *sb, + ext4_group_t group) + { +- struct ext4_group_info ***grp_info; ++ struct ext4_group_info **grp_info; + long indexv, indexh; + BUG_ON(group >= EXT4_SB(sb)->s_groups_count); +- grp_info = EXT4_SB(sb)->s_group_info; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); +- return grp_info[indexv][indexh]; ++ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); ++ return grp_info[indexh]; + } + + /* +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index fb865216edb9b..745a89d30a57a 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2389,7 +2389,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned size; +- struct ext4_group_info ***new_groupinfo; ++ struct ext4_group_info ***old_groupinfo, ***new_groupinfo; + + size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); +@@ -2402,13 +2402,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); + return -ENOMEM; + } +- if (sbi->s_group_info) { +- memcpy(new_groupinfo, sbi->s_group_info, ++ rcu_read_lock(); ++ old_groupinfo = rcu_dereference(sbi->s_group_info); ++ if (old_groupinfo) ++ memcpy(new_groupinfo, old_groupinfo, + sbi->s_group_info_size * sizeof(*sbi->s_group_info)); +- kvfree(sbi->s_group_info); +- } +- sbi->s_group_info = new_groupinfo; ++ rcu_read_unlock(); ++ rcu_assign_pointer(sbi->s_group_info, new_groupinfo); + sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); ++ if (old_groupinfo) ++ ext4_kvfree_array_rcu(old_groupinfo); + ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", + sbi->s_group_info_size); + return 0; +@@ -2420,6 +2423,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, + { + int i; + int metalen = 0; ++ int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_info **meta_group_info; + struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); +@@ -2438,12 +2442,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, + "for a buddy group"); + goto exit_meta_group_info; + } +- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = +- meta_group_info; ++ rcu_read_lock(); ++ rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; ++ rcu_read_unlock(); + } + +- meta_group_info = +- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; ++ meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); + i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); + + meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); +@@ -2491,8 +2495,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, + exit_group_info: + /* If a meta_group_info table has been allocated, release it now */ + if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { +- kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); +- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; ++ struct ext4_group_info ***group_info; ++ ++ rcu_read_lock(); ++ group_info = rcu_dereference(sbi->s_group_info); ++ kfree(group_info[idx]); ++ group_info[idx] = NULL; ++ rcu_read_unlock(); + } + exit_meta_group_info: + return -ENOMEM; +@@ -2505,6 +2514,7 @@ static int ext4_mb_init_backend(struct super_block *sb) + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + struct ext4_group_desc *desc; ++ struct ext4_group_info ***group_info; + struct kmem_cache *cachep; + + err = ext4_mb_alloc_groupinfo(sb, ngroups); +@@ -2539,11 +2549,16 @@ err_freebuddy: + while (i-- > 0) + kmem_cache_free(cachep, ext4_get_group_info(sb, i)); + i = sbi->s_group_info_size; ++ rcu_read_lock(); ++ group_info = rcu_dereference(sbi->s_group_info); + while (i-- > 0) +- kfree(sbi->s_group_info[i]); ++ kfree(group_info[i]); ++ rcu_read_unlock(); + iput(sbi->s_buddy_cache); + err_freesgi: +- kvfree(sbi->s_group_info); ++ rcu_read_lock(); ++ kvfree(rcu_dereference(sbi->s_group_info)); ++ rcu_read_unlock(); + return -ENOMEM; + } + +@@ -2733,7 +2748,7 @@ int ext4_mb_release(struct super_block *sb) + ext4_group_t ngroups = ext4_get_groups_count(sb); + ext4_group_t i; + int num_meta_group_infos; +- struct ext4_group_info *grinfo; ++ struct ext4_group_info *grinfo, ***group_info; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); + +@@ -2751,9 +2766,12 @@ int ext4_mb_release(struct super_block *sb) + num_meta_group_infos = (ngroups + + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); ++ rcu_read_lock(); ++ group_info = rcu_dereference(sbi->s_group_info); + for (i = 0; i < num_meta_group_infos; i++) +- kfree(sbi->s_group_info[i]); +- kvfree(sbi->s_group_info); ++ kfree(group_info[i]); ++ kvfree(group_info); ++ rcu_read_unlock(); + } + kfree(sbi->s_mb_offsets); + kfree(sbi->s_mb_maxs); +-- +2.20.1 + diff --git a/queue-4.14/iwlwifi-pcie-fix-rb_allocator-workqueue-allocation.patch b/queue-4.14/iwlwifi-pcie-fix-rb_allocator-workqueue-allocation.patch new file mode 100644 index 00000000000..1a03af80475 --- /dev/null +++ b/queue-4.14/iwlwifi-pcie-fix-rb_allocator-workqueue-allocation.patch @@ -0,0 +1,66 @@ +From 1f1ebdb6185803ec415884997eceb389cdbf3dbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Feb 2020 04:54:53 +0530 +Subject: iwlwifi: pcie: fix rb_allocator workqueue allocation + +From: Johannes Berg + +commit 8188a18ee2e48c9a7461139838048363bfce3fef upstream + +We don't handle failures in the rb_allocator workqueue allocation +correctly. To fix that, move the code earlier so the cleanup is +easier and we don't have to undo all the interrupt allocations in +this case. + +Signed-off-by: Johannes Berg +Signed-off-by: Luca Coelho +[Ajay: Modified to apply on v4.19.y and v4.14.y] +Signed-off-by: Ajay Kaher +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +index dffa697d71e0f..8a074a516fb26 100644 +--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c ++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +@@ -3023,6 +3023,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, + spin_lock_init(&trans_pcie->reg_lock); + mutex_init(&trans_pcie->mutex); + init_waitqueue_head(&trans_pcie->ucode_write_waitq); ++ ++ trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!trans_pcie->rba.alloc_wq) { ++ ret = -ENOMEM; ++ goto out_free_trans; ++ } ++ INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); ++ + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); + if (!trans_pcie->tso_hdr_page) { + ret = -ENOMEM; +@@ -3195,10 +3204,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, + trans_pcie->inta_mask = CSR_INI_SET_MASK; + } + +- trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", +- WQ_HIGHPRI | WQ_UNBOUND, 1); +- INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); +- + #ifdef CONFIG_IWLWIFI_PCIE_RTPM + trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; + #else +@@ -3211,6 +3216,8 @@ out_free_ict: + iwl_pcie_free_ict(trans); + out_no_pci: + free_percpu(trans_pcie->tso_hdr_page); ++ destroy_workqueue(trans_pcie->rba.alloc_wq); ++out_free_trans: + iwl_trans_free(trans); + return ERR_PTR(ret); + } +-- +2.20.1 + diff --git a/queue-4.14/netfilter-nf_conntrack-resolve-clash-for-matching-co.patch b/queue-4.14/netfilter-nf_conntrack-resolve-clash-for-matching-co.patch new file mode 100644 index 00000000000..2558bdd6eda --- /dev/null +++ b/queue-4.14/netfilter-nf_conntrack-resolve-clash-for-matching-co.patch @@ -0,0 +1,86 @@ +From 31ef01b015c855193736e8c0ae8ef417a6bab384 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Feb 2020 00:57:38 +0000 +Subject: netfilter: nf_conntrack: resolve clash for matching conntracks + +From: Martynas Pumputis + +[ Upstream commit ed07d9a021df6da53456663a76999189badc432a ] + +This patch enables the clash resolution for NAT (disabled in +"590b52e10d41") if clashing conntracks match (i.e. both tuples are equal) +and a protocol allows it. + +The clash might happen for a connections-less protocol (e.g. UDP) when +two threads in parallel writes to the same socket and consequent calls +to "get_unique_tuple" return the same tuples (incl. reply tuples). + +In this case it is safe to perform the resolution, as the losing CT +describes the same mangling as the winning CT, so no modifications to +the packet are needed, and the result of rules traversal for the loser's +packet stays valid. + +Signed-off-by: Martynas Pumputis +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Andy Strohman +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conntrack_core.c | 30 ++++++++++++++++++++++-------- + 1 file changed, 22 insertions(+), 8 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c +index 2e65271bed01f..a79f5a89cab14 100644 +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -543,6 +543,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, + net_eq(net, nf_ct_net(ct)); + } + ++static inline bool ++nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) ++{ ++ return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, ++ &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && ++ nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, ++ &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && ++ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && ++ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && ++ net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); ++} ++ + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ + static void nf_ct_gc_expired(struct nf_conn *ct) + { +@@ -736,19 +748,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, + /* This is the conntrack entry already in hashes that won race. */ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + const struct nf_conntrack_l4proto *l4proto; ++ enum ip_conntrack_info oldinfo; ++ struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->allow_clash && +- ((ct->status & IPS_NAT_DONE_MASK) == 0) && + !nf_ct_is_dying(ct) && + atomic_inc_not_zero(&ct->ct_general.use)) { +- enum ip_conntrack_info oldinfo; +- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); +- +- nf_ct_acct_merge(ct, ctinfo, loser_ct); +- nf_conntrack_put(&loser_ct->ct_general); +- nf_ct_set(skb, ct, oldinfo); +- return NF_ACCEPT; ++ if (((ct->status & IPS_NAT_DONE_MASK) == 0) || ++ nf_ct_match(ct, loser_ct)) { ++ nf_ct_acct_merge(ct, ctinfo, loser_ct); ++ nf_conntrack_put(&loser_ct->ct_general); ++ nf_ct_set(skb, ct, oldinfo); ++ return NF_ACCEPT; ++ } ++ nf_ct_put(ct); + } + NF_CT_STAT_INC(net, drop); + return NF_DROP; +-- +2.20.1 + diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..73efe5f48f8 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,5 @@ +iwlwifi-pcie-fix-rb_allocator-workqueue-allocation.patch +netfilter-nf_conntrack-resolve-clash-for-matching-co.patch +ext4-fix-potential-race-between-online-resizing-and-.patch +ext4-fix-potential-race-between-s_flex_groups-online.patch +ext4-fix-potential-race-between-s_group_info-online-.patch