From: Greg Kroah-Hartman Date: Mon, 28 Feb 2022 07:05:44 +0000 (+0100) Subject: 5.16-stable patches X-Git-Tag: v4.9.304~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b7b8da1d50bfe36d7af6e61e3671d7ac4cf1e6eb;p=thirdparty%2Fkernel%2Fstable-queue.git 5.16-stable patches added patches: btrfs-autodefrag-only-scan-one-inode-once.patch btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch btrfs-prevent-copying-too-big-compressed-lzo-segment.patch btrfs-reduce-extent-threshold-for-autodefrag.patch hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch ib-qib-fix-duplicate-sysfs-directory-name.patch mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch riscv-fix-nommu_k210_sdcard_defconfig.patch riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch staging-fbtft-fb_st7789v-reset-display-before-initialization.patch thermal-int340x-fix-memory-leak-in-int3400_notify.patch tps6598x-clear-int-mask-on-probe-failure.patch tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch --- diff --git a/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch b/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch new file mode 100644 index 00000000000..b5c099c0a94 --- /dev/null +++ b/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch @@ -0,0 +1,171 @@ +From 26fbac2517fcad34fa3f950151fd4c0240fb2935 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 22 Feb 2022 18:20:59 +0100 +Subject: btrfs: autodefrag: only scan one inode once + +From: Qu Wenruo + +commit 26fbac2517fcad34fa3f950151fd4c0240fb2935 upstream. + +Although we have btrfs_requeue_inode_defrag(), for autodefrag we are +still just exhausting all inode_defrag items in the tree. + +This means, it doesn't make much difference to requeue an inode_defrag, +other than scan the inode from the beginning till its end. + +Change the behaviour to always scan from offset 0 of an inode, and till +the end. + +By this we get the following benefit: + +- Straight-forward code + +- No more re-queue related check + +- Fewer members in inode_defrag + +We still keep the same btrfs_get_fs_root() and btrfs_iget() check for +each loop, and added extra should_auto_defrag() check per-loop. + +Note: the patch needs to be backported and is intentionally written +to minimize the diff size, code will be cleaned up later. + +CC: stable@vger.kernel.org # 5.16 +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/file.c | 84 ++++++++++++++------------------------------------------ + 1 file changed, 22 insertions(+), 62 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -49,12 +49,6 @@ struct inode_defrag { + + /* root objectid */ + u64 root; +- +- /* last offset we were able to defrag */ +- u64 last_offset; +- +- /* if we've wrapped around back to zero once already */ +- int cycled; + }; + + static int __compare_inode_defrag(struct inode_defrag *defrag1, +@@ -107,8 +101,6 @@ static int __btrfs_add_inode_defrag(stru + */ + if (defrag->transid < entry->transid) + entry->transid = defrag->transid; +- if (defrag->last_offset > entry->last_offset) +- entry->last_offset = defrag->last_offset; + return -EEXIST; + } + } +@@ -179,34 +171,6 @@ int btrfs_add_inode_defrag(struct btrfs_ + } + + /* +- * Requeue the defrag object. If there is a defrag object that points to +- * the same inode in the tree, we will merge them together (by +- * __btrfs_add_inode_defrag()) and free the one that we want to requeue. +- */ +-static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, +- struct inode_defrag *defrag) +-{ +- struct btrfs_fs_info *fs_info = inode->root->fs_info; +- int ret; +- +- if (!__need_auto_defrag(fs_info)) +- goto out; +- +- /* +- * Here we don't check the IN_DEFRAG flag, because we need merge +- * them together. +- */ +- spin_lock(&fs_info->defrag_inodes_lock); +- ret = __btrfs_add_inode_defrag(inode, defrag); +- spin_unlock(&fs_info->defrag_inodes_lock); +- if (ret) +- goto out; +- return; +-out: +- kmem_cache_free(btrfs_inode_defrag_cachep, defrag); +-} +- +-/* + * pick the defragable inode that we want, if it doesn't exist, we will get + * the next one. + */ +@@ -278,8 +242,14 @@ static int __btrfs_run_defrag_inode(stru + struct btrfs_root *inode_root; + struct inode *inode; + struct btrfs_ioctl_defrag_range_args range; +- int num_defrag; +- int ret; ++ int ret = 0; ++ u64 cur = 0; ++ ++again: ++ if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) ++ goto cleanup; ++ if (!__need_auto_defrag(fs_info)) ++ goto cleanup; + + /* get the inode */ + inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); +@@ -295,39 +265,29 @@ static int __btrfs_run_defrag_inode(stru + goto cleanup; + } + ++ if (cur >= i_size_read(inode)) { ++ iput(inode); ++ goto cleanup; ++ } ++ + /* do a chunk of defrag */ + clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); + memset(&range, 0, sizeof(range)); + range.len = (u64)-1; +- range.start = defrag->last_offset; ++ range.start = cur; + + sb_start_write(fs_info->sb); +- num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, ++ ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid, + BTRFS_DEFRAG_BATCH); + sb_end_write(fs_info->sb); +- /* +- * if we filled the whole defrag batch, there +- * must be more work to do. Queue this defrag +- * again +- */ +- if (num_defrag == BTRFS_DEFRAG_BATCH) { +- defrag->last_offset = range.start; +- btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); +- } else if (defrag->last_offset && !defrag->cycled) { +- /* +- * we didn't fill our defrag batch, but +- * we didn't start at zero. Make sure we loop +- * around to the start of the file. +- */ +- defrag->last_offset = 0; +- defrag->cycled = 1; +- btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); +- } else { +- kmem_cache_free(btrfs_inode_defrag_cachep, defrag); +- } +- + iput(inode); +- return 0; ++ ++ if (ret < 0) ++ goto cleanup; ++ ++ cur = max(cur + fs_info->sectorsize, range.start); ++ goto again; ++ + cleanup: + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + return ret; diff --git a/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch b/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch new file mode 100644 index 00000000000..4e62e6dd635 --- /dev/null +++ b/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch @@ -0,0 +1,226 @@ +From 966d879bafaaf020c11a7cee9526f6dd823a4126 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 11 Feb 2022 14:41:39 +0800 +Subject: btrfs: defrag: allow defrag_one_cluster() to skip large extent which is not a target + +From: Qu Wenruo + +commit 966d879bafaaf020c11a7cee9526f6dd823a4126 upstream. + +In the rework of btrfs_defrag_file(), we always call +defrag_one_cluster() and increase the offset by cluster size, which is +only 256K. + +But there are cases where we have a large extent (e.g. 128M) which +doesn't need to be defragged at all. + +Before the refactor, we can directly skip the range, but now we have to +scan that extent map again and again until the cluster moves after the +non-target extent. + +Fix the problem by allow defrag_one_cluster() to increase +btrfs_defrag_ctrl::last_scanned to the end of an extent, if and only if +the last extent of the cluster is not a target. + +The test script looks like this: + + mkfs.btrfs -f $dev > /dev/null + + mount $dev $mnt + + # As btrfs ioctl uses 32M as extent_threshold + xfs_io -f -c "pwrite 0 64M" $mnt/file1 + sync + # Some fragemented range to defrag + xfs_io -s -c "pwrite 65548k 4k" \ + -c "pwrite 65544k 4k" \ + -c "pwrite 65540k 4k" \ + -c "pwrite 65536k 4k" \ + $mnt/file1 + sync + + echo "=== before ===" + xfs_io -c "fiemap -v" $mnt/file1 + echo "=== after ===" + btrfs fi defrag $mnt/file1 + sync + xfs_io -c "fiemap -v" $mnt/file1 + umount $mnt + +With extra ftrace put into defrag_one_cluster(), before the patch it +would result tons of loops: + +(As defrag_one_cluster() is inlined, the function name is its caller) + + btrfs-126062 [005] ..... 4682.816026: btrfs_defrag_file: r/i=5/257 start=0 len=262144 + btrfs-126062 [005] ..... 4682.816027: btrfs_defrag_file: r/i=5/257 start=262144 len=262144 + btrfs-126062 [005] ..... 4682.816028: btrfs_defrag_file: r/i=5/257 start=524288 len=262144 + btrfs-126062 [005] ..... 4682.816028: btrfs_defrag_file: r/i=5/257 start=786432 len=262144 + btrfs-126062 [005] ..... 4682.816028: btrfs_defrag_file: r/i=5/257 start=1048576 len=262144 + ... + btrfs-126062 [005] ..... 4682.816043: btrfs_defrag_file: r/i=5/257 start=67108864 len=262144 + +But with this patch there will be just one loop, then directly to the +end of the extent: + + btrfs-130471 [014] ..... 5434.029558: defrag_one_cluster: r/i=5/257 start=0 len=262144 + btrfs-130471 [014] ..... 5434.029559: defrag_one_cluster: r/i=5/257 start=67108864 len=16384 + +CC: stable@vger.kernel.org # 5.16 +Signed-off-by: Qu Wenruo +Reviewed-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 48 +++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 39 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1174,8 +1174,10 @@ struct defrag_target_range { + static int defrag_collect_targets(struct btrfs_inode *inode, + u64 start, u64 len, u32 extent_thresh, + u64 newer_than, bool do_compress, +- bool locked, struct list_head *target_list) ++ bool locked, struct list_head *target_list, ++ u64 *last_scanned_ret) + { ++ bool last_is_target = false; + u64 cur = start; + int ret = 0; + +@@ -1185,6 +1187,7 @@ static int defrag_collect_targets(struct + bool next_mergeable = true; + u64 range_len; + ++ last_is_target = false; + em = defrag_lookup_extent(&inode->vfs_inode, cur, locked); + if (!em) + break; +@@ -1267,6 +1270,7 @@ static int defrag_collect_targets(struct + } + + add: ++ last_is_target = true; + range_len = min(extent_map_end(em), start + len) - cur; + /* + * This one is a good target, check if it can be merged into +@@ -1310,6 +1314,17 @@ next: + kfree(entry); + } + } ++ if (!ret && last_scanned_ret) { ++ /* ++ * If the last extent is not a target, the caller can skip to ++ * the end of that extent. ++ * Otherwise, we can only go the end of the specified range. ++ */ ++ if (!last_is_target) ++ *last_scanned_ret = max(cur, *last_scanned_ret); ++ else ++ *last_scanned_ret = max(start + len, *last_scanned_ret); ++ } + return ret; + } + +@@ -1368,7 +1383,8 @@ static int defrag_one_locked_target(stru + } + + static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, +- u32 extent_thresh, u64 newer_than, bool do_compress) ++ u32 extent_thresh, u64 newer_than, bool do_compress, ++ u64 *last_scanned_ret) + { + struct extent_state *cached_state = NULL; + struct defrag_target_range *entry; +@@ -1414,7 +1430,7 @@ static int defrag_one_range(struct btrfs + */ + ret = defrag_collect_targets(inode, start, len, extent_thresh, + newer_than, do_compress, true, +- &target_list); ++ &target_list, last_scanned_ret); + if (ret < 0) + goto unlock_extent; + +@@ -1449,7 +1465,8 @@ static int defrag_one_cluster(struct btr + u64 start, u32 len, u32 extent_thresh, + u64 newer_than, bool do_compress, + unsigned long *sectors_defragged, +- unsigned long max_sectors) ++ unsigned long max_sectors, ++ u64 *last_scanned_ret) + { + const u32 sectorsize = inode->root->fs_info->sectorsize; + struct defrag_target_range *entry; +@@ -1460,7 +1477,7 @@ static int defrag_one_cluster(struct btr + BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); + ret = defrag_collect_targets(inode, start, len, extent_thresh, + newer_than, do_compress, false, +- &target_list); ++ &target_list, NULL); + if (ret < 0) + goto out; + +@@ -1477,6 +1494,15 @@ static int defrag_one_cluster(struct btr + range_len = min_t(u32, range_len, + (max_sectors - *sectors_defragged) * sectorsize); + ++ /* ++ * If defrag_one_range() has updated last_scanned_ret, ++ * our range may already be invalid (e.g. hole punched). ++ * Skip if our range is before last_scanned_ret, as there is ++ * no need to defrag the range anymore. ++ */ ++ if (entry->start + range_len <= *last_scanned_ret) ++ continue; ++ + if (ra) + page_cache_sync_readahead(inode->vfs_inode.i_mapping, + ra, NULL, entry->start >> PAGE_SHIFT, +@@ -1489,7 +1515,8 @@ static int defrag_one_cluster(struct btr + * accounting. + */ + ret = defrag_one_range(inode, entry->start, range_len, +- extent_thresh, newer_than, do_compress); ++ extent_thresh, newer_than, do_compress, ++ last_scanned_ret); + if (ret < 0) + break; + *sectors_defragged += range_len >> +@@ -1500,6 +1527,8 @@ out: + list_del_init(&entry->list); + kfree(entry); + } ++ if (ret >= 0) ++ *last_scanned_ret = max(*last_scanned_ret, start + len); + return ret; + } + +@@ -1585,6 +1614,7 @@ int btrfs_defrag_file(struct inode *inod + + while (cur < last_byte) { + const unsigned long prev_sectors_defragged = sectors_defragged; ++ u64 last_scanned = cur; + u64 cluster_end; + + /* The cluster size 256K should always be page aligned */ +@@ -1614,8 +1644,8 @@ int btrfs_defrag_file(struct inode *inod + BTRFS_I(inode)->defrag_compress = compress_type; + ret = defrag_one_cluster(BTRFS_I(inode), ra, cur, + cluster_end + 1 - cur, extent_thresh, +- newer_than, do_compress, +- §ors_defragged, max_to_defrag); ++ newer_than, do_compress, §ors_defragged, ++ max_to_defrag, &last_scanned); + + if (sectors_defragged > prev_sectors_defragged) + balance_dirty_pages_ratelimited(inode->i_mapping); +@@ -1623,7 +1653,7 @@ int btrfs_defrag_file(struct inode *inod + btrfs_inode_unlock(inode, 0); + if (ret < 0) + break; +- cur = cluster_end + 1; ++ cur = max(cluster_end + 1, last_scanned); + if (ret > 0) { + ret = 0; + break; diff --git a/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch b/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch new file mode 100644 index 00000000000..2fc2aa5ec87 --- /dev/null +++ b/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch @@ -0,0 +1,113 @@ +From 979b25c300dbcbcb750e88715018e04e854de6c6 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 28 Jan 2022 15:21:21 +0800 +Subject: btrfs: defrag: don't defrag extents which are already at max capacity + +From: Qu Wenruo + +commit 979b25c300dbcbcb750e88715018e04e854de6c6 upstream. + +[BUG] +For compressed extents, defrag ioctl will always try to defrag any +compressed extents, wasting not only IO but also CPU time to +compress/decompress: + + mkfs.btrfs -f $DEV + mount -o compress $DEV $MNT + xfs_io -f -c "pwrite -S 0xab 0 128K" $MNT/foobar + sync + xfs_io -f -c "pwrite -S 0xcd 128K 128K" $MNT/foobar + sync + echo "=== before ===" + xfs_io -c "fiemap -v" $MNT/foobar + btrfs filesystem defrag $MNT/foobar + sync + echo "=== after ===" + xfs_io -c "fiemap -v" $MNT/foobar + +Then it shows the 2 128K extents just get COW for no extra benefit, with +extra IO/CPU spent: + + === before === + /mnt/btrfs/file1: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..255]: 26624..26879 256 0x8 + 1: [256..511]: 26632..26887 256 0x9 + === after === + /mnt/btrfs/file1: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..255]: 26640..26895 256 0x8 + 1: [256..511]: 26648..26903 256 0x9 + +This affects not only v5.16 (after the defrag rework), but also v5.15 +(before the defrag rework). + +[CAUSE] +From the very beginning, btrfs defrag never checks if one extent is +already at its max capacity (128K for compressed extents, 128M +otherwise). + +And the default extent size threshold is 256K, which is already beyond +the compressed extent max size. + +This means, by default btrfs defrag ioctl will mark all compressed +extent which is not adjacent to a hole/preallocated range for defrag. + +[FIX] +Introduce a helper to grab the maximum extent size, and then in +defrag_collect_targets() and defrag_check_next_extent(), reject extents +which are already at their max capacity. + +Reported-by: Filipe Manana +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1020,6 +1020,13 @@ static struct extent_map *defrag_lookup_ + return em; + } + ++static u32 get_extent_max_capacity(const struct extent_map *em) ++{ ++ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) ++ return BTRFS_MAX_COMPRESSED; ++ return BTRFS_MAX_EXTENT_SIZE; ++} ++ + static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, + bool locked) + { +@@ -1036,6 +1043,12 @@ static bool defrag_check_next_extent(str + goto out; + if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags)) + goto out; ++ /* ++ * If the next extent is at its max capacity, defragging current extent ++ * makes no sense, as the total number of extents won't change. ++ */ ++ if (next->len >= get_extent_max_capacity(em)) ++ goto out; + /* Physically adjacent and large enough */ + if ((em->block_start + em->block_len == next->block_start) && + (em->block_len > SZ_128K && next->block_len > SZ_128K)) +@@ -1233,6 +1246,13 @@ static int defrag_collect_targets(struct + if (range_len >= extent_thresh) + goto next; + ++ /* ++ * Skip extents already at its max capacity, this is mostly for ++ * compressed extents, which max cap is only 128K. ++ */ ++ if (em->len >= get_extent_max_capacity(em)) ++ goto next; ++ + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, + locked); + if (!next_mergeable) { diff --git a/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch b/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch new file mode 100644 index 00000000000..6d07baaa73f --- /dev/null +++ b/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch @@ -0,0 +1,118 @@ +From 7093f15291e95f16dfb5a93307eda3272bfe1108 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 28 Jan 2022 15:21:20 +0800 +Subject: btrfs: defrag: don't try to merge regular extents with preallocated extents + +From: Qu Wenruo + +commit 7093f15291e95f16dfb5a93307eda3272bfe1108 upstream. + +[BUG] +With older kernels (before v5.16), btrfs will defrag preallocated extents. +While with newer kernels (v5.16 and newer) btrfs will not defrag +preallocated extents, but it will defrag the extent just before the +preallocated extent, even it's just a single sector. + +This can be exposed by the following small script: + + mkfs.btrfs -f $dev > /dev/null + + mount $dev $mnt + xfs_io -f -c "pwrite 0 4k" -c sync -c "falloc 4k 16K" $mnt/file + xfs_io -c "fiemap -v" $mnt/file + btrfs fi defrag $mnt/file + sync + xfs_io -c "fiemap -v" $mnt/file + +The output looks like this on older kernels: + +/mnt/btrfs/file: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..7]: 26624..26631 8 0x0 + 1: [8..39]: 26632..26663 32 0x801 +/mnt/btrfs/file: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..39]: 26664..26703 40 0x1 + +Which defrags the single sector along with the preallocated extent, and +replace them with an regular extent into a new location (caused by data +COW). +This wastes most of the data IO just for the preallocated range. + +On the other hand, v5.16 is slightly better: + +/mnt/btrfs/file: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..7]: 26624..26631 8 0x0 + 1: [8..39]: 26632..26663 32 0x801 +/mnt/btrfs/file: + EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS + 0: [0..7]: 26664..26671 8 0x0 + 1: [8..39]: 26632..26663 32 0x801 + +The preallocated range is not defragged, but the sector before it still +gets defragged, which has no need for it. + +[CAUSE] +One of the function reused by the old and new behavior is +defrag_check_next_extent(), it will determine if we should defrag +current extent by checking the next one. + +It only checks if the next extent is a hole or inlined, but it doesn't +check if it's preallocated. + +On the other hand, out of the function, both old and new kernel will +reject preallocated extents. + +Such inconsistent behavior causes above behavior. + +[FIX] +- Also check if next extent is preallocated + If so, don't defrag current extent. + +- Add comments for each branch why we reject the extent + +This will reduce the IO caused by defrag ioctl and autodefrag. + +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1024,19 +1024,24 @@ static bool defrag_check_next_extent(str + bool locked) + { + struct extent_map *next; +- bool ret = true; ++ bool ret = false; + + /* this is the last extent */ + if (em->start + em->len >= i_size_read(inode)) + return false; + + next = defrag_lookup_extent(inode, em->start + em->len, locked); ++ /* No more em or hole */ + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) +- ret = false; +- else if ((em->block_start + em->block_len == next->block_start) && +- (em->block_len > SZ_128K && next->block_len > SZ_128K)) +- ret = false; +- ++ goto out; ++ if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags)) ++ goto out; ++ /* Physically adjacent and large enough */ ++ if ((em->block_start + em->block_len == next->block_start) && ++ (em->block_len > SZ_128K && next->block_len > SZ_128K)) ++ goto out; ++ ret = true; ++out: + free_extent_map(next); + return ret; + } diff --git a/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch b/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch new file mode 100644 index 00000000000..d1ad3aa2905 --- /dev/null +++ b/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch @@ -0,0 +1,55 @@ +From 550f133f6959db927127111b50e483da3a7ce662 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 28 Jan 2022 15:21:22 +0800 +Subject: btrfs: defrag: remove an ambiguous condition for rejection + +From: Qu Wenruo + +commit 550f133f6959db927127111b50e483da3a7ce662 upstream. + +From the very beginning of btrfs defrag, there is a check to reject +extents which meet both conditions: + +- Physically adjacent + + We may want to defrag physically adjacent extents to reduce the number + of extents or the size of subvolume tree. + +- Larger than 128K + + This may be there for compressed extents, but unfortunately 128K is + exactly the max capacity for compressed extents. + And the check is > 128K, thus it never rejects compressed extents. + + Furthermore, the compressed extent capacity bug is fixed by previous + patch, there is no reason for that check anymore. + +The original check has a very small ranges to reject (the target extent +size is > 128K, and default extent threshold is 256K), and for +compressed extent it doesn't work at all. + +So it's better just to remove the rejection, and allow us to defrag +physically adjacent extents. + +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1049,10 +1049,6 @@ static bool defrag_check_next_extent(str + */ + if (next->len >= get_extent_max_capacity(em)) + goto out; +- /* Physically adjacent and large enough */ +- if ((em->block_start + em->block_len == next->block_start) && +- (em->block_len > SZ_128K && next->block_len > SZ_128K)) +- goto out; + ret = true; + out: + free_extent_map(next); diff --git a/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch b/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch new file mode 100644 index 00000000000..95bc236824e --- /dev/null +++ b/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch @@ -0,0 +1,83 @@ +From 741b23a970a79d5d3a1db2d64fa2c7b375a4febb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= +Date: Wed, 2 Feb 2022 23:44:55 +0200 +Subject: btrfs: prevent copying too big compressed lzo segment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dāvis Mosāns + +commit 741b23a970a79d5d3a1db2d64fa2c7b375a4febb upstream. + +Compressed length can be corrupted to be a lot larger than memory +we have allocated for buffer. +This will cause memcpy in copy_compressed_segment to write outside +of allocated memory. + +This mostly results in stuck read syscall but sometimes when using +btrfs send can get #GP + + kernel: general protection fault, probably for non-canonical address 0x841551d5c1000: 0000 [#1] PREEMPT SMP NOPTI + kernel: CPU: 17 PID: 264 Comm: kworker/u256:7 Tainted: P OE 5.17.0-rc2-1 #12 + kernel: Workqueue: btrfs-endio btrfs_work_helper [btrfs] + kernel: RIP: 0010:lzo_decompress_bio (./include/linux/fortify-string.h:225 fs/btrfs/lzo.c:322 fs/btrfs/lzo.c:394) btrfs + Code starting with the faulting instruction + =========================================== + 0:* 48 8b 06 mov (%rsi),%rax <-- trapping instruction + 3: 48 8d 79 08 lea 0x8(%rcx),%rdi + 7: 48 83 e7 f8 and $0xfffffffffffffff8,%rdi + b: 48 89 01 mov %rax,(%rcx) + e: 44 89 f0 mov %r14d,%eax + 11: 48 8b 54 06 f8 mov -0x8(%rsi,%rax,1),%rdx + kernel: RSP: 0018:ffffb110812efd50 EFLAGS: 00010212 + kernel: RAX: 0000000000001000 RBX: 000000009ca264c8 RCX: ffff98996e6d8ff8 + kernel: RDX: 0000000000000064 RSI: 000841551d5c1000 RDI: ffffffff9500435d + kernel: RBP: ffff989a3be856c0 R08: 0000000000000000 R09: 0000000000000000 + kernel: R10: 0000000000000000 R11: 0000000000001000 R12: ffff98996e6d8000 + kernel: R13: 0000000000000008 R14: 0000000000001000 R15: 000841551d5c1000 + kernel: FS: 0000000000000000(0000) GS:ffff98a09d640000(0000) knlGS:0000000000000000 + kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + kernel: CR2: 00001e9f984d9ea8 CR3: 000000014971a000 CR4: 00000000003506e0 + kernel: Call Trace: + kernel: + kernel: end_compressed_bio_read (fs/btrfs/compression.c:104 fs/btrfs/compression.c:1363 fs/btrfs/compression.c:323) btrfs + kernel: end_workqueue_fn (fs/btrfs/disk-io.c:1923) btrfs + kernel: btrfs_work_helper (fs/btrfs/async-thread.c:326) btrfs + kernel: process_one_work (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:212 ./include/trace/events/workqueue.h:108 kernel/workqueue.c:2312) + kernel: worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2455) + kernel: ? process_one_work (kernel/workqueue.c:2397) + kernel: kthread (kernel/kthread.c:377) + kernel: ? kthread_complete_and_exit (kernel/kthread.c:332) + kernel: ret_from_fork (arch/x86/entry/entry_64.S:301) + kernel: + +CC: stable@vger.kernel.org # 4.9+ +Signed-off-by: Dāvis Mosāns +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/lzo.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/fs/btrfs/lzo.c ++++ b/fs/btrfs/lzo.c +@@ -380,6 +380,17 @@ int lzo_decompress_bio(struct list_head + kunmap(cur_page); + cur_in += LZO_LEN; + ++ if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) { ++ /* ++ * seg_len shouldn't be larger than we have allocated ++ * for workspace->cbuf ++ */ ++ btrfs_err(fs_info, "unexpectedly large lzo segment len %u", ++ seg_len); ++ ret = -EIO; ++ goto out; ++ } ++ + /* Copy the compressed segment payload into workspace */ + copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in); + diff --git a/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch b/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch new file mode 100644 index 00000000000..2080f8010f5 --- /dev/null +++ b/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch @@ -0,0 +1,133 @@ +From 558732df2122092259ab4ef85594bee11dbb9104 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Sun, 13 Feb 2022 15:42:33 +0800 +Subject: btrfs: reduce extent threshold for autodefrag + +From: Qu Wenruo + +commit 558732df2122092259ab4ef85594bee11dbb9104 upstream. + +There is a big gap between inode_should_defrag() and autodefrag extent +size threshold. For inode_should_defrag() it has a flexible +@small_write value. For compressed extent is 16K, and for non-compressed +extent it's 64K. + +However for autodefrag extent size threshold, it's always fixed to the +default value (256K). + +This means, the following write sequence will trigger autodefrag to +defrag ranges which didn't trigger autodefrag: + + pwrite 0 8k + sync + pwrite 8k 128K + sync + +The latter 128K write will also be considered as a defrag target (if +other conditions are met). While only that 8K write is really +triggering autodefrag. + +Such behavior can cause extra IO for autodefrag. + +Close the gap, by copying the @small_write value into inode_defrag, so +that later autodefrag can use the same @small_write value which +triggered autodefrag. + +With the existing transid value, this allows autodefrag really to scan +the ranges which triggered autodefrag. + +Although this behavior change is mostly reducing the extent_thresh value +for autodefrag, I believe in the future we should allow users to specify +the autodefrag extent threshold through mount options, but that's an +other problem to consider in the future. + +CC: stable@vger.kernel.org # 5.16+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 2 +- + fs/btrfs/file.c | 15 ++++++++++++++- + fs/btrfs/inode.c | 4 ++-- + 3 files changed, 17 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3315,7 +3315,7 @@ void btrfs_exclop_finish(struct btrfs_fs + int __init btrfs_auto_defrag_init(void); + void __cold btrfs_auto_defrag_exit(void); + int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, +- struct btrfs_inode *inode); ++ struct btrfs_inode *inode, u32 extent_thresh); + int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); + void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); + int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -49,6 +49,15 @@ struct inode_defrag { + + /* root objectid */ + u64 root; ++ ++ /* ++ * The extent size threshold for autodefrag. ++ * ++ * This value is different for compressed/non-compressed extents, ++ * thus needs to be passed from higher layer. ++ * (aka, inode_should_defrag()) ++ */ ++ u32 extent_thresh; + }; + + static int __compare_inode_defrag(struct inode_defrag *defrag1, +@@ -101,6 +110,8 @@ static int __btrfs_add_inode_defrag(stru + */ + if (defrag->transid < entry->transid) + entry->transid = defrag->transid; ++ entry->extent_thresh = min(defrag->extent_thresh, ++ entry->extent_thresh); + return -EEXIST; + } + } +@@ -126,7 +137,7 @@ static inline int __need_auto_defrag(str + * enabled + */ + int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, +- struct btrfs_inode *inode) ++ struct btrfs_inode *inode, u32 extent_thresh) + { + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; +@@ -152,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_ + defrag->ino = btrfs_ino(inode); + defrag->transid = transid; + defrag->root = root->root_key.objectid; ++ defrag->extent_thresh = extent_thresh; + + spin_lock(&fs_info->defrag_inodes_lock); + if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) { +@@ -275,6 +287,7 @@ again: + memset(&range, 0, sizeof(range)); + range.len = (u64)-1; + range.start = cur; ++ range.extent_thresh = defrag->extent_thresh; + + sb_start_write(fs_info->sb); + ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid, +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -561,12 +561,12 @@ static inline int inode_need_compress(st + } + + static inline void inode_should_defrag(struct btrfs_inode *inode, +- u64 start, u64 end, u64 num_bytes, u64 small_write) ++ u64 start, u64 end, u64 num_bytes, u32 small_write) + { + /* If this is a small write inside eof, kick off a defrag */ + if (num_bytes < small_write && + (start > 0 || end + 1 < inode->disk_i_size)) +- btrfs_add_inode_defrag(NULL, inode); ++ btrfs_add_inode_defrag(NULL, inode, small_write); + } + + /* diff --git a/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch b/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch new file mode 100644 index 00000000000..e51d64804f1 --- /dev/null +++ b/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch @@ -0,0 +1,55 @@ +From e79ce9832316e09529b212a21278d68240ccbf1f Mon Sep 17 00:00:00 2001 +From: Liu Yuntao +Date: Fri, 25 Feb 2022 19:11:02 -0800 +Subject: hugetlbfs: fix a truncation issue in hugepages parameter + +From: Liu Yuntao + +commit e79ce9832316e09529b212a21278d68240ccbf1f upstream. + +When we specify a large number for node in hugepages parameter, it may +be parsed to another number due to truncation in this statement: + + node = tmp; + +For example, add following parameter in command line: + + hugepagesz=1G hugepages=4294967297:5 + +and kernel will allocate 5 hugepages for node 1 instead of ignoring it. + +I move the validation check earlier to fix this issue, and slightly +simplifies the condition here. + +Link: https://lkml.kernel.org/r/20220209134018.8242-1-liuyuntao10@huawei.com +Fixes: b5389086ad7be0 ("hugetlbfs: extend the definition of hugepages parameter to support node allocation") +Signed-off-by: Liu Yuntao +Reviewed-by: Mike Kravetz +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index e57650a9404f..f294db835f4b 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4159,10 +4159,10 @@ static int __init hugepages_setup(char *s) + pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n"); + return 0; + } ++ if (tmp >= nr_online_nodes) ++ goto invalid; + node = tmp; + p += count + 1; +- if (node < 0 || node >= nr_online_nodes) +- goto invalid; + /* Parse hugepages */ + if (sscanf(p, "%lu%n", &tmp, &count) != 1) + goto invalid; +-- +2.35.1 + diff --git a/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch b/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch new file mode 100644 index 00000000000..8f0651935d7 --- /dev/null +++ b/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch @@ -0,0 +1,39 @@ +From 32f57cb1b2c8d6f20aefec7052b1bfeb7e3b69d4 Mon Sep 17 00:00:00 2001 +From: Mike Marciniszyn +Date: Thu, 17 Feb 2022 08:59:32 -0500 +Subject: IB/qib: Fix duplicate sysfs directory name + +From: Mike Marciniszyn + +commit 32f57cb1b2c8d6f20aefec7052b1bfeb7e3b69d4 upstream. + +The qib driver load has been failing with the following message: + + sysfs: cannot create duplicate filename '/devices/pci0000:80/0000:80:02.0/0000:81:00.0/infiniband/qib0/ports/1/linkcontrol' + +The patch below has two "linkcontrol" names causing the duplication. + +Fix by using the correct "diag_counters" name on the second instance. + +Fixes: 4a7aaf88c89f ("RDMA/qib: Use attributes for the port sysfs") +Link: https://lore.kernel.org/r/1645106372-23004-1-git-send-email-mike.marciniszyn@cornelisnetworks.com +Cc: +Reviewed-by: Dennis Dalessandro +Signed-off-by: Mike Marciniszyn +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/qib/qib_sysfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/qib/qib_sysfs.c ++++ b/drivers/infiniband/hw/qib/qib_sysfs.c +@@ -541,7 +541,7 @@ static struct attribute *port_diagc_attr + }; + + static const struct attribute_group port_diagc_group = { +- .name = "linkcontrol", ++ .name = "diag_counters", + .attrs = port_diagc_attributes, + }; + diff --git a/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch b/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch new file mode 100644 index 00000000000..6a2ae2eb966 --- /dev/null +++ b/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch @@ -0,0 +1,80 @@ +From db110a99d3367936058727ff4798e3a39c707969 Mon Sep 17 00:00:00 2001 +From: "Aneesh Kumar K.V" +Date: Fri, 25 Feb 2022 19:10:56 -0800 +Subject: mm/hugetlb: fix kernel crash with hugetlb mremap + +From: Aneesh Kumar K.V + +commit db110a99d3367936058727ff4798e3a39c707969 upstream. + +This fixes the below crash: + + kernel BUG at include/linux/mm.h:2373! + cpu 0x5d: Vector: 700 (Program Check) at [c00000003c6e76e0] + pc: c000000000581a54: pmd_to_page+0x54/0x80 + lr: c00000000058d184: move_hugetlb_page_tables+0x4e4/0x5b0 + sp: c00000003c6e7980 + msr: 9000000000029033 + current = 0xc00000003bd8d980 + paca = 0xc000200fff610100 irqmask: 0x03 irq_happened: 0x01 + pid = 9349, comm = hugepage-mremap + kernel BUG at include/linux/mm.h:2373! + move_hugetlb_page_tables+0x4e4/0x5b0 (link register) + move_hugetlb_page_tables+0x22c/0x5b0 (unreliable) + move_page_tables+0xdbc/0x1010 + move_vma+0x254/0x5f0 + sys_mremap+0x7c0/0x900 + system_call_exception+0x160/0x2c0 + +the kernel can't use huge_pte_offset before it set the pte entry because +a page table lookup check for huge PTE bit in the page table to +differentiate between a huge pte entry and a pointer to pte page. A +huge_pte_alloc won't mark the page table entry huge and hence kernel +should not use huge_pte_offset after a huge_pte_alloc. + +Link: https://lkml.kernel.org/r/20220211063221.99293-1-aneesh.kumar@linux.ibm.com +Fixes: 550a7d60bd5e ("mm, hugepages: add mremap() support for hugepage backed vma") +Signed-off-by: Aneesh Kumar K.V +Reviewed-by: Mike Kravetz +Reviewed-by: Mina Almasry +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 61895cc01d09..e57650a9404f 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4851,14 +4851,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + } + + static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, +- unsigned long new_addr, pte_t *src_pte) ++ unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) + { + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; +- pte_t *dst_pte, pte; + spinlock_t *src_ptl, *dst_ptl; ++ pte_t pte; + +- dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h)); + dst_ptl = huge_pte_lock(h, mm, dst_pte); + src_ptl = huge_pte_lockptr(h, mm, src_pte); + +@@ -4917,7 +4916,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, + if (!dst_pte) + break; + +- move_huge_pte(vma, old_addr, new_addr, src_pte); ++ move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); + } + flush_tlb_range(vma, old_end - len, old_end); + mmu_notifier_invalidate_range_end(&range); +-- +2.35.1 + diff --git a/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch b/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch new file mode 100644 index 00000000000..d23272f468c --- /dev/null +++ b/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch @@ -0,0 +1,114 @@ +From 22e9f71072fa605cbf033158db58e0790101928d Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Wed, 23 Feb 2022 11:23:57 -0400 +Subject: RDMA/cma: Do not change route.addr.src_addr outside state checks + +From: Jason Gunthorpe + +commit 22e9f71072fa605cbf033158db58e0790101928d upstream. + +If the state is not idle then resolve_prepare_src() should immediately +fail and no change to global state should happen. However, it +unconditionally overwrites the src_addr trying to build a temporary any +address. + +For instance if the state is already RDMA_CM_LISTEN then this will corrupt +the src_addr and would cause the test in cma_cancel_operation(): + + if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) + +Which would manifest as this trace from syzkaller: + + BUG: KASAN: use-after-free in __list_add_valid+0x93/0xa0 lib/list_debug.c:26 + Read of size 8 at addr ffff8881546491e0 by task syz-executor.1/32204 + + CPU: 1 PID: 32204 Comm: syz-executor.1 Not tainted 5.12.0-rc8-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0x141/0x1d7 lib/dump_stack.c:120 + print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232 + __kasan_report mm/kasan/report.c:399 [inline] + kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416 + __list_add_valid+0x93/0xa0 lib/list_debug.c:26 + __list_add include/linux/list.h:67 [inline] + list_add_tail include/linux/list.h:100 [inline] + cma_listen_on_all drivers/infiniband/core/cma.c:2557 [inline] + rdma_listen+0x787/0xe00 drivers/infiniband/core/cma.c:3751 + ucma_listen+0x16a/0x210 drivers/infiniband/core/ucma.c:1102 + ucma_write+0x259/0x350 drivers/infiniband/core/ucma.c:1732 + vfs_write+0x28e/0xa30 fs/read_write.c:603 + ksys_write+0x1ee/0x250 fs/read_write.c:658 + do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +This is indicating that an rdma_id_private was destroyed without doing +cma_cancel_listens(). + +Instead of trying to re-use the src_addr memory to indirectly create an +any address derived from the dst build one explicitly on the stack and +bind to that as any other normal flow would do. rdma_bind_addr() will copy +it over the src_addr once it knows the state is valid. + +This is similar to commit bc0bdc5afaa7 ("RDMA/cma: Do not change +route.addr.src_addr.ss_family") + +Link: https://lore.kernel.org/r/0-v2-e975c8fd9ef2+11e-syz_cma_srcaddr_jgg@nvidia.com +Cc: stable@vger.kernel.org +Fixes: 732d41c545bb ("RDMA/cma: Make the locking for automatic state transition more clear") +Reported-by: syzbot+c94a3675a626f6333d74@syzkaller.appspotmail.com +Reviewed-by: Leon Romanovsky +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/core/cma.c | 38 +++++++++++++++++++++++--------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -3370,22 +3370,30 @@ err: + static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) + { +- if (!src_addr || !src_addr->sa_family) { +- src_addr = (struct sockaddr *) &id->route.addr.src_addr; +- src_addr->sa_family = dst_addr->sa_family; +- if (IS_ENABLED(CONFIG_IPV6) && +- dst_addr->sa_family == AF_INET6) { +- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; +- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; +- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; +- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) +- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; +- } else if (dst_addr->sa_family == AF_IB) { +- ((struct sockaddr_ib *) src_addr)->sib_pkey = +- ((struct sockaddr_ib *) dst_addr)->sib_pkey; +- } ++ struct sockaddr_storage zero_sock = {}; ++ ++ if (src_addr && src_addr->sa_family) ++ return rdma_bind_addr(id, src_addr); ++ ++ /* ++ * When the src_addr is not specified, automatically supply an any addr ++ */ ++ zero_sock.ss_family = dst_addr->sa_family; ++ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { ++ struct sockaddr_in6 *src_addr6 = ++ (struct sockaddr_in6 *)&zero_sock; ++ struct sockaddr_in6 *dst_addr6 = ++ (struct sockaddr_in6 *)dst_addr; ++ ++ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; ++ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) ++ id->route.addr.dev_addr.bound_dev_if = ++ dst_addr6->sin6_scope_id; ++ } else if (dst_addr->sa_family == AF_IB) { ++ ((struct sockaddr_ib *)&zero_sock)->sib_pkey = ++ ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } +- return rdma_bind_addr(id, src_addr); ++ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); + } + + /* diff --git a/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch b/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch new file mode 100644 index 00000000000..0d96c452852 --- /dev/null +++ b/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch @@ -0,0 +1,33 @@ +From 762e52f79c95ea20a7229674ffd13b94d7d8959c Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Wed, 9 Feb 2022 12:56:23 +0900 +Subject: riscv: fix nommu_k210_sdcard_defconfig + +From: Damien Le Moal + +commit 762e52f79c95ea20a7229674ffd13b94d7d8959c upstream. + +Instead of an arbitrary delay, use the "rootwait" kernel option to wait +for the mmc root device to be ready. + +Signed-off-by: Damien Le Moal +Reviewed-by: Anup Patel +Fixes: 7e09fd3994c5 ("riscv: Add Canaan Kendryte K210 SD card defconfig") +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/configs/nommu_k210_sdcard_defconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig ++++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig +@@ -23,7 +23,7 @@ CONFIG_SLOB=y + CONFIG_SOC_CANAAN=y + CONFIG_SMP=y + CONFIG_NR_CPUS=2 +-CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" ++CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" + CONFIG_CMDLINE_FORCE=y + # CONFIG_SECCOMP is not set + # CONFIG_STACKPROTECTOR is not set diff --git a/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch b/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch new file mode 100644 index 00000000000..b5722a15664 --- /dev/null +++ b/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch @@ -0,0 +1,167 @@ +From 22e2100b1b07d6f5acc71cc1acb53f680c677d77 Mon Sep 17 00:00:00 2001 +From: Changbin Du +Date: Sun, 13 Feb 2022 16:18:45 +0800 +Subject: riscv: fix oops caused by irqsoff latency tracer + +From: Changbin Du + +commit 22e2100b1b07d6f5acc71cc1acb53f680c677d77 upstream. + +The trace_hardirqs_{on,off}() require the caller to setup frame pointer +properly. This because these two functions use macro 'CALLER_ADDR1' (aka. +__builtin_return_address(1)) to acquire caller info. If the $fp is used +for other purpose, the code generated this macro (as below) could trigger +memory access fault. + + 0xffffffff8011510e <+80>: ld a1,-16(s0) + 0xffffffff80115112 <+84>: ld s2,-8(a1) # <-- paging fault here + +The oops message during booting if compiled with 'irqoff' tracer enabled: +[ 0.039615][ T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8 +[ 0.041925][ T0] Oops [#1] +[ 0.042063][ T0] Modules linked in: +[ 0.042864][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29 +[ 0.043568][ T0] Hardware name: riscv-virtio,qemu (DT) +[ 0.044343][ T0] epc : trace_hardirqs_on+0x56/0xe2 +[ 0.044601][ T0] ra : restore_all+0x12/0x6e +[ 0.044721][ T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0 +[ 0.044801][ T0] gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020 +[ 0.044882][ T0] t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0 +[ 0.044967][ T0] s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100 +[ 0.045046][ T0] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 +[ 0.045124][ T0] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45 +[ 0.045210][ T0] s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50 +[ 0.045289][ T0] s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8 +[ 0.045389][ T0] s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000 +[ 0.045474][ T0] s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000 +[ 0.045548][ T0] t5 : 0000000000000000 t6 : ffffffff814aa368 +[ 0.045620][ T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d +[ 0.046402][ T0] [] restore_all+0x12/0x6e + +This because the $fp(aka. $s0) register is not used as frame pointer in the +assembly entry code. + + resume_kernel: + REG_L s0, TASK_TI_PREEMPT_COUNT(tp) + bnez s0, restore_all + REG_L s0, TASK_TI_FLAGS(tp) + andi s0, s0, _TIF_NEED_RESCHED + beqz s0, restore_all + call preempt_schedule_irq + j restore_all + +To fix above issue, here we add one extra level wrapper for function +trace_hardirqs_{on,off}() so they can be safely called by low level entry +code. + +Signed-off-by: Changbin Du +Fixes: 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/kernel/Makefile | 2 ++ + arch/riscv/kernel/entry.S | 10 +++++----- + arch/riscv/kernel/trace_irq.c | 27 +++++++++++++++++++++++++++ + arch/riscv/kernel/trace_irq.h | 11 +++++++++++ + 4 files changed, 45 insertions(+), 5 deletions(-) + create mode 100644 arch/riscv/kernel/trace_irq.c + create mode 100644 arch/riscv/kernel/trace_irq.h + +--- a/arch/riscv/kernel/Makefile ++++ b/arch/riscv/kernel/Makefile +@@ -50,6 +50,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module- + obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o + obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o + ++obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o ++ + obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o + obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o +--- a/arch/riscv/kernel/entry.S ++++ b/arch/riscv/kernel/entry.S +@@ -108,7 +108,7 @@ _save_context: + .option pop + + #ifdef CONFIG_TRACE_IRQFLAGS +- call trace_hardirqs_off ++ call __trace_hardirqs_off + #endif + + #ifdef CONFIG_CONTEXT_TRACKING +@@ -143,7 +143,7 @@ skip_context_tracking: + li t0, EXC_BREAKPOINT + beq s4, t0, 1f + #ifdef CONFIG_TRACE_IRQFLAGS +- call trace_hardirqs_on ++ call __trace_hardirqs_on + #endif + csrs CSR_STATUS, SR_IE + +@@ -234,7 +234,7 @@ ret_from_exception: + REG_L s0, PT_STATUS(sp) + csrc CSR_STATUS, SR_IE + #ifdef CONFIG_TRACE_IRQFLAGS +- call trace_hardirqs_off ++ call __trace_hardirqs_off + #endif + #ifdef CONFIG_RISCV_M_MODE + /* the MPP value is too large to be used as an immediate arg for addi */ +@@ -270,10 +270,10 @@ restore_all: + REG_L s1, PT_STATUS(sp) + andi t0, s1, SR_PIE + beqz t0, 1f +- call trace_hardirqs_on ++ call __trace_hardirqs_on + j 2f + 1: +- call trace_hardirqs_off ++ call __trace_hardirqs_off + 2: + #endif + REG_L a0, PT_STATUS(sp) +--- /dev/null ++++ b/arch/riscv/kernel/trace_irq.c +@@ -0,0 +1,27 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2022 Changbin Du ++ */ ++ ++#include ++#include ++#include "trace_irq.h" ++ ++/* ++ * trace_hardirqs_on/off require the caller to setup frame pointer properly. ++ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. ++ * Here we add one extra level so they can be safely called by low ++ * level entry code which $fp is used for other purpose. ++ */ ++ ++void __trace_hardirqs_on(void) ++{ ++ trace_hardirqs_on(); ++} ++NOKPROBE_SYMBOL(__trace_hardirqs_on); ++ ++void __trace_hardirqs_off(void) ++{ ++ trace_hardirqs_off(); ++} ++NOKPROBE_SYMBOL(__trace_hardirqs_off); +--- /dev/null ++++ b/arch/riscv/kernel/trace_irq.h +@@ -0,0 +1,11 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2022 Changbin Du ++ */ ++#ifndef __TRACE_IRQ_H ++#define __TRACE_IRQ_H ++ ++void __trace_hardirqs_on(void); ++void __trace_hardirqs_off(void); ++ ++#endif /* __TRACE_IRQ_H */ diff --git a/queue-5.16/series b/queue-5.16/series index dff956a4a69..43772b5b0c4 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -135,3 +135,20 @@ xhci-prevent-futile-urb-re-submissions-due-to-incorrect-return-value.patch nvmem-core-fix-a-conflict-between-mtd-and-nvmem-on-wp-gpios-property.patch mtd-core-fix-a-conflict-between-mtd-and-nvmem-on-wp-gpios-property.patch driver-core-free-dma-range-map-when-device-is-released.patch +btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch +btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch +btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch +btrfs-prevent-copying-too-big-compressed-lzo-segment.patch +btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch +btrfs-autodefrag-only-scan-one-inode-once.patch +btrfs-reduce-extent-threshold-for-autodefrag.patch +rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch +thermal-int340x-fix-memory-leak-in-int3400_notify.patch +staging-fbtft-fb_st7789v-reset-display-before-initialization.patch +tps6598x-clear-int-mask-on-probe-failure.patch +ib-qib-fix-duplicate-sysfs-directory-name.patch +riscv-fix-nommu_k210_sdcard_defconfig.patch +riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch +mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch +hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch +tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch diff --git a/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch b/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch new file mode 100644 index 00000000000..4bc0c7c2ddd --- /dev/null +++ b/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch @@ -0,0 +1,33 @@ +From b6821b0d9b56386d2bf14806f90ec401468c799f Mon Sep 17 00:00:00 2001 +From: Oliver Graute +Date: Thu, 10 Feb 2022 09:53:22 +0100 +Subject: staging: fbtft: fb_st7789v: reset display before initialization + +From: Oliver Graute + +commit b6821b0d9b56386d2bf14806f90ec401468c799f upstream. + +In rare cases the display is flipped or mirrored. This was observed more +often in a low temperature environment. A clean reset on init_display() +should help to get registers in a sane state. + +Fixes: ef8f317795da (staging: fbtft: use init function instead of init sequence) +Cc: stable@vger.kernel.org +Signed-off-by: Oliver Graute +Link: https://lore.kernel.org/r/20220210085322.15676-1-oliver.graute@kococonnector.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/staging/fbtft/fb_st7789v.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/staging/fbtft/fb_st7789v.c ++++ b/drivers/staging/fbtft/fb_st7789v.c +@@ -144,6 +144,8 @@ static int init_display(struct fbtft_par + { + int rc; + ++ par->fbtftops.reset(par); ++ + rc = init_tearing_effect_line(par); + if (rc) + return rc; diff --git a/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch b/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch new file mode 100644 index 00000000000..26aa7a3369b --- /dev/null +++ b/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch @@ -0,0 +1,52 @@ +From 3abea10e6a8f0e7804ed4c124bea2d15aca977c8 Mon Sep 17 00:00:00 2001 +From: Chuansheng Liu +Date: Wed, 23 Feb 2022 08:20:24 +0800 +Subject: thermal: int340x: fix memory leak in int3400_notify() + +From: Chuansheng Liu + +commit 3abea10e6a8f0e7804ed4c124bea2d15aca977c8 upstream. + +It is easy to hit the below memory leaks in my TigerLake platform: + +unreferenced object 0xffff927c8b91dbc0 (size 32): + comm "kworker/0:2", pid 112, jiffies 4294893323 (age 83.604s) + hex dump (first 32 bytes): + 4e 41 4d 45 3d 49 4e 54 33 34 30 30 20 54 68 65 NAME=INT3400 The + 72 6d 61 6c 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 rmal.kkkkkkkkkk. + backtrace: + [] __kmalloc_track_caller+0x2fe/0x4a0 + [] kvasprintf+0x65/0xd0 + [] kasprintf+0x4e/0x70 + [] int3400_notify+0x82/0x120 [int3400_thermal] + [] acpi_ev_notify_dispatch+0x54/0x71 + [] acpi_os_execute_deferred+0x17/0x30 + [] process_one_work+0x21a/0x3f0 + [] worker_thread+0x4a/0x3b0 + [] kthread+0xfd/0x130 + [] ret_from_fork+0x1f/0x30 + +Fix it by calling kfree() accordingly. + +Fixes: 38e44da59130 ("thermal: int3400_thermal: process "thermal table changed" event") +Signed-off-by: Chuansheng Liu +Cc: 4.14+ # 4.14+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c ++++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +@@ -404,6 +404,10 @@ static void int3400_notify(acpi_handle h + thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event); + thermal_prop[4] = NULL; + kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop); ++ kfree(thermal_prop[0]); ++ kfree(thermal_prop[1]); ++ kfree(thermal_prop[2]); ++ kfree(thermal_prop[3]); + } + + static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, diff --git a/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch b/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch new file mode 100644 index 00000000000..e63dd5c2975 --- /dev/null +++ b/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch @@ -0,0 +1,59 @@ +From aba2081e0a9c977396124aa6df93b55ed5912b19 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 15 Feb 2022 11:22:04 -0700 +Subject: tps6598x: clear int mask on probe failure + +From: Jens Axboe + +commit aba2081e0a9c977396124aa6df93b55ed5912b19 upstream. + +The interrupt mask is enabled before any potential failure points in +the driver, which can leave a failure path where we exit with +interrupts enabled but the device not live. This causes an infinite +stream of interrupts on an Apple M1 Pro laptop on USB-C. + +Add a failure label that's used post enabling interrupts, where we +mask them again before returning an error. + +Suggested-by: Sven Peter +Cc: stable +Reviewed-by: Heikki Krogerus +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/r/e6b80669-20f3-06e7-9ed5-8951a9c6db6f@kernel.dk +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/typec/tipd/core.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c +index 6d27a5b5e3ca..7ffcda94d323 100644 +--- a/drivers/usb/typec/tipd/core.c ++++ b/drivers/usb/typec/tipd/core.c +@@ -761,12 +761,12 @@ static int tps6598x_probe(struct i2c_client *client) + + ret = tps6598x_read32(tps, TPS_REG_STATUS, &status); + if (ret < 0) +- return ret; ++ goto err_clear_mask; + trace_tps6598x_status(status); + + ret = tps6598x_read32(tps, TPS_REG_SYSTEM_CONF, &conf); + if (ret < 0) +- return ret; ++ goto err_clear_mask; + + /* + * This fwnode has a "compatible" property, but is never populated as a +@@ -855,7 +855,8 @@ static int tps6598x_probe(struct i2c_client *client) + usb_role_switch_put(tps->role_sw); + err_fwnode_put: + fwnode_handle_put(fwnode); +- ++err_clear_mask: ++ tps6598x_write64(tps, TPS_REG_INT_MASK1, 0); + return ret; + } + +-- +2.35.1 + diff --git a/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch b/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch new file mode 100644 index 00000000000..633866711af --- /dev/null +++ b/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch @@ -0,0 +1,44 @@ +From 737b0ef3be6b319d6c1fd64193d1603311969326 Mon Sep 17 00:00:00 2001 +From: "daniel.starke@siemens.com" +Date: Thu, 17 Feb 2022 23:31:17 -0800 +Subject: tty: n_gsm: fix encoding of control signal octet bit DV + +From: daniel.starke@siemens.com + +commit 737b0ef3be6b319d6c1fd64193d1603311969326 upstream. + +n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. +See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 +The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to +the newer 27.010 here. Chapter 5.4.6.3.7 describes the encoding of the +control signal octet used by the MSC (modem status command). The same +encoding is also used in convergence layer type 2 as described in chapter +5.5.2. Table 7 and 24 both require the DV (data valid) bit to be set 1 for +outgoing control signal octets sent by the DTE (data terminal equipment), +i.e. for the initiator side. +Currently, the DV bit is only set if CD (carrier detect) is on, regardless +of the side. + +This patch fixes this behavior by setting the DV bit on the initiator side +unconditionally. + +Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") +Cc: stable@vger.kernel.org +Signed-off-by: Daniel Starke +Link: https://lore.kernel.org/r/20220218073123.2121-1-daniel.starke@siemens.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/n_gsm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/tty/n_gsm.c ++++ b/drivers/tty/n_gsm.c +@@ -439,7 +439,7 @@ static u8 gsm_encode_modem(const struct + modembits |= MDM_RTR; + if (dlci->modem_tx & TIOCM_RI) + modembits |= MDM_IC; +- if (dlci->modem_tx & TIOCM_CD) ++ if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) + modembits |= MDM_DV; + return modembits; + }