--- /dev/null
+From foo@baz Fri Aug 19 01:21:02 PM CEST 2022
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 19 Aug 2022 16:39:49 +0800
+Subject: btrfs: only write the sectors in the vertical stripe which has data stripes
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>
+Message-ID: <e1694a5724fa6b950b10566599a492ce4537f772.1660898037.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bd8f7e627703ca5707833d623efcd43f104c7b3f upstream.
+
+If we have only 8K partial write at the beginning of a full RAID56
+stripe, we will write the following contents:
+
+ 0 8K 32K 64K
+Disk 1 (data): |XX| | |
+Disk 2 (data): | | |
+Disk 3 (parity): |XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|
+
+|X| means the sector will be written back to disk.
+
+Note that, although we won't write any sectors from disk 2, but we will
+write the full 64KiB of parity to disk.
+
+This behavior is fine for now, but not for the future (especially for
+RAID56J, as we waste quite some space to journal the unused parity
+stripes).
+
+So here we will also utilize the btrfs_raid_bio::dbitmap, anytime we
+queue a higher level bio into an rbio, we will update rbio::dbitmap to
+indicate which vertical stripes we need to writeback.
+
+And at finish_rmw(), we also check dbitmap to see if we need to write
+any sector in the vertical stripe.
+
+So after the patch, above example will only lead to the following
+writeback pattern:
+
+ 0 8K 32K 64K
+Disk 1 (data): |XX| | |
+Disk 2 (data): | | |
+Disk 3 (parity): |XX| | |
+
+Acked-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/raid56.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -324,6 +324,9 @@ static void merge_rbio(struct btrfs_raid
+ {
+ bio_list_merge(&dest->bio_list, &victim->bio_list);
+ dest->bio_list_bytes += victim->bio_list_bytes;
++ /* Also inherit the bitmaps from @victim. */
++ bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
++ dest->stripe_npages);
+ dest->generic_bio_cnt += victim->generic_bio_cnt;
+ bio_list_init(&victim->bio_list);
+ }
+@@ -865,6 +868,12 @@ static void rbio_orig_end_io(struct btrf
+
+ if (rbio->generic_bio_cnt)
+ btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
++ /*
++ * Clear the data bitmap, as the rbio may be cached for later usage.
++ * do this before before unlock_stripe() so there will be no new bio
++ * for this bio.
++ */
++ bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
+
+ /*
+ * At this moment, rbio->bio_list is empty, however since rbio does not
+@@ -1197,6 +1206,9 @@ static noinline void finish_rmw(struct b
+ else
+ BUG();
+
++ /* We should have at least one data sector. */
++ ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
++
+ /* at this point we either have a full stripe,
+ * or we've read the full stripe from the drive.
+ * recalculate the parity and write the new results.
+@@ -1268,6 +1280,11 @@ static noinline void finish_rmw(struct b
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+ struct page *page;
++
++ /* This vertical stripe has no data, skip it. */
++ if (!test_bit(pagenr, rbio->dbitmap))
++ continue;
++
+ if (stripe < rbio->nr_data) {
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
+ if (!page)
+@@ -1292,6 +1309,11 @@ static noinline void finish_rmw(struct b
+
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+ struct page *page;
++
++ /* This vertical stripe has no data, skip it. */
++ if (!test_bit(pagenr, rbio->dbitmap))
++ continue;
++
+ if (stripe < rbio->nr_data) {
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
+ if (!page)
+@@ -1715,6 +1737,33 @@ static void btrfs_raid_unplug(struct blk
+ run_plug(plug);
+ }
+
++/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
++static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
++{
++ const struct btrfs_fs_info *fs_info = rbio->fs_info;
++ const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
++ const u64 full_stripe_start = rbio->bioc->raid_map[0];
++ const u32 orig_len = orig_bio->bi_iter.bi_size;
++ const u32 sectorsize = fs_info->sectorsize;
++ u64 cur_logical;
++
++ ASSERT(orig_logical >= full_stripe_start &&
++ orig_logical + orig_len <= full_stripe_start +
++ rbio->nr_data * rbio->stripe_len);
++
++ bio_list_add(&rbio->bio_list, orig_bio);
++ rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
++
++ /* Update the dbitmap. */
++ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
++ cur_logical += sectorsize) {
++ int bit = ((u32)(cur_logical - full_stripe_start) >>
++ fs_info->sectorsize_bits) % rbio->stripe_npages;
++
++ set_bit(bit, rbio->dbitmap);
++ }
++}
++
+ /*
+ * our main entry point for writes from the rest of the FS.
+ */
+@@ -1731,9 +1780,8 @@ int raid56_parity_write(struct btrfs_fs_
+ btrfs_put_bioc(bioc);
+ return PTR_ERR(rbio);
+ }
+- bio_list_add(&rbio->bio_list, bio);
+- rbio->bio_list_bytes = bio->bi_iter.bi_size;
+ rbio->operation = BTRFS_RBIO_WRITE;
++ rbio_add_bio(rbio, bio);
+
+ btrfs_bio_counter_inc_noblocked(fs_info);
+ rbio->generic_bio_cnt = 1;
+@@ -2135,8 +2183,7 @@ int raid56_parity_recover(struct btrfs_f
+ }
+
+ rbio->operation = BTRFS_RBIO_READ_REBUILD;
+- bio_list_add(&rbio->bio_list, bio);
+- rbio->bio_list_bytes = bio->bi_iter.bi_size;
++ rbio_add_bio(rbio, bio);
+
+ rbio->faila = find_logical_bio_stripe(rbio, bio);
+ if (rbio->faila == -1) {
--- /dev/null
+From foo@baz Fri Aug 19 01:21:02 PM CEST 2022
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 19 Aug 2022 16:39:50 +0800
+Subject: btrfs: raid56: don't trust any cached sector in __raid56_parity_recover()
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>
+Message-ID: <83e62eda83a2e3e77d10867388a4599a31f26e14.1660898037.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f6065f8edeb25f4a9dfe0b446030ad995a84a088 upstream.
+
+[BUG]
+There is a small workload which will always fail with recent kernel:
+(A simplified version from btrfs/125 test case)
+
+ mkfs.btrfs -f -m raid5 -d raid5 -b 1G $dev1 $dev2 $dev3
+ mount $dev1 $mnt
+ xfs_io -f -c "pwrite -S 0xee 0 1M" $mnt/file1
+ sync
+ umount $mnt
+ btrfs dev scan -u $dev3
+ mount -o degraded $dev1 $mnt
+ xfs_io -f -c "pwrite -S 0xff 0 128M" $mnt/file2
+ umount $mnt
+ btrfs dev scan
+ mount $dev1 $mnt
+ btrfs balance start --full-balance $mnt
+ umount $mnt
+
+The failure is always failed to read some tree blocks:
+
+ BTRFS info (device dm-4): relocating block group 217710592 flags data|raid5
+ BTRFS error (device dm-4): parent transid verify failed on 38993920 wanted 9 found 7
+ BTRFS error (device dm-4): parent transid verify failed on 38993920 wanted 9 found 7
+ ...
+
+[CAUSE]
+With the recently added debug output, we can see all RAID56 operations
+related to full stripe 38928384:
+
+ 56.1183: raid56_read_partial: full_stripe=38928384 devid=2 type=DATA1 offset=0 opf=0x0 physical=9502720 len=65536
+ 56.1185: raid56_read_partial: full_stripe=38928384 devid=3 type=DATA2 offset=16384 opf=0x0 physical=9519104 len=16384
+ 56.1185: raid56_read_partial: full_stripe=38928384 devid=3 type=DATA2 offset=49152 opf=0x0 physical=9551872 len=16384
+ 56.1187: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=0 opf=0x1 physical=9502720 len=16384
+ 56.1188: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=32768 opf=0x1 physical=9535488 len=16384
+ 56.1188: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=0 opf=0x1 physical=30474240 len=16384
+ 56.1189: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=32768 opf=0x1 physical=30507008 len=16384
+ 56.1218: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=49152 opf=0x1 physical=9551872 len=16384
+ 56.1219: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=49152 opf=0x1 physical=30523392 len=16384
+ 56.2721: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+ 56.2723: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+ 56.2724: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+
+Before we enter raid56_parity_recover(), we have triggered some metadata
+write for the full stripe 38928384, this leads to us to read all the
+sectors from disk.
+
+Furthermore, btrfs raid56 write will cache its calculated P/Q sectors to
+avoid unnecessary read.
+
+This means, for that full stripe, after any partial write, we will have
+stale data, along with P/Q calculated using that stale data.
+
+Thankfully due to patch "btrfs: only write the sectors in the vertical stripe
+which has data stripes" we haven't submitted all the corrupted P/Q to disk.
+
+When we really need to recover certain range, aka in
+raid56_parity_recover(), we will use the cached rbio, along with its
+cached sectors (the full stripe is all cached).
+
+This explains why we have no event raid56_scrub_read_recover()
+triggered.
+
+Since we have the cached P/Q which is calculated using the stale data,
+the recovered one will just be stale.
+
+In our particular test case, it will always return the same incorrect
+metadata, thus causing the same error message "parent transid verify
+failed on 39010304 wanted 9 found 7" again and again.
+
+[BTRFS DESTRUCTIVE RMW PROBLEM]
+
+Test case btrfs/125 (and above workload) always has its trouble with
+the destructive read-modify-write (RMW) cycle:
+
+ 0 32K 64K
+Data1: | Good | Good |
+Data2: | Bad | Bad |
+Parity: | Good | Good |
+
+In above case, if we trigger any write into Data1, we will use the bad
+data in Data2 to re-generate parity, killing the only chance to recovery
+Data2, thus Data2 is lost forever.
+
+This destructive RMW cycle is not specific to btrfs RAID56, but there
+are some btrfs specific behaviors making the case even worse:
+
+- Btrfs will cache sectors for unrelated vertical stripes.
+
+ In above example, if we're only writing into 0~32K range, btrfs will
+ still read data range (32K ~ 64K) of Data1, and (64K~128K) of Data2.
+ This behavior is to cache sectors for later update.
+
+ Incidentally commit d4e28d9b5f04 ("btrfs: raid56: make steal_rbio()
+ subpage compatible") has a bug which makes RAID56 to never trust the
+ cached sectors, thus slightly improve the situation for recovery.
+
+ Unfortunately, follow up fix "btrfs: update stripe_sectors::uptodate in
+ steal_rbio" will revert the behavior back to the old one.
+
+- Btrfs raid56 partial write will update all P/Q sectors and cache them
+
+ This means, even if data at (64K ~ 96K) of Data2 is free space, and
+ only (96K ~ 128K) of Data2 is really stale data.
+ And we write into that (96K ~ 128K), we will update all the parity
+ sectors for the full stripe.
+
+ This unnecessary behavior will completely kill the chance of recovery.
+
+ Thankfully, an unrelated optimization "btrfs: only write the sectors
+ in the vertical stripe which has data stripes" will prevent
+ submitting the write bio for untouched vertical sectors.
+
+ That optimization will keep the on-disk P/Q untouched for a chance for
+ later recovery.
+
+[FIX]
+Although we have no good way to completely fix the destructive RMW
+(unless we go full scrub for each partial write), we can still limit the
+damage.
+
+With patch "btrfs: only write the sectors in the vertical stripe which
+has data stripes" now we won't really submit the P/Q of unrelated
+vertical stripes, so the on-disk P/Q should still be fine.
+
+Now we really need to do is just drop all the cached sectors when doing
+recovery.
+
+By this, we have a chance to read the original P/Q from disk, and have a
+chance to recover the stale data, while still keep the cache to speed up
+regular write path.
+
+In fact, just dropping all the cache for recovery path is good enough to
+allow the test case btrfs/125 along with the small script to pass
+reliably.
+
+The lack of metadata write after the degraded mount, and forced metadata
+COW is saving us this time.
+
+So this patch will fix the behavior by not trust any cache in
+__raid56_parity_recover(), to solve the problem while still keep the
+cache useful.
+
+But please note that this test pass DOES NOT mean we have solved the
+destructive RMW problem, we just do better damage control a little
+better.
+
+Related patches:
+
+- btrfs: only write the sectors in the vertical stripe
+- d4e28d9b5f04 ("btrfs: raid56: make steal_rbio() subpage compatible")
+- btrfs: update stripe_sectors::uptodate in steal_rbio
+
+Acked-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/raid56.c | 19 ++++++-------------
+ 1 file changed, 6 insertions(+), 13 deletions(-)
+
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -2085,9 +2085,12 @@ static int __raid56_parity_recover(struc
+ atomic_set(&rbio->error, 0);
+
+ /*
+- * read everything that hasn't failed. Thanks to the
+- * stripe cache, it is possible that some or all of these
+- * pages are going to be uptodate.
++ * Read everything that hasn't failed. However this time we will
++ * not trust any cached sector.
++ * As we may read out some stale data but higher layer is not reading
++ * that stale part.
++ *
++ * So here we always re-read everything in recovery path.
+ */
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+ if (rbio->faila == stripe || rbio->failb == stripe) {
+@@ -2096,16 +2099,6 @@ static int __raid56_parity_recover(struc
+ }
+
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+- struct page *p;
+-
+- /*
+- * the rmw code may have already read this
+- * page in
+- */
+- p = rbio_stripe_page(rbio, stripe, pagenr);
+- if (PageUptodate(p))
+- continue;
+-
+ ret = rbio_add_io_page(rbio, &bio_list,
+ rbio_stripe_page(rbio, stripe, pagenr),
+ stripe, pagenr, rbio->stripe_len);
--- /dev/null
+From 8f0541186e9ad1b62accc9519cc2b7a7240272a7 Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <linkinjeon@kernel.org>
+Date: Tue, 2 Aug 2022 07:28:51 +0900
+Subject: ksmbd: fix heap-based overflow in set_ntacl_dacl()
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+commit 8f0541186e9ad1b62accc9519cc2b7a7240272a7 upstream.
+
+The testcase use SMB2_SET_INFO_HE command to set a malformed file attribute
+under the label `security.NTACL`. SMB2_QUERY_INFO_HE command in testcase
+trigger the following overflow.
+
+[ 4712.003781] ==================================================================
+[ 4712.003790] BUG: KASAN: slab-out-of-bounds in build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.003807] Write of size 1060 at addr ffff88801e34c068 by task kworker/0:0/4190
+
+[ 4712.003813] CPU: 0 PID: 4190 Comm: kworker/0:0 Not tainted 5.19.0-rc5 #1
+[ 4712.003850] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd]
+[ 4712.003867] Call Trace:
+[ 4712.003870] <TASK>
+[ 4712.003873] dump_stack_lvl+0x49/0x5f
+[ 4712.003935] print_report.cold+0x5e/0x5cf
+[ 4712.003972] ? ksmbd_vfs_get_sd_xattr+0x16d/0x500 [ksmbd]
+[ 4712.003984] ? cmp_map_id+0x200/0x200
+[ 4712.003988] ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004000] kasan_report+0xaa/0x120
+[ 4712.004045] ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004056] kasan_check_range+0x100/0x1e0
+[ 4712.004060] memcpy+0x3c/0x60
+[ 4712.004064] build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004076] ? parse_sec_desc+0x580/0x580 [ksmbd]
+[ 4712.004088] ? ksmbd_acls_fattr+0x281/0x410 [ksmbd]
+[ 4712.004099] smb2_query_info+0xa8f/0x6110 [ksmbd]
+[ 4712.004111] ? psi_group_change+0x856/0xd70
+[ 4712.004148] ? update_load_avg+0x1c3/0x1af0
+[ 4712.004152] ? asym_cpu_capacity_scan+0x5d0/0x5d0
+[ 4712.004157] ? xas_load+0x23/0x300
+[ 4712.004162] ? smb2_query_dir+0x1530/0x1530 [ksmbd]
+[ 4712.004173] ? _raw_spin_lock_bh+0xe0/0xe0
+[ 4712.004179] handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 4712.004192] process_one_work+0x778/0x11c0
+[ 4712.004227] ? _raw_spin_lock_irq+0x8e/0xe0
+[ 4712.004231] worker_thread+0x544/0x1180
+[ 4712.004234] ? __cpuidle_text_end+0x4/0x4
+[ 4712.004239] kthread+0x282/0x320
+[ 4712.004243] ? process_one_work+0x11c0/0x11c0
+[ 4712.004246] ? kthread_complete_and_exit+0x30/0x30
+[ 4712.004282] ret_from_fork+0x1f/0x30
+
+This patch add the buffer validation for security descriptor that is
+stored by malformed SMB2_SET_INFO_HE command. and allocate large
+response buffer about SMB2_O_INFO_SECURITY file info class.
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17771
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ksmbd/smb2pdu.c | 39 ++++++++++-----
+ fs/ksmbd/smbacl.c | 130 +++++++++++++++++++++++++++++++++++------------------
+ fs/ksmbd/smbacl.h | 2
+ fs/ksmbd/vfs.c | 5 ++
+ 4 files changed, 119 insertions(+), 57 deletions(-)
+
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -541,9 +541,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_w
+ struct smb2_query_info_req *req;
+
+ req = work->request_buf;
+- if (req->InfoType == SMB2_O_INFO_FILE &&
+- (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+- req->FileInfoClass == FILE_ALL_INFORMATION))
++ if ((req->InfoType == SMB2_O_INFO_FILE &&
++ (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
++ req->FileInfoClass == FILE_ALL_INFORMATION)) ||
++ req->InfoType == SMB2_O_INFO_SECURITY)
+ sz = large_sz;
+ }
+
+@@ -2981,7 +2982,7 @@ int smb2_open(struct ksmbd_work *work)
+ goto err_out;
+
+ rc = build_sec_desc(user_ns,
+- pntsd, NULL,
++ pntsd, NULL, 0,
+ OWNER_SECINFO |
+ GROUP_SECINFO |
+ DACL_SECINFO,
+@@ -3824,6 +3825,15 @@ static int verify_info_level(int info_le
+ return 0;
+ }
+
++static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len)
++{
++ int free_len;
++
++ free_len = (int)(work->response_sz -
++ (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len;
++ return free_len;
++}
++
+ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+ unsigned short hdr2_len,
+ unsigned int out_buf_len)
+@@ -3833,9 +3843,7 @@ static int smb2_calc_max_out_buf_len(str
+ if (out_buf_len > work->conn->vals->max_trans_size)
+ return -EINVAL;
+
+- free_len = (int)(work->response_sz -
+- (get_rfc1002_len(work->response_buf) + 4)) -
+- hdr2_len;
++ free_len = smb2_resp_buf_len(work, hdr2_len);
+ if (free_len < 0)
+ return -EINVAL;
+
+@@ -5087,10 +5095,10 @@ static int smb2_get_info_sec(struct ksmb
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode;
+- __u32 secdesclen;
++ __u32 secdesclen = 0;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+ int addition_info = le32_to_cpu(req->AdditionalInformation);
+- int rc;
++ int rc = 0, ppntsd_size = 0;
+
+ if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+ PROTECTED_DACL_SECINFO |
+@@ -5136,11 +5144,14 @@ static int smb2_get_info_sec(struct ksmb
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR))
+- ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+- fp->filp->f_path.dentry, &ppntsd);
+-
+- rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+- &secdesclen, &fattr);
++ ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
++ fp->filp->f_path.dentry,
++ &ppntsd);
++
++ /* Check if sd buffer size exceeds response buffer size */
++ if (smb2_resp_buf_len(work, 8) > ppntsd_size)
++ rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size,
++ addition_info, &secdesclen, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ kfree(ppntsd);
+--- a/fs/ksmbd/smbacl.c
++++ b/fs/ksmbd/smbacl.c
+@@ -690,6 +690,7 @@ posix_default_acl:
+ static void set_ntacl_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl,
+ struct smb_acl *nt_dacl,
++ unsigned int aces_size,
+ const struct smb_sid *pownersid,
+ const struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+@@ -703,9 +704,19 @@ static void set_ntacl_dacl(struct user_n
+ if (nt_num_aces) {
+ ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+ for (i = 0; i < nt_num_aces; i++) {
+- memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+- size += le16_to_cpu(ntace->size);
+- ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
++ unsigned short nt_ace_size;
++
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++
++ nt_ace_size = le16_to_cpu(ntace->size);
++ if (nt_ace_size > aces_size)
++ break;
++
++ memcpy((char *)pndace + size, ntace, nt_ace_size);
++ size += nt_ace_size;
++ aces_size -= nt_ace_size;
++ ntace = (struct smb_ace *)((char *)ntace + nt_ace_size);
+ num_aces++;
+ }
+ }
+@@ -878,7 +889,7 @@ int parse_sec_desc(struct user_namespace
+ /* Convert permission bits from mode to equivalent CIFS ACL */
+ int build_sec_desc(struct user_namespace *user_ns,
+ struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+- int addition_info, __u32 *secdesclen,
++ int ppntsd_size, int addition_info, __u32 *secdesclen,
+ struct smb_fattr *fattr)
+ {
+ int rc = 0;
+@@ -938,15 +949,25 @@ int build_sec_desc(struct user_namespace
+
+ if (!ppntsd) {
+ set_mode_dacl(user_ns, dacl_ptr, fattr);
+- } else if (!ppntsd->dacloffset) {
+- goto out;
+ } else {
+ struct smb_acl *ppdacl_ptr;
++ unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
++ int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset;
++
++ if (!dacl_offset ||
++ (dacl_offset + sizeof(struct smb_acl) > ppntsd_size))
++ goto out;
++
++ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset);
++ ppdacl_size = le16_to_cpu(ppdacl_ptr->size);
++ if (ppdacl_size > ntacl_size ||
++ ppdacl_size < sizeof(struct smb_acl))
++ goto out;
+
+- ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+- le32_to_cpu(ppntsd->dacloffset));
+ set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+- nowner_sid_ptr, ngroup_sid_ptr, fattr);
++ ntacl_size - sizeof(struct smb_acl),
++ nowner_sid_ptr, ngroup_sid_ptr,
++ fattr);
+ }
+ pntsd->dacloffset = cpu_to_le32(offset);
+ offset += le16_to_cpu(dacl_ptr->size);
+@@ -980,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *
+ struct smb_sid owner_sid, group_sid;
+ struct dentry *parent = path->dentry->d_parent;
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+- int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+- int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
++ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
++ int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+ char *aces_base;
+ bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+
+- acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+- parent, &parent_pntsd);
+- if (acl_len <= 0)
++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++ parent, &parent_pntsd);
++ if (pntsd_size <= 0)
+ return -ENOENT;
+ dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+- if (!dacloffset) {
++ if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) {
+ rc = -EINVAL;
+ goto free_parent_pntsd;
+ }
+
+ parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
++ acl_len = pntsd_size - dacloffset;
+ num_aces = le32_to_cpu(parent_pdacl->num_aces);
+ pntsd_type = le16_to_cpu(parent_pntsd->type);
++ pdacl_size = le16_to_cpu(parent_pdacl->size);
++
++ if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) {
++ rc = -EINVAL;
++ goto free_parent_pntsd;
++ }
+
+ aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+ if (!aces_base) {
+@@ -1008,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *
+ aces = (struct smb_ace *)aces_base;
+ parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+ sizeof(struct smb_acl));
++ aces_size = acl_len - sizeof(struct smb_acl);
+
+ if (pntsd_type & DACL_AUTO_INHERITED)
+ inherited_flags = INHERITED_ACE;
+
+ for (i = 0; i < num_aces; i++) {
++ int pace_size;
++
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++
++ pace_size = le16_to_cpu(parent_aces->size);
++ if (pace_size > aces_size)
++ break;
++
++ aces_size -= pace_size;
++
+ flags = parent_aces->flags;
+ if (!smb_inherit_flags(flags, is_dir))
+ goto pass;
+@@ -1057,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ ace_cnt++;
+ pass:
+- parent_aces =
+- (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
++ parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size);
+ }
+
+ if (nt_size > 0) {
+@@ -1153,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_con
+ struct smb_ntsd *pntsd = NULL;
+ struct smb_acl *pdacl;
+ struct posix_acl *posix_acls;
+- int rc = 0, acl_size;
++ int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset;
+ struct smb_sid sid;
+ int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+ struct smb_ace *ace;
+@@ -1162,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_con
+ struct smb_ace *others_ace = NULL;
+ struct posix_acl_entry *pa_entry;
+ unsigned int sid_type = SIDOWNER;
+- char *end_of_acl;
++ unsigned short ace_size;
+
+ ksmbd_debug(SMB, "check permission using windows acl\n");
+- acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+- path->dentry, &pntsd);
+- if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+- kfree(pntsd);
+- return 0;
+- }
++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++ path->dentry, &pntsd);
++ if (pntsd_size <= 0 || !pntsd)
++ goto err_out;
++
++ dacl_offset = le32_to_cpu(pntsd->dacloffset);
++ if (!dacl_offset ||
++ (dacl_offset + sizeof(struct smb_acl) > pntsd_size))
++ goto err_out;
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+- end_of_acl = ((char *)pntsd) + acl_size;
+- if (end_of_acl <= (char *)pdacl) {
+- kfree(pntsd);
+- return 0;
+- }
++ acl_size = pntsd_size - dacl_offset;
++ pdacl_size = le16_to_cpu(pdacl->size);
+
+- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+- le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+- kfree(pntsd);
+- return 0;
+- }
++ if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl))
++ goto err_out;
+
+ if (!pdacl->num_aces) {
+- if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
++ if (!(pdacl_size - sizeof(struct smb_acl)) &&
+ *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+ rc = -EACCES;
+ goto err_out;
+ }
+- kfree(pntsd);
+- return 0;
++ goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+@@ -1200,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_con
+ DELETE;
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++ aces_size = acl_size - sizeof(struct smb_acl);
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++ ace_size = le16_to_cpu(ace->size);
++ if (ace_size > aces_size)
++ break;
++ aces_size -= ace_size;
+ granted |= le32_to_cpu(ace->access_req);
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+- if (end_of_acl < (char *)ace)
+- goto err_out;
+ }
+
+ if (!pdacl->num_aces)
+@@ -1216,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_con
+ id_to_sid(uid, sid_type, &sid);
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++ aces_size = acl_size - sizeof(struct smb_acl);
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++ ace_size = le16_to_cpu(ace->size);
++ if (ace_size > aces_size)
++ break;
++ aces_size -= ace_size;
++
+ if (!compare_sids(&sid, &ace->sid) ||
+ !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+ found = 1;
+@@ -1226,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_con
+ others_ace = ace;
+
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+- if (end_of_acl < (char *)ace)
+- goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+--- a/fs/ksmbd/smbacl.h
++++ b/fs/ksmbd/smbacl.h
+@@ -193,7 +193,7 @@ struct posix_acl_state {
+ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr);
+ int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+- struct smb_ntsd *ppntsd, int addition_info,
++ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
+ __u32 *secdesclen, struct smb_fattr *fattr);
+ int init_acl_state(struct posix_acl_state *state, int cnt);
+ void free_acl_state(struct posix_acl_state *state);
+--- a/fs/ksmbd/vfs.c
++++ b/fs/ksmbd/vfs.c
+@@ -1543,6 +1543,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_
+ }
+
+ *pntsd = acl.sd_buf;
++ if (acl.sd_size < sizeof(struct smb_ntsd)) {
++ pr_err("sd size is invalid\n");
++ goto out_free;
++ }
++
+ (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
--- /dev/null
+From ac60778b87e45576d7bfdbd6f53df902654e6f09 Mon Sep 17 00:00:00 2001
+From: Hyunchul Lee <hyc.lee@gmail.com>
+Date: Thu, 28 Jul 2022 23:41:51 +0900
+Subject: ksmbd: prevent out of bound read for SMB2_WRITE
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+commit ac60778b87e45576d7bfdbd6f53df902654e6f09 upstream.
+
+OOB read memory can be written to a file,
+if DataOffset is 0 and Length is too large
+in SMB2_WRITE request of compound request.
+
+To prevent this, when checking the length of
+the data area of SMB2_WRITE in smb2_get_data_area_len(),
+let the minimum of DataOffset be the size of
+SMB2 header + the size of SMB2_WRITE header.
+
+This bug can lead an oops looking something like:
+
+[ 798.008715] BUG: KASAN: slab-out-of-bounds in copy_page_from_iter_atomic+0xd3d/0x14b0
+[ 798.008724] Read of size 252 at addr ffff88800f863e90 by task kworker/0:2/2859
+...
+[ 798.008754] Call Trace:
+[ 798.008756] <TASK>
+[ 798.008759] dump_stack_lvl+0x49/0x5f
+[ 798.008764] print_report.cold+0x5e/0x5cf
+[ 798.008768] ? __filemap_get_folio+0x285/0x6d0
+[ 798.008774] ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[ 798.008777] kasan_report+0xaa/0x120
+[ 798.008781] ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[ 798.008784] kasan_check_range+0x100/0x1e0
+[ 798.008788] memcpy+0x24/0x60
+[ 798.008792] copy_page_from_iter_atomic+0xd3d/0x14b0
+[ 798.008795] ? pagecache_get_page+0x53/0x160
+[ 798.008799] ? iov_iter_get_pages_alloc+0x1590/0x1590
+[ 798.008803] ? ext4_write_begin+0xfc0/0xfc0
+[ 798.008807] ? current_time+0x72/0x210
+[ 798.008811] generic_perform_write+0x2c8/0x530
+[ 798.008816] ? filemap_fdatawrite_wbc+0x180/0x180
+[ 798.008820] ? down_write+0xb4/0x120
+[ 798.008824] ? down_write_killable+0x130/0x130
+[ 798.008829] ext4_buffered_write_iter+0x137/0x2c0
+[ 798.008833] ext4_file_write_iter+0x40b/0x1490
+[ 798.008837] ? __fsnotify_parent+0x275/0xb20
+[ 798.008842] ? __fsnotify_update_child_dentry_flags+0x2c0/0x2c0
+[ 798.008846] ? ext4_buffered_write_iter+0x2c0/0x2c0
+[ 798.008851] __kernel_write+0x3a1/0xa70
+[ 798.008855] ? __x64_sys_preadv2+0x160/0x160
+[ 798.008860] ? security_file_permission+0x4a/0xa0
+[ 798.008865] kernel_write+0xbb/0x360
+[ 798.008869] ksmbd_vfs_write+0x27e/0xb90 [ksmbd]
+[ 798.008881] ? ksmbd_vfs_read+0x830/0x830 [ksmbd]
+[ 798.008892] ? _raw_read_unlock+0x2a/0x50
+[ 798.008896] smb2_write+0xb45/0x14e0 [ksmbd]
+[ 798.008909] ? __kasan_check_write+0x14/0x20
+[ 798.008912] ? _raw_spin_lock_bh+0xd0/0xe0
+[ 798.008916] ? smb2_read+0x15e0/0x15e0 [ksmbd]
+[ 798.008927] ? memcpy+0x4e/0x60
+[ 798.008931] ? _raw_spin_unlock+0x19/0x30
+[ 798.008934] ? ksmbd_smb2_check_message+0x16af/0x2350 [ksmbd]
+[ 798.008946] ? _raw_spin_lock_bh+0xe0/0xe0
+[ 798.008950] handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 798.008962] process_one_work+0x778/0x11c0
+[ 798.008966] ? _raw_spin_lock_irq+0x8e/0xe0
+[ 798.008970] worker_thread+0x544/0x1180
+[ 798.008973] ? __cpuidle_text_end+0x4/0x4
+[ 798.008977] kthread+0x282/0x320
+[ 798.008982] ? process_one_work+0x11c0/0x11c0
+[ 798.008985] ? kthread_complete_and_exit+0x30/0x30
+[ 798.008989] ret_from_fork+0x1f/0x30
+[ 798.008995] </TASK>
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17817
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ksmbd/smb2misc.c | 7 +++++--
+ fs/ksmbd/smb2pdu.c | 6 ++----
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -132,8 +132,11 @@ static int smb2_get_data_area_len(unsign
+ *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+ break;
+ case SMB2_WRITE:
+- if (((struct smb2_write_req *)hdr)->DataOffset) {
+- *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
++ if (((struct smb2_write_req *)hdr)->DataOffset ||
++ ((struct smb2_write_req *)hdr)->Length) {
++ *off = max_t(unsigned int,
++ le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
++ offsetof(struct smb2_write_req, Buffer) - 4);
+ *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+ break;
+ }
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6471,10 +6471,8 @@ int smb2_write(struct ksmbd_work *work)
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+- if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+- pr_err("invalid write data offset %u, smb_len %u\n",
+- le16_to_cpu(req->DataOffset),
+- get_rfc1002_len(req));
++ if (le16_to_cpu(req->DataOffset) <
++ offsetof(struct smb2_write_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
--- /dev/null
+From 02799571714dc5dd6948824b9d080b44a295f695 Mon Sep 17 00:00:00 2001
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+Date: Sun, 14 Aug 2022 11:27:58 +0000
+Subject: net_sched: cls_route: disallow handle of 0
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+commit 02799571714dc5dd6948824b9d080b44a295f695 upstream.
+
+Follows up on:
+https://lore.kernel.org/all/20220809170518.164662-1-cascardo@canonical.com/
+
+handle of 0 implies from/to of universe realm which is not very
+sensible.
+
+Lets see what this patch will do:
+$sudo tc qdisc add dev $DEV root handle 1:0 prio
+
+//lets manufacture a way to insert handle of 0
+$sudo tc filter add dev $DEV parent 1:0 protocol ip prio 100 \
+route to 0 from 0 classid 1:10 action ok
+
+//gets rejected...
+Error: handle of 0 is not valid.
+We have an error talking to the kernel, -1
+
+//lets create a legit entry..
+sudo tc filter add dev $DEV parent 1:0 protocol ip prio 100 route from 10 \
+classid 1:10 action ok
+
+//what did the kernel insert?
+$sudo tc filter ls dev $DEV parent 1:0
+filter protocol ip pref 100 route chain 0
+filter protocol ip pref 100 route chain 0 fh 0x000a8000 flowid 1:10 from 10
+ action order 1: gact action pass
+ random type none pass val 0
+ index 1 ref 1 bind 1
+
+//Lets try to replace that legit entry with a handle of 0
+$ sudo tc filter replace dev $DEV parent 1:0 protocol ip prio 100 \
+handle 0x000a8000 route to 0 from 0 classid 1:10 action drop
+
+Error: Replacing with handle of 0 is invalid.
+We have an error talking to the kernel, -1
+
+And last, lets run Cascardo's POC:
+$ ./poc
+0
+0
+-22
+-22
+-22
+
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_route.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -424,6 +424,11 @@ static int route4_set_parms(struct net *
+ return -EINVAL;
+ }
+
++ if (!nhandle) {
++ NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
++ return -EINVAL;
++ }
++
+ h1 = to_hash(nhandle);
+ b = rtnl_dereference(head->table[h1]);
+ if (!b) {
+@@ -477,6 +482,11 @@ static int route4_change(struct net *net
+ int err;
+ bool new = true;
+
++ if (!handle) {
++ NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
++ return -EINVAL;
++ }
++
+ if (opt == NULL)
+ return handle ? -EINVAL : 0;
+
--- /dev/null
+From foo@baz Fri Aug 19 01:16:51 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:56 -0300
+Subject: Revert "x86/ftrace: Use alternative RET encoding"
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-1-cascardo@canonical.com>
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+This reverts commit e54fcb0812faebd147de72bd37ad87cc4951c68c.
+
+This temporarily reverts the backport of upstream commit
+1f001e9da6bbf482311e45e48f53c2bd2179e59c. It was not correct to copy the
+ftrace stub as it would contain a relative jump to the return thunk which
+would not apply to the context where it was being copied to, leading to
+ftrace support to be broken.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -309,7 +309,7 @@ union ftrace_op_code_union {
+ } __attribute__((packed));
+ };
+
+-#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
++#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
+
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -368,10 +368,7 @@ create_trampoline(struct ftrace_ops *ops
+
+ /* The trampoline ends with ret(q) */
+ retq = (unsigned long)ftrace_stub;
+- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+- memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
+- else
+- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
++ ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+
io_uring-use-original-request-task-for-inflight-tracking.patch
+tee-add-overflow-check-in-register_shm_helper.patch
+net_sched-cls_route-disallow-handle-of-0.patch
+ksmbd-prevent-out-of-bound-read-for-smb2_write.patch
+ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch
+revert-x86-ftrace-use-alternative-ret-encoding.patch
+x86-ibt-ftrace-make-function-graph-play-nice.patch
+x86-ftrace-use-alternative-ret-encoding.patch
+btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch
+btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch
--- /dev/null
+From 573ae4f13f630d6660008f1974c0a8a29c30e18a Mon Sep 17 00:00:00 2001
+From: Jens Wiklander <jens.wiklander@linaro.org>
+Date: Thu, 18 Aug 2022 13:08:59 +0200
+Subject: tee: add overflow check in register_shm_helper()
+
+From: Jens Wiklander <jens.wiklander@linaro.org>
+
+commit 573ae4f13f630d6660008f1974c0a8a29c30e18a upstream.
+
+With special lengths supplied by user space, register_shm_helper() has
+an integer overflow when calculating the number of pages covered by a
+supplied user space memory region.
+
+This causes internal_get_user_pages_fast() a helper function of
+pin_user_pages_fast() to do a NULL pointer dereference:
+
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
+ Modules linked in:
+ CPU: 1 PID: 173 Comm: optee_example_a Not tainted 5.19.0 #11
+ Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
+ pc : internal_get_user_pages_fast+0x474/0xa80
+ Call trace:
+ internal_get_user_pages_fast+0x474/0xa80
+ pin_user_pages_fast+0x24/0x4c
+ register_shm_helper+0x194/0x330
+ tee_shm_register_user_buf+0x78/0x120
+ tee_ioctl+0xd0/0x11a0
+ __arm64_sys_ioctl+0xa8/0xec
+ invoke_syscall+0x48/0x114
+
+Fix this by adding an an explicit call to access_ok() in
+tee_shm_register_user_buf() to catch an invalid user space address
+early.
+
+Fixes: 033ddf12bcf5 ("tee: add register user memory")
+Cc: stable@vger.kernel.org
+Reported-by: Nimish Mishra <neelam.nimish@gmail.com>
+Reported-by: Anirban Chakraborty <ch.anirban00727@gmail.com>
+Reported-by: Debdeep Mukhopadhyay <debdeep.mukhopadhyay@gmail.com>
+Suggested-by: Jerome Forissier <jerome.forissier@linaro.org>
+Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tee/tee_shm.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/tee/tee_shm.c
++++ b/drivers/tee/tee_shm.c
+@@ -222,6 +222,9 @@ struct tee_shm *tee_shm_register(struct
+ goto err;
+ }
+
++ if (!access_ok((void __user *)addr, length))
++ return ERR_PTR(-EFAULT);
++
+ mutex_lock(&teedev->mutex);
+ shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+ mutex_unlock(&teedev->mutex);
--- /dev/null
+From foo@baz Fri Aug 19 01:18:09 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:58 -0300
+Subject: x86/ftrace: Use alternative RET encoding
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-3-cascardo@canonical.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream.
+
+Use the return thunk in ftrace trampolines, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -309,7 +309,7 @@ union ftrace_op_code_union {
+ } __attribute__((packed));
+ };
+
+-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -365,7 +365,12 @@ create_trampoline(struct ftrace_ops *ops
+ goto fail;
+
+ ip = trampoline + size;
+- memcpy(ip, retq, RET_SIZE);
++
++ /* The trampoline ends with ret(q) */
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
++ else
++ memcpy(ip, retq, sizeof(retq));
+
+ /* No need to test direct calls on created trampolines */
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
--- /dev/null
+From foo@baz Fri Aug 19 01:18:09 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:57 -0300
+Subject: x86/ibt,ftrace: Make function-graph play nice
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Josh Poimboeuf <jpoimboe@redhat.com>, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-2-cascardo@canonical.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e52fc2cf3f662828cc0d51c4b73bed73ad275fce upstream.
+
+Return trampoline must not use indirect branch to return; while this
+preserves the RSB, it is fundamentally incompatible with IBT. Instead
+use a retpoline like ROP gadget that defeats IBT while not unbalancing
+the RSB.
+
+And since ftrace_stub is no longer a plain RET, don't use it to copy
+from. Since RET is a trivial instruction, poke it directly.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20220308154318.347296408@infradead.org
+[cascardo: remove ENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c | 9 ++-------
+ arch/x86/kernel/ftrace_64.S | 19 +++++++++++++++----
+ 2 files changed, 17 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -322,12 +322,12 @@ create_trampoline(struct ftrace_ops *ops
+ unsigned long offset;
+ unsigned long npages;
+ unsigned long size;
+- unsigned long retq;
+ unsigned long *ptr;
+ void *trampoline;
+ void *ip;
+ /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
+ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
++ unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
+ union ftrace_op_code_union op_ptr;
+ int ret;
+
+@@ -365,12 +365,7 @@ create_trampoline(struct ftrace_ops *ops
+ goto fail;
+
+ ip = trampoline + size;
+-
+- /* The trampoline ends with ret(q) */
+- retq = (unsigned long)ftrace_stub;
+- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+- if (WARN_ON(ret < 0))
+- goto fail;
++ memcpy(ip, retq, RET_SIZE);
+
+ /* No need to test direct calls on created trampolines */
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -181,7 +181,6 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
+
+ /*
+ * This is weak to keep gas from relaxing the jumps.
+- * It is also used to copy the RET for trampolines.
+ */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+ UNWIND_HINT_FUNC
+@@ -335,7 +334,7 @@ SYM_FUNC_START(ftrace_graph_caller)
+ SYM_FUNC_END(ftrace_graph_caller)
+
+ SYM_FUNC_START(return_to_handler)
+- subq $24, %rsp
++ subq $16, %rsp
+
+ /* Save the return values */
+ movq %rax, (%rsp)
+@@ -347,7 +346,19 @@ SYM_FUNC_START(return_to_handler)
+ movq %rax, %rdi
+ movq 8(%rsp), %rdx
+ movq (%rsp), %rax
+- addq $24, %rsp
+- JMP_NOSPEC rdi
++
++ addq $16, %rsp
++ /*
++ * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
++ * since IBT would demand that contain ENDBR, which simply isn't so for
++ * return addresses. Use a retpoline here to keep the RSB balanced.
++ */
++ ANNOTATE_INTRA_FUNCTION_CALL
++ call .Ldo_rop
++ int3
++.Ldo_rop:
++ mov %rdi, (%rsp)
++ UNWIND_HINT_FUNC
++ RET
+ SYM_FUNC_END(return_to_handler)
+ #endif