5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)
diff --git a/queue-5.15/btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch b/queue-5.15/btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch

new file mode 100644 (file)

index 0000000..bc6dba1
--- /dev/null
+++ b/queue-5.15/btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch
@@ -0,0 +1,166 @@
+From foo@baz Fri Aug 19 01:21:02 PM CEST 2022
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 19 Aug 2022 16:39:49 +0800
+Subject: btrfs: only write the sectors in the vertical stripe which has data stripes
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>
+Message-ID: <e1694a5724fa6b950b10566599a492ce4537f772.1660898037.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bd8f7e627703ca5707833d623efcd43f104c7b3f upstream.
+
+If we have only 8K partial write at the beginning of a full RAID56
+stripe, we will write the following contents:
+
+                    0  8K           32K             64K
+Disk 1 (data):     |XX|            |               |
+Disk 2  (data):     |               |               |
+Disk 3  (parity):   |XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|
+
+|X| means the sector will be written back to disk.
+
+Note that, although we won't write any sectors from disk 2, but we will
+write the full 64KiB of parity to disk.
+
+This behavior is fine for now, but not for the future (especially for
+RAID56J, as we waste quite some space to journal the unused parity
+stripes).
+
+So here we will also utilize the btrfs_raid_bio::dbitmap, anytime we
+queue a higher level bio into an rbio, we will update rbio::dbitmap to
+indicate which vertical stripes we need to writeback.
+
+And at finish_rmw(), we also check dbitmap to see if we need to write
+any sector in the vertical stripe.
+
+So after the patch, above example will only lead to the following
+writeback pattern:
+
+                    0  8K           32K             64K
+Disk 1 (data):     |XX|            |               |
+Disk 2  (data):     |               |               |
+Disk 3  (parity):   |XX|            |               |
+
+Acked-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/raid56.c |   55 ++++++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -324,6 +324,9 @@ static void merge_rbio(struct btrfs_raid
+ {
+       bio_list_merge(&dest->bio_list, &victim->bio_list);
+       dest->bio_list_bytes += victim->bio_list_bytes;
++      /* Also inherit the bitmaps from @victim. */
++      bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
++                dest->stripe_npages);
+       dest->generic_bio_cnt += victim->generic_bio_cnt;
+       bio_list_init(&victim->bio_list);
+ }
+@@ -865,6 +868,12 @@ static void rbio_orig_end_io(struct btrf
+ 
+       if (rbio->generic_bio_cnt)
+               btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
++      /*
++       * Clear the data bitmap, as the rbio may be cached for later usage.
++       * do this before before unlock_stripe() so there will be no new bio
++       * for this bio.
++       */
++      bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
+ 
+       /*
+        * At this moment, rbio->bio_list is empty, however since rbio does not
+@@ -1197,6 +1206,9 @@ static noinline void finish_rmw(struct b
+       else
+               BUG();
+ 
++      /* We should have at least one data sector. */
++      ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
++
+       /* at this point we either have a full stripe,
+        * or we've read the full stripe from the drive.
+        * recalculate the parity and write the new results.
+@@ -1268,6 +1280,11 @@ static noinline void finish_rmw(struct b
+       for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+                       struct page *page;
++
++                      /* This vertical stripe has no data, skip it. */
++                      if (!test_bit(pagenr, rbio->dbitmap))
++                              continue;
++
+                       if (stripe < rbio->nr_data) {
+                               page = page_in_rbio(rbio, stripe, pagenr, 1);
+                               if (!page)
+@@ -1292,6 +1309,11 @@ static noinline void finish_rmw(struct b
+ 
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+                       struct page *page;
++
++                      /* This vertical stripe has no data, skip it. */
++                      if (!test_bit(pagenr, rbio->dbitmap))
++                              continue;
++
+                       if (stripe < rbio->nr_data) {
+                               page = page_in_rbio(rbio, stripe, pagenr, 1);
+                               if (!page)
+@@ -1715,6 +1737,33 @@ static void btrfs_raid_unplug(struct blk
+       run_plug(plug);
+ }
+ 
++/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
++static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
++{
++      const struct btrfs_fs_info *fs_info = rbio->fs_info;
++      const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
++      const u64 full_stripe_start = rbio->bioc->raid_map[0];
++      const u32 orig_len = orig_bio->bi_iter.bi_size;
++      const u32 sectorsize = fs_info->sectorsize;
++      u64 cur_logical;
++
++      ASSERT(orig_logical >= full_stripe_start &&
++             orig_logical + orig_len <= full_stripe_start +
++             rbio->nr_data * rbio->stripe_len);
++
++      bio_list_add(&rbio->bio_list, orig_bio);
++      rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
++
++      /* Update the dbitmap. */
++      for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
++           cur_logical += sectorsize) {
++              int bit = ((u32)(cur_logical - full_stripe_start) >>
++                         fs_info->sectorsize_bits) % rbio->stripe_npages;
++
++              set_bit(bit, rbio->dbitmap);
++      }
++}
++
+ /*
+  * our main entry point for writes from the rest of the FS.
+  */
+@@ -1731,9 +1780,8 @@ int raid56_parity_write(struct btrfs_fs_
+               btrfs_put_bioc(bioc);
+               return PTR_ERR(rbio);
+       }
+-      bio_list_add(&rbio->bio_list, bio);
+-      rbio->bio_list_bytes = bio->bi_iter.bi_size;
+       rbio->operation = BTRFS_RBIO_WRITE;
++      rbio_add_bio(rbio, bio);
+ 
+       btrfs_bio_counter_inc_noblocked(fs_info);
+       rbio->generic_bio_cnt = 1;
+@@ -2135,8 +2183,7 @@ int raid56_parity_recover(struct btrfs_f
+       }
+ 
+       rbio->operation = BTRFS_RBIO_READ_REBUILD;
+-      bio_list_add(&rbio->bio_list, bio);
+-      rbio->bio_list_bytes = bio->bi_iter.bi_size;
++      rbio_add_bio(rbio, bio);
+ 
+       rbio->faila = find_logical_bio_stripe(rbio, bio);
+       if (rbio->faila == -1) {
diff --git a/queue-5.15/btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch b/queue-5.15/btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch

new file mode 100644 (file)

index 0000000..f9d0638
--- /dev/null
+++ b/queue-5.15/btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch
@@ -0,0 +1,207 @@
+From foo@baz Fri Aug 19 01:21:02 PM CEST 2022
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 19 Aug 2022 16:39:50 +0800
+Subject: btrfs: raid56: don't trust any cached sector in __raid56_parity_recover()
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>
+Message-ID: <83e62eda83a2e3e77d10867388a4599a31f26e14.1660898037.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f6065f8edeb25f4a9dfe0b446030ad995a84a088 upstream.
+
+[BUG]
+There is a small workload which will always fail with recent kernel:
+(A simplified version from btrfs/125 test case)
+
+  mkfs.btrfs -f -m raid5 -d raid5 -b 1G $dev1 $dev2 $dev3
+  mount $dev1 $mnt
+  xfs_io -f -c "pwrite -S 0xee 0 1M" $mnt/file1
+  sync
+  umount $mnt
+  btrfs dev scan -u $dev3
+  mount -o degraded $dev1 $mnt
+  xfs_io -f -c "pwrite -S 0xff 0 128M" $mnt/file2
+  umount $mnt
+  btrfs dev scan
+  mount $dev1 $mnt
+  btrfs balance start --full-balance $mnt
+  umount $mnt
+
+The failure is always failed to read some tree blocks:
+
+  BTRFS info (device dm-4): relocating block group 217710592 flags data|raid5
+  BTRFS error (device dm-4): parent transid verify failed on 38993920 wanted 9 found 7
+  BTRFS error (device dm-4): parent transid verify failed on 38993920 wanted 9 found 7
+  ...
+
+[CAUSE]
+With the recently added debug output, we can see all RAID56 operations
+related to full stripe 38928384:
+
+  56.1183: raid56_read_partial: full_stripe=38928384 devid=2 type=DATA1 offset=0 opf=0x0 physical=9502720 len=65536
+  56.1185: raid56_read_partial: full_stripe=38928384 devid=3 type=DATA2 offset=16384 opf=0x0 physical=9519104 len=16384
+  56.1185: raid56_read_partial: full_stripe=38928384 devid=3 type=DATA2 offset=49152 opf=0x0 physical=9551872 len=16384
+  56.1187: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=0 opf=0x1 physical=9502720 len=16384
+  56.1188: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=32768 opf=0x1 physical=9535488 len=16384
+  56.1188: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=0 opf=0x1 physical=30474240 len=16384
+  56.1189: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=32768 opf=0x1 physical=30507008 len=16384
+  56.1218: raid56_write_stripe: full_stripe=38928384 devid=3 type=DATA2 offset=49152 opf=0x1 physical=9551872 len=16384
+  56.1219: raid56_write_stripe: full_stripe=38928384 devid=1 type=PQ1 offset=49152 opf=0x1 physical=30523392 len=16384
+  56.2721: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+  56.2723: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+  56.2724: raid56_parity_recover: full stripe=38928384 eb=39010304 mirror=2
+
+Before we enter raid56_parity_recover(), we have triggered some metadata
+write for the full stripe 38928384, this leads to us to read all the
+sectors from disk.
+
+Furthermore, btrfs raid56 write will cache its calculated P/Q sectors to
+avoid unnecessary read.
+
+This means, for that full stripe, after any partial write, we will have
+stale data, along with P/Q calculated using that stale data.
+
+Thankfully due to patch "btrfs: only write the sectors in the vertical stripe
+which has data stripes" we haven't submitted all the corrupted P/Q to disk.
+
+When we really need to recover certain range, aka in
+raid56_parity_recover(), we will use the cached rbio, along with its
+cached sectors (the full stripe is all cached).
+
+This explains why we have no event raid56_scrub_read_recover()
+triggered.
+
+Since we have the cached P/Q which is calculated using the stale data,
+the recovered one will just be stale.
+
+In our particular test case, it will always return the same incorrect
+metadata, thus causing the same error message "parent transid verify
+failed on 39010304 wanted 9 found 7" again and again.
+
+[BTRFS DESTRUCTIVE RMW PROBLEM]
+
+Test case btrfs/125 (and above workload) always has its trouble with
+the destructive read-modify-write (RMW) cycle:
+
+        0       32K     64K
+Data1:  | Good  | Good  |
+Data2:  | Bad   | Bad   |
+Parity: | Good  | Good  |
+
+In above case, if we trigger any write into Data1, we will use the bad
+data in Data2 to re-generate parity, killing the only chance to recovery
+Data2, thus Data2 is lost forever.
+
+This destructive RMW cycle is not specific to btrfs RAID56, but there
+are some btrfs specific behaviors making the case even worse:
+
+- Btrfs will cache sectors for unrelated vertical stripes.
+
+  In above example, if we're only writing into 0~32K range, btrfs will
+  still read data range (32K ~ 64K) of Data1, and (64K~128K) of Data2.
+  This behavior is to cache sectors for later update.
+
+  Incidentally commit d4e28d9b5f04 ("btrfs: raid56: make steal_rbio()
+  subpage compatible") has a bug which makes RAID56 to never trust the
+  cached sectors, thus slightly improve the situation for recovery.
+
+  Unfortunately, follow up fix "btrfs: update stripe_sectors::uptodate in
+  steal_rbio" will revert the behavior back to the old one.
+
+- Btrfs raid56 partial write will update all P/Q sectors and cache them
+
+  This means, even if data at (64K ~ 96K) of Data2 is free space, and
+  only (96K ~ 128K) of Data2 is really stale data.
+  And we write into that (96K ~ 128K), we will update all the parity
+  sectors for the full stripe.
+
+  This unnecessary behavior will completely kill the chance of recovery.
+
+  Thankfully, an unrelated optimization "btrfs: only write the sectors
+  in the vertical stripe which has data stripes" will prevent
+  submitting the write bio for untouched vertical sectors.
+
+  That optimization will keep the on-disk P/Q untouched for a chance for
+  later recovery.
+
+[FIX]
+Although we have no good way to completely fix the destructive RMW
+(unless we go full scrub for each partial write), we can still limit the
+damage.
+
+With patch "btrfs: only write the sectors in the vertical stripe which
+has data stripes" now we won't really submit the P/Q of unrelated
+vertical stripes, so the on-disk P/Q should still be fine.
+
+Now we really need to do is just drop all the cached sectors when doing
+recovery.
+
+By this, we have a chance to read the original P/Q from disk, and have a
+chance to recover the stale data, while still keep the cache to speed up
+regular write path.
+
+In fact, just dropping all the cache for recovery path is good enough to
+allow the test case btrfs/125 along with the small script to pass
+reliably.
+
+The lack of metadata write after the degraded mount, and forced metadata
+COW is saving us this time.
+
+So this patch will fix the behavior by not trust any cache in
+__raid56_parity_recover(), to solve the problem while still keep the
+cache useful.
+
+But please note that this test pass DOES NOT mean we have solved the
+destructive RMW problem, we just do better damage control a little
+better.
+
+Related patches:
+
+- btrfs: only write the sectors in the vertical stripe
+- d4e28d9b5f04 ("btrfs: raid56: make steal_rbio() subpage compatible")
+- btrfs: update stripe_sectors::uptodate in steal_rbio
+
+Acked-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/raid56.c |   19 ++++++-------------
+ 1 file changed, 6 insertions(+), 13 deletions(-)
+
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -2085,9 +2085,12 @@ static int __raid56_parity_recover(struc
+       atomic_set(&rbio->error, 0);
+ 
+       /*
+-       * read everything that hasn't failed.  Thanks to the
+-       * stripe cache, it is possible that some or all of these
+-       * pages are going to be uptodate.
++       * Read everything that hasn't failed. However this time we will
++       * not trust any cached sector.
++       * As we may read out some stale data but higher layer is not reading
++       * that stale part.
++       *
++       * So here we always re-read everything in recovery path.
+        */
+       for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+               if (rbio->faila == stripe || rbio->failb == stripe) {
+@@ -2096,16 +2099,6 @@ static int __raid56_parity_recover(struc
+               }
+ 
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+-                      struct page *p;
+-
+-                      /*
+-                       * the rmw code may have already read this
+-                       * page in
+-                       */
+-                      p = rbio_stripe_page(rbio, stripe, pagenr);
+-                      if (PageUptodate(p))
+-                              continue;
+-
+                       ret = rbio_add_io_page(rbio, &bio_list,
+                                      rbio_stripe_page(rbio, stripe, pagenr),
+                                      stripe, pagenr, rbio->stripe_len);
diff --git a/queue-5.15/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch b/queue-5.15/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch

new file mode 100644 (file)

index 0000000..a9b2760
--- /dev/null
+++ b/queue-5.15/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch
@@ -0,0 +1,430 @@
+From 8f0541186e9ad1b62accc9519cc2b7a7240272a7 Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <linkinjeon@kernel.org>
+Date: Tue, 2 Aug 2022 07:28:51 +0900
+Subject: ksmbd: fix heap-based overflow in set_ntacl_dacl()
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+commit 8f0541186e9ad1b62accc9519cc2b7a7240272a7 upstream.
+
+The testcase use SMB2_SET_INFO_HE command to set a malformed file attribute
+under the label `security.NTACL`. SMB2_QUERY_INFO_HE command in testcase
+trigger the following overflow.
+
+[ 4712.003781] ==================================================================
+[ 4712.003790] BUG: KASAN: slab-out-of-bounds in build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.003807] Write of size 1060 at addr ffff88801e34c068 by task kworker/0:0/4190
+
+[ 4712.003813] CPU: 0 PID: 4190 Comm: kworker/0:0 Not tainted 5.19.0-rc5 #1
+[ 4712.003850] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd]
+[ 4712.003867] Call Trace:
+[ 4712.003870]  <TASK>
+[ 4712.003873]  dump_stack_lvl+0x49/0x5f
+[ 4712.003935]  print_report.cold+0x5e/0x5cf
+[ 4712.003972]  ? ksmbd_vfs_get_sd_xattr+0x16d/0x500 [ksmbd]
+[ 4712.003984]  ? cmp_map_id+0x200/0x200
+[ 4712.003988]  ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004000]  kasan_report+0xaa/0x120
+[ 4712.004045]  ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004056]  kasan_check_range+0x100/0x1e0
+[ 4712.004060]  memcpy+0x3c/0x60
+[ 4712.004064]  build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004076]  ? parse_sec_desc+0x580/0x580 [ksmbd]
+[ 4712.004088]  ? ksmbd_acls_fattr+0x281/0x410 [ksmbd]
+[ 4712.004099]  smb2_query_info+0xa8f/0x6110 [ksmbd]
+[ 4712.004111]  ? psi_group_change+0x856/0xd70
+[ 4712.004148]  ? update_load_avg+0x1c3/0x1af0
+[ 4712.004152]  ? asym_cpu_capacity_scan+0x5d0/0x5d0
+[ 4712.004157]  ? xas_load+0x23/0x300
+[ 4712.004162]  ? smb2_query_dir+0x1530/0x1530 [ksmbd]
+[ 4712.004173]  ? _raw_spin_lock_bh+0xe0/0xe0
+[ 4712.004179]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 4712.004192]  process_one_work+0x778/0x11c0
+[ 4712.004227]  ? _raw_spin_lock_irq+0x8e/0xe0
+[ 4712.004231]  worker_thread+0x544/0x1180
+[ 4712.004234]  ? __cpuidle_text_end+0x4/0x4
+[ 4712.004239]  kthread+0x282/0x320
+[ 4712.004243]  ? process_one_work+0x11c0/0x11c0
+[ 4712.004246]  ? kthread_complete_and_exit+0x30/0x30
+[ 4712.004282]  ret_from_fork+0x1f/0x30
+
+This patch add the buffer validation for security descriptor that is
+stored by malformed SMB2_SET_INFO_HE command. and allocate large
+response buffer about SMB2_O_INFO_SECURITY file info class.
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17771
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ksmbd/smb2pdu.c |   39 ++++++++++-----
+ fs/ksmbd/smbacl.c  |  130 +++++++++++++++++++++++++++++++++++------------------
+ fs/ksmbd/smbacl.h  |    2 
+ fs/ksmbd/vfs.c     |    5 ++
+ 4 files changed, 119 insertions(+), 57 deletions(-)
+
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -541,9 +541,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_w
+               struct smb2_query_info_req *req;
+ 
+               req = work->request_buf;
+-              if (req->InfoType == SMB2_O_INFO_FILE &&
+-                  (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+-                   req->FileInfoClass == FILE_ALL_INFORMATION))
++              if ((req->InfoType == SMB2_O_INFO_FILE &&
++                   (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
++                    req->FileInfoClass == FILE_ALL_INFORMATION)) ||
++                  req->InfoType == SMB2_O_INFO_SECURITY)
+                       sz = large_sz;
+       }
+ 
+@@ -2981,7 +2982,7 @@ int smb2_open(struct ksmbd_work *work)
+                                               goto err_out;
+ 
+                                       rc = build_sec_desc(user_ns,
+-                                                          pntsd, NULL,
++                                                          pntsd, NULL, 0,
+                                                           OWNER_SECINFO |
+                                                           GROUP_SECINFO |
+                                                           DACL_SECINFO,
+@@ -3824,6 +3825,15 @@ static int verify_info_level(int info_le
+       return 0;
+ }
+ 
++static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len)
++{
++      int free_len;
++
++      free_len = (int)(work->response_sz -
++              (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len;
++      return free_len;
++}
++
+ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+                                    unsigned short hdr2_len,
+                                    unsigned int out_buf_len)
+@@ -3833,9 +3843,7 @@ static int smb2_calc_max_out_buf_len(str
+       if (out_buf_len > work->conn->vals->max_trans_size)
+               return -EINVAL;
+ 
+-      free_len = (int)(work->response_sz -
+-                       (get_rfc1002_len(work->response_buf) + 4)) -
+-              hdr2_len;
++      free_len = smb2_resp_buf_len(work, hdr2_len);
+       if (free_len < 0)
+               return -EINVAL;
+ 
+@@ -5087,10 +5095,10 @@ static int smb2_get_info_sec(struct ksmb
+       struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+       struct smb_fattr fattr = {{0}};
+       struct inode *inode;
+-      __u32 secdesclen;
++      __u32 secdesclen = 0;
+       unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+       int addition_info = le32_to_cpu(req->AdditionalInformation);
+-      int rc;
++      int rc = 0, ppntsd_size = 0;
+ 
+       if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+                             PROTECTED_DACL_SECINFO |
+@@ -5136,11 +5144,14 @@ static int smb2_get_info_sec(struct ksmb
+ 
+       if (test_share_config_flag(work->tcon->share_conf,
+                                  KSMBD_SHARE_FLAG_ACL_XATTR))
+-              ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+-                                     fp->filp->f_path.dentry, &ppntsd);
+-
+-      rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+-                          &secdesclen, &fattr);
++              ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
++                                                   fp->filp->f_path.dentry,
++                                                   &ppntsd);
++
++      /* Check if sd buffer size exceeds response buffer size */
++      if (smb2_resp_buf_len(work, 8) > ppntsd_size)
++              rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size,
++                                  addition_info, &secdesclen, &fattr);
+       posix_acl_release(fattr.cf_acls);
+       posix_acl_release(fattr.cf_dacls);
+       kfree(ppntsd);
+--- a/fs/ksmbd/smbacl.c
++++ b/fs/ksmbd/smbacl.c
+@@ -690,6 +690,7 @@ posix_default_acl:
+ static void set_ntacl_dacl(struct user_namespace *user_ns,
+                          struct smb_acl *pndacl,
+                          struct smb_acl *nt_dacl,
++                         unsigned int aces_size,
+                          const struct smb_sid *pownersid,
+                          const struct smb_sid *pgrpsid,
+                          struct smb_fattr *fattr)
+@@ -703,9 +704,19 @@ static void set_ntacl_dacl(struct user_n
+       if (nt_num_aces) {
+               ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+               for (i = 0; i < nt_num_aces; i++) {
+-                      memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+-                      size += le16_to_cpu(ntace->size);
+-                      ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
++                      unsigned short nt_ace_size;
++
++                      if (offsetof(struct smb_ace, access_req) > aces_size)
++                              break;
++
++                      nt_ace_size = le16_to_cpu(ntace->size);
++                      if (nt_ace_size > aces_size)
++                              break;
++
++                      memcpy((char *)pndace + size, ntace, nt_ace_size);
++                      size += nt_ace_size;
++                      aces_size -= nt_ace_size;
++                      ntace = (struct smb_ace *)((char *)ntace + nt_ace_size);
+                       num_aces++;
+               }
+       }
+@@ -878,7 +889,7 @@ int parse_sec_desc(struct user_namespace
+ /* Convert permission bits from mode to equivalent CIFS ACL */
+ int build_sec_desc(struct user_namespace *user_ns,
+                  struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+-                 int addition_info, __u32 *secdesclen,
++                 int ppntsd_size, int addition_info, __u32 *secdesclen,
+                  struct smb_fattr *fattr)
+ {
+       int rc = 0;
+@@ -938,15 +949,25 @@ int build_sec_desc(struct user_namespace
+ 
+               if (!ppntsd) {
+                       set_mode_dacl(user_ns, dacl_ptr, fattr);
+-              } else if (!ppntsd->dacloffset) {
+-                      goto out;
+               } else {
+                       struct smb_acl *ppdacl_ptr;
++                      unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
++                      int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset;
++
++                      if (!dacl_offset ||
++                          (dacl_offset + sizeof(struct smb_acl) > ppntsd_size))
++                              goto out;
++
++                      ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset);
++                      ppdacl_size = le16_to_cpu(ppdacl_ptr->size);
++                      if (ppdacl_size > ntacl_size ||
++                          ppdacl_size < sizeof(struct smb_acl))
++                              goto out;
+ 
+-                      ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+-                                              le32_to_cpu(ppntsd->dacloffset));
+                       set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+-                                     nowner_sid_ptr, ngroup_sid_ptr, fattr);
++                                     ntacl_size - sizeof(struct smb_acl),
++                                     nowner_sid_ptr, ngroup_sid_ptr,
++                                     fattr);
+               }
+               pntsd->dacloffset = cpu_to_le32(offset);
+               offset += le16_to_cpu(dacl_ptr->size);
+@@ -980,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *
+       struct smb_sid owner_sid, group_sid;
+       struct dentry *parent = path->dentry->d_parent;
+       struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+-      int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+-      int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
++      int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
++      int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+       char *aces_base;
+       bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+ 
+-      acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+-                                       parent, &parent_pntsd);
+-      if (acl_len <= 0)
++      pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++                                          parent, &parent_pntsd);
++      if (pntsd_size <= 0)
+               return -ENOENT;
+       dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+-      if (!dacloffset) {
++      if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) {
+               rc = -EINVAL;
+               goto free_parent_pntsd;
+       }
+ 
+       parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
++      acl_len = pntsd_size - dacloffset;
+       num_aces = le32_to_cpu(parent_pdacl->num_aces);
+       pntsd_type = le16_to_cpu(parent_pntsd->type);
++      pdacl_size = le16_to_cpu(parent_pdacl->size);
++
++      if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) {
++              rc = -EINVAL;
++              goto free_parent_pntsd;
++      }
+ 
+       aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+       if (!aces_base) {
+@@ -1008,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *
+       aces = (struct smb_ace *)aces_base;
+       parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+                       sizeof(struct smb_acl));
++      aces_size = acl_len - sizeof(struct smb_acl);
+ 
+       if (pntsd_type & DACL_AUTO_INHERITED)
+               inherited_flags = INHERITED_ACE;
+ 
+       for (i = 0; i < num_aces; i++) {
++              int pace_size;
++
++              if (offsetof(struct smb_ace, access_req) > aces_size)
++                      break;
++
++              pace_size = le16_to_cpu(parent_aces->size);
++              if (pace_size > aces_size)
++                      break;
++
++              aces_size -= pace_size;
++
+               flags = parent_aces->flags;
+               if (!smb_inherit_flags(flags, is_dir))
+                       goto pass;
+@@ -1057,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *
+               aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+               ace_cnt++;
+ pass:
+-              parent_aces =
+-                      (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
++              parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size);
+       }
+ 
+       if (nt_size > 0) {
+@@ -1153,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_con
+       struct smb_ntsd *pntsd = NULL;
+       struct smb_acl *pdacl;
+       struct posix_acl *posix_acls;
+-      int rc = 0, acl_size;
++      int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset;
+       struct smb_sid sid;
+       int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+       struct smb_ace *ace;
+@@ -1162,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_con
+       struct smb_ace *others_ace = NULL;
+       struct posix_acl_entry *pa_entry;
+       unsigned int sid_type = SIDOWNER;
+-      char *end_of_acl;
++      unsigned short ace_size;
+ 
+       ksmbd_debug(SMB, "check permission using windows acl\n");
+-      acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+-                                        path->dentry, &pntsd);
+-      if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++                                          path->dentry, &pntsd);
++      if (pntsd_size <= 0 || !pntsd)
++              goto err_out;
++
++      dacl_offset = le32_to_cpu(pntsd->dacloffset);
++      if (!dacl_offset ||
++          (dacl_offset + sizeof(struct smb_acl) > pntsd_size))
++              goto err_out;
+ 
+       pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+-      end_of_acl = ((char *)pntsd) + acl_size;
+-      if (end_of_acl <= (char *)pdacl) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      acl_size = pntsd_size - dacl_offset;
++      pdacl_size = le16_to_cpu(pdacl->size);
+ 
+-      if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+-          le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl))
++              goto err_out;
+ 
+       if (!pdacl->num_aces) {
+-              if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
++              if (!(pdacl_size - sizeof(struct smb_acl)) &&
+                   *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+                       rc = -EACCES;
+                       goto err_out;
+               }
+-              kfree(pntsd);
+-              return 0;
++              goto err_out;
+       }
+ 
+       if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+@@ -1200,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_con
+                       DELETE;
+ 
+               ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++              aces_size = acl_size - sizeof(struct smb_acl);
+               for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++                      if (offsetof(struct smb_ace, access_req) > aces_size)
++                              break;
++                      ace_size = le16_to_cpu(ace->size);
++                      if (ace_size > aces_size)
++                              break;
++                      aces_size -= ace_size;
+                       granted |= le32_to_cpu(ace->access_req);
+                       ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+-                      if (end_of_acl < (char *)ace)
+-                              goto err_out;
+               }
+ 
+               if (!pdacl->num_aces)
+@@ -1216,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_con
+       id_to_sid(uid, sid_type, &sid);
+ 
+       ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++      aces_size = acl_size - sizeof(struct smb_acl);
+       for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++              if (offsetof(struct smb_ace, access_req) > aces_size)
++                      break;
++              ace_size = le16_to_cpu(ace->size);
++              if (ace_size > aces_size)
++                      break;
++              aces_size -= ace_size;
++
+               if (!compare_sids(&sid, &ace->sid) ||
+                   !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+                       found = 1;
+@@ -1226,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_con
+                       others_ace = ace;
+ 
+               ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+-              if (end_of_acl < (char *)ace)
+-                      goto err_out;
+       }
+ 
+       if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+--- a/fs/ksmbd/smbacl.h
++++ b/fs/ksmbd/smbacl.h
+@@ -193,7 +193,7 @@ struct posix_acl_state {
+ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+                  int acl_len, struct smb_fattr *fattr);
+ int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+-                 struct smb_ntsd *ppntsd, int addition_info,
++                 struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
+                  __u32 *secdesclen, struct smb_fattr *fattr);
+ int init_acl_state(struct posix_acl_state *state, int cnt);
+ void free_acl_state(struct posix_acl_state *state);
+--- a/fs/ksmbd/vfs.c
++++ b/fs/ksmbd/vfs.c
+@@ -1543,6 +1543,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_
+       }
+ 
+       *pntsd = acl.sd_buf;
++      if (acl.sd_size < sizeof(struct smb_ntsd)) {
++              pr_err("sd size is invalid\n");
++              goto out_free;
++      }
++
+       (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+                                          NDR_NTSD_OFFSETOF);
+       (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
diff --git a/queue-5.15/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch b/queue-5.15/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch

new file mode 100644 (file)

index 0000000..88f3924
--- /dev/null
+++ b/queue-5.15/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch
@@ -0,0 +1,116 @@
+From ac60778b87e45576d7bfdbd6f53df902654e6f09 Mon Sep 17 00:00:00 2001
+From: Hyunchul Lee <hyc.lee@gmail.com>
+Date: Thu, 28 Jul 2022 23:41:51 +0900
+Subject: ksmbd: prevent out of bound read for SMB2_WRITE
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+commit ac60778b87e45576d7bfdbd6f53df902654e6f09 upstream.
+
+OOB read memory can be written to a file,
+if DataOffset is 0 and Length is too large
+in SMB2_WRITE request of compound request.
+
+To prevent this, when checking the length of
+the data area of SMB2_WRITE in smb2_get_data_area_len(),
+let the minimum of DataOffset be the size of
+SMB2 header + the size of SMB2_WRITE header.
+
+This bug can lead an oops looking something like:
+
+[  798.008715] BUG: KASAN: slab-out-of-bounds in copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008724] Read of size 252 at addr ffff88800f863e90 by task kworker/0:2/2859
+...
+[  798.008754] Call Trace:
+[  798.008756]  <TASK>
+[  798.008759]  dump_stack_lvl+0x49/0x5f
+[  798.008764]  print_report.cold+0x5e/0x5cf
+[  798.008768]  ? __filemap_get_folio+0x285/0x6d0
+[  798.008774]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008777]  kasan_report+0xaa/0x120
+[  798.008781]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008784]  kasan_check_range+0x100/0x1e0
+[  798.008788]  memcpy+0x24/0x60
+[  798.008792]  copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008795]  ? pagecache_get_page+0x53/0x160
+[  798.008799]  ? iov_iter_get_pages_alloc+0x1590/0x1590
+[  798.008803]  ? ext4_write_begin+0xfc0/0xfc0
+[  798.008807]  ? current_time+0x72/0x210
+[  798.008811]  generic_perform_write+0x2c8/0x530
+[  798.008816]  ? filemap_fdatawrite_wbc+0x180/0x180
+[  798.008820]  ? down_write+0xb4/0x120
+[  798.008824]  ? down_write_killable+0x130/0x130
+[  798.008829]  ext4_buffered_write_iter+0x137/0x2c0
+[  798.008833]  ext4_file_write_iter+0x40b/0x1490
+[  798.008837]  ? __fsnotify_parent+0x275/0xb20
+[  798.008842]  ? __fsnotify_update_child_dentry_flags+0x2c0/0x2c0
+[  798.008846]  ? ext4_buffered_write_iter+0x2c0/0x2c0
+[  798.008851]  __kernel_write+0x3a1/0xa70
+[  798.008855]  ? __x64_sys_preadv2+0x160/0x160
+[  798.008860]  ? security_file_permission+0x4a/0xa0
+[  798.008865]  kernel_write+0xbb/0x360
+[  798.008869]  ksmbd_vfs_write+0x27e/0xb90 [ksmbd]
+[  798.008881]  ? ksmbd_vfs_read+0x830/0x830 [ksmbd]
+[  798.008892]  ? _raw_read_unlock+0x2a/0x50
+[  798.008896]  smb2_write+0xb45/0x14e0 [ksmbd]
+[  798.008909]  ? __kasan_check_write+0x14/0x20
+[  798.008912]  ? _raw_spin_lock_bh+0xd0/0xe0
+[  798.008916]  ? smb2_read+0x15e0/0x15e0 [ksmbd]
+[  798.008927]  ? memcpy+0x4e/0x60
+[  798.008931]  ? _raw_spin_unlock+0x19/0x30
+[  798.008934]  ? ksmbd_smb2_check_message+0x16af/0x2350 [ksmbd]
+[  798.008946]  ? _raw_spin_lock_bh+0xe0/0xe0
+[  798.008950]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[  798.008962]  process_one_work+0x778/0x11c0
+[  798.008966]  ? _raw_spin_lock_irq+0x8e/0xe0
+[  798.008970]  worker_thread+0x544/0x1180
+[  798.008973]  ? __cpuidle_text_end+0x4/0x4
+[  798.008977]  kthread+0x282/0x320
+[  798.008982]  ? process_one_work+0x11c0/0x11c0
+[  798.008985]  ? kthread_complete_and_exit+0x30/0x30
+[  798.008989]  ret_from_fork+0x1f/0x30
+[  798.008995]  </TASK>
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17817
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ksmbd/smb2misc.c |    7 +++++--
+ fs/ksmbd/smb2pdu.c  |    6 ++----
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -132,8 +132,11 @@ static int smb2_get_data_area_len(unsign
+               *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+               break;
+       case SMB2_WRITE:
+-              if (((struct smb2_write_req *)hdr)->DataOffset) {
+-                      *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
++              if (((struct smb2_write_req *)hdr)->DataOffset ||
++                  ((struct smb2_write_req *)hdr)->Length) {
++                      *off = max_t(unsigned int,
++                                   le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
++                                   offsetof(struct smb2_write_req, Buffer) - 4);
+                       *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+                       break;
+               }
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6471,10 +6471,8 @@ int smb2_write(struct ksmbd_work *work)
+                   (offsetof(struct smb2_write_req, Buffer) - 4)) {
+                       data_buf = (char *)&req->Buffer[0];
+               } else {
+-                      if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+-                              pr_err("invalid write data offset %u, smb_len %u\n",
+-                                     le16_to_cpu(req->DataOffset),
+-                                     get_rfc1002_len(req));
++                      if (le16_to_cpu(req->DataOffset) <
++                          offsetof(struct smb2_write_req, Buffer)) {
+                               err = -EINVAL;
+                               goto out;
+                       }
diff --git a/queue-5.15/net_sched-cls_route-disallow-handle-of-0.patch b/queue-5.15/net_sched-cls_route-disallow-handle-of-0.patch

new file mode 100644 (file)

index 0000000..5ea429d
--- /dev/null
+++ b/queue-5.15/net_sched-cls_route-disallow-handle-of-0.patch
@@ -0,0 +1,87 @@
+From 02799571714dc5dd6948824b9d080b44a295f695 Mon Sep 17 00:00:00 2001
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+Date: Sun, 14 Aug 2022 11:27:58 +0000
+Subject: net_sched: cls_route: disallow handle of 0
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+commit 02799571714dc5dd6948824b9d080b44a295f695 upstream.
+
+Follows up on:
+https://lore.kernel.org/all/20220809170518.164662-1-cascardo@canonical.com/
+
+handle of 0 implies from/to of universe realm which is not very
+sensible.
+
+Lets see what this patch will do:
+$sudo tc qdisc add dev $DEV root handle 1:0 prio
+
+//lets manufacture a way to insert handle of 0
+$sudo tc filter add dev $DEV parent 1:0 protocol ip prio 100 \
+route to 0 from 0 classid 1:10 action ok
+
+//gets rejected...
+Error: handle of 0 is not valid.
+We have an error talking to the kernel, -1
+
+//lets create a legit entry..
+sudo tc filter add dev $DEV parent 1:0 protocol ip prio 100 route from 10 \
+classid 1:10 action ok
+
+//what did the kernel insert?
+$sudo tc filter ls dev $DEV parent 1:0
+filter protocol ip pref 100 route chain 0
+filter protocol ip pref 100 route chain 0 fh 0x000a8000 flowid 1:10 from 10
+       action order 1: gact action pass
+        random type none pass val 0
+        index 1 ref 1 bind 1
+
+//Lets try to replace that legit entry with a handle of 0
+$ sudo tc filter replace dev $DEV parent 1:0 protocol ip prio 100 \
+handle 0x000a8000 route to 0 from 0 classid 1:10 action drop
+
+Error: Replacing with handle of 0 is invalid.
+We have an error talking to the kernel, -1
+
+And last, lets run Cascardo's POC:
+$ ./poc
+0
+0
+-22
+-22
+-22
+
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_route.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -424,6 +424,11 @@ static int route4_set_parms(struct net *
+                       return -EINVAL;
+       }
+ 
++      if (!nhandle) {
++              NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
++              return -EINVAL;
++      }
++
+       h1 = to_hash(nhandle);
+       b = rtnl_dereference(head->table[h1]);
+       if (!b) {
+@@ -477,6 +482,11 @@ static int route4_change(struct net *net
+       int err;
+       bool new = true;
+ 
++      if (!handle) {
++              NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
++              return -EINVAL;
++      }
++
+       if (opt == NULL)
+               return handle ? -EINVAL : 0;
+ 
diff --git a/queue-5.15/revert-x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.15/revert-x86-ftrace-use-alternative-ret-encoding.patch

new file mode 100644 (file)

index 0000000..5b11a7b
--- /dev/null
+++ b/queue-5.15/revert-x86-ftrace-use-alternative-ret-encoding.patch
@@ -0,0 +1,47 @@
+From foo@baz Fri Aug 19 01:16:51 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:56 -0300
+Subject: Revert "x86/ftrace: Use alternative RET encoding"
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-1-cascardo@canonical.com>
+
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+
+This reverts commit e54fcb0812faebd147de72bd37ad87cc4951c68c.
+
+This temporarily reverts the backport of upstream commit
+1f001e9da6bbf482311e45e48f53c2bd2179e59c. It was not correct to copy the
+ftrace stub as it would contain a relative jump to the return thunk which
+would not apply to the context where it was being copied to, leading to
+ftrace support to be broken.
+
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -309,7 +309,7 @@ union ftrace_op_code_union {
+       } __attribute__((packed));
+ };
+ 
+-#define RET_SIZE              (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
++#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
+ 
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -368,10 +368,7 @@ create_trampoline(struct ftrace_ops *ops
+ 
+       /* The trampoline ends with ret(q) */
+       retq = (unsigned long)ftrace_stub;
+-      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+-              memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
+-      else
+-              ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
++      ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+       if (WARN_ON(ret < 0))
+               goto fail;
+ 
diff --git a/queue-5.15/series b/queue-5.15/series

index ccc936fe2acc288fb1739fc613888ec70ab93d7c..15eb293d70a79fdedfb02a05a511cb33ec286236 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -1 +1,10 @@
  io_uring-use-original-request-task-for-inflight-tracking.patch
+tee-add-overflow-check-in-register_shm_helper.patch
+net_sched-cls_route-disallow-handle-of-0.patch
+ksmbd-prevent-out-of-bound-read-for-smb2_write.patch
+ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch
+revert-x86-ftrace-use-alternative-ret-encoding.patch
+x86-ibt-ftrace-make-function-graph-play-nice.patch
+x86-ftrace-use-alternative-ret-encoding.patch
+btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch
+btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch
diff --git a/queue-5.15/tee-add-overflow-check-in-register_shm_helper.patch b/queue-5.15/tee-add-overflow-check-in-register_shm_helper.patch

new file mode 100644 (file)

index 0000000..c5f847c
--- /dev/null
+++ b/queue-5.15/tee-add-overflow-check-in-register_shm_helper.patch
@@ -0,0 +1,59 @@
+From 573ae4f13f630d6660008f1974c0a8a29c30e18a Mon Sep 17 00:00:00 2001
+From: Jens Wiklander <jens.wiklander@linaro.org>
+Date: Thu, 18 Aug 2022 13:08:59 +0200
+Subject: tee: add overflow check in register_shm_helper()
+
+From: Jens Wiklander <jens.wiklander@linaro.org>
+
+commit 573ae4f13f630d6660008f1974c0a8a29c30e18a upstream.
+
+With special lengths supplied by user space, register_shm_helper() has
+an integer overflow when calculating the number of pages covered by a
+supplied user space memory region.
+
+This causes internal_get_user_pages_fast() a helper function of
+pin_user_pages_fast() to do a NULL pointer dereference:
+
+  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
+  Modules linked in:
+  CPU: 1 PID: 173 Comm: optee_example_a Not tainted 5.19.0 #11
+  Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
+  pc : internal_get_user_pages_fast+0x474/0xa80
+  Call trace:
+   internal_get_user_pages_fast+0x474/0xa80
+   pin_user_pages_fast+0x24/0x4c
+   register_shm_helper+0x194/0x330
+   tee_shm_register_user_buf+0x78/0x120
+   tee_ioctl+0xd0/0x11a0
+   __arm64_sys_ioctl+0xa8/0xec
+   invoke_syscall+0x48/0x114
+
+Fix this by adding an an explicit call to access_ok() in
+tee_shm_register_user_buf() to catch an invalid user space address
+early.
+
+Fixes: 033ddf12bcf5 ("tee: add register user memory")
+Cc: stable@vger.kernel.org
+Reported-by: Nimish Mishra <neelam.nimish@gmail.com>
+Reported-by: Anirban Chakraborty <ch.anirban00727@gmail.com>
+Reported-by: Debdeep Mukhopadhyay <debdeep.mukhopadhyay@gmail.com>
+Suggested-by: Jerome Forissier <jerome.forissier@linaro.org>
+Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tee/tee_shm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/tee/tee_shm.c
++++ b/drivers/tee/tee_shm.c
+@@ -222,6 +222,9 @@ struct tee_shm *tee_shm_register(struct
+               goto err;
+       }
+ 
++      if (!access_ok((void __user *)addr, length))
++              return ERR_PTR(-EFAULT);
++
+       mutex_lock(&teedev->mutex);
+       shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+       mutex_unlock(&teedev->mutex);
diff --git a/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch b/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch

new file mode 100644 (file)

index 0000000..3a81807
--- /dev/null
+++ b/queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch
@@ -0,0 +1,50 @@
+From foo@baz Fri Aug 19 01:18:09 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:58 -0300
+Subject: x86/ftrace: Use alternative RET encoding
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-3-cascardo@canonical.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream.
+
+Use the return thunk in ftrace trampolines, if needed.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+[cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -309,7 +309,7 @@ union ftrace_op_code_union {
+       } __attribute__((packed));
+ };
+ 
+-#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
++#define RET_SIZE              (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+ 
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -365,7 +365,12 @@ create_trampoline(struct ftrace_ops *ops
+               goto fail;
+ 
+       ip = trampoline + size;
+-      memcpy(ip, retq, RET_SIZE);
++
++      /* The trampoline ends with ret(q) */
++      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++              memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
++      else
++              memcpy(ip, retq, sizeof(retq));
+ 
+       /* No need to test direct calls on created trampolines */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
diff --git a/queue-5.15/x86-ibt-ftrace-make-function-graph-play-nice.patch b/queue-5.15/x86-ibt-ftrace-make-function-graph-play-nice.patch

new file mode 100644 (file)

index 0000000..a0ad944
--- /dev/null
+++ b/queue-5.15/x86-ibt-ftrace-make-function-graph-play-nice.patch
@@ -0,0 +1,102 @@
+From foo@baz Fri Aug 19 01:18:09 PM CEST 2022
+From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Date: Tue, 16 Aug 2022 05:26:57 -0300
+Subject: x86/ibt,ftrace: Make function-graph play nice
+To: stable@vger.kernel.org
+Cc: paul.gortmaker@windriver.com, gregkh@linuxfoundation.org, peterz@infradead.org, bp@suse.de, jpoimboe@kernel.org, Josh Poimboeuf <jpoimboe@redhat.com>, Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Message-ID: <20220816082658.172387-2-cascardo@canonical.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e52fc2cf3f662828cc0d51c4b73bed73ad275fce upstream.
+
+Return trampoline must not use indirect branch to return; while this
+preserves the RSB, it is fundamentally incompatible with IBT. Instead
+use a retpoline like ROP gadget that defeats IBT while not unbalancing
+the RSB.
+
+And since ftrace_stub is no longer a plain RET, don't use it to copy
+from. Since RET is a trivial instruction, poke it directly.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Link: https://lore.kernel.org/r/20220308154318.347296408@infradead.org
+[cascardo: remove ENDBR]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ftrace.c    |    9 ++-------
+ arch/x86/kernel/ftrace_64.S |   19 +++++++++++++++----
+ 2 files changed, 17 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -322,12 +322,12 @@ create_trampoline(struct ftrace_ops *ops
+       unsigned long offset;
+       unsigned long npages;
+       unsigned long size;
+-      unsigned long retq;
+       unsigned long *ptr;
+       void *trampoline;
+       void *ip;
+       /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
+       unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
++      unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
+       union ftrace_op_code_union op_ptr;
+       int ret;
+ 
+@@ -365,12 +365,7 @@ create_trampoline(struct ftrace_ops *ops
+               goto fail;
+ 
+       ip = trampoline + size;
+-
+-      /* The trampoline ends with ret(q) */
+-      retq = (unsigned long)ftrace_stub;
+-      ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+-      if (WARN_ON(ret < 0))
+-              goto fail;
++      memcpy(ip, retq, RET_SIZE);
+ 
+       /* No need to test direct calls on created trampolines */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -181,7 +181,6 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L
+ 
+ /*
+  * This is weak to keep gas from relaxing the jumps.
+- * It is also used to copy the RET for trampolines.
+  */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+       UNWIND_HINT_FUNC
+@@ -335,7 +334,7 @@ SYM_FUNC_START(ftrace_graph_caller)
+ SYM_FUNC_END(ftrace_graph_caller)
+ 
+ SYM_FUNC_START(return_to_handler)
+-      subq  $24, %rsp
++      subq  $16, %rsp
+ 
+       /* Save the return values */
+       movq %rax, (%rsp)
+@@ -347,7 +346,19 @@ SYM_FUNC_START(return_to_handler)
+       movq %rax, %rdi
+       movq 8(%rsp), %rdx
+       movq (%rsp), %rax
+-      addq $24, %rsp
+-      JMP_NOSPEC rdi
++
++      addq $16, %rsp
++      /*
++       * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
++       * since IBT would demand that contain ENDBR, which simply isn't so for
++       * return addresses. Use a retpoline here to keep the RSB balanced.
++       */
++      ANNOTATE_INTRA_FUNCTION_CALL
++      call .Ldo_rop
++      int3
++.Ldo_rop:
++      mov %rdi, (%rsp)
++      UNWIND_HINT_FUNC
++      RET
+ SYM_FUNC_END(return_to_handler)
+ #endif
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 19 Aug 2022 11:27:05 +0000 (13:27 +0200)
queue-5.15/btrfs-only-write-the-sectors-in-the-vertical-stripe-which-has-data-stripes.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/btrfs-raid56-don-t-trust-any-cached-sector-in-__raid56_parity_recover.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net_sched-cls_route-disallow-handle-of-0.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/revert-x86-ftrace-use-alternative-ret-encoding.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/tee-add-overflow-check-in-register_shm_helper.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/x86-ftrace-use-alternative-ret-encoding.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/x86-ibt-ftrace-make-function-graph-play-nice.patch	[new file with mode: 0644]	patch \| blob