From: Greg Kroah-Hartman Date: Fri, 20 Nov 2020 09:19:34 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.245~13 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=019f85b47131a048d1ef7e75bf8f3b624760d5d9;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: input-sunkbd-avoid-use-after-free-in-teardown-paths.patch powerpc-8xx-always-fault-when-_page_accessed-is-not-set.patch xfs-catch-inode-allocation-state-mismatch-corruption.patch xfs-validate-cached-inodes-are-free-when-allocated.patch --- diff --git a/queue-4.4/input-sunkbd-avoid-use-after-free-in-teardown-paths.patch b/queue-4.4/input-sunkbd-avoid-use-after-free-in-teardown-paths.patch new file mode 100644 index 00000000000..49a10907889 --- /dev/null +++ b/queue-4.4/input-sunkbd-avoid-use-after-free-in-teardown-paths.patch @@ -0,0 +1,94 @@ +From 77e70d351db7de07a46ac49b87a6c3c7a60fca7e Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Mon, 26 Oct 2020 13:36:17 -0700 +Subject: Input: sunkbd - avoid use-after-free in teardown paths + +From: Dmitry Torokhov + +commit 77e70d351db7de07a46ac49b87a6c3c7a60fca7e upstream. + +We need to make sure we cancel the reinit work before we tear down the +driver structures. + +Reported-by: Bodong Zhao +Tested-by: Bodong Zhao +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Torokhov +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/input/keyboard/sunkbd.c | 41 ++++++++++++++++++++++++++++++++-------- + 1 file changed, 33 insertions(+), 8 deletions(-) + +--- a/drivers/input/keyboard/sunkbd.c ++++ b/drivers/input/keyboard/sunkbd.c +@@ -115,7 +115,8 @@ static irqreturn_t sunkbd_interrupt(stru + switch (data) { + + case SUNKBD_RET_RESET: +- schedule_work(&sunkbd->tq); ++ if (sunkbd->enabled) ++ schedule_work(&sunkbd->tq); + sunkbd->reset = -1; + break; + +@@ -216,16 +217,12 @@ static int sunkbd_initialize(struct sunk + } + + /* +- * sunkbd_reinit() sets leds and beeps to a state the computer remembers they +- * were in. ++ * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers ++ * they were in. + */ + +-static void sunkbd_reinit(struct work_struct *work) ++static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd) + { +- struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); +- +- wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ); +- + serio_write(sunkbd->serio, SUNKBD_CMD_SETLED); + serio_write(sunkbd->serio, + (!!test_bit(LED_CAPSL, sunkbd->dev->led) << 3) | +@@ -238,11 +235,39 @@ static void sunkbd_reinit(struct work_st + SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd)); + } + ++ ++/* ++ * sunkbd_reinit() wait for the keyboard reset to complete and restores state ++ * of leds and beeps. ++ */ ++ ++static void sunkbd_reinit(struct work_struct *work) ++{ ++ struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); ++ ++ /* ++ * It is OK that we check sunkbd->enabled without pausing serio, ++ * as we only want to catch true->false transition that will ++ * happen once and we will be woken up for it. ++ */ ++ wait_event_interruptible_timeout(sunkbd->wait, ++ sunkbd->reset >= 0 || !sunkbd->enabled, ++ HZ); ++ ++ if (sunkbd->reset >= 0 && sunkbd->enabled) ++ sunkbd_set_leds_beeps(sunkbd); ++} ++ + static void sunkbd_enable(struct sunkbd *sunkbd, bool enable) + { + serio_pause_rx(sunkbd->serio); + sunkbd->enabled = enable; + serio_continue_rx(sunkbd->serio); ++ ++ if (!enable) { ++ wake_up_interruptible(&sunkbd->wait); ++ cancel_work_sync(&sunkbd->tq); ++ } + } + + /* diff --git a/queue-4.4/powerpc-8xx-always-fault-when-_page_accessed-is-not-set.patch b/queue-4.4/powerpc-8xx-always-fault-when-_page_accessed-is-not-set.patch new file mode 100644 index 00000000000..6dd7397df12 --- /dev/null +++ b/queue-4.4/powerpc-8xx-always-fault-when-_page_accessed-is-not-set.patch @@ -0,0 +1,63 @@ +From 29daf869cbab69088fe1755d9dd224e99ba78b56 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Mon, 12 Oct 2020 08:54:31 +0000 +Subject: powerpc/8xx: Always fault when _PAGE_ACCESSED is not set + +From: Christophe Leroy + +commit 29daf869cbab69088fe1755d9dd224e99ba78b56 upstream. + +The kernel expects pte_young() to work regardless of CONFIG_SWAP. + +Make sure a minor fault is taken to set _PAGE_ACCESSED when it +is not already set, regardless of the selection of CONFIG_SWAP. + +This adds at least 3 instructions to the TLB miss exception +handlers fast path. Following patch will reduce this overhead. + +Also update the rotation instruction to the correct number of bits +to reflect all changes done to _PAGE_ACCESSED over time. + +Fixes: d069cb4373fe ("powerpc/8xx: Don't touch ACCESSED when no SWAP.") +Fixes: 5f356497c384 ("powerpc/8xx: remove unused _PAGE_WRITETHRU") +Fixes: e0a8e0d90a9f ("powerpc/8xx: Handle PAGE_USER via APG bits") +Fixes: 5b2753fc3e8a ("powerpc/8xx: Implementation of PAGE_EXEC") +Fixes: a891c43b97d3 ("powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages.") +Cc: stable@vger.kernel.org +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/af834e8a0f1fa97bfae65664950f0984a70c4750.1602492856.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/head_8xx.S | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/arch/powerpc/kernel/head_8xx.S ++++ b/arch/powerpc/kernel/head_8xx.S +@@ -361,11 +361,9 @@ InstructionTLBMiss: + /* Load the MI_TWC with the attributes for this "segment." */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ + +-#ifdef CONFIG_SWAP +- rlwinm r11, r10, 32-5, _PAGE_PRESENT ++ rlwinm r11, r10, 32-11, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +-#endif + li r11, RPN_PATTERN + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 20-23 and 28 must be clear. +@@ -436,11 +434,9 @@ DataStoreTLBMiss: + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ +-#ifdef CONFIG_SWAP +- rlwinm r11, r10, 32-5, _PAGE_PRESENT ++ rlwinm r11, r10, 32-11, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +-#endif + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be diff --git a/queue-4.4/series b/queue-4.4/series index b40d410f621..75f519653cb 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -7,3 +7,7 @@ powerpc-fix-__clear_user-with-kuap-enabled.patch powerpc-uaccess-evaluate-macro-arguments-once-before-user-access-is-allowed.patch powerpc-64s-flush-l1d-after-user-accesses.patch i2c-imx-fix-external-abort-on-interrupt-in-exit-paths.patch +xfs-catch-inode-allocation-state-mismatch-corruption.patch +xfs-validate-cached-inodes-are-free-when-allocated.patch +powerpc-8xx-always-fault-when-_page_accessed-is-not-set.patch +input-sunkbd-avoid-use-after-free-in-teardown-paths.patch diff --git a/queue-4.4/xfs-catch-inode-allocation-state-mismatch-corruption.patch b/queue-4.4/xfs-catch-inode-allocation-state-mismatch-corruption.patch new file mode 100644 index 00000000000..131e0e3f8c9 --- /dev/null +++ b/queue-4.4/xfs-catch-inode-allocation-state-mismatch-corruption.patch @@ -0,0 +1,184 @@ +From foo@baz Fri Nov 20 09:55:34 AM CET 2020 +From: Dave Chinner +Date: Fri, 23 Mar 2018 10:22:53 -0700 +Subject: xfs: catch inode allocation state mismatch corruption + +From: Dave Chinner + +commit ee457001ed6c6f31ddad69c24c1da8f377d8472d upstream + +We recently came across a V4 filesystem causing memory corruption +due to a newly allocated inode being setup twice and being added to +the superblock inode list twice. From code inspection, the only way +this could happen is if a newly allocated inode was not marked as +free on disk (i.e. di_mode wasn't zero). + +Running the metadump on an upstream debug kernel fails during inode +allocation like so: + +XFS: Assertion failed: ip->i_d.di_nblocks == 0, file: fs/xfs/xfs_inod= +e.c, line: 838 + ------------[ cut here ]------------ +kernel BUG at fs/xfs/xfs_message.c:114! +invalid opcode: 0000 [#1] PREEMPT SMP +CPU: 11 PID: 3496 Comm: mkdir Not tainted 4.16.0-rc5-dgc #442 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/0= +1/2014 +RIP: 0010:assfail+0x28/0x30 +RSP: 0018:ffffc9000236fc80 EFLAGS: 00010202 +RAX: 00000000ffffffea RBX: 0000000000004000 RCX: 0000000000000000 +RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff8227211b +RBP: ffffc9000236fce8 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000bec R11: f000000000000000 R12: ffffc9000236fd30 +R13: ffff8805c76bab80 R14: ffff8805c77ac800 R15: ffff88083fb12e10 +FS: 00007fac8cbff040(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000= +000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fffa6783ff8 CR3: 00000005c6e2b003 CR4: 00000000000606e0 +Call Trace: + xfs_ialloc+0x383/0x570 + xfs_dir_ialloc+0x6a/0x2a0 + xfs_create+0x412/0x670 + xfs_generic_create+0x1f7/0x2c0 + ? capable_wrt_inode_uidgid+0x3f/0x50 + vfs_mkdir+0xfb/0x1b0 + SyS_mkdir+0xcf/0xf0 + do_syscall_64+0x73/0x1a0 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 + +Extracting the inode number we crashed on from an event trace and +looking at it with xfs_db: + +xfs_db> inode 184452204 +xfs_db> p +core.magic = 0x494e +core.mode = 0100644 +core.version = 2 +core.format = 2 (extents) +core.nlinkv2 = 1 +core.onlink = 0 +..... + +Confirms that it is not a free inode on disk. xfs_repair +also trips over this inode: + +..... +zero length extent (off = 0, fsbno = 0) in ino 184452204 +correcting nextents for inode 184452204 +bad attribute fork in inode 184452204, would clear attr fork +bad nblocks 1 for inode 184452204, would reset to 0 +bad anextents 1 for inode 184452204, would reset to 0 +imap claims in-use inode 184452204 is free, would correct imap +would have cleared inode 184452204 +..... +disconnected inode 184452204, would move to lost+found + +And so we have a situation where the directory structure and the +inobt thinks the inode is free, but the inode on disk thinks it is +still in use. Where this corruption came from is not possible to +diagnose, but we can detect it and prevent the kernel from oopsing +on lookup. The reproducer now results in: + +$ sudo mkdir /mnt/scratch/{0,1,2,3,4,5}{0,1,2,3,4,5} +mkdir: cannot create directory =E2=80=98/mnt/scratch/00=E2=80=99: File ex= +ists +mkdir: cannot create directory =E2=80=98/mnt/scratch/01=E2=80=99: File ex= +ists +mkdir: cannot create directory =E2=80=98/mnt/scratch/03=E2=80=99: Structu= +re needs cleaning +mkdir: cannot create directory =E2=80=98/mnt/scratch/04=E2=80=99: Input/o= +utput error +mkdir: cannot create directory =E2=80=98/mnt/scratch/05=E2=80=99: Input/o= +utput error +.... + +And this corruption shutdown: + +[ 54.843517] XFS (loop0): Corruption detected! Free inode 0xafe846c not= + marked free on disk +[ 54.845885] XFS (loop0): Internal error xfs_trans_cancel at line 1023 = +of file fs/xfs/xfs_trans.c. Caller xfs_create+0x425/0x670 +[ 54.848994] CPU: 10 PID: 3541 Comm: mkdir Not tainted 4.16.0-rc5-dgc #= +443 +[ 54.850753] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO= +S 1.10.2-1 04/01/2014 +[ 54.852859] Call Trace: +[ 54.853531] dump_stack+0x85/0xc5 +[ 54.854385] xfs_trans_cancel+0x197/0x1c0 +[ 54.855421] xfs_create+0x425/0x670 +[ 54.856314] xfs_generic_create+0x1f7/0x2c0 +[ 54.857390] ? capable_wrt_inode_uidgid+0x3f/0x50 +[ 54.858586] vfs_mkdir+0xfb/0x1b0 +[ 54.859458] SyS_mkdir+0xcf/0xf0 +[ 54.860254] do_syscall_64+0x73/0x1a0 +[ 54.861193] entry_SYSCALL_64_after_hwframe+0x42/0xb7 +[ 54.862492] RIP: 0033:0x7fb73bddf547 +[ 54.863358] RSP: 002b:00007ffdaa553338 EFLAGS: 00000246 ORIG_RAX: 0000= +000000000053 +[ 54.865133] RAX: ffffffffffffffda RBX: 00007ffdaa55449a RCX: 00007fb73= +bddf547 +[ 54.866766] RDX: 0000000000000001 RSI: 00000000000001ff RDI: 00007ffda= +a55449a +[ 54.868432] RBP: 00007ffdaa55449a R08: 00000000000001ff R09: 00005623a= +8670dd0 +[ 54.870110] R10: 00007fb73be72d5b R11: 0000000000000246 R12: 000000000= +00001ff +[ 54.871752] R13: 00007ffdaa5534b0 R14: 0000000000000000 R15: 00007ffda= +a553500 +[ 54.873429] XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1= +024 of file fs/xfs/xfs_trans.c. Return address = ffffffff814cd050 +[ 54.882790] XFS (loop0): Corruption of in-memory data detected. Shutt= +ing down filesystem +[ 54.884597] XFS (loop0): Please umount the filesystem and rectify the = +problem(s) + +Note that this crash is only possible on v4 filesystemsi or v5 +filesystems mounted with the ikeep mount option. For all other V5 +filesystems, this problem cannot occur because we don't read inodes +we are allocating from disk - we simply overwrite them with the new +inode information. + +Signed-Off-By: Dave Chinner +Reviewed-by: Carlos Maiolino +Tested-by: Carlos Maiolino +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +[sudip: use ip->i_d.di_mode instead of VFS_I(ip)->i_mode] +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_icache.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -298,7 +298,28 @@ xfs_iget_cache_miss( + + trace_xfs_iget_miss(ip); + +- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { ++ ++ /* ++ * If we are allocating a new inode, then check what was returned is ++ * actually a free, empty inode. If we are not allocating an inode, ++ * the check we didn't find a free inode. ++ */ ++ if (flags & XFS_IGET_CREATE) { ++ if (ip->i_d.di_mode != 0) { ++ xfs_warn(mp, ++"Corruption detected! Free inode 0x%llx not marked free on disk", ++ ino); ++ error = -EFSCORRUPTED; ++ goto out_destroy; ++ } ++ if (ip->i_d.di_nblocks != 0) { ++ xfs_warn(mp, ++"Corruption detected! Free inode 0x%llx has blocks allocated!", ++ ino); ++ error = -EFSCORRUPTED; ++ goto out_destroy; ++ } ++ } else if (ip->i_d.di_mode == 0) { + error = -ENOENT; + goto out_destroy; + } diff --git a/queue-4.4/xfs-validate-cached-inodes-are-free-when-allocated.patch b/queue-4.4/xfs-validate-cached-inodes-are-free-when-allocated.patch new file mode 100644 index 00000000000..a1c697edb9f --- /dev/null +++ b/queue-4.4/xfs-validate-cached-inodes-are-free-when-allocated.patch @@ -0,0 +1,157 @@ +From foo@baz Fri Nov 20 09:55:34 AM CET 2020 +From: Dave Chinner +Date: Tue, 17 Apr 2018 17:17:34 -0700 +Subject: xfs: validate cached inodes are free when allocated + +From: Dave Chinner + +commit afca6c5b2595fc44383919fba740c194b0b76aff upstream + +A recent fuzzed filesystem image cached random dcache corruption +when the reproducer was run. This often showed up as panics in +lookup_slow() on a null inode->i_ops pointer when doing pathwalks. + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 +.... +Call Trace: + lookup_slow+0x44/0x60 + walk_component+0x3dd/0x9f0 + link_path_walk+0x4a7/0x830 + path_lookupat+0xc1/0x470 + filename_lookup+0x129/0x270 + user_path_at_empty+0x36/0x40 + path_listxattr+0x98/0x110 + SyS_listxattr+0x13/0x20 + do_syscall_64+0xf5/0x280 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 + +but had many different failure modes including deadlocks trying to +lock the inode that was just allocated or KASAN reports of +use-after-free violations. + +The cause of the problem was a corrupt INOBT on a v4 fs where the +root inode was marked as free in the inobt record. Hence when we +allocated an inode, it chose the root inode to allocate, found it in +the cache and re-initialised it. + +We recently fixed a similar inode allocation issue caused by inobt +record corruption problem in xfs_iget_cache_miss() in commit +ee457001ed6c ("xfs: catch inode allocation state mismatch +corruption"). This change adds similar checks to the cache-hit path +to catch it, and turns the reproducer into a corruption shutdown +situation. + +Reported-by: Wen Xu +Signed-Off-By: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Carlos Maiolino +Reviewed-by: Darrick J. Wong +[darrick: fix typos in comment] +Signed-off-by: Darrick J. Wong +[sudip: use ip->i_d.di_mode instead of VFS_I(ip)->i_mode] +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_icache.c | 73 ++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 48 insertions(+), 25 deletions(-) + +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -135,6 +135,46 @@ xfs_inode_free( + } + + /* ++ * If we are allocating a new inode, then check what was returned is ++ * actually a free, empty inode. If we are not allocating an inode, ++ * then check we didn't find a free inode. ++ * ++ * Returns: ++ * 0 if the inode free state matches the lookup context ++ * -ENOENT if the inode is free and we are not allocating ++ * -EFSCORRUPTED if there is any state mismatch at all ++ */ ++static int ++xfs_iget_check_free_state( ++ struct xfs_inode *ip, ++ int flags) ++{ ++ if (flags & XFS_IGET_CREATE) { ++ /* should be a free inode */ ++ if (ip->i_d.di_mode != 0) { ++ xfs_warn(ip->i_mount, ++"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", ++ ip->i_ino, ip->i_d.di_mode); ++ return -EFSCORRUPTED; ++ } ++ ++ if (ip->i_d.di_nblocks != 0) { ++ xfs_warn(ip->i_mount, ++"Corruption detected! Free inode 0x%llx has blocks allocated!", ++ ip->i_ino); ++ return -EFSCORRUPTED; ++ } ++ return 0; ++ } ++ ++ /* should be an allocated inode */ ++ if (ip->i_d.di_mode == 0) ++ return -ENOENT; ++ ++ return 0; ++} ++ ++/* + * Check the validity of the inode we just found it the cache + */ + static int +@@ -183,12 +223,12 @@ xfs_iget_cache_hit( + } + + /* +- * If lookup is racing with unlink return an error immediately. ++ * Check the inode free state is valid. This also detects lookup ++ * racing with unlinks. + */ +- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { +- error = -ENOENT; ++ error = xfs_iget_check_free_state(ip, flags); ++ if (error) + goto out_error; +- } + + /* + * If IRECLAIMABLE is set, we've torn down the VFS inode already. +@@ -300,29 +340,12 @@ xfs_iget_cache_miss( + + + /* +- * If we are allocating a new inode, then check what was returned is +- * actually a free, empty inode. If we are not allocating an inode, +- * the check we didn't find a free inode. ++ * Check the inode free state is valid. This also detects lookup ++ * racing with unlinks. + */ +- if (flags & XFS_IGET_CREATE) { +- if (ip->i_d.di_mode != 0) { +- xfs_warn(mp, +-"Corruption detected! Free inode 0x%llx not marked free on disk", +- ino); +- error = -EFSCORRUPTED; +- goto out_destroy; +- } +- if (ip->i_d.di_nblocks != 0) { +- xfs_warn(mp, +-"Corruption detected! Free inode 0x%llx has blocks allocated!", +- ino); +- error = -EFSCORRUPTED; +- goto out_destroy; +- } +- } else if (ip->i_d.di_mode == 0) { +- error = -ENOENT; ++ error = xfs_iget_check_free_state(ip, flags); ++ if (error) + goto out_destroy; +- } + + /* + * Preload the radix tree so we can insert safely under the