From: Greg Kroah-Hartman Date: Sat, 5 Mar 2016 19:45:08 +0000 (-0800) Subject: 4.4-stable patches X-Git-Tag: v3.10.100~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4e362751dc079531ef2b95bcd5c71e12f78e1ae6;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: adding-intel-lewisburg-device-ids-for-sata.patch arm64-vmemmap-use-virtual-projection-of-linear-region.patch ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch drm-ast-fix-incorrect-register-check-for-dram-width.patch drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch fix-directory-hardlinks-from-deleted-directories.patch jffs2-fix-page-lock-f-sem-deadlock.patch libata-align-ata_device-s-id-on-a-cacheline.patch libata-fix-hdio_get_32bit-ioctl.patch pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch tracing-do-not-have-comm-filter-override-event-comm-field.patch writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch --- diff --git a/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch b/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch new file mode 100644 index 00000000000..157ba04ba0c --- /dev/null +++ b/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch @@ -0,0 +1,44 @@ +From f5bdd66c705484b4bc77eb914be15c1b7881fae7 Mon Sep 17 00:00:00 2001 +From: Alexandra Yates +Date: Wed, 17 Feb 2016 19:36:20 -0800 +Subject: Adding Intel Lewisburg device IDs for SATA + +From: Alexandra Yates + +commit f5bdd66c705484b4bc77eb914be15c1b7881fae7 upstream. + +This patch complements the list of device IDs previously +added for lewisburg sata. + +Signed-off-by: Alexandra Yates +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/ahci.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -367,15 +367,21 @@ static const struct pci_device_id ahci_p + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ ++ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + + /* JMicron 360/1/3/5/6, match class to avoid IDE function */ + { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch b/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch new file mode 100644 index 00000000000..a5b4b89eb70 --- /dev/null +++ b/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch @@ -0,0 +1,77 @@ +From dfd55ad85e4a7fbaa82df12467515ac3c81e8a3e Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Fri, 26 Feb 2016 17:57:13 +0100 +Subject: arm64: vmemmap: use virtual projection of linear region + +From: Ard Biesheuvel + +commit dfd55ad85e4a7fbaa82df12467515ac3c81e8a3e upstream. + +Commit dd006da21646 ("arm64: mm: increase VA range of identity map") made +some changes to the memory mapping code to allow physical memory to reside +at an offset that exceeds the size of the virtual mapping. + +However, since the size of the vmemmap area is proportional to the size of +the VA area, but it is populated relative to the physical space, we may +end up with the struct page array being mapped outside of the vmemmap +region. For instance, on my Seattle A0 box, I can see the following output +in the dmesg log. + + vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000 ( 8 GB maximum) + 0xffffffbfc0000000 - 0xffffffbfd0000000 ( 256 MB actual) + +We can fix this by deciding that the vmemmap region is not a projection of +the physical space, but of the virtual space above PAGE_OFFSET, i.e., the +linear region. This way, we are guaranteed that the vmemmap region is of +sufficient size, and we can even reduce the size by half. + +Signed-off-by: Ard Biesheuvel +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/pgtable.h | 7 ++++--- + arch/arm64/mm/init.c | 4 ++-- + 2 files changed, 6 insertions(+), 5 deletions(-) + +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -34,13 +34,13 @@ + /* + * VMALLOC and SPARSEMEM_VMEMMAP ranges. + * +- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array ++ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array + * (rounded up to PUD_SIZE). + * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, + * fixed mappings and modules + */ +-#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) ++#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE) + + #ifndef CONFIG_KASAN + #define VMALLOC_START (VA_START) +@@ -51,7 +51,8 @@ + + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) + +-#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) ++#define VMEMMAP_START (VMALLOC_END + SZ_64K) ++#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) + + #define FIRST_USER_ADDRESS 0UL + +--- a/arch/arm64/mm/init.c ++++ b/arch/arm64/mm/init.c +@@ -319,8 +319,8 @@ void __init mem_init(void) + #endif + MLG(VMALLOC_START, VMALLOC_END), + #ifdef CONFIG_SPARSEMEM_VMEMMAP +- MLG((unsigned long)vmemmap, +- (unsigned long)vmemmap + VMEMMAP_SIZE), ++ MLG(VMEMMAP_START, ++ VMEMMAP_START + VMEMMAP_SIZE), + MLM((unsigned long)virt_to_page(PAGE_OFFSET), + (unsigned long)virt_to_page(high_memory)), + #endif diff --git a/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch b/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch new file mode 100644 index 00000000000..2491986ce4f --- /dev/null +++ b/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch @@ -0,0 +1,44 @@ +From dc8b4afc4a04fac8ee55a19b59f2356a25e7e778 Mon Sep 17 00:00:00 2001 +From: Manuel Lauss +Date: Sat, 27 Feb 2016 16:10:05 +0100 +Subject: ata: ahci: don't mark HotPlugCapable Ports as external/removable + +From: Manuel Lauss + +commit dc8b4afc4a04fac8ee55a19b59f2356a25e7e778 upstream. + +The HPCP bit is set by bioses for on-board sata ports either because +they think sata is hotplug capable in general or to allow Windows +to display a "device eject" icon on ports which are routed to an +external connector bracket. + +However in Redhat Bugzilla #1310682, users report that with kernel 4.4, +where this bit test first appeared, a lot of partitions on sata drives +are now mounted automatically. + +This patch should fix redhat and a lot of other distros which +unconditionally automount all devices which have the "removable" +bit set. + +Signed-off-by: Manuel Lauss +Signed-off-by: Tejun Heo +Fixes: 8a3e33cf92c7 ("ata: ahci: find eSATA ports and flag them as removable" changes userspace behavior) +Link: http://lkml.kernel.org/g/56CF35FA.1070500@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libahci.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -1142,8 +1142,7 @@ static void ahci_port_init(struct device + + /* mark esata ports */ + tmp = readl(port_mmio + PORT_CMD); +- if ((tmp & PORT_CMD_HPCP) || +- ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))) ++ if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) + ap->pflags |= ATA_PFLAG_EXTERNAL; + } + diff --git a/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch b/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch new file mode 100644 index 00000000000..bd21afbc6f7 --- /dev/null +++ b/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch @@ -0,0 +1,77 @@ +From 7bcd79ac50d9d83350a835bdb91c04ac9e098412 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Fri, 26 Feb 2016 23:40:50 +0800 +Subject: block: bio: introduce helpers to get the 1st and last bvec + +From: Ming Lei + +commit 7bcd79ac50d9d83350a835bdb91c04ac9e098412 upstream. + +The bio passed to bio_will_gap() may be fast cloned from upper +layer(dm, md, bcache, fs, ...), or from bio splitting in block +core. + +Unfortunately bio_will_gap() just figures out the last bvec via +'bi_io_vec[prev->bi_vcnt - 1]' directly, and this way is obviously +wrong. + +This patch introduces two helpers for getting the first and last +bvec of one bio for fixing the issue. + +Reported-by: Sagi Grimberg +Reviewed-by: Sagi Grimberg +Reviewed-by: Christoph Hellwig +Signed-off-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/bio.h | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct + bio->bi_flags &= ~(1U << bit); + } + ++static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) ++{ ++ *bv = bio_iovec(bio); ++} ++ ++static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) ++{ ++ struct bvec_iter iter = bio->bi_iter; ++ int idx; ++ ++ if (!bio_flagged(bio, BIO_CLONED)) { ++ *bv = bio->bi_io_vec[bio->bi_vcnt - 1]; ++ return; ++ } ++ ++ if (unlikely(!bio_multiple_segments(bio))) { ++ *bv = bio_iovec(bio); ++ return; ++ } ++ ++ bio_advance_iter(bio, &iter, iter.bi_size); ++ ++ if (!iter.bi_bvec_done) ++ idx = iter.bi_idx - 1; ++ else /* in the middle of bvec */ ++ idx = iter.bi_idx; ++ ++ *bv = bio->bi_io_vec[idx]; ++ ++ /* ++ * iter.bi_bvec_done records actual length of the last bvec ++ * if this bio ends in the middle of one io vector ++ */ ++ if (iter.bi_bvec_done) ++ bv->bv_len = iter.bi_bvec_done; ++} ++ + enum bip_flags { + BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ + BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ diff --git a/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch b/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch new file mode 100644 index 00000000000..1b6a3688e72 --- /dev/null +++ b/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch @@ -0,0 +1,184 @@ +From 909c3a22da3b8d2cfd3505ca5658f0176859d400 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 2 Mar 2016 15:49:38 +0000 +Subject: Btrfs: fix loading of orphan roots leading to BUG_ON + +From: Filipe Manana + +commit 909c3a22da3b8d2cfd3505ca5658f0176859d400 upstream. + +When looking for orphan roots during mount we can end up hitting a +BUG_ON() (at root-item.c:btrfs_find_orphan_roots()) if a log tree is +replayed and qgroups are enabled. This is because after a log tree is +replayed, a transaction commit is made, which triggers qgroup extent +accounting which in turn does backref walking which ends up reading and +inserting all roots in the radix tree fs_info->fs_root_radix, including +orphan roots (deleted snapshots). So after the log tree is replayed, when +finding orphan roots we hit the BUG_ON with the following trace: + +[118209.182438] ------------[ cut here ]------------ +[118209.183279] kernel BUG at fs/btrfs/root-tree.c:314! +[118209.184074] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC +[118209.185123] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic ppdev xor raid6_pq evdev sg parport_pc parport acpi_cpufreq tpm_tis tpm psmouse +processor i2c_piix4 serio_raw pcspkr i2c_core button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata +virtio_pci virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs] +[118209.186318] CPU: 14 PID: 28428 Comm: mount Tainted: G W 4.5.0-rc5-btrfs-next-24+ #1 +[118209.186318] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 +[118209.186318] task: ffff8801ec131040 ti: ffff8800af34c000 task.ti: ffff8800af34c000 +[118209.186318] RIP: 0010:[] [] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs] +[118209.186318] RSP: 0018:ffff8800af34faa8 EFLAGS: 00010246 +[118209.186318] RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000001 +[118209.186318] RDX: 0000000080000000 RSI: 0000000000000001 RDI: 00000000ffffffff +[118209.186318] RBP: ffff8800af34fb08 R08: 0000000000000001 R09: 0000000000000000 +[118209.186318] R10: ffff8800af34f9f0 R11: 6db6db6db6db6db7 R12: ffff880171b97000 +[118209.186318] R13: ffff8801ca9d65e0 R14: ffff8800afa2e000 R15: 0000160000000000 +[118209.186318] FS: 00007f5bcb914840(0000) GS:ffff88023edc0000(0000) knlGS:0000000000000000 +[118209.186318] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[118209.186318] CR2: 00007f5bcaceb5d9 CR3: 00000000b49b5000 CR4: 00000000000006e0 +[118209.186318] Stack: +[118209.186318] fffffbffffffffff 010230ffffffffff 0101000000000000 ff84000000000000 +[118209.186318] fbffffffffffffff 30ffffffffffffff 0000000000000101 ffff880082348000 +[118209.186318] 0000000000000000 ffff8800afa2e000 ffff8800afa2e000 0000000000000000 +[118209.186318] Call Trace: +[118209.186318] [] open_ctree+0x1e37/0x21b9 [btrfs] +[118209.186318] [] btrfs_mount+0x97e/0xaed [btrfs] +[118209.186318] [] ? trace_hardirqs_on+0xd/0xf +[118209.186318] [] mount_fs+0x67/0x131 +[118209.186318] [] vfs_kern_mount+0x6c/0xde +[118209.186318] [] btrfs_mount+0x1ac/0xaed [btrfs] +[118209.186318] [] ? trace_hardirqs_on+0xd/0xf +[118209.186318] [] ? lockdep_init_map+0xb9/0x1b3 +[118209.186318] [] mount_fs+0x67/0x131 +[118209.186318] [] vfs_kern_mount+0x6c/0xde +[118209.186318] [] do_mount+0x8a6/0x9e8 +[118209.186318] [] SyS_mount+0x77/0x9f +[118209.186318] [] entry_SYSCALL_64_fastpath+0x12/0x6b +[118209.186318] Code: 64 00 00 85 c0 89 c3 75 24 f0 41 80 4c 24 20 20 49 8b bc 24 f0 01 00 00 4c 89 e6 e8 e8 65 00 00 85 c0 89 c3 74 11 83 f8 ef 75 02 <0f> 0b +4c 89 e7 e8 da 72 00 00 eb 1c 41 83 bc 24 00 01 00 00 00 +[118209.186318] RIP [] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs] +[118209.186318] RSP +[118209.230735] ---[ end trace 83938f987d85d477 ]--- + +So fix this by not treating the error -EEXIST, returned when attempting +to insert a root already inserted by the backref walking code, as an error. + +The following test case for xfstests reproduces the bug: + + seq=`basename $0` + seqres=$RESULT_DIR/$seq + echo "QA output created by $seq" + tmp=/tmp/$$ + status=1 # failure is the default! + trap "_cleanup; exit \$status" 0 1 2 3 15 + + _cleanup() + { + _cleanup_flakey + cd / + rm -f $tmp.* + } + + # get standard environment, filters and checks + . ./common/rc + . ./common/filter + . ./common/dmflakey + + # real QA test starts here + _supported_fs btrfs + _supported_os Linux + _require_scratch + _require_dm_target flakey + _require_metadata_journaling $SCRATCH_DEV + + rm -f $seqres.full + + _scratch_mkfs >>$seqres.full 2>&1 + _init_flakey + _mount_flakey + + _run_btrfs_util_prog quota enable $SCRATCH_MNT + + # Create 2 directories with one file in one of them. + # We use these just to trigger a transaction commit later, moving the file from + # directory a to directory b and doing an fsync against directory a. + mkdir $SCRATCH_MNT/a + mkdir $SCRATCH_MNT/b + touch $SCRATCH_MNT/a/f + sync + + # Create our test file with 2 4K extents. + $XFS_IO_PROG -f -s -c "pwrite -S 0xaa 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io + + # Create a snapshot and delete it. This doesn't really delete the snapshot + # immediately, just makes it inaccessible and invisible to user space, the + # snapshot is deleted later by a dedicated kernel thread (cleaner kthread) + # which is woke up at the next transaction commit. + # A root orphan item is inserted into the tree of tree roots, so that if a + # power failure happens before the dedicated kernel thread does the snapshot + # deletion, the next time the filesystem is mounted it resumes the snapshot + # deletion. + _run_btrfs_util_prog subvolume snapshot $SCRATCH_MNT $SCRATCH_MNT/snap + _run_btrfs_util_prog subvolume delete $SCRATCH_MNT/snap + + # Now overwrite half of the extents we wrote before. Because we made a snapshpot + # before, which isn't really deleted yet (since no transaction commit happened + # after we did the snapshot delete request), the non overwritten extents get + # referenced twice, once by the default subvolume and once by the snapshot. + $XFS_IO_PROG -c "pwrite -S 0xbb 4K 8K" $SCRATCH_MNT/foobar | _filter_xfs_io + + # Now move file f from directory a to directory b and fsync directory a. + # The fsync on the directory a triggers a transaction commit (because a file + # was moved from it to another directory) and the file fsync leaves a log tree + # with file extent items to replay. + mv $SCRATCH_MNT/a/f $SCRATCH_MNT/a/b + $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/a + $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar + + echo "File digest before power failure:" + md5sum $SCRATCH_MNT/foobar | _filter_scratch + + # Now simulate a power failure and mount the filesystem to replay the log tree. + # After the log tree was replayed, we used to hit a BUG_ON() when processing + # the root orphan item for the deleted snapshot. This is because when processing + # an orphan root the code expected to be the first code inserting the root into + # the fs_info->fs_root_radix radix tree, while in reallity it was the second + # caller attempting to do it - the first caller was the transaction commit that + # took place after replaying the log tree, when updating the qgroup counters. + _flakey_drop_and_remount + + echo "File digest before after failure:" + # Must match what he got before the power failure. + md5sum $SCRATCH_MNT/foobar | _filter_scratch + + _unmount_flakey + status=0 + exit + +Fixes: 2d9e97761087 ("Btrfs: use btrfs_get_fs_root in resolve_indirect_ref") +Signed-off-by: Filipe Manana +Reviewed-by: Qu Wenruo +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c +index 7cf8509deda7..2c849b08a91b 100644 +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) + set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); + + err = btrfs_insert_fs_root(root->fs_info, root); ++ /* ++ * The root might have been inserted already, as before we look ++ * for orphan roots, log replay might have happened, which ++ * triggers a transaction commit and qgroup accounting, which ++ * in turn reads and inserts fs roots while doing backref ++ * walking. ++ */ ++ if (err == -EEXIST) ++ err = 0; + if (err) { +- BUG_ON(err == -EEXIST); + btrfs_free_fs_root(root); + break; + } diff --git a/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch b/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch new file mode 100644 index 00000000000..9d45cec31ce --- /dev/null +++ b/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch @@ -0,0 +1,44 @@ +From feebe91aa9a9d99d9ec157612a614fadb79beb99 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 26 Feb 2016 16:18:15 +0100 +Subject: drm/amdgpu: apply gfx_v8 fixes to gfx_v7 as well +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit feebe91aa9a9d99d9ec157612a614fadb79beb99 upstream. + +We never ported that back to CIK, so we could run into VM faults here. + +Signed-off-by: Christian König +Reviewed-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -3628,6 +3628,19 @@ static void gfx_v7_0_ring_emit_vm_flush( + unsigned vm_id, uint64_t pd_addr) + { + int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); ++ uint32_t seq = ring->fence_drv.sync_seq; ++ uint64_t addr = ring->fence_drv.gpu_addr; ++ ++ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); ++ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ ++ WAIT_REG_MEM_FUNCTION(3) | /* equal */ ++ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ ++ amdgpu_ring_write(ring, addr & 0xfffffffc); ++ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); ++ amdgpu_ring_write(ring, seq); ++ amdgpu_ring_write(ring, 0xffffffff); ++ amdgpu_ring_write(ring, 4); /* poll interval */ ++ + if (usepfp) { + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); diff --git a/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch b/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch new file mode 100644 index 00000000000..7fb49fbc2c1 --- /dev/null +++ b/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch @@ -0,0 +1,35 @@ +From 9cac537332f5502c103415b25609548c276a09f8 Mon Sep 17 00:00:00 2001 +From: Chunming Zhou +Date: Mon, 29 Feb 2016 14:12:38 +0800 +Subject: drm/amdgpu/gfx8: specify which engine to wait before vm flush +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chunming Zhou + +commit 9cac537332f5502c103415b25609548c276a09f8 upstream. + +Select between me and pfp properly. + +Signed-off-by: Chunming Zhou +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -4681,7 +4681,8 @@ static void gfx_v8_0_ring_emit_vm_flush( + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ +- WAIT_REG_MEM_FUNCTION(3))); /* equal */ ++ WAIT_REG_MEM_FUNCTION(3) | /* equal */ ++ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); diff --git a/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch b/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch new file mode 100644 index 00000000000..8fa7201592b --- /dev/null +++ b/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch @@ -0,0 +1,49 @@ +From eda1d1cf8d18383f19cd2b752f786120efa4768f Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Wed, 24 Feb 2016 17:18:25 -0500 +Subject: drm/amdgpu/pm: update current crtc info after setting the powerstate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher + +commit eda1d1cf8d18383f19cd2b752f786120efa4768f upstream. + +On CI, we need to see if the number of crtcs changes to determine +whether or not we need to upload the mclk table again. In practice +we don't currently upload the mclk table again after the initial load. +The only reason you would would be to add new states, e.g., for +arbitrary mclk setting which is not currently supported. + +Acked-by: Jordan Lazare +Acked-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +@@ -596,9 +596,6 @@ force: + /* update display watermarks based on new power state */ + amdgpu_display_bandwidth_update(adev); + +- adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; +- adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; +- + /* wait for the rings to drain */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; +@@ -617,6 +614,9 @@ force: + /* update displays */ + amdgpu_dpm_display_configuration_changed(adev); + ++ adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; ++ adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; ++ + if (adev->pm.funcs->force_performance_level) { + if (adev->pm.dpm.thermal_active) { + enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level; diff --git a/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch b/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch new file mode 100644 index 00000000000..8f8dd3dc29d --- /dev/null +++ b/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch @@ -0,0 +1,37 @@ +From 0b39c531cfa12dad54eac238c2e303b994df1ef7 Mon Sep 17 00:00:00 2001 +From: Arindam Nath +Date: Wed, 2 Mar 2016 17:19:01 +0530 +Subject: drm/amdgpu: return from atombios_dp_get_dpcd only when error + +From: Arindam Nath + +commit 0b39c531cfa12dad54eac238c2e303b994df1ef7 upstream. + +In amdgpu_connector_hotplug(), we need to start DP link +training only after we have received DPCD. The function +amdgpu_atombios_dp_get_dpcd() returns non-zero value only +when an error condition is met, otherwise returns zero. +So in case the function encounters an error, we need to +skip rest of the code and return from amdgpu_connector_hotplug() +immediately. Only when we are successfull in reading DPCD +pin, we should carry on with turning-on the monitor. + +Signed-off-by: Arindam Nath +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +@@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm + } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { + /* Don't try to start link training before we + * have the dpcd */ +- if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) ++ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + return; + + /* set it to OFF so that drm_helper_connector_dpms() diff --git a/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch b/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch new file mode 100644 index 00000000000..8b290d37982 --- /dev/null +++ b/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch @@ -0,0 +1,34 @@ +From 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 Mon Sep 17 00:00:00 2001 +From: Timothy Pearson +Date: Fri, 26 Feb 2016 15:29:32 -0600 +Subject: drm/ast: Fix incorrect register check for DRAM width + +From: Timothy Pearson + +commit 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 upstream. + +During DRAM initialization on certain ASpeed devices, an incorrect +bit (bit 10) was checked in the "SDRAM Bus Width Status" register +to determine DRAM width. + +Query bit 6 instead in accordance with the Aspeed AST2050 datasheet v1.05. + +Signed-off-by: Timothy Pearson +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/ast/ast_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/ast/ast_main.c ++++ b/drivers/gpu/drm/ast/ast_main.c +@@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_ + } while (ast_read32(ast, 0x10000) != 0x01); + data = ast_read32(ast, 0x10004); + +- if (data & 0x400) ++ if (data & 0x40) + ast->dram_bus_width = 16; + else + ast->dram_bus_width = 32; diff --git a/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch b/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch new file mode 100644 index 00000000000..ff71345f035 --- /dev/null +++ b/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch @@ -0,0 +1,50 @@ +From 5e031d9fe8b0741f11d49667dfc3ebf5454121fd Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Wed, 24 Feb 2016 17:38:38 -0500 +Subject: drm/radeon/pm: update current crtc info after setting the powerstate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher + +commit 5e031d9fe8b0741f11d49667dfc3ebf5454121fd upstream. + +On CI, we need to see if the number of crtcs changes to determine +whether or not we need to upload the mclk table again. In practice +we don't currently upload the mclk table again after the initial load. +The only reason you would would be to add new states, e.g., for +arbitrary mclk setting which is not currently supported. + +Acked-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -1076,10 +1076,6 @@ force: + /* update display watermarks based on new power state */ + radeon_bandwidth_update(rdev); + +- rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; +- rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; +- rdev->pm.dpm.single_display = single_display; +- + /* wait for the rings to drain */ + for (i = 0; i < RADEON_NUM_RINGS; i++) { + struct radeon_ring *ring = &rdev->ring[i]; +@@ -1098,6 +1094,10 @@ force: + /* update displays */ + radeon_dpm_display_configuration_changed(rdev); + ++ rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; ++ rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; ++ rdev->pm.dpm.single_display = single_display; ++ + if (rdev->asic->dpm.force_performance_level) { + if (rdev->pm.dpm.thermal_active) { + enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; diff --git a/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch b/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch new file mode 100644 index 00000000000..6c9736e7f9c --- /dev/null +++ b/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch @@ -0,0 +1,193 @@ +From be629c62a603e5935f8177fd8a19e014100a259e Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Mon, 1 Feb 2016 14:04:46 +0000 +Subject: Fix directory hardlinks from deleted directories +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Woodhouse + +commit be629c62a603e5935f8177fd8a19e014100a259e upstream. + +When a directory is deleted, we don't take too much care about killing off +all the dirents that belong to it — on the basis that on remount, the scan +will conclude that the directory is dead anyway. + +This doesn't work though, when the deleted directory contained a child +directory which was moved *out*. In the early stages of the fs build +we can then end up with an apparent hard link, with the child directory +appearing both in its true location, and as a child of the original +directory which are this stage of the mount process we don't *yet* know +is defunct. + +To resolve this, take out the early special-casing of the "directories +shall not have hard links" rule in jffs2_build_inode_pass1(), and let the +normal nlink processing happen for directories as well as other inodes. + +Then later in the build process we can set ic->pino_nlink to the parent +inode#, as is required for directories during normal operaton, instead +of the nlink. And complain only *then* about hard links which are still +in evidence even after killing off all the unreachable paths. + +Reported-by: Liu Song +Signed-off-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jffs2/build.c | 75 +++++++++++++++++++++++++++++++++++++++------------- + fs/jffs2/nodelist.h | 6 +++- + 2 files changed, 62 insertions(+), 19 deletions(-) + +--- a/fs/jffs2/build.c ++++ b/fs/jffs2/build.c +@@ -49,7 +49,8 @@ next_inode(int *i, struct jffs2_inode_ca + + + static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, +- struct jffs2_inode_cache *ic) ++ struct jffs2_inode_cache *ic, ++ int *dir_hardlinks) + { + struct jffs2_full_dirent *fd; + +@@ -68,19 +69,21 @@ static void jffs2_build_inode_pass1(stru + dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", + fd->name, fd->ino, ic->ino); + jffs2_mark_node_obsolete(c, fd->raw); ++ /* Clear the ic/raw union so it doesn't cause problems later. */ ++ fd->ic = NULL; + continue; + } + ++ /* From this point, fd->raw is no longer used so we can set fd->ic */ ++ fd->ic = child_ic; ++ child_ic->pino_nlink++; ++ /* If we appear (at this stage) to have hard-linked directories, ++ * set a flag to trigger a scan later */ + if (fd->type == DT_DIR) { +- if (child_ic->pino_nlink) { +- JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", +- fd->name, fd->ino, ic->ino); +- /* TODO: What do we do about it? */ +- } else { +- child_ic->pino_nlink = ic->ino; +- } +- } else +- child_ic->pino_nlink++; ++ child_ic->flags |= INO_FLAGS_IS_DIR; ++ if (child_ic->pino_nlink > 1) ++ *dir_hardlinks = 1; ++ } + + dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); + /* Can't free scan_dents so far. We might need them in pass 2 */ +@@ -94,8 +97,7 @@ static void jffs2_build_inode_pass1(stru + */ + static int jffs2_build_filesystem(struct jffs2_sb_info *c) + { +- int ret; +- int i; ++ int ret, i, dir_hardlinks = 0; + struct jffs2_inode_cache *ic; + struct jffs2_full_dirent *fd; + struct jffs2_full_dirent *dead_fds = NULL; +@@ -119,7 +121,7 @@ static int jffs2_build_filesystem(struct + /* Now scan the directory tree, increasing nlink according to every dirent found. */ + for_each_inode(i, c, ic) { + if (ic->scan_dents) { +- jffs2_build_inode_pass1(c, ic); ++ jffs2_build_inode_pass1(c, ic, &dir_hardlinks); + cond_resched(); + } + } +@@ -155,6 +157,20 @@ static int jffs2_build_filesystem(struct + } + + dbg_fsbuild("pass 2a complete\n"); ++ ++ if (dir_hardlinks) { ++ /* If we detected directory hardlinks earlier, *hopefully* ++ * they are gone now because some of the links were from ++ * dead directories which still had some old dirents lying ++ * around and not yet garbage-collected, but which have ++ * been discarded above. So clear the pino_nlink field ++ * in each directory, so that the final scan below can ++ * print appropriate warnings. */ ++ for_each_inode(i, c, ic) { ++ if (ic->flags & INO_FLAGS_IS_DIR) ++ ic->pino_nlink = 0; ++ } ++ } + dbg_fsbuild("freeing temporary data structures\n"); + + /* Finally, we can scan again and free the dirent structs */ +@@ -162,6 +178,33 @@ static int jffs2_build_filesystem(struct + while(ic->scan_dents) { + fd = ic->scan_dents; + ic->scan_dents = fd->next; ++ /* We do use the pino_nlink field to count nlink of ++ * directories during fs build, so set it to the ++ * parent ino# now. Now that there's hopefully only ++ * one. */ ++ if (fd->type == DT_DIR) { ++ if (!fd->ic) { ++ /* We'll have complained about it and marked the coresponding ++ raw node obsolete already. Just skip it. */ ++ continue; ++ } ++ ++ /* We *have* to have set this in jffs2_build_inode_pass1() */ ++ BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); ++ ++ /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks ++ * is set. Otherwise, we know this should never trigger anyway, so ++ * we don't do the check. And ic->pino_nlink still contains the nlink ++ * value (which is 1). */ ++ if (dir_hardlinks && fd->ic->pino_nlink) { ++ JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", ++ fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); ++ /* Should we unlink it from its previous parent? */ ++ } ++ ++ /* For directories, ic->pino_nlink holds that parent inode # */ ++ fd->ic->pino_nlink = ic->ino; ++ } + jffs2_free_full_dirent(fd); + } + ic->scan_dents = NULL; +@@ -240,11 +283,7 @@ static void jffs2_build_remove_unlinked_ + + /* Reduce nlink of the child. If it's now zero, stick it on the + dead_fds list to be cleaned up later. Else just free the fd */ +- +- if (fd->type == DT_DIR) +- child_ic->pino_nlink = 0; +- else +- child_ic->pino_nlink--; ++ child_ic->pino_nlink--; + + if (!child_ic->pino_nlink) { + dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", +--- a/fs/jffs2/nodelist.h ++++ b/fs/jffs2/nodelist.h +@@ -194,6 +194,7 @@ struct jffs2_inode_cache { + #define INO_STATE_CLEARING 6 /* In clear_inode() */ + + #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ ++#define INO_FLAGS_IS_DIR 0x02 /* is a directory */ + + #define RAWNODE_CLASS_INODE_CACHE 0 + #define RAWNODE_CLASS_XATTR_DATUM 1 +@@ -249,7 +250,10 @@ struct jffs2_readinode_info + + struct jffs2_full_dirent + { +- struct jffs2_raw_node_ref *raw; ++ union { ++ struct jffs2_raw_node_ref *raw; ++ struct jffs2_inode_cache *ic; /* Just during part of build */ ++ }; + struct jffs2_full_dirent *next; + uint32_t version; + uint32_t ino; /* == zero for unlink */ diff --git a/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch b/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch new file mode 100644 index 00000000000..7bda632f175 --- /dev/null +++ b/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch @@ -0,0 +1,73 @@ +From 49e91e7079febe59a20ca885a87dd1c54240d0f1 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Mon, 1 Feb 2016 12:37:20 +0000 +Subject: jffs2: Fix page lock / f->sem deadlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Woodhouse + +commit 49e91e7079febe59a20ca885a87dd1c54240d0f1 upstream. + +With this fix, all code paths should now be obtaining the page lock before +f->sem. + +Reported-by: Szabó Tamás +Tested-by: Thomas Betker +Signed-off-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jffs2/README.Locking | 5 +---- + fs/jffs2/gc.c | 17 ++++++++++------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +--- a/fs/jffs2/README.Locking ++++ b/fs/jffs2/README.Locking +@@ -2,10 +2,6 @@ + JFFS2 LOCKING DOCUMENTATION + --------------------------- + +-At least theoretically, JFFS2 does not require the Big Kernel Lock +-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS +-code. It has its own locking, as described below. +- + This document attempts to describe the existing locking rules for + JFFS2. It is not expected to remain perfectly up to date, but ought to + be fairly close. +@@ -69,6 +65,7 @@ Ordering constraints: + any f->sem held. + 2. Never attempt to lock two file mutexes in one thread. + No ordering rules have been made for doing so. ++ 3. Never lock a page cache page with f->sem held. + + + erase_completion_lock spinlock +--- a/fs/jffs2/gc.c ++++ b/fs/jffs2/gc.c +@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(s + BUG_ON(start > orig_start); + } + +- /* First, use readpage() to read the appropriate page into the page cache */ +- /* Q: What happens if we actually try to GC the _same_ page for which commit_write() +- * triggered garbage collection in the first place? +- * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the +- * page OK. We'll actually write it out again in commit_write, which is a little +- * suboptimal, but at least we're correct. +- */ ++ /* The rules state that we must obtain the page lock *before* f->sem, so ++ * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's ++ * actually going to *change* so we're safe; we only allow reading. ++ * ++ * It is important to note that jffs2_write_begin() will ensure that its ++ * page is marked Uptodate before allocating space. That means that if we ++ * end up here trying to GC the *same* page that jffs2_write_begin() is ++ * trying to write out, read_cache_page() will not deadlock. */ ++ mutex_unlock(&f->sem); + pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); ++ mutex_lock(&f->sem); + + if (IS_ERR(pg_ptr)) { + pr_warn("read_cache_page() returned error: %ld\n", diff --git a/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch b/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch new file mode 100644 index 00000000000..2bdb6bdd8db --- /dev/null +++ b/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch @@ -0,0 +1,39 @@ +From 4ee34ea3a12396f35b26d90a094c75db95080baa Mon Sep 17 00:00:00 2001 +From: Harvey Hunt +Date: Wed, 24 Feb 2016 15:16:43 +0000 +Subject: libata: Align ata_device's id on a cacheline + +From: Harvey Hunt + +commit 4ee34ea3a12396f35b26d90a094c75db95080baa upstream. + +The id buffer in ata_device is a DMA target, but it isn't explicitly +cacheline aligned. Due to this, adjacent fields can be overwritten with +stale data from memory on non coherent architectures. As a result, the +kernel is sometimes unable to communicate with an ATA device. + +Fix this by ensuring that the id buffer is cacheline aligned. + +This issue is similar to that fixed by Commit 84bda12af31f +("libata: align ap->sector_buf"). + +Signed-off-by: Harvey Hunt +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/libata.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -718,7 +718,7 @@ struct ata_device { + union { + u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ + u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ +- }; ++ } ____cacheline_aligned; + + /* DEVSLP Timing Variables from Identify Device Data Log */ + u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; diff --git a/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch b/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch new file mode 100644 index 00000000000..64ebd6cfcba --- /dev/null +++ b/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch @@ -0,0 +1,96 @@ +From 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Thu, 11 Feb 2016 14:16:27 +0100 +Subject: libata: fix HDIO_GET_32BIT ioctl + +From: Arnd Bergmann + +commit 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 upstream. + +As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not +work correctly in compat mode with libata. + +I have investigated the issue further and found multiple problems +that all appeared with the same commit that originally introduced +HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably +also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy +a 'long' variable containing either 0 or 1 to user space. + +The problems with this are: + +* On big-endian machines, this will always write a zero because it + stores the wrong byte into user space. + +* In compat mode, the upper three bytes of the variable are updated + by the compat_hdio_ioctl() function, but they now contain + uninitialized stack data. + +* The hdparm tool calling this ioctl uses a 'static long' variable + to store the result. This means at least the upper bytes are + initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT + would fill them with data that remains stale when the low byte + is overwritten. Fortunately libata doesn't implement any of the + affected ioctl commands, so this would only happen when we query + both an IDE and an ATA device in the same command such as + "hdparm -N -c /dev/hda /dev/sda" + +* The libata code for unknown reasons started using ATA_IOC_GET_IO32 + and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, + while the ioctl commands that were added later use the normal + HDIO_* names. This is harmless but rather confusing. + +This addresses all four issues by changing the code to use put_user() +on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem +does, and by clarifying the names of the ioctl commands. + +Signed-off-by: Arnd Bergmann +Reported-by: Soohoon Lee +Tested-by: Soohoon Lee +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-scsi.c | 11 +++++------ + include/linux/ata.h | 4 ++-- + 2 files changed, 7 insertions(+), 8 deletions(-) + +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap + int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, + int cmd, void __user *arg) + { +- int val = -EINVAL, rc = -EINVAL; ++ unsigned long val; ++ int rc = -EINVAL; + unsigned long flags; + + switch (cmd) { +- case ATA_IOC_GET_IO32: ++ case HDIO_GET_32BIT: + spin_lock_irqsave(ap->lock, flags); + val = ata_ioc32(ap); + spin_unlock_irqrestore(ap->lock, flags); +- if (copy_to_user(arg, &val, 1)) +- return -EFAULT; +- return 0; ++ return put_user(val, (unsigned long __user *)arg); + +- case ATA_IOC_SET_IO32: ++ case HDIO_SET_32BIT: + val = (unsigned long) arg; + rc = 0; + spin_lock_irqsave(ap->lock, flags); +--- a/include/linux/ata.h ++++ b/include/linux/ata.h +@@ -487,8 +487,8 @@ enum ata_tf_protocols { + }; + + enum ata_ioctls { +- ATA_IOC_GET_IO32 = 0x309, +- ATA_IOC_SET_IO32 = 0x324, ++ ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ ++ ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ + }; + + /* core structures */ diff --git a/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch b/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch new file mode 100644 index 00000000000..424b0038877 --- /dev/null +++ b/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch @@ -0,0 +1,80 @@ +From 018361767a21fb2d5ebd3ac182c04baf8a8b4e08 Mon Sep 17 00:00:00 2001 +From: Gabor Juhos +Date: Wed, 17 Feb 2016 12:58:20 +0100 +Subject: pata-rb532-cf: get rid of the irq_to_gpio() call + +From: Gabor Juhos + +commit 018361767a21fb2d5ebd3ac182c04baf8a8b4e08 upstream. + +The RB532 platform specific irq_to_gpio() implementation has been +removed with commit 832f5dacfa0b ("MIPS: Remove all the uses of +custom gpio.h"). Now the platform uses the generic stub which causes +the following error: + + pata-rb532-cf pata-rb532-cf: no GPIO found for irq149 + pata-rb532-cf: probe of pata-rb532-cf failed with error -2 + +Drop the irq_to_gpio() call and get the GPIO number from platform +data instead. After this change, the driver works again: + + scsi host0: pata-rb532-cf + ata1: PATA max PIO4 irq 149 + ata1.00: CFA: CF 1GB, 20080820, max MWDMA4 + ata1.00: 1989792 sectors, multi 0: LBA + ata1.00: configured for PIO4 + scsi 0:0:0:0: Direct-Access ATA CF 1GB 0820 PQ: 0\ + ANSI: 5 + sd 0:0:0:0: [sda] 1989792 512-byte logical blocks: (1.01 GB/971 MiB) + sd 0:0:0:0: [sda] Write Protect is off + sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't\ + support DPO or FUA + sda: sda1 sda2 + sd 0:0:0:0: [sda] Attached SCSI disk + +Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h") +Cc: Alban Bedel +Cc: Ralf Baechle +Cc: Arnd Bergmann +Signed-off-by: Gabor Juhos +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/pata_rb532_cf.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/ata/pata_rb532_cf.c ++++ b/drivers/ata/pata_rb532_cf.c +@@ -32,6 +32,8 @@ + #include + #include + ++#include ++ + #define DRV_NAME "pata-rb532-cf" + #define DRV_VERSION "0.1.0" + #define DRV_DESC "PATA driver for RouterBOARD 532 Compact Flash" +@@ -107,6 +109,7 @@ static int rb532_pata_driver_probe(struc + int gpio; + struct resource *res; + struct ata_host *ah; ++ struct cf_device *pdata; + struct rb532_cf_info *info; + int ret; + +@@ -122,7 +125,13 @@ static int rb532_pata_driver_probe(struc + return -ENOENT; + } + +- gpio = irq_to_gpio(irq); ++ pdata = dev_get_platdata(&pdev->dev); ++ if (!pdata) { ++ dev_err(&pdev->dev, "no platform data specified\n"); ++ return -EINVAL; ++ } ++ ++ gpio = pdata->gpio_pin; + if (gpio < 0) { + dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq); + return -ENOENT; diff --git a/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch b/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch new file mode 100644 index 00000000000..0380ee934d2 --- /dev/null +++ b/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch @@ -0,0 +1,51 @@ +From 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 Mon Sep 17 00:00:00 2001 +From: Todd E Brandt +Date: Wed, 2 Mar 2016 16:05:29 -0800 +Subject: PM / sleep / x86: Fix crash on graph trace through x86 suspend + +From: Todd E Brandt + +commit 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 upstream. + +Pause/unpause graph tracing around do_suspend_lowlevel as it has +inconsistent call/return info after it jumps to the wakeup vector. +The graph trace buffer will otherwise become misaligned and +may eventually crash and hang on suspend. + +To reproduce the issue and test the fix: +Run a function_graph trace over suspend/resume and set the graph +function to suspend_devices_and_enter. This consistently hangs the +system without this fix. + +Signed-off-by: Todd Brandt +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/acpi/sleep.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kernel/acpi/sleep.c ++++ b/arch/x86/kernel/acpi/sleep.c +@@ -16,6 +16,7 @@ + #include + #include + ++#include + #include "../../realmode/rm/wakeup.h" + #include "sleep.h" + +@@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) + saved_magic = 0x123456789abcdef0L; + #endif /* CONFIG_64BIT */ + ++ /* ++ * Pause/unpause graph tracing around do_suspend_lowlevel as it has ++ * inconsistent call/return info after it jumps to the wakeup vector. ++ */ ++ pause_graph_tracing(); + do_suspend_lowlevel(); ++ unpause_graph_tracing(); + return 0; + } + diff --git a/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch b/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch new file mode 100644 index 00000000000..151abdbc1b7 --- /dev/null +++ b/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch @@ -0,0 +1,133 @@ +From 157078f64b8a9cd7011b6b900b2f2498df850748 Mon Sep 17 00:00:00 2001 +From: Thomas Betker +Date: Tue, 10 Nov 2015 22:18:15 +0100 +Subject: Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin" + +From: Thomas Betker + +commit 157078f64b8a9cd7011b6b900b2f2498df850748 upstream. + +This reverts commit 5ffd3412ae55 +("jffs2: Fix lock acquisition order bug in jffs2_write_begin"). + +The commit modified jffs2_write_begin() to remove a deadlock with +jffs2_garbage_collect_live(), but this introduced new deadlocks found +by multiple users. page_lock() actually has to be called before +mutex_lock(&c->alloc_sem) or mutex_lock(&f->sem) because +jffs2_write_end() and jffs2_readpage() are called with the page locked, +and they acquire c->alloc_sem and f->sem, resp. + +In other words, the lock order in jffs2_write_begin() was correct, and +it is the jffs2_garbage_collect_live() path that has to be changed. + +Revert the commit to get rid of the new deadlocks, and to clear the way +for a better fix of the original deadlock. + +Reported-by: Deng Chao +Reported-by: Ming Liu +Reported-by: wangzaiwei +Signed-off-by: Thomas Betker +Signed-off-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jffs2/file.c | 39 ++++++++++++++++++--------------------- + 1 file changed, 18 insertions(+), 21 deletions(-) + +--- a/fs/jffs2/file.c ++++ b/fs/jffs2/file.c +@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file + struct page *pg; + struct inode *inode = mapping->host; + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); +- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); +- struct jffs2_raw_inode ri; +- uint32_t alloc_len = 0; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + uint32_t pageofs = index << PAGE_CACHE_SHIFT; + int ret = 0; + +- jffs2_dbg(1, "%s()\n", __func__); +- +- if (pageofs > inode->i_size) { +- ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, +- ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); +- if (ret) +- return ret; +- } +- +- mutex_lock(&f->sem); + pg = grab_cache_page_write_begin(mapping, index, flags); +- if (!pg) { +- if (alloc_len) +- jffs2_complete_reservation(c); +- mutex_unlock(&f->sem); ++ if (!pg) + return -ENOMEM; +- } + *pagep = pg; + +- if (alloc_len) { ++ jffs2_dbg(1, "%s()\n", __func__); ++ ++ if (pageofs > inode->i_size) { + /* Make new hole frag from old EOF to new page */ ++ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); ++ struct jffs2_raw_inode ri; + struct jffs2_full_dnode *fn; ++ uint32_t alloc_len; + + jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", + (unsigned int)inode->i_size, pageofs); + ++ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ++ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); ++ if (ret) ++ goto out_page; ++ ++ mutex_lock(&f->sem); + memset(&ri, 0, sizeof(ri)); + + ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); +@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file + if (IS_ERR(fn)) { + ret = PTR_ERR(fn); + jffs2_complete_reservation(c); ++ mutex_unlock(&f->sem); + goto out_page; + } + ret = jffs2_add_full_dnode_to_inode(c, f, fn); +@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file + jffs2_mark_node_obsolete(c, fn->raw); + jffs2_free_full_dnode(fn); + jffs2_complete_reservation(c); ++ mutex_unlock(&f->sem); + goto out_page; + } + jffs2_complete_reservation(c); + inode->i_size = pageofs; ++ mutex_unlock(&f->sem); + } + + /* +@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file + * case of a short-copy. + */ + if (!PageUptodate(pg)) { ++ mutex_lock(&f->sem); + ret = jffs2_do_readpage_nolock(inode, pg); ++ mutex_unlock(&f->sem); + if (ret) + goto out_page; + } +- mutex_unlock(&f->sem); + jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); + return ret; + + out_page: + unlock_page(pg); + page_cache_release(pg); +- mutex_unlock(&f->sem); + return ret; + } + diff --git a/queue-4.4/series b/queue-4.4/series index 2407707a8a5..894b79ccd80 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -19,3 +19,24 @@ arm-arm64-kvm-fix-ioctl-error-handling.patch iommu-amd-apply-workaround-for-ats-write-permission-check.patch iommu-amd-fix-boot-warning-when-device-00-00.0-is-not-iommu-covered.patch iommu-vt-d-use-bus_notify_removed_device-in-hotplug-path.patch +target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch +drm-ast-fix-incorrect-register-check-for-dram-width.patch +drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch +drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch +drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch +drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch +drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch +libata-fix-hdio_get_32bit-ioctl.patch +libata-align-ata_device-s-id-on-a-cacheline.patch +block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch +writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch +adding-intel-lewisburg-device-ids-for-sata.patch +arm64-vmemmap-use-virtual-projection-of-linear-region.patch +pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch +ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch +tracing-do-not-have-comm-filter-override-event-comm-field.patch +pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch +btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch +revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch +jffs2-fix-page-lock-f-sem-deadlock.patch +fix-directory-hardlinks-from-deleted-directories.patch diff --git a/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch b/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch new file mode 100644 index 00000000000..35af5d4232b --- /dev/null +++ b/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch @@ -0,0 +1,268 @@ +From 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 Mon Sep 17 00:00:00 2001 +From: Mike Christie +Date: Mon, 18 Jan 2016 14:09:27 -0600 +Subject: target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors + +From: Mike Christie + +commit 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 upstream. + +In a couple places we are not converting to/from the Linux +block layer 512 bytes sectors. + +1. + +The request queue values and what we do are a mismatch of +things: + +max_discard_sectors - This is in linux block layer 512 byte +sectors. We are just copying this to max_unmap_lba_count. + +discard_granularity - This is in bytes. We are converting it +to Linux block layer 512 byte sectors. + +discard_alignment - This is in bytes. We are just copying +this over. + +The problem is that the core LIO code exports these values in +spc_emulate_evpd_b0 and we use them to test request arguments +in sbc_execute_unmap, but we never convert to the block size +we export to the initiator. If we are not using 512 byte sectors +then we are exporting the wrong values or are checks are off. +And, for the discard_alignment/bytes case we are just plain messed +up. + +2. + +blkdev_issue_discard's start and number of sector arguments +are supposed to be in linux block layer 512 byte sectors. We are +currently passing in the values we get from the initiator which +might be based on some other sector size. + +There is a similar problem in iblock_execute_write_same where +the bio functions want values in 512 byte sectors but we are +passing in what we got from the initiator. + +Signed-off-by: Mike Christie +Signed-off-by: Nicholas Bellinger +[ kamal: backport to 4.4-stable: no unmap_zeroes_data ] +Signed-off-by: Kamal Mostafa +Signed-off-by: Greg Kroah-Hartman +--- + drivers/target/target_core_device.c | 43 ++++++++++++++++++++++++++ + drivers/target/target_core_file.c | 29 +++++------------- + drivers/target/target_core_iblock.c | 56 ++++++++--------------------------- + include/target/target_core_backend.h | 3 + + 4 files changed, 69 insertions(+), 62 deletions(-) + +--- a/drivers/target/target_core_device.c ++++ b/drivers/target/target_core_device.c +@@ -826,6 +826,49 @@ struct se_device *target_alloc_device(st + return dev; + } + ++/* ++ * Check if the underlying struct block_device request_queue supports ++ * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM ++ * in ATA and we need to set TPE=1 ++ */ ++bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, ++ struct request_queue *q, int block_size) ++{ ++ if (!blk_queue_discard(q)) ++ return false; ++ ++ attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) / ++ block_size; ++ /* ++ * Currently hardcoded to 1 in Linux/SCSI code.. ++ */ ++ attrib->max_unmap_block_desc_count = 1; ++ attrib->unmap_granularity = q->limits.discard_granularity / block_size; ++ attrib->unmap_granularity_alignment = q->limits.discard_alignment / ++ block_size; ++ return true; ++} ++EXPORT_SYMBOL(target_configure_unmap_from_queue); ++ ++/* ++ * Convert from blocksize advertised to the initiator to the 512 byte ++ * units unconditionally used by the Linux block layer. ++ */ ++sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) ++{ ++ switch (dev->dev_attrib.block_size) { ++ case 4096: ++ return lb << 3; ++ case 2048: ++ return lb << 2; ++ case 1024: ++ return lb << 1; ++ default: ++ return lb; ++ } ++} ++EXPORT_SYMBOL(target_to_linux_sector); ++ + int target_configure_device(struct se_device *dev) + { + struct se_hba *hba = dev->se_hba; +--- a/drivers/target/target_core_file.c ++++ b/drivers/target/target_core_file.c +@@ -160,25 +160,11 @@ static int fd_configure_device(struct se + " block_device blocks: %llu logical_block_size: %d\n", + dev_size, div_u64(dev_size, fd_dev->fd_block_size), + fd_dev->fd_block_size); +- /* +- * Check if the underlying struct block_device request_queue supports +- * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM +- * in ATA and we need to set TPE=1 +- */ +- if (blk_queue_discard(q)) { +- dev->dev_attrib.max_unmap_lba_count = +- q->limits.max_discard_sectors; +- /* +- * Currently hardcoded to 1 in Linux/SCSI code.. +- */ +- dev->dev_attrib.max_unmap_block_desc_count = 1; +- dev->dev_attrib.unmap_granularity = +- q->limits.discard_granularity >> 9; +- dev->dev_attrib.unmap_granularity_alignment = +- q->limits.discard_alignment; ++ ++ if (target_configure_unmap_from_queue(&dev->dev_attrib, q, ++ fd_dev->fd_block_size)) + pr_debug("IFILE: BLOCK Discard support available," +- " disabled by default\n"); +- } ++ " disabled by default\n"); + /* + * Enable write same emulation for IBLOCK and use 0xFFFF as + * the smaller WRITE_SAME(10) only has a two-byte block count. +@@ -490,9 +476,12 @@ fd_execute_unmap(struct se_cmd *cmd, sec + if (S_ISBLK(inode->i_mode)) { + /* The backend is block device, use discard */ + struct block_device *bdev = inode->i_bdev; ++ struct se_device *dev = cmd->se_dev; + +- ret = blkdev_issue_discard(bdev, lba, +- nolb, GFP_KERNEL, 0); ++ ret = blkdev_issue_discard(bdev, ++ target_to_linux_sector(dev, lba), ++ target_to_linux_sector(dev, nolb), ++ GFP_KERNEL, 0); + if (ret < 0) { + pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n", + ret); +--- a/drivers/target/target_core_iblock.c ++++ b/drivers/target/target_core_iblock.c +@@ -121,27 +121,11 @@ static int iblock_configure_device(struc + dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); + dev->dev_attrib.hw_queue_depth = q->nr_requests; + +- /* +- * Check if the underlying struct block_device request_queue supports +- * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM +- * in ATA and we need to set TPE=1 +- */ +- if (blk_queue_discard(q)) { +- dev->dev_attrib.max_unmap_lba_count = +- q->limits.max_discard_sectors; +- +- /* +- * Currently hardcoded to 1 in Linux/SCSI code.. +- */ +- dev->dev_attrib.max_unmap_block_desc_count = 1; +- dev->dev_attrib.unmap_granularity = +- q->limits.discard_granularity >> 9; +- dev->dev_attrib.unmap_granularity_alignment = +- q->limits.discard_alignment; +- ++ if (target_configure_unmap_from_queue(&dev->dev_attrib, q, ++ dev->dev_attrib.hw_block_size)) + pr_debug("IBLOCK: BLOCK Discard support available," +- " disabled by default\n"); +- } ++ " disabled by default\n"); ++ + /* + * Enable write same emulation for IBLOCK and use 0xFFFF as + * the smaller WRITE_SAME(10) only has a two-byte block count. +@@ -413,9 +397,13 @@ static sense_reason_t + iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) + { + struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; ++ struct se_device *dev = cmd->se_dev; + int ret; + +- ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0); ++ ret = blkdev_issue_discard(bdev, ++ target_to_linux_sector(dev, lba), ++ target_to_linux_sector(dev, nolb), ++ GFP_KERNEL, 0); + if (ret < 0) { + pr_err("blkdev_issue_discard() failed: %d\n", ret); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; +@@ -431,8 +419,10 @@ iblock_execute_write_same(struct se_cmd + struct scatterlist *sg; + struct bio *bio; + struct bio_list list; +- sector_t block_lba = cmd->t_task_lba; +- sector_t sectors = sbc_get_write_same_sectors(cmd); ++ struct se_device *dev = cmd->se_dev; ++ sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); ++ sector_t sectors = target_to_linux_sector(dev, ++ sbc_get_write_same_sectors(cmd)); + + if (cmd->prot_op) { + pr_err("WRITE_SAME: Protection information with IBLOCK" +@@ -646,12 +636,12 @@ iblock_execute_rw(struct se_cmd *cmd, st + enum dma_data_direction data_direction) + { + struct se_device *dev = cmd->se_dev; ++ sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); + struct iblock_req *ibr; + struct bio *bio, *bio_start; + struct bio_list list; + struct scatterlist *sg; + u32 sg_num = sgl_nents; +- sector_t block_lba; + unsigned bio_cnt; + int rw = 0; + int i; +@@ -677,24 +667,6 @@ iblock_execute_rw(struct se_cmd *cmd, st + rw = READ; + } + +- /* +- * Convert the blocksize advertised to the initiator to the 512 byte +- * units unconditionally used by the Linux block layer. +- */ +- if (dev->dev_attrib.block_size == 4096) +- block_lba = (cmd->t_task_lba << 3); +- else if (dev->dev_attrib.block_size == 2048) +- block_lba = (cmd->t_task_lba << 2); +- else if (dev->dev_attrib.block_size == 1024) +- block_lba = (cmd->t_task_lba << 1); +- else if (dev->dev_attrib.block_size == 512) +- block_lba = cmd->t_task_lba; +- else { +- pr_err("Unsupported SCSI -> BLOCK LBA conversion:" +- " %u\n", dev->dev_attrib.block_size); +- return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; +- } +- + ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL); + if (!ibr) + goto fail; +--- a/include/target/target_core_backend.h ++++ b/include/target/target_core_backend.h +@@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(str + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); + + bool target_sense_desc_format(struct se_device *dev); ++sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); ++bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, ++ struct request_queue *q, int block_size); + + #endif /* TARGET_CORE_BACKEND_H */ diff --git a/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch b/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch new file mode 100644 index 00000000000..ab367845fc3 --- /dev/null +++ b/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch @@ -0,0 +1,124 @@ +From e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" +Date: Thu, 3 Mar 2016 17:18:20 -0500 +Subject: tracing: Do not have 'comm' filter override event 'comm' field + +From: Steven Rostedt (Red Hat) + +commit e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c upstream. + +Commit 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and +process names" added a 'comm' filter that will filter events based on the +current tasks struct 'comm'. But this now hides the ability to filter events +that have a 'comm' field too. For example, sched_migrate_task trace event. +That has a 'comm' field of the task to be migrated. + + echo 'comm == "bash"' > events/sched_migrate_task/filter + +will now filter all sched_migrate_task events for tasks named "bash" that +migrates other tasks (in interrupt context), instead of seeing when "bash" +itself gets migrated. + +This fix requires a couple of changes. + +1) Change the look up order for filter predicates to look at the events + fields before looking at the generic filters. + +2) Instead of basing the filter function off of the "comm" name, have the + generic "comm" filter have its own filter_type (FILTER_COMM). Test + against the type instead of the name to assign the filter function. + +3) Add a new "COMM" filter that works just like "comm" but will filter based + on the current task, even if the trace event contains a "comm" field. + +Do the same for "cpu" field, adding a FILTER_CPU and a filter "CPU". + +Fixes: 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and process names" +Reported-by: Matt Fleming +Signed-off-by: Steven Rostedt +Signed-off-by: Greg Kroah-Hartman + +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index 429fdfc3baf5..925730bc9fc1 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -568,6 +568,8 @@ enum { + FILTER_DYN_STRING, + FILTER_PTR_STRING, + FILTER_TRACE_FN, ++ FILTER_COMM, ++ FILTER_CPU, + }; + + extern int trace_event_raw_init(struct trace_event_call *call); +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index ab09829d3b97..05ddc0820771 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name) + struct ftrace_event_field *field; + struct list_head *head; + +- field = __find_event_field(&ftrace_generic_fields, name); ++ head = trace_get_fields(call); ++ field = __find_event_field(head, name); + if (field) + return field; + +- field = __find_event_field(&ftrace_common_fields, name); ++ field = __find_event_field(&ftrace_generic_fields, name); + if (field) + return field; + +- head = trace_get_fields(call); +- return __find_event_field(head, name); ++ return __find_event_field(&ftrace_common_fields, name); + } + + static int __trace_define_field(struct list_head *head, const char *type, +@@ -171,8 +171,10 @@ static int trace_define_generic_fields(void) + { + int ret; + +- __generic_field(int, cpu, FILTER_OTHER); +- __generic_field(char *, comm, FILTER_PTR_STRING); ++ __generic_field(int, CPU, FILTER_CPU); ++ __generic_field(int, cpu, FILTER_CPU); ++ __generic_field(char *, COMM, FILTER_COMM); ++ __generic_field(char *, comm, FILTER_COMM); + + return ret; + } +diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c +index f93a219b18da..6816302542b2 100644 +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps, + return -EINVAL; + } + +- if (is_string_field(field)) { ++ if (field->filter_type == FILTER_COMM) { ++ filter_build_regex(pred); ++ fn = filter_pred_comm; ++ pred->regex.field_len = TASK_COMM_LEN; ++ } else if (is_string_field(field)) { + filter_build_regex(pred); + +- if (!strcmp(field->name, "comm")) { +- fn = filter_pred_comm; +- pred->regex.field_len = TASK_COMM_LEN; +- } else if (field->filter_type == FILTER_STATIC_STRING) { ++ if (field->filter_type == FILTER_STATIC_STRING) { + fn = filter_pred_string; + pred->regex.field_len = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) +@@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps, + } + pred->val = val; + +- if (!strcmp(field->name, "cpu")) ++ if (field->filter_type == FILTER_CPU) + fn = filter_pred_cpu; + else + fn = select_comparison_fn(pred->op, field->size, diff --git a/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch b/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch new file mode 100644 index 00000000000..8f09df40b3f --- /dev/null +++ b/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch @@ -0,0 +1,188 @@ +From a1a0e23e49037c23ea84bc8cc146a03584d13577 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Mon, 29 Feb 2016 18:28:53 -0500 +Subject: writeback: flush inode cgroup wb switches instead of pinning super_block + +From: Tejun Heo + +commit a1a0e23e49037c23ea84bc8cc146a03584d13577 upstream. + +If cgroup writeback is in use, inodes can be scheduled for +asynchronous wb switching. Before 5ff8eaac1636 ("writeback: keep +superblock pinned during cgroup writeback association switches"), this +could race with umount leading to super_block being destroyed while +inodes are pinned for wb switching. 5ff8eaac1636 fixed it by bumping +s_active while wb switches are in flight; however, this allowed +in-flight wb switches to make umounts asynchronous when the userland +expected synchronosity - e.g. fsck immediately following umount may +fail because the device is still busy. + +This patch removes the problematic super_block pinning and instead +makes generic_shutdown_super() flush in-flight wb switches. wb +switches are now executed on a dedicated isw_wq so that they can be +flushed and isw_nr_in_flight keeps track of the number of in-flight wb +switches so that flushing can be avoided in most cases. + +v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE + check in inode_switch_wbs() as Jan an Al suggested. + +Signed-off-by: Tejun Heo +Reported-by: Tahsin Erdogan +Cc: Jan Kara +Cc: Al Viro +Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com +Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches") +Reviewed-by: Jan Kara +Tested-by: Tahsin Erdogan +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + fs/fs-writeback.c | 54 ++++++++++++++++++++++++++++++++++------------ + fs/super.c | 1 + include/linux/writeback.h | 5 ++++ + 3 files changed, 47 insertions(+), 13 deletions(-) + +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struc + #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) + /* one round can affect upto 5 slots */ + ++static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); ++static struct workqueue_struct *isw_wq; ++ + void __inode_attach_wb(struct inode *inode, struct page *page) + { + struct backing_dev_info *bdi = inode_to_bdi(inode); +@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(str + struct inode_switch_wbs_context *isw = + container_of(work, struct inode_switch_wbs_context, work); + struct inode *inode = isw->inode; +- struct super_block *sb = inode->i_sb; + struct address_space *mapping = inode->i_mapping; + struct bdi_writeback *old_wb = inode->i_wb; + struct bdi_writeback *new_wb = isw->new_wb; +@@ -424,8 +426,9 @@ skip_switch: + wb_put(new_wb); + + iput(inode); +- deactivate_super(sb); + kfree(isw); ++ ++ atomic_dec(&isw_nr_in_flight); + } + + static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) +@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(stru + + /* needs to grab bh-unsafe locks, bounce to work item */ + INIT_WORK(&isw->work, inode_switch_wbs_work_fn); +- schedule_work(&isw->work); ++ queue_work(isw_wq, &isw->work); + } + + /** +@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inod + + /* while holding I_WB_SWITCH, no one else can update the association */ + spin_lock(&inode->i_lock); +- +- if (inode->i_state & (I_WB_SWITCH | I_FREEING) || +- inode_to_wb(inode) == isw->new_wb) +- goto out_unlock; +- +- if (!atomic_inc_not_zero(&inode->i_sb->s_active)) +- goto out_unlock; +- ++ if (!(inode->i_sb->s_flags & MS_ACTIVE) || ++ inode->i_state & (I_WB_SWITCH | I_FREEING) || ++ inode_to_wb(inode) == isw->new_wb) { ++ spin_unlock(&inode->i_lock); ++ goto out_free; ++ } + inode->i_state |= I_WB_SWITCH; + spin_unlock(&inode->i_lock); + + ihold(inode); + isw->inode = inode; + ++ atomic_inc(&isw_nr_in_flight); ++ + /* + * In addition to synchronizing among switchers, I_WB_SWITCH tells + * the RCU protected stat update paths to grab the mapping's +@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inod + call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); + return; + +-out_unlock: +- spin_unlock(&inode->i_lock); + out_free: + if (isw->new_wb) + wb_put(isw->new_wb); +@@ -849,6 +850,33 @@ restart: + wb_put(last_wb); + } + ++/** ++ * cgroup_writeback_umount - flush inode wb switches for umount ++ * ++ * This function is called when a super_block is about to be destroyed and ++ * flushes in-flight inode wb switches. An inode wb switch goes through ++ * RCU and then workqueue, so the two need to be flushed in order to ensure ++ * that all previously scheduled switches are finished. As wb switches are ++ * rare occurrences and synchronize_rcu() can take a while, perform ++ * flushing iff wb switches are in flight. ++ */ ++void cgroup_writeback_umount(void) ++{ ++ if (atomic_read(&isw_nr_in_flight)) { ++ synchronize_rcu(); ++ flush_workqueue(isw_wq); ++ } ++} ++ ++static int __init cgroup_writeback_init(void) ++{ ++ isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); ++ if (!isw_wq) ++ return -ENOMEM; ++ return 0; ++} ++fs_initcall(cgroup_writeback_init); ++ + #else /* CONFIG_CGROUP_WRITEBACK */ + + static struct bdi_writeback * +--- a/fs/super.c ++++ b/fs/super.c +@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super + sb->s_flags &= ~MS_ACTIVE; + + fsnotify_unmount_inodes(sb); ++ cgroup_writeback_umount(); + + evict_inodes(sb); + +--- a/include/linux/writeback.h ++++ b/include/linux/writeback.h +@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct + void wbc_detach_inode(struct writeback_control *wbc); + void wbc_account_io(struct writeback_control *wbc, struct page *page, + size_t bytes); ++void cgroup_writeback_umount(void); + + /** + * inode_attach_wb - associate an inode with its wb +@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct + { + } + ++static inline void cgroup_writeback_umount(void) ++{ ++} ++ + #endif /* CONFIG_CGROUP_WRITEBACK */ + + /*