From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 5 Mar 2016 19:45:08 +0000 (-0800)
Subject: 4.4-stable patches
X-Git-Tag: v3.10.100~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4e362751dc079531ef2b95bcd5c71e12f78e1ae6;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	adding-intel-lewisburg-device-ids-for-sata.patch
	arm64-vmemmap-use-virtual-projection-of-linear-region.patch
	ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch
	block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch
	btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch
	drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch
	drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch
	drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch
	drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch
	drm-ast-fix-incorrect-register-check-for-dram-width.patch
	drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch
	fix-directory-hardlinks-from-deleted-directories.patch
	jffs2-fix-page-lock-f-sem-deadlock.patch
	libata-align-ata_device-s-id-on-a-cacheline.patch
	libata-fix-hdio_get_32bit-ioctl.patch
	pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch
	pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch
	revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch
	target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch
	tracing-do-not-have-comm-filter-override-event-comm-field.patch
	writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
---

diff --git a/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch b/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch
new file mode 100644
index 00000000000..157ba04ba0c
--- /dev/null
+++ b/queue-4.4/adding-intel-lewisburg-device-ids-for-sata.patch
@@ -0,0 +1,44 @@
+From f5bdd66c705484b4bc77eb914be15c1b7881fae7 Mon Sep 17 00:00:00 2001
+From: Alexandra Yates <alexandra.yates@linux.intel.com>
+Date: Wed, 17 Feb 2016 19:36:20 -0800
+Subject: Adding Intel Lewisburg device IDs for SATA
+
+From: Alexandra Yates <alexandra.yates@linux.intel.com>
+
+commit f5bdd66c705484b4bc77eb914be15c1b7881fae7 upstream.
+
+This patch complements the list of device IDs previously
+added for lewisburg sata.
+
+Signed-off-by: Alexandra Yates <alexandra.yates@linux.intel.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/ahci.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -367,15 +367,21 @@ static const struct pci_device_id ahci_p
+ 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
+ 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
+ 	{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/
+ 	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
+ 	{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
+ 	{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
+ 	{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
++	{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
+ 
+ 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
+ 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch b/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch
new file mode 100644
index 00000000000..a5b4b89eb70
--- /dev/null
+++ b/queue-4.4/arm64-vmemmap-use-virtual-projection-of-linear-region.patch
@@ -0,0 +1,77 @@
+From dfd55ad85e4a7fbaa82df12467515ac3c81e8a3e Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Fri, 26 Feb 2016 17:57:13 +0100
+Subject: arm64: vmemmap: use virtual projection of linear region
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit dfd55ad85e4a7fbaa82df12467515ac3c81e8a3e upstream.
+
+Commit dd006da21646 ("arm64: mm: increase VA range of identity map") made
+some changes to the memory mapping code to allow physical memory to reside
+at an offset that exceeds the size of the virtual mapping.
+
+However, since the size of the vmemmap area is proportional to the size of
+the VA area, but it is populated relative to the physical space, we may
+end up with the struct page array being mapped outside of the vmemmap
+region. For instance, on my Seattle A0 box, I can see the following output
+in the dmesg log.
+
+   vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000   (     8 GB maximum)
+             0xffffffbfc0000000 - 0xffffffbfd0000000   (   256 MB actual)
+
+We can fix this by deciding that the vmemmap region is not a projection of
+the physical space, but of the virtual space above PAGE_OFFSET, i.e., the
+linear region. This way, we are guaranteed that the vmemmap region is of
+sufficient size, and we can even reduce the size by half.
+
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/pgtable.h |    7 ++++---
+ arch/arm64/mm/init.c             |    4 ++--
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -34,13 +34,13 @@
+ /*
+  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
+  *
+- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
++ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
+  *	(rounded up to PUD_SIZE).
+  * VMALLOC_START: beginning of the kernel VA space
+  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
+  *	fixed mappings and modules
+  */
+-#define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
++#define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE)
+ 
+ #ifndef CONFIG_KASAN
+ #define VMALLOC_START		(VA_START)
+@@ -51,7 +51,8 @@
+ 
+ #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+ 
+-#define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
++#define VMEMMAP_START		(VMALLOC_END + SZ_64K)
++#define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
+ 
+ #define FIRST_USER_ADDRESS	0UL
+ 
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -319,8 +319,8 @@ void __init mem_init(void)
+ #endif
+ 		  MLG(VMALLOC_START, VMALLOC_END),
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+-		  MLG((unsigned long)vmemmap,
+-		      (unsigned long)vmemmap + VMEMMAP_SIZE),
++		  MLG(VMEMMAP_START,
++		      VMEMMAP_START + VMEMMAP_SIZE),
+ 		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+ 		      (unsigned long)virt_to_page(high_memory)),
+ #endif
diff --git a/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch b/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch
new file mode 100644
index 00000000000..2491986ce4f
--- /dev/null
+++ b/queue-4.4/ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch
@@ -0,0 +1,44 @@
+From dc8b4afc4a04fac8ee55a19b59f2356a25e7e778 Mon Sep 17 00:00:00 2001
+From: Manuel Lauss <manuel.lauss@gmail.com>
+Date: Sat, 27 Feb 2016 16:10:05 +0100
+Subject: ata: ahci: don't mark HotPlugCapable Ports as external/removable
+
+From: Manuel Lauss <manuel.lauss@gmail.com>
+
+commit dc8b4afc4a04fac8ee55a19b59f2356a25e7e778 upstream.
+
+The HPCP bit is set by bioses for on-board sata ports either because
+they think sata is hotplug capable in general or to allow Windows
+to display a "device eject" icon on ports which are routed to an
+external connector bracket.
+
+However in Redhat Bugzilla #1310682, users report that with kernel 4.4,
+where this bit test first appeared, a lot of partitions on sata drives
+are now mounted automatically.
+
+This patch should fix redhat and a lot of other distros which
+unconditionally automount all devices which have the "removable"
+bit set.
+
+Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: 8a3e33cf92c7 ("ata: ahci: find eSATA ports and flag them as removable" changes userspace behavior)
+Link: http://lkml.kernel.org/g/56CF35FA.1070500@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libahci.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1142,8 +1142,7 @@ static void ahci_port_init(struct device
+ 
+ 	/* mark esata ports */
+ 	tmp = readl(port_mmio + PORT_CMD);
+-	if ((tmp & PORT_CMD_HPCP) ||
+-	    ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)))
++	if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))
+ 		ap->pflags |= ATA_PFLAG_EXTERNAL;
+ }
+ 
diff --git a/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch b/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch
new file mode 100644
index 00000000000..bd21afbc6f7
--- /dev/null
+++ b/queue-4.4/block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch
@@ -0,0 +1,77 @@
+From 7bcd79ac50d9d83350a835bdb91c04ac9e098412 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@canonical.com>
+Date: Fri, 26 Feb 2016 23:40:50 +0800
+Subject: block: bio: introduce helpers to get the 1st and last bvec
+
+From: Ming Lei <ming.lei@canonical.com>
+
+commit 7bcd79ac50d9d83350a835bdb91c04ac9e098412 upstream.
+
+The bio passed to bio_will_gap() may be fast cloned from upper
+layer(dm, md, bcache, fs, ...), or from bio splitting in block
+core.
+
+Unfortunately bio_will_gap() just figures out the last bvec via
+'bi_io_vec[prev->bi_vcnt - 1]' directly, and this way is obviously
+wrong.
+
+This patch introduces two helpers for getting the first and last
+bvec of one bio for fixing the issue.
+
+Reported-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
+Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/bio.h |   37 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 37 insertions(+)
+
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct
+ 	bio->bi_flags &= ~(1U << bit);
+ }
+ 
++static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
++{
++	*bv = bio_iovec(bio);
++}
++
++static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
++{
++	struct bvec_iter iter = bio->bi_iter;
++	int idx;
++
++	if (!bio_flagged(bio, BIO_CLONED)) {
++		*bv = bio->bi_io_vec[bio->bi_vcnt - 1];
++		return;
++	}
++
++	if (unlikely(!bio_multiple_segments(bio))) {
++		*bv = bio_iovec(bio);
++		return;
++	}
++
++	bio_advance_iter(bio, &iter, iter.bi_size);
++
++	if (!iter.bi_bvec_done)
++		idx = iter.bi_idx - 1;
++	else	/* in the middle of bvec */
++		idx = iter.bi_idx;
++
++	*bv = bio->bi_io_vec[idx];
++
++	/*
++	 * iter.bi_bvec_done records actual length of the last bvec
++	 * if this bio ends in the middle of one io vector
++	 */
++	if (iter.bi_bvec_done)
++		bv->bv_len = iter.bi_bvec_done;
++}
++
+ enum bip_flags {
+ 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+ 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
diff --git a/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch b/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch
new file mode 100644
index 00000000000..1b6a3688e72
--- /dev/null
+++ b/queue-4.4/btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch
@@ -0,0 +1,184 @@
+From 909c3a22da3b8d2cfd3505ca5658f0176859d400 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 2 Mar 2016 15:49:38 +0000
+Subject: Btrfs: fix loading of orphan roots leading to BUG_ON
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 909c3a22da3b8d2cfd3505ca5658f0176859d400 upstream.
+
+When looking for orphan roots during mount we can end up hitting a
+BUG_ON() (at root-item.c:btrfs_find_orphan_roots()) if a log tree is
+replayed and qgroups are enabled. This is because after a log tree is
+replayed, a transaction commit is made, which triggers qgroup extent
+accounting which in turn does backref walking which ends up reading and
+inserting all roots in the radix tree fs_info->fs_root_radix, including
+orphan roots (deleted snapshots). So after the log tree is replayed, when
+finding orphan roots we hit the BUG_ON with the following trace:
+
+[118209.182438] ------------[ cut here ]------------
+[118209.183279] kernel BUG at fs/btrfs/root-tree.c:314!
+[118209.184074] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+[118209.185123] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic ppdev xor raid6_pq evdev sg parport_pc parport acpi_cpufreq tpm_tis tpm psmouse
+processor i2c_piix4 serio_raw pcspkr i2c_core button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata
+virtio_pci virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs]
+[118209.186318] CPU: 14 PID: 28428 Comm: mount Tainted: G        W       4.5.0-rc5-btrfs-next-24+ #1
+[118209.186318] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
+[118209.186318] task: ffff8801ec131040 ti: ffff8800af34c000 task.ti: ffff8800af34c000
+[118209.186318] RIP: 0010:[<ffffffffa04237d7>]  [<ffffffffa04237d7>] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs]
+[118209.186318] RSP: 0018:ffff8800af34faa8  EFLAGS: 00010246
+[118209.186318] RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000001
+[118209.186318] RDX: 0000000080000000 RSI: 0000000000000001 RDI: 00000000ffffffff
+[118209.186318] RBP: ffff8800af34fb08 R08: 0000000000000001 R09: 0000000000000000
+[118209.186318] R10: ffff8800af34f9f0 R11: 6db6db6db6db6db7 R12: ffff880171b97000
+[118209.186318] R13: ffff8801ca9d65e0 R14: ffff8800afa2e000 R15: 0000160000000000
+[118209.186318] FS:  00007f5bcb914840(0000) GS:ffff88023edc0000(0000) knlGS:0000000000000000
+[118209.186318] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[118209.186318] CR2: 00007f5bcaceb5d9 CR3: 00000000b49b5000 CR4: 00000000000006e0
+[118209.186318] Stack:
+[118209.186318]  fffffbffffffffff 010230ffffffffff 0101000000000000 ff84000000000000
+[118209.186318]  fbffffffffffffff 30ffffffffffffff 0000000000000101 ffff880082348000
+[118209.186318]  0000000000000000 ffff8800afa2e000 ffff8800afa2e000 0000000000000000
+[118209.186318] Call Trace:
+[118209.186318]  [<ffffffffa042e2db>] open_ctree+0x1e37/0x21b9 [btrfs]
+[118209.186318]  [<ffffffffa040a753>] btrfs_mount+0x97e/0xaed [btrfs]
+[118209.186318]  [<ffffffff8108e1c0>] ? trace_hardirqs_on+0xd/0xf
+[118209.186318]  [<ffffffff8117b87e>] mount_fs+0x67/0x131
+[118209.186318]  [<ffffffff81192d2b>] vfs_kern_mount+0x6c/0xde
+[118209.186318]  [<ffffffffa0409f81>] btrfs_mount+0x1ac/0xaed [btrfs]
+[118209.186318]  [<ffffffff8108e1c0>] ? trace_hardirqs_on+0xd/0xf
+[118209.186318]  [<ffffffff8108c26b>] ? lockdep_init_map+0xb9/0x1b3
+[118209.186318]  [<ffffffff8117b87e>] mount_fs+0x67/0x131
+[118209.186318]  [<ffffffff81192d2b>] vfs_kern_mount+0x6c/0xde
+[118209.186318]  [<ffffffff81195637>] do_mount+0x8a6/0x9e8
+[118209.186318]  [<ffffffff8119598d>] SyS_mount+0x77/0x9f
+[118209.186318]  [<ffffffff81493017>] entry_SYSCALL_64_fastpath+0x12/0x6b
+[118209.186318] Code: 64 00 00 85 c0 89 c3 75 24 f0 41 80 4c 24 20 20 49 8b bc 24 f0 01 00 00 4c 89 e6 e8 e8 65 00 00 85 c0 89 c3 74 11 83 f8 ef 75 02 <0f> 0b
+4c 89 e7 e8 da 72 00 00 eb 1c 41 83 bc 24 00 01 00 00 00
+[118209.186318] RIP  [<ffffffffa04237d7>] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs]
+[118209.186318]  RSP <ffff8800af34faa8>
+[118209.230735] ---[ end trace 83938f987d85d477 ]---
+
+So fix this by not treating the error -EEXIST, returned when attempting
+to insert a root already inserted by the backref walking code, as an error.
+
+The following test case for xfstests reproduces the bug:
+
+  seq=`basename $0`
+  seqres=$RESULT_DIR/$seq
+  echo "QA output created by $seq"
+  tmp=/tmp/$$
+  status=1	# failure is the default!
+  trap "_cleanup; exit \$status" 0 1 2 3 15
+
+  _cleanup()
+  {
+      _cleanup_flakey
+      cd /
+      rm -f $tmp.*
+  }
+
+  # get standard environment, filters and checks
+  . ./common/rc
+  . ./common/filter
+  . ./common/dmflakey
+
+  # real QA test starts here
+  _supported_fs btrfs
+  _supported_os Linux
+  _require_scratch
+  _require_dm_target flakey
+  _require_metadata_journaling $SCRATCH_DEV
+
+  rm -f $seqres.full
+
+  _scratch_mkfs >>$seqres.full 2>&1
+  _init_flakey
+  _mount_flakey
+
+  _run_btrfs_util_prog quota enable $SCRATCH_MNT
+
+  # Create 2 directories with one file in one of them.
+  # We use these just to trigger a transaction commit later, moving the file from
+  # directory a to directory b and doing an fsync against directory a.
+  mkdir $SCRATCH_MNT/a
+  mkdir $SCRATCH_MNT/b
+  touch $SCRATCH_MNT/a/f
+  sync
+
+  # Create our test file with 2 4K extents.
+  $XFS_IO_PROG -f -s -c "pwrite -S 0xaa 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io
+
+  # Create a snapshot and delete it. This doesn't really delete the snapshot
+  # immediately, just makes it inaccessible and invisible to user space, the
+  # snapshot is deleted later by a dedicated kernel thread (cleaner kthread)
+  # which is woke up at the next transaction commit.
+  # A root orphan item is inserted into the tree of tree roots, so that if a
+  # power failure happens before the dedicated kernel thread does the snapshot
+  # deletion, the next time the filesystem is mounted it resumes the snapshot
+  # deletion.
+  _run_btrfs_util_prog subvolume snapshot $SCRATCH_MNT $SCRATCH_MNT/snap
+  _run_btrfs_util_prog subvolume delete $SCRATCH_MNT/snap
+
+  # Now overwrite half of the extents we wrote before. Because we made a snapshpot
+  # before, which isn't really deleted yet (since no transaction commit happened
+  # after we did the snapshot delete request), the non overwritten extents get
+  # referenced twice, once by the default subvolume and once by the snapshot.
+  $XFS_IO_PROG -c "pwrite -S 0xbb 4K 8K" $SCRATCH_MNT/foobar | _filter_xfs_io
+
+  # Now move file f from directory a to directory b and fsync directory a.
+  # The fsync on the directory a triggers a transaction commit (because a file
+  # was moved from it to another directory) and the file fsync leaves a log tree
+  # with file extent items to replay.
+  mv $SCRATCH_MNT/a/f $SCRATCH_MNT/a/b
+  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/a
+  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
+
+  echo "File digest before power failure:"
+  md5sum $SCRATCH_MNT/foobar | _filter_scratch
+
+  # Now simulate a power failure and mount the filesystem to replay the log tree.
+  # After the log tree was replayed, we used to hit a BUG_ON() when processing
+  # the root orphan item for the deleted snapshot. This is because when processing
+  # an orphan root the code expected to be the first code inserting the root into
+  # the fs_info->fs_root_radix radix tree, while in reallity it was the second
+  # caller attempting to do it - the first caller was the transaction commit that
+  # took place after replaying the log tree, when updating the qgroup counters.
+  _flakey_drop_and_remount
+
+  echo "File digest before after failure:"
+  # Must match what he got before the power failure.
+  md5sum $SCRATCH_MNT/foobar | _filter_scratch
+
+  _unmount_flakey
+  status=0
+  exit
+
+Fixes: 2d9e97761087 ("Btrfs: use btrfs_get_fs_root in resolve_indirect_ref")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
+index 7cf8509deda7..2c849b08a91b 100644
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+ 		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
+ 
+ 		err = btrfs_insert_fs_root(root->fs_info, root);
++		/*
++		 * The root might have been inserted already, as before we look
++		 * for orphan roots, log replay might have happened, which
++		 * triggers a transaction commit and qgroup accounting, which
++		 * in turn reads and inserts fs roots while doing backref
++		 * walking.
++		 */
++		if (err == -EEXIST)
++			err = 0;
+ 		if (err) {
+-			BUG_ON(err == -EEXIST);
+ 			btrfs_free_fs_root(root);
+ 			break;
+ 		}
diff --git a/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch b/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch
new file mode 100644
index 00000000000..9d45cec31ce
--- /dev/null
+++ b/queue-4.4/drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch
@@ -0,0 +1,44 @@
+From feebe91aa9a9d99d9ec157612a614fadb79beb99 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 26 Feb 2016 16:18:15 +0100
+Subject: drm/amdgpu: apply gfx_v8 fixes to gfx_v7 as well
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian KÃ¶nig <christian.koenig@amd.com>
+
+commit feebe91aa9a9d99d9ec157612a614fadb79beb99 upstream.
+
+We never ported that back to CIK, so we could run into VM faults here.
+
+Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -3628,6 +3628,19 @@ static void gfx_v7_0_ring_emit_vm_flush(
+ 					unsigned vm_id, uint64_t pd_addr)
+ {
+ 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
++	uint32_t seq = ring->fence_drv.sync_seq;
++	uint64_t addr = ring->fence_drv.gpu_addr;
++
++	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
++	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
++				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
++				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
++	amdgpu_ring_write(ring, addr & 0xfffffffc);
++	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
++	amdgpu_ring_write(ring, seq);
++	amdgpu_ring_write(ring, 0xffffffff);
++	amdgpu_ring_write(ring, 4); /* poll interval */
++
+ 	if (usepfp) {
+ 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
diff --git a/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch b/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch
new file mode 100644
index 00000000000..7fb49fbc2c1
--- /dev/null
+++ b/queue-4.4/drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch
@@ -0,0 +1,35 @@
+From 9cac537332f5502c103415b25609548c276a09f8 Mon Sep 17 00:00:00 2001
+From: Chunming Zhou <David1.Zhou@amd.com>
+Date: Mon, 29 Feb 2016 14:12:38 +0800
+Subject: drm/amdgpu/gfx8: specify which engine to wait before vm flush
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chunming Zhou <David1.Zhou@amd.com>
+
+commit 9cac537332f5502c103415b25609548c276a09f8 upstream.
+
+Select between me and pfp properly.
+
+Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
+Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -4681,7 +4681,8 @@ static void gfx_v8_0_ring_emit_vm_flush(
+ 
+ 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+-		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
++				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
++				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ 	amdgpu_ring_write(ring, addr & 0xfffffffc);
+ 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ 	amdgpu_ring_write(ring, seq);
diff --git a/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch b/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch
new file mode 100644
index 00000000000..8fa7201592b
--- /dev/null
+++ b/queue-4.4/drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch
@@ -0,0 +1,49 @@
+From eda1d1cf8d18383f19cd2b752f786120efa4768f Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 24 Feb 2016 17:18:25 -0500
+Subject: drm/amdgpu/pm: update current crtc info after setting the powerstate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit eda1d1cf8d18383f19cd2b752f786120efa4768f upstream.
+
+On CI, we need to see if the number of crtcs changes to determine
+whether or not we need to upload the mclk table again.  In practice
+we don't currently upload the mclk table again after the initial load.
+The only reason you would would be to add new states, e.g., for
+arbitrary mclk setting which is not currently supported.
+
+Acked-by: Jordan Lazare <Jordan.Lazare@amd.com>
+Acked-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+@@ -596,9 +596,6 @@ force:
+ 	/* update display watermarks based on new power state */
+ 	amdgpu_display_bandwidth_update(adev);
+ 
+-	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
+-	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
+-
+ 	/* wait for the rings to drain */
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+@@ -617,6 +614,9 @@ force:
+ 	/* update displays */
+ 	amdgpu_dpm_display_configuration_changed(adev);
+ 
++	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
++	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
++
+ 	if (adev->pm.funcs->force_performance_level) {
+ 		if (adev->pm.dpm.thermal_active) {
+ 			enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
diff --git a/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch b/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch
new file mode 100644
index 00000000000..8f8dd3dc29d
--- /dev/null
+++ b/queue-4.4/drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch
@@ -0,0 +1,37 @@
+From 0b39c531cfa12dad54eac238c2e303b994df1ef7 Mon Sep 17 00:00:00 2001
+From: Arindam Nath <arindam.nath@amd.com>
+Date: Wed, 2 Mar 2016 17:19:01 +0530
+Subject: drm/amdgpu: return from atombios_dp_get_dpcd only when error
+
+From: Arindam Nath <arindam.nath@amd.com>
+
+commit 0b39c531cfa12dad54eac238c2e303b994df1ef7 upstream.
+
+In amdgpu_connector_hotplug(), we need to start DP link
+training only after we have received DPCD. The function
+amdgpu_atombios_dp_get_dpcd() returns non-zero value only
+when an error condition is met, otherwise returns zero.
+So in case the function encounters an error, we need to
+skip rest of the code and return from amdgpu_connector_hotplug()
+immediately. Only when we are successfull in reading DPCD
+pin, we should carry on with turning-on the monitor.
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+@@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm
+ 			} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+ 				/* Don't try to start link training before we
+ 				 * have the dpcd */
+-				if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
++				if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ 					return;
+ 
+ 				/* set it to OFF so that drm_helper_connector_dpms()
diff --git a/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch b/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch
new file mode 100644
index 00000000000..8b290d37982
--- /dev/null
+++ b/queue-4.4/drm-ast-fix-incorrect-register-check-for-dram-width.patch
@@ -0,0 +1,34 @@
+From 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 Mon Sep 17 00:00:00 2001
+From: Timothy Pearson <tpearson@raptorengineeringinc.com>
+Date: Fri, 26 Feb 2016 15:29:32 -0600
+Subject: drm/ast: Fix incorrect register check for DRAM width
+
+From: Timothy Pearson <tpearson@raptorengineeringinc.com>
+
+commit 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 upstream.
+
+During DRAM initialization on certain ASpeed devices, an incorrect
+bit (bit 10) was checked in the "SDRAM Bus Width Status" register
+to determine DRAM width.
+
+Query bit 6 instead in accordance with the Aspeed AST2050 datasheet v1.05.
+
+Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/ast/ast_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ast/ast_main.c
++++ b/drivers/gpu/drm/ast/ast_main.c
+@@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_
+ 	} while (ast_read32(ast, 0x10000) != 0x01);
+ 	data = ast_read32(ast, 0x10004);
+ 
+-	if (data & 0x400)
++	if (data & 0x40)
+ 		ast->dram_bus_width = 16;
+ 	else
+ 		ast->dram_bus_width = 32;
diff --git a/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch b/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch
new file mode 100644
index 00000000000..ff71345f035
--- /dev/null
+++ b/queue-4.4/drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch
@@ -0,0 +1,50 @@
+From 5e031d9fe8b0741f11d49667dfc3ebf5454121fd Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Wed, 24 Feb 2016 17:38:38 -0500
+Subject: drm/radeon/pm: update current crtc info after setting the powerstate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 5e031d9fe8b0741f11d49667dfc3ebf5454121fd upstream.
+
+On CI, we need to see if the number of crtcs changes to determine
+whether or not we need to upload the mclk table again.  In practice
+we don't currently upload the mclk table again after the initial load.
+The only reason you would would be to add new states, e.g., for
+arbitrary mclk setting which is not currently supported.
+
+Acked-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/radeon/radeon_pm.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/radeon_pm.c
++++ b/drivers/gpu/drm/radeon/radeon_pm.c
+@@ -1076,10 +1076,6 @@ force:
+ 	/* update display watermarks based on new power state */
+ 	radeon_bandwidth_update(rdev);
+ 
+-	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
+-	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
+-	rdev->pm.dpm.single_display = single_display;
+-
+ 	/* wait for the rings to drain */
+ 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
+ 		struct radeon_ring *ring = &rdev->ring[i];
+@@ -1098,6 +1094,10 @@ force:
+ 	/* update displays */
+ 	radeon_dpm_display_configuration_changed(rdev);
+ 
++	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
++	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
++	rdev->pm.dpm.single_display = single_display;
++
+ 	if (rdev->asic->dpm.force_performance_level) {
+ 		if (rdev->pm.dpm.thermal_active) {
+ 			enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level;
diff --git a/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch b/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch
new file mode 100644
index 00000000000..6c9736e7f9c
--- /dev/null
+++ b/queue-4.4/fix-directory-hardlinks-from-deleted-directories.patch
@@ -0,0 +1,193 @@
+From be629c62a603e5935f8177fd8a19e014100a259e Mon Sep 17 00:00:00 2001
+From: David Woodhouse <David.Woodhouse@intel.com>
+Date: Mon, 1 Feb 2016 14:04:46 +0000
+Subject: Fix directory hardlinks from deleted directories
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Woodhouse <David.Woodhouse@intel.com>
+
+commit be629c62a603e5935f8177fd8a19e014100a259e upstream.
+
+When a directory is deleted, we don't take too much care about killing off
+all the dirents that belong to it â on the basis that on remount, the scan
+will conclude that the directory is dead anyway.
+
+This doesn't work though, when the deleted directory contained a child
+directory which was moved *out*. In the early stages of the fs build
+we can then end up with an apparent hard link, with the child directory
+appearing both in its true location, and as a child of the original
+directory which are this stage of the mount process we don't *yet* know
+is defunct.
+
+To resolve this, take out the early special-casing of the "directories
+shall not have hard links" rule in jffs2_build_inode_pass1(), and let the
+normal nlink processing happen for directories as well as other inodes.
+
+Then later in the build process we can set ic->pino_nlink to the parent
+inode#, as is required for directories during normal operaton, instead
+of the nlink. And complain only *then* about hard links which are still
+in evidence even after killing off all the unreachable paths.
+
+Reported-by: Liu Song <liu.song11@zte.com.cn>
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jffs2/build.c    |   75 +++++++++++++++++++++++++++++++++++++++-------------
+ fs/jffs2/nodelist.h |    6 +++-
+ 2 files changed, 62 insertions(+), 19 deletions(-)
+
+--- a/fs/jffs2/build.c
++++ b/fs/jffs2/build.c
+@@ -49,7 +49,8 @@ next_inode(int *i, struct jffs2_inode_ca
+ 
+ 
+ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
+-				    struct jffs2_inode_cache *ic)
++				    struct jffs2_inode_cache *ic,
++				    int *dir_hardlinks)
+ {
+ 	struct jffs2_full_dirent *fd;
+ 
+@@ -68,19 +69,21 @@ static void jffs2_build_inode_pass1(stru
+ 			dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
+ 				  fd->name, fd->ino, ic->ino);
+ 			jffs2_mark_node_obsolete(c, fd->raw);
++			/* Clear the ic/raw union so it doesn't cause problems later. */
++			fd->ic = NULL;
+ 			continue;
+ 		}
+ 
++		/* From this point, fd->raw is no longer used so we can set fd->ic */
++		fd->ic = child_ic;
++		child_ic->pino_nlink++;
++		/* If we appear (at this stage) to have hard-linked directories,
++		 * set a flag to trigger a scan later */
+ 		if (fd->type == DT_DIR) {
+-			if (child_ic->pino_nlink) {
+-				JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
+-					    fd->name, fd->ino, ic->ino);
+-				/* TODO: What do we do about it? */
+-			} else {
+-				child_ic->pino_nlink = ic->ino;
+-			}
+-		} else
+-			child_ic->pino_nlink++;
++			child_ic->flags |= INO_FLAGS_IS_DIR;
++			if (child_ic->pino_nlink > 1)
++				*dir_hardlinks = 1;
++		}
+ 
+ 		dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
+ 		/* Can't free scan_dents so far. We might need them in pass 2 */
+@@ -94,8 +97,7 @@ static void jffs2_build_inode_pass1(stru
+ */
+ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
+ {
+-	int ret;
+-	int i;
++	int ret, i, dir_hardlinks = 0;
+ 	struct jffs2_inode_cache *ic;
+ 	struct jffs2_full_dirent *fd;
+ 	struct jffs2_full_dirent *dead_fds = NULL;
+@@ -119,7 +121,7 @@ static int jffs2_build_filesystem(struct
+ 	/* Now scan the directory tree, increasing nlink according to every dirent found. */
+ 	for_each_inode(i, c, ic) {
+ 		if (ic->scan_dents) {
+-			jffs2_build_inode_pass1(c, ic);
++			jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
+ 			cond_resched();
+ 		}
+ 	}
+@@ -155,6 +157,20 @@ static int jffs2_build_filesystem(struct
+ 	}
+ 
+ 	dbg_fsbuild("pass 2a complete\n");
++
++	if (dir_hardlinks) {
++		/* If we detected directory hardlinks earlier, *hopefully*
++		 * they are gone now because some of the links were from
++		 * dead directories which still had some old dirents lying
++		 * around and not yet garbage-collected, but which have
++		 * been discarded above. So clear the pino_nlink field
++		 * in each directory, so that the final scan below can
++		 * print appropriate warnings. */
++		for_each_inode(i, c, ic) {
++			if (ic->flags & INO_FLAGS_IS_DIR)
++				ic->pino_nlink = 0;
++		}
++	}
+ 	dbg_fsbuild("freeing temporary data structures\n");
+ 
+ 	/* Finally, we can scan again and free the dirent structs */
+@@ -162,6 +178,33 @@ static int jffs2_build_filesystem(struct
+ 		while(ic->scan_dents) {
+ 			fd = ic->scan_dents;
+ 			ic->scan_dents = fd->next;
++			/* We do use the pino_nlink field to count nlink of
++			 * directories during fs build, so set it to the
++			 * parent ino# now. Now that there's hopefully only
++			 * one. */
++			if (fd->type == DT_DIR) {
++				if (!fd->ic) {
++					/* We'll have complained about it and marked the coresponding
++					   raw node obsolete already. Just skip it. */
++					continue;
++				}
++
++				/* We *have* to have set this in jffs2_build_inode_pass1() */
++				BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
++
++				/* We clear ic->pino_nlink â directories' ic *only* if dir_hardlinks
++				 * is set. Otherwise, we know this should never trigger anyway, so
++				 * we don't do the check. And ic->pino_nlink still contains the nlink
++				 * value (which is 1). */
++				if (dir_hardlinks && fd->ic->pino_nlink) {
++					JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
++						    fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
++					/* Should we unlink it from its previous parent? */
++				}
++
++				/* For directories, ic->pino_nlink holds that parent inode # */
++				fd->ic->pino_nlink = ic->ino;
++			}
+ 			jffs2_free_full_dirent(fd);
+ 		}
+ 		ic->scan_dents = NULL;
+@@ -240,11 +283,7 @@ static void jffs2_build_remove_unlinked_
+ 
+ 			/* Reduce nlink of the child. If it's now zero, stick it on the
+ 			   dead_fds list to be cleaned up later. Else just free the fd */
+-
+-			if (fd->type == DT_DIR)
+-				child_ic->pino_nlink = 0;
+-			else
+-				child_ic->pino_nlink--;
++			child_ic->pino_nlink--;
+ 
+ 			if (!child_ic->pino_nlink) {
+ 				dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
+--- a/fs/jffs2/nodelist.h
++++ b/fs/jffs2/nodelist.h
+@@ -194,6 +194,7 @@ struct jffs2_inode_cache {
+ #define INO_STATE_CLEARING	6	/* In clear_inode() */
+ 
+ #define INO_FLAGS_XATTR_CHECKED	0x01	/* has no duplicate xattr_ref */
++#define INO_FLAGS_IS_DIR	0x02	/* is a directory */
+ 
+ #define RAWNODE_CLASS_INODE_CACHE	0
+ #define RAWNODE_CLASS_XATTR_DATUM	1
+@@ -249,7 +250,10 @@ struct jffs2_readinode_info
+ 
+ struct jffs2_full_dirent
+ {
+-	struct jffs2_raw_node_ref *raw;
++	union {
++		struct jffs2_raw_node_ref *raw;
++		struct jffs2_inode_cache *ic; /* Just during part of build */
++	};
+ 	struct jffs2_full_dirent *next;
+ 	uint32_t version;
+ 	uint32_t ino; /* == zero for unlink */
diff --git a/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch b/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch
new file mode 100644
index 00000000000..7bda632f175
--- /dev/null
+++ b/queue-4.4/jffs2-fix-page-lock-f-sem-deadlock.patch
@@ -0,0 +1,73 @@
+From 49e91e7079febe59a20ca885a87dd1c54240d0f1 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <David.Woodhouse@intel.com>
+Date: Mon, 1 Feb 2016 12:37:20 +0000
+Subject: jffs2: Fix page lock / f->sem deadlock
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Woodhouse <David.Woodhouse@intel.com>
+
+commit 49e91e7079febe59a20ca885a87dd1c54240d0f1 upstream.
+
+With this fix, all code paths should now be obtaining the page lock before
+f->sem.
+
+Reported-by: SzabÃ³ TamÃ¡s <sztomi89@gmail.com>
+Tested-by: Thomas Betker <thomas.betker@rohde-schwarz.com>
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jffs2/README.Locking |    5 +----
+ fs/jffs2/gc.c           |   17 ++++++++++-------
+ 2 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/fs/jffs2/README.Locking
++++ b/fs/jffs2/README.Locking
+@@ -2,10 +2,6 @@
+ 	JFFS2 LOCKING DOCUMENTATION
+ 	---------------------------
+ 
+-At least theoretically, JFFS2 does not require the Big Kernel Lock
+-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
+-code. It has its own locking, as described below.
+-
+ This document attempts to describe the existing locking rules for
+ JFFS2. It is not expected to remain perfectly up to date, but ought to
+ be fairly close.
+@@ -69,6 +65,7 @@ Ordering constraints:
+ 	   any f->sem held.
+ 	2. Never attempt to lock two file mutexes in one thread.
+ 	   No ordering rules have been made for doing so.
++	3. Never lock a page cache page with f->sem held.
+ 
+ 
+ 	erase_completion_lock spinlock
+--- a/fs/jffs2/gc.c
++++ b/fs/jffs2/gc.c
+@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(s
+ 		BUG_ON(start > orig_start);
+ 	}
+ 
+-	/* First, use readpage() to read the appropriate page into the page cache */
+-	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
+-	 *    triggered garbage collection in the first place?
+-	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
+-	 *    page OK. We'll actually write it out again in commit_write, which is a little
+-	 *    suboptimal, but at least we're correct.
+-	 */
++	/* The rules state that we must obtain the page lock *before* f->sem, so
++	 * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
++	 * actually going to *change* so we're safe; we only allow reading.
++	 *
++	 * It is important to note that jffs2_write_begin() will ensure that its
++	 * page is marked Uptodate before allocating space. That means that if we
++	 * end up here trying to GC the *same* page that jffs2_write_begin() is
++	 * trying to write out, read_cache_page() will not deadlock. */
++	mutex_unlock(&f->sem);
+ 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
++	mutex_lock(&f->sem);
+ 
+ 	if (IS_ERR(pg_ptr)) {
+ 		pr_warn("read_cache_page() returned error: %ld\n",
diff --git a/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch b/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch
new file mode 100644
index 00000000000..2bdb6bdd8db
--- /dev/null
+++ b/queue-4.4/libata-align-ata_device-s-id-on-a-cacheline.patch
@@ -0,0 +1,39 @@
+From 4ee34ea3a12396f35b26d90a094c75db95080baa Mon Sep 17 00:00:00 2001
+From: Harvey Hunt <harvey.hunt@imgtec.com>
+Date: Wed, 24 Feb 2016 15:16:43 +0000
+Subject: libata: Align ata_device's id on a cacheline
+
+From: Harvey Hunt <harvey.hunt@imgtec.com>
+
+commit 4ee34ea3a12396f35b26d90a094c75db95080baa upstream.
+
+The id buffer in ata_device is a DMA target, but it isn't explicitly
+cacheline aligned. Due to this, adjacent fields can be overwritten with
+stale data from memory on non coherent architectures. As a result, the
+kernel is sometimes unable to communicate with an ATA device.
+
+Fix this by ensuring that the id buffer is cacheline aligned.
+
+This issue is similar to that fixed by Commit 84bda12af31f
+("libata: align ap->sector_buf").
+
+Signed-off-by: Harvey Hunt <harvey.hunt@imgtec.com>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/libata.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -718,7 +718,7 @@ struct ata_device {
+ 	union {
+ 		u16		id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
+ 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
+-	};
++	} ____cacheline_aligned;
+ 
+ 	/* DEVSLP Timing Variables from Identify Device Data Log */
+ 	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
diff --git a/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch b/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch
new file mode 100644
index 00000000000..64ebd6cfcba
--- /dev/null
+++ b/queue-4.4/libata-fix-hdio_get_32bit-ioctl.patch
@@ -0,0 +1,96 @@
+From 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Thu, 11 Feb 2016 14:16:27 +0100
+Subject: libata: fix HDIO_GET_32BIT ioctl
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 upstream.
+
+As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not
+work correctly in compat mode with libata.
+
+I have investigated the issue further and found multiple problems
+that all appeared with the same commit that originally introduced
+HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably
+also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy
+a 'long' variable containing either 0 or 1 to user space.
+
+The problems with this are:
+
+* On big-endian machines, this will always write a zero because it
+  stores the wrong byte into user space.
+
+* In compat mode, the upper three bytes of the variable are updated
+  by the compat_hdio_ioctl() function, but they now contain
+  uninitialized stack data.
+
+* The hdparm tool calling this ioctl uses a 'static long' variable
+  to store the result. This means at least the upper bytes are
+  initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT
+  would fill them with data that remains stale when the low byte
+  is overwritten. Fortunately libata doesn't implement any of the
+  affected ioctl commands, so this would only happen when we query
+  both an IDE and an ATA device in the same command such as
+  "hdparm -N -c /dev/hda /dev/sda"
+
+* The libata code for unknown reasons started using ATA_IOC_GET_IO32
+  and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT,
+  while the ioctl commands that were added later use the normal
+  HDIO_* names. This is harmless but rather confusing.
+
+This addresses all four issues by changing the code to use put_user()
+on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem
+does, and by clarifying the names of the ioctl commands.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reported-by: Soohoon Lee <Soohoon.Lee@f5.com>
+Tested-by: Soohoon Lee <Soohoon.Lee@f5.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-scsi.c |   11 +++++------
+ include/linux/ata.h       |    4 ++--
+ 2 files changed, 7 insertions(+), 8 deletions(-)
+
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap
+ int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
+ 		     int cmd, void __user *arg)
+ {
+-	int val = -EINVAL, rc = -EINVAL;
++	unsigned long val;
++	int rc = -EINVAL;
+ 	unsigned long flags;
+ 
+ 	switch (cmd) {
+-	case ATA_IOC_GET_IO32:
++	case HDIO_GET_32BIT:
+ 		spin_lock_irqsave(ap->lock, flags);
+ 		val = ata_ioc32(ap);
+ 		spin_unlock_irqrestore(ap->lock, flags);
+-		if (copy_to_user(arg, &val, 1))
+-			return -EFAULT;
+-		return 0;
++		return put_user(val, (unsigned long __user *)arg);
+ 
+-	case ATA_IOC_SET_IO32:
++	case HDIO_SET_32BIT:
+ 		val = (unsigned long) arg;
+ 		rc = 0;
+ 		spin_lock_irqsave(ap->lock, flags);
+--- a/include/linux/ata.h
++++ b/include/linux/ata.h
+@@ -487,8 +487,8 @@ enum ata_tf_protocols {
+ };
+ 
+ enum ata_ioctls {
+-	ATA_IOC_GET_IO32	= 0x309,
+-	ATA_IOC_SET_IO32	= 0x324,
++	ATA_IOC_GET_IO32	= 0x309, /* HDIO_GET_32BIT */
++	ATA_IOC_SET_IO32	= 0x324, /* HDIO_SET_32BIT */
+ };
+ 
+ /* core structures */
diff --git a/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch b/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch
new file mode 100644
index 00000000000..424b0038877
--- /dev/null
+++ b/queue-4.4/pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch
@@ -0,0 +1,80 @@
+From 018361767a21fb2d5ebd3ac182c04baf8a8b4e08 Mon Sep 17 00:00:00 2001
+From: Gabor Juhos <juhosg@openwrt.org>
+Date: Wed, 17 Feb 2016 12:58:20 +0100
+Subject: pata-rb532-cf: get rid of the irq_to_gpio() call
+
+From: Gabor Juhos <juhosg@openwrt.org>
+
+commit 018361767a21fb2d5ebd3ac182c04baf8a8b4e08 upstream.
+
+The RB532 platform specific irq_to_gpio() implementation has been
+removed with commit 832f5dacfa0b ("MIPS: Remove all the uses of
+custom gpio.h"). Now the platform uses the generic stub which causes
+the following error:
+
+  pata-rb532-cf pata-rb532-cf: no GPIO found for irq149
+  pata-rb532-cf: probe of pata-rb532-cf failed with error -2
+
+Drop the irq_to_gpio() call and get the GPIO number from platform
+data instead. After this change, the driver works again:
+
+  scsi host0: pata-rb532-cf
+  ata1: PATA max PIO4 irq 149
+  ata1.00: CFA: CF 1GB, 20080820, max MWDMA4
+  ata1.00: 1989792 sectors, multi 0: LBA
+  ata1.00: configured for PIO4
+  scsi 0:0:0:0: Direct-Access     ATA      CF 1GB           0820 PQ: 0\
+  ANSI: 5
+  sd 0:0:0:0: [sda] 1989792 512-byte logical blocks: (1.01 GB/971 MiB)
+  sd 0:0:0:0: [sda] Write Protect is off
+  sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't\
+  support DPO or FUA
+   sda: sda1 sda2
+  sd 0:0:0:0: [sda] Attached SCSI disk
+
+Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h")
+Cc: Alban Bedel <albeu@free.fr>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/pata_rb532_cf.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/ata/pata_rb532_cf.c
++++ b/drivers/ata/pata_rb532_cf.c
+@@ -32,6 +32,8 @@
+ #include <linux/libata.h>
+ #include <scsi/scsi_host.h>
+ 
++#include <asm/mach-rc32434/rb.h>
++
+ #define DRV_NAME	"pata-rb532-cf"
+ #define DRV_VERSION	"0.1.0"
+ #define DRV_DESC	"PATA driver for RouterBOARD 532 Compact Flash"
+@@ -107,6 +109,7 @@ static int rb532_pata_driver_probe(struc
+ 	int gpio;
+ 	struct resource *res;
+ 	struct ata_host *ah;
++	struct cf_device *pdata;
+ 	struct rb532_cf_info *info;
+ 	int ret;
+ 
+@@ -122,7 +125,13 @@ static int rb532_pata_driver_probe(struc
+ 		return -ENOENT;
+ 	}
+ 
+-	gpio = irq_to_gpio(irq);
++	pdata = dev_get_platdata(&pdev->dev);
++	if (!pdata) {
++		dev_err(&pdev->dev, "no platform data specified\n");
++		return -EINVAL;
++	}
++
++	gpio = pdata->gpio_pin;
+ 	if (gpio < 0) {
+ 		dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq);
+ 		return -ENOENT;
diff --git a/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch b/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch
new file mode 100644
index 00000000000..0380ee934d2
--- /dev/null
+++ b/queue-4.4/pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch
@@ -0,0 +1,51 @@
+From 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 Mon Sep 17 00:00:00 2001
+From: Todd E Brandt <todd.e.brandt@linux.intel.com>
+Date: Wed, 2 Mar 2016 16:05:29 -0800
+Subject: PM / sleep / x86: Fix crash on graph trace through x86 suspend
+
+From: Todd E Brandt <todd.e.brandt@linux.intel.com>
+
+commit 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 upstream.
+
+Pause/unpause graph tracing around do_suspend_lowlevel as it has
+inconsistent call/return info after it jumps to the wakeup vector.
+The graph trace buffer will otherwise become misaligned and
+may eventually crash and hang on suspend.
+
+To reproduce the issue and test the fix:
+Run a function_graph trace over suspend/resume and set the graph
+function to suspend_devices_and_enter. This consistently hangs the
+system without this fix.
+
+Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/acpi/sleep.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/acpi/sleep.c
++++ b/arch/x86/kernel/acpi/sleep.c
+@@ -16,6 +16,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/realmode.h>
+ 
++#include <linux/ftrace.h>
+ #include "../../realmode/rm/wakeup.h"
+ #include "sleep.h"
+ 
+@@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void)
+        saved_magic = 0x123456789abcdef0L;
+ #endif /* CONFIG_64BIT */
+ 
++	/*
++	 * Pause/unpause graph tracing around do_suspend_lowlevel as it has
++	 * inconsistent call/return info after it jumps to the wakeup vector.
++	 */
++	pause_graph_tracing();
+ 	do_suspend_lowlevel();
++	unpause_graph_tracing();
+ 	return 0;
+ }
+ 
diff --git a/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch b/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch
new file mode 100644
index 00000000000..151abdbc1b7
--- /dev/null
+++ b/queue-4.4/revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch
@@ -0,0 +1,133 @@
+From 157078f64b8a9cd7011b6b900b2f2498df850748 Mon Sep 17 00:00:00 2001
+From: Thomas Betker <thomas.betker@rohde-schwarz.com>
+Date: Tue, 10 Nov 2015 22:18:15 +0100
+Subject: Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"
+
+From: Thomas Betker <thomas.betker@rohde-schwarz.com>
+
+commit 157078f64b8a9cd7011b6b900b2f2498df850748 upstream.
+
+This reverts commit 5ffd3412ae55
+("jffs2: Fix lock acquisition order bug in jffs2_write_begin").
+
+The commit modified jffs2_write_begin() to remove a deadlock with
+jffs2_garbage_collect_live(), but this introduced new deadlocks found
+by multiple users. page_lock() actually has to be called before
+mutex_lock(&c->alloc_sem) or mutex_lock(&f->sem) because
+jffs2_write_end() and jffs2_readpage() are called with the page locked,
+and they acquire c->alloc_sem and f->sem, resp.
+
+In other words, the lock order in jffs2_write_begin() was correct, and
+it is the jffs2_garbage_collect_live() path that has to be changed.
+
+Revert the commit to get rid of the new deadlocks, and to clear the way
+for a better fix of the original deadlock.
+
+Reported-by: Deng Chao <deng.chao1@zte.com.cn>
+Reported-by: Ming Liu <liu.ming50@gmail.com>
+Reported-by: wangzaiwei <wangzaiwei@top-vision.cn>
+Signed-off-by: Thomas Betker <thomas.betker@rohde-schwarz.com>
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jffs2/file.c |   39 ++++++++++++++++++---------------------
+ 1 file changed, 18 insertions(+), 21 deletions(-)
+
+--- a/fs/jffs2/file.c
++++ b/fs/jffs2/file.c
+@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file
+ 	struct page *pg;
+ 	struct inode *inode = mapping->host;
+ 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+-	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+-	struct jffs2_raw_inode ri;
+-	uint32_t alloc_len = 0;
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
+ 	int ret = 0;
+ 
+-	jffs2_dbg(1, "%s()\n", __func__);
+-
+-	if (pageofs > inode->i_size) {
+-		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+-					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	mutex_lock(&f->sem);
+ 	pg = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!pg) {
+-		if (alloc_len)
+-			jffs2_complete_reservation(c);
+-		mutex_unlock(&f->sem);
++	if (!pg)
+ 		return -ENOMEM;
+-	}
+ 	*pagep = pg;
+ 
+-	if (alloc_len) {
++	jffs2_dbg(1, "%s()\n", __func__);
++
++	if (pageofs > inode->i_size) {
+ 		/* Make new hole frag from old EOF to new page */
++		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
++		struct jffs2_raw_inode ri;
+ 		struct jffs2_full_dnode *fn;
++		uint32_t alloc_len;
+ 
+ 		jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
+ 			  (unsigned int)inode->i_size, pageofs);
+ 
++		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
++					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
++		if (ret)
++			goto out_page;
++
++		mutex_lock(&f->sem);
+ 		memset(&ri, 0, sizeof(ri));
+ 
+ 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
+@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file
+ 		if (IS_ERR(fn)) {
+ 			ret = PTR_ERR(fn);
+ 			jffs2_complete_reservation(c);
++			mutex_unlock(&f->sem);
+ 			goto out_page;
+ 		}
+ 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
+@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file
+ 			jffs2_mark_node_obsolete(c, fn->raw);
+ 			jffs2_free_full_dnode(fn);
+ 			jffs2_complete_reservation(c);
++			mutex_unlock(&f->sem);
+ 			goto out_page;
+ 		}
+ 		jffs2_complete_reservation(c);
+ 		inode->i_size = pageofs;
++		mutex_unlock(&f->sem);
+ 	}
+ 
+ 	/*
+@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file
+ 	 * case of a short-copy.
+ 	 */
+ 	if (!PageUptodate(pg)) {
++		mutex_lock(&f->sem);
+ 		ret = jffs2_do_readpage_nolock(inode, pg);
++		mutex_unlock(&f->sem);
+ 		if (ret)
+ 			goto out_page;
+ 	}
+-	mutex_unlock(&f->sem);
+ 	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
+ 	return ret;
+ 
+ out_page:
+ 	unlock_page(pg);
+ 	page_cache_release(pg);
+-	mutex_unlock(&f->sem);
+ 	return ret;
+ }
+ 
diff --git a/queue-4.4/series b/queue-4.4/series
index 2407707a8a5..894b79ccd80 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -19,3 +19,24 @@ arm-arm64-kvm-fix-ioctl-error-handling.patch
 iommu-amd-apply-workaround-for-ats-write-permission-check.patch
 iommu-amd-fix-boot-warning-when-device-00-00.0-is-not-iommu-covered.patch
 iommu-vt-d-use-bus_notify_removed_device-in-hotplug-path.patch
+target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch
+drm-ast-fix-incorrect-register-check-for-dram-width.patch
+drm-radeon-pm-update-current-crtc-info-after-setting-the-powerstate.patch
+drm-amdgpu-pm-update-current-crtc-info-after-setting-the-powerstate.patch
+drm-amdgpu-apply-gfx_v8-fixes-to-gfx_v7-as-well.patch
+drm-amdgpu-gfx8-specify-which-engine-to-wait-before-vm-flush.patch
+drm-amdgpu-return-from-atombios_dp_get_dpcd-only-when-error.patch
+libata-fix-hdio_get_32bit-ioctl.patch
+libata-align-ata_device-s-id-on-a-cacheline.patch
+block-bio-introduce-helpers-to-get-the-1st-and-last-bvec.patch
+writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
+adding-intel-lewisburg-device-ids-for-sata.patch
+arm64-vmemmap-use-virtual-projection-of-linear-region.patch
+pm-sleep-x86-fix-crash-on-graph-trace-through-x86-suspend.patch
+ata-ahci-don-t-mark-hotplugcapable-ports-as-external-removable.patch
+tracing-do-not-have-comm-filter-override-event-comm-field.patch
+pata-rb532-cf-get-rid-of-the-irq_to_gpio-call.patch
+btrfs-fix-loading-of-orphan-roots-leading-to-bug_on.patch
+revert-jffs2-fix-lock-acquisition-order-bug-in-jffs2_write_begin.patch
+jffs2-fix-page-lock-f-sem-deadlock.patch
+fix-directory-hardlinks-from-deleted-directories.patch
diff --git a/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch b/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch
new file mode 100644
index 00000000000..35af5d4232b
--- /dev/null
+++ b/queue-4.4/target-fix-write_same-discard-conversion-to-linux-512b-sectors.patch
@@ -0,0 +1,268 @@
+From 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 Mon Sep 17 00:00:00 2001
+From: Mike Christie <mchristi@redhat.com>
+Date: Mon, 18 Jan 2016 14:09:27 -0600
+Subject: target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors
+
+From: Mike Christie <mchristi@redhat.com>
+
+commit 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 upstream.
+
+In a couple places we are not converting to/from the Linux
+block layer 512 bytes sectors.
+
+1.
+
+The request queue values and what we do are a mismatch of
+things:
+
+max_discard_sectors - This is in linux block layer 512 byte
+sectors. We are just copying this to max_unmap_lba_count.
+
+discard_granularity - This is in bytes. We are converting it
+to Linux block layer 512 byte sectors.
+
+discard_alignment - This is in bytes. We are just copying
+this over.
+
+The problem is that the core LIO code exports these values in
+spc_emulate_evpd_b0 and we use them to test request arguments
+in sbc_execute_unmap, but we never convert to the block size
+we export to the initiator. If we are not using 512 byte sectors
+then we are exporting the wrong values or are checks are off.
+And, for the discard_alignment/bytes case we are just plain messed
+up.
+
+2.
+
+blkdev_issue_discard's start and number of sector arguments
+are supposed to be in linux block layer 512 byte sectors. We are
+currently passing in the values we get from the initiator which
+might be based on some other sector size.
+
+There is a similar problem in iblock_execute_write_same where
+the bio functions want values in 512 byte sectors but we are
+passing in what we got from the initiator.
+
+Signed-off-by: Mike Christie <mchristi@redhat.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+[ kamal: backport to 4.4-stable: no unmap_zeroes_data ]
+Signed-off-by: Kamal Mostafa <kamal@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/target/target_core_device.c  |   43 ++++++++++++++++++++++++++
+ drivers/target/target_core_file.c    |   29 +++++-------------
+ drivers/target/target_core_iblock.c  |   56 ++++++++---------------------------
+ include/target/target_core_backend.h |    3 +
+ 4 files changed, 69 insertions(+), 62 deletions(-)
+
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -826,6 +826,49 @@ struct se_device *target_alloc_device(st
+ 	return dev;
+ }
+ 
++/*
++ * Check if the underlying struct block_device request_queue supports
++ * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
++ * in ATA and we need to set TPE=1
++ */
++bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
++				       struct request_queue *q, int block_size)
++{
++	if (!blk_queue_discard(q))
++		return false;
++
++	attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
++								block_size;
++	/*
++	 * Currently hardcoded to 1 in Linux/SCSI code..
++	 */
++	attrib->max_unmap_block_desc_count = 1;
++	attrib->unmap_granularity = q->limits.discard_granularity / block_size;
++	attrib->unmap_granularity_alignment = q->limits.discard_alignment /
++								block_size;
++	return true;
++}
++EXPORT_SYMBOL(target_configure_unmap_from_queue);
++
++/*
++ * Convert from blocksize advertised to the initiator to the 512 byte
++ * units unconditionally used by the Linux block layer.
++ */
++sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
++{
++	switch (dev->dev_attrib.block_size) {
++	case 4096:
++		return lb << 3;
++	case 2048:
++		return lb << 2;
++	case 1024:
++		return lb << 1;
++	default:
++		return lb;
++	}
++}
++EXPORT_SYMBOL(target_to_linux_sector);
++
+ int target_configure_device(struct se_device *dev)
+ {
+ 	struct se_hba *hba = dev->se_hba;
+--- a/drivers/target/target_core_file.c
++++ b/drivers/target/target_core_file.c
+@@ -160,25 +160,11 @@ static int fd_configure_device(struct se
+ 			" block_device blocks: %llu logical_block_size: %d\n",
+ 			dev_size, div_u64(dev_size, fd_dev->fd_block_size),
+ 			fd_dev->fd_block_size);
+-		/*
+-		 * Check if the underlying struct block_device request_queue supports
+-		 * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
+-		 * in ATA and we need to set TPE=1
+-		 */
+-		if (blk_queue_discard(q)) {
+-			dev->dev_attrib.max_unmap_lba_count =
+-				q->limits.max_discard_sectors;
+-			/*
+-			 * Currently hardcoded to 1 in Linux/SCSI code..
+-			 */
+-			dev->dev_attrib.max_unmap_block_desc_count = 1;
+-			dev->dev_attrib.unmap_granularity =
+-				q->limits.discard_granularity >> 9;
+-			dev->dev_attrib.unmap_granularity_alignment =
+-				q->limits.discard_alignment;
++
++		if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
++						      fd_dev->fd_block_size))
+ 			pr_debug("IFILE: BLOCK Discard support available,"
+-					" disabled by default\n");
+-		}
++				 " disabled by default\n");
+ 		/*
+ 		 * Enable write same emulation for IBLOCK and use 0xFFFF as
+ 		 * the smaller WRITE_SAME(10) only has a two-byte block count.
+@@ -490,9 +476,12 @@ fd_execute_unmap(struct se_cmd *cmd, sec
+ 	if (S_ISBLK(inode->i_mode)) {
+ 		/* The backend is block device, use discard */
+ 		struct block_device *bdev = inode->i_bdev;
++		struct se_device *dev = cmd->se_dev;
+ 
+-		ret = blkdev_issue_discard(bdev, lba,
+-				nolb, GFP_KERNEL, 0);
++		ret = blkdev_issue_discard(bdev,
++					   target_to_linux_sector(dev, lba),
++					   target_to_linux_sector(dev,  nolb),
++					   GFP_KERNEL, 0);
+ 		if (ret < 0) {
+ 			pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
+ 				ret);
+--- a/drivers/target/target_core_iblock.c
++++ b/drivers/target/target_core_iblock.c
+@@ -121,27 +121,11 @@ static int iblock_configure_device(struc
+ 	dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
+ 	dev->dev_attrib.hw_queue_depth = q->nr_requests;
+ 
+-	/*
+-	 * Check if the underlying struct block_device request_queue supports
+-	 * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
+-	 * in ATA and we need to set TPE=1
+-	 */
+-	if (blk_queue_discard(q)) {
+-		dev->dev_attrib.max_unmap_lba_count =
+-				q->limits.max_discard_sectors;
+-
+-		/*
+-		 * Currently hardcoded to 1 in Linux/SCSI code..
+-		 */
+-		dev->dev_attrib.max_unmap_block_desc_count = 1;
+-		dev->dev_attrib.unmap_granularity =
+-				q->limits.discard_granularity >> 9;
+-		dev->dev_attrib.unmap_granularity_alignment =
+-				q->limits.discard_alignment;
+-
++	if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
++					      dev->dev_attrib.hw_block_size))
+ 		pr_debug("IBLOCK: BLOCK Discard support available,"
+-				" disabled by default\n");
+-	}
++			 " disabled by default\n");
++
+ 	/*
+ 	 * Enable write same emulation for IBLOCK and use 0xFFFF as
+ 	 * the smaller WRITE_SAME(10) only has a two-byte block count.
+@@ -413,9 +397,13 @@ static sense_reason_t
+ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
+ {
+ 	struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
++	struct se_device *dev = cmd->se_dev;
+ 	int ret;
+ 
+-	ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0);
++	ret = blkdev_issue_discard(bdev,
++				   target_to_linux_sector(dev, lba),
++				   target_to_linux_sector(dev,  nolb),
++				   GFP_KERNEL, 0);
+ 	if (ret < 0) {
+ 		pr_err("blkdev_issue_discard() failed: %d\n", ret);
+ 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+@@ -431,8 +419,10 @@ iblock_execute_write_same(struct se_cmd
+ 	struct scatterlist *sg;
+ 	struct bio *bio;
+ 	struct bio_list list;
+-	sector_t block_lba = cmd->t_task_lba;
+-	sector_t sectors = sbc_get_write_same_sectors(cmd);
++	struct se_device *dev = cmd->se_dev;
++	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
++	sector_t sectors = target_to_linux_sector(dev,
++					sbc_get_write_same_sectors(cmd));
+ 
+ 	if (cmd->prot_op) {
+ 		pr_err("WRITE_SAME: Protection information with IBLOCK"
+@@ -646,12 +636,12 @@ iblock_execute_rw(struct se_cmd *cmd, st
+ 		  enum dma_data_direction data_direction)
+ {
+ 	struct se_device *dev = cmd->se_dev;
++	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
+ 	struct iblock_req *ibr;
+ 	struct bio *bio, *bio_start;
+ 	struct bio_list list;
+ 	struct scatterlist *sg;
+ 	u32 sg_num = sgl_nents;
+-	sector_t block_lba;
+ 	unsigned bio_cnt;
+ 	int rw = 0;
+ 	int i;
+@@ -677,24 +667,6 @@ iblock_execute_rw(struct se_cmd *cmd, st
+ 		rw = READ;
+ 	}
+ 
+-	/*
+-	 * Convert the blocksize advertised to the initiator to the 512 byte
+-	 * units unconditionally used by the Linux block layer.
+-	 */
+-	if (dev->dev_attrib.block_size == 4096)
+-		block_lba = (cmd->t_task_lba << 3);
+-	else if (dev->dev_attrib.block_size == 2048)
+-		block_lba = (cmd->t_task_lba << 2);
+-	else if (dev->dev_attrib.block_size == 1024)
+-		block_lba = (cmd->t_task_lba << 1);
+-	else if (dev->dev_attrib.block_size == 512)
+-		block_lba = cmd->t_task_lba;
+-	else {
+-		pr_err("Unsupported SCSI -> BLOCK LBA conversion:"
+-				" %u\n", dev->dev_attrib.block_size);
+-		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+-	}
+-
+ 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
+ 	if (!ibr)
+ 		goto fail;
+--- a/include/target/target_core_backend.h
++++ b/include/target/target_core_backend.h
+@@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(str
+ 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
+ 
+ bool target_sense_desc_format(struct se_device *dev);
++sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
++bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
++				       struct request_queue *q, int block_size);
+ 
+ #endif /* TARGET_CORE_BACKEND_H */
diff --git a/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch b/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch
new file mode 100644
index 00000000000..ab367845fc3
--- /dev/null
+++ b/queue-4.4/tracing-do-not-have-comm-filter-override-event-comm-field.patch
@@ -0,0 +1,124 @@
+From e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+Date: Thu, 3 Mar 2016 17:18:20 -0500
+Subject: tracing: Do not have 'comm' filter override event 'comm' field
+
+From: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
+
+commit e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c upstream.
+
+Commit 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and
+process names" added a 'comm' filter that will filter events based on the
+current tasks struct 'comm'. But this now hides the ability to filter events
+that have a 'comm' field too. For example, sched_migrate_task trace event.
+That has a 'comm' field of the task to be migrated.
+
+ echo 'comm == "bash"' > events/sched_migrate_task/filter
+
+will now filter all sched_migrate_task events for tasks named "bash" that
+migrates other tasks (in interrupt context), instead of seeing when "bash"
+itself gets migrated.
+
+This fix requires a couple of changes.
+
+1) Change the look up order for filter predicates to look at the events
+   fields before looking at the generic filters.
+
+2) Instead of basing the filter function off of the "comm" name, have the
+   generic "comm" filter have its own filter_type (FILTER_COMM). Test
+   against the type instead of the name to assign the filter function.
+
+3) Add a new "COMM" filter that works just like "comm" but will filter based
+   on the current task, even if the trace event contains a "comm" field.
+
+Do the same for "cpu" field, adding a FILTER_CPU and a filter "CPU".
+
+Fixes: 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and process names"
+Reported-by: Matt Fleming <matt@codeblueprint.co.uk>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index 429fdfc3baf5..925730bc9fc1 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -568,6 +568,8 @@ enum {
+ 	FILTER_DYN_STRING,
+ 	FILTER_PTR_STRING,
+ 	FILTER_TRACE_FN,
++	FILTER_COMM,
++	FILTER_CPU,
+ };
+ 
+ extern int trace_event_raw_init(struct trace_event_call *call);
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index ab09829d3b97..05ddc0820771 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name)
+ 	struct ftrace_event_field *field;
+ 	struct list_head *head;
+ 
+-	field = __find_event_field(&ftrace_generic_fields, name);
++	head = trace_get_fields(call);
++	field = __find_event_field(head, name);
+ 	if (field)
+ 		return field;
+ 
+-	field = __find_event_field(&ftrace_common_fields, name);
++	field = __find_event_field(&ftrace_generic_fields, name);
+ 	if (field)
+ 		return field;
+ 
+-	head = trace_get_fields(call);
+-	return __find_event_field(head, name);
++	return __find_event_field(&ftrace_common_fields, name);
+ }
+ 
+ static int __trace_define_field(struct list_head *head, const char *type,
+@@ -171,8 +171,10 @@ static int trace_define_generic_fields(void)
+ {
+ 	int ret;
+ 
+-	__generic_field(int, cpu, FILTER_OTHER);
+-	__generic_field(char *, comm, FILTER_PTR_STRING);
++	__generic_field(int, CPU, FILTER_CPU);
++	__generic_field(int, cpu, FILTER_CPU);
++	__generic_field(char *, COMM, FILTER_COMM);
++	__generic_field(char *, comm, FILTER_COMM);
+ 
+ 	return ret;
+ }
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index f93a219b18da..6816302542b2 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps,
+ 		return -EINVAL;
+ 	}
+ 
+-	if (is_string_field(field)) {
++	if (field->filter_type == FILTER_COMM) {
++		filter_build_regex(pred);
++		fn = filter_pred_comm;
++		pred->regex.field_len = TASK_COMM_LEN;
++	} else if (is_string_field(field)) {
+ 		filter_build_regex(pred);
+ 
+-		if (!strcmp(field->name, "comm")) {
+-			fn = filter_pred_comm;
+-			pred->regex.field_len = TASK_COMM_LEN;
+-		} else if (field->filter_type == FILTER_STATIC_STRING) {
++		if (field->filter_type == FILTER_STATIC_STRING) {
+ 			fn = filter_pred_string;
+ 			pred->regex.field_len = field->size;
+ 		} else if (field->filter_type == FILTER_DYN_STRING)
+@@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps,
+ 		}
+ 		pred->val = val;
+ 
+-		if (!strcmp(field->name, "cpu"))
++		if (field->filter_type == FILTER_CPU)
+ 			fn = filter_pred_cpu;
+ 		else
+ 			fn = select_comparison_fn(pred->op, field->size,
diff --git a/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch b/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
new file mode 100644
index 00000000000..8f09df40b3f
--- /dev/null
+++ b/queue-4.4/writeback-flush-inode-cgroup-wb-switches-instead-of-pinning-super_block.patch
@@ -0,0 +1,188 @@
+From a1a0e23e49037c23ea84bc8cc146a03584d13577 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 29 Feb 2016 18:28:53 -0500
+Subject: writeback: flush inode cgroup wb switches instead of pinning super_block
+
+From: Tejun Heo <tj@kernel.org>
+
+commit a1a0e23e49037c23ea84bc8cc146a03584d13577 upstream.
+
+If cgroup writeback is in use, inodes can be scheduled for
+asynchronous wb switching.  Before 5ff8eaac1636 ("writeback: keep
+superblock pinned during cgroup writeback association switches"), this
+could race with umount leading to super_block being destroyed while
+inodes are pinned for wb switching.  5ff8eaac1636 fixed it by bumping
+s_active while wb switches are in flight; however, this allowed
+in-flight wb switches to make umounts asynchronous when the userland
+expected synchronosity - e.g. fsck immediately following umount may
+fail because the device is still busy.
+
+This patch removes the problematic super_block pinning and instead
+makes generic_shutdown_super() flush in-flight wb switches.  wb
+switches are now executed on a dedicated isw_wq so that they can be
+flushed and isw_nr_in_flight keeps track of the number of in-flight wb
+switches so that flushing can be avoided in most cases.
+
+v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE
+    check in inode_switch_wbs() as Jan an Al suggested.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Tahsin Erdogan <tahsin@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com
+Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches")
+Reviewed-by: Jan Kara <jack@suse.cz>
+Tested-by: Tahsin Erdogan <tahsin@google.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c         |   54 ++++++++++++++++++++++++++++++++++------------
+ fs/super.c                |    1 
+ include/linux/writeback.h |    5 ++++
+ 3 files changed, 47 insertions(+), 13 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struc
+ #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
+ 					/* one round can affect upto 5 slots */
+ 
++static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
++static struct workqueue_struct *isw_wq;
++
+ void __inode_attach_wb(struct inode *inode, struct page *page)
+ {
+ 	struct backing_dev_info *bdi = inode_to_bdi(inode);
+@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(str
+ 	struct inode_switch_wbs_context *isw =
+ 		container_of(work, struct inode_switch_wbs_context, work);
+ 	struct inode *inode = isw->inode;
+-	struct super_block *sb = inode->i_sb;
+ 	struct address_space *mapping = inode->i_mapping;
+ 	struct bdi_writeback *old_wb = inode->i_wb;
+ 	struct bdi_writeback *new_wb = isw->new_wb;
+@@ -424,8 +426,9 @@ skip_switch:
+ 	wb_put(new_wb);
+ 
+ 	iput(inode);
+-	deactivate_super(sb);
+ 	kfree(isw);
++
++	atomic_dec(&isw_nr_in_flight);
+ }
+ 
+ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
+@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(stru
+ 
+ 	/* needs to grab bh-unsafe locks, bounce to work item */
+ 	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
+-	schedule_work(&isw->work);
++	queue_work(isw_wq, &isw->work);
+ }
+ 
+ /**
+@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inod
+ 
+ 	/* while holding I_WB_SWITCH, no one else can update the association */
+ 	spin_lock(&inode->i_lock);
+-
+-	if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+-	    inode_to_wb(inode) == isw->new_wb)
+-		goto out_unlock;
+-
+-	if (!atomic_inc_not_zero(&inode->i_sb->s_active))
+-		goto out_unlock;
+-
++	if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
++	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
++	    inode_to_wb(inode) == isw->new_wb) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
+ 	inode->i_state |= I_WB_SWITCH;
+ 	spin_unlock(&inode->i_lock);
+ 
+ 	ihold(inode);
+ 	isw->inode = inode;
+ 
++	atomic_inc(&isw_nr_in_flight);
++
+ 	/*
+ 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
+ 	 * the RCU protected stat update paths to grab the mapping's
+@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inod
+ 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
+ 	return;
+ 
+-out_unlock:
+-	spin_unlock(&inode->i_lock);
+ out_free:
+ 	if (isw->new_wb)
+ 		wb_put(isw->new_wb);
+@@ -849,6 +850,33 @@ restart:
+ 		wb_put(last_wb);
+ }
+ 
++/**
++ * cgroup_writeback_umount - flush inode wb switches for umount
++ *
++ * This function is called when a super_block is about to be destroyed and
++ * flushes in-flight inode wb switches.  An inode wb switch goes through
++ * RCU and then workqueue, so the two need to be flushed in order to ensure
++ * that all previously scheduled switches are finished.  As wb switches are
++ * rare occurrences and synchronize_rcu() can take a while, perform
++ * flushing iff wb switches are in flight.
++ */
++void cgroup_writeback_umount(void)
++{
++	if (atomic_read(&isw_nr_in_flight)) {
++		synchronize_rcu();
++		flush_workqueue(isw_wq);
++	}
++}
++
++static int __init cgroup_writeback_init(void)
++{
++	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
++	if (!isw_wq)
++		return -ENOMEM;
++	return 0;
++}
++fs_initcall(cgroup_writeback_init);
++
+ #else	/* CONFIG_CGROUP_WRITEBACK */
+ 
+ static struct bdi_writeback *
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super
+ 		sb->s_flags &= ~MS_ACTIVE;
+ 
+ 		fsnotify_unmount_inodes(sb);
++		cgroup_writeback_umount();
+ 
+ 		evict_inodes(sb);
+ 
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct
+ void wbc_detach_inode(struct writeback_control *wbc);
+ void wbc_account_io(struct writeback_control *wbc, struct page *page,
+ 		    size_t bytes);
++void cgroup_writeback_umount(void);
+ 
+ /**
+  * inode_attach_wb - associate an inode with its wb
+@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct
+ {
+ }
+ 
++static inline void cgroup_writeback_umount(void)
++{
++}
++
+ #endif	/* CONFIG_CGROUP_WRITEBACK */
+ 
+ /*