]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.5-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 10 Apr 2016 18:12:21 +0000 (11:12 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 10 Apr 2016 18:12:21 +0000 (11:12 -0700)
added patches:
input-ati_remote2-fix-crashes-on-detecting-device-with-invalid-descriptor.patch
input-ims-pcu-sanity-check-against-missing-interfaces.patch
input-synaptics-handle-spurious-release-of-trackstick-buttons-again.patch
intel_idle-prevent-skl-h-boot-failure-when-c8-c9-c10-enabled.patch
mm-page_alloc-prevent-merging-between-isolated-and-other-pageblocks.patch
mtd-onenand-fix-deadlock-in-onenand_block_markbad.patch
ocfs2-dlm-fix-bug-in-dlm_move_lockres_to_recovery_list.patch
ocfs2-dlm-fix-race-between-convert-and-recovery.patch
ocfs2-o2hb-fix-double-free-bug.patch
pm-sleep-clear-pm_suspend_global_flags-upon-hibernate.patch
scsi_common-do-not-clobber-fixed-sense-information.patch
writeback-cgroup-fix-premature-wb_put-in-locked_inode_to_wb_and_lock_list.patch
writeback-cgroup-fix-use-of-the-wrong-bdi_writeback-which-mismatches-the-inode.patch

14 files changed:
queue-4.5/input-ati_remote2-fix-crashes-on-detecting-device-with-invalid-descriptor.patch [new file with mode: 0644]
queue-4.5/input-ims-pcu-sanity-check-against-missing-interfaces.patch [new file with mode: 0644]
queue-4.5/input-synaptics-handle-spurious-release-of-trackstick-buttons-again.patch [new file with mode: 0644]
queue-4.5/intel_idle-prevent-skl-h-boot-failure-when-c8-c9-c10-enabled.patch [new file with mode: 0644]
queue-4.5/mm-page_alloc-prevent-merging-between-isolated-and-other-pageblocks.patch [new file with mode: 0644]
queue-4.5/mtd-onenand-fix-deadlock-in-onenand_block_markbad.patch [new file with mode: 0644]
queue-4.5/ocfs2-dlm-fix-bug-in-dlm_move_lockres_to_recovery_list.patch [new file with mode: 0644]
queue-4.5/ocfs2-dlm-fix-race-between-convert-and-recovery.patch [new file with mode: 0644]
queue-4.5/ocfs2-o2hb-fix-double-free-bug.patch [new file with mode: 0644]
queue-4.5/pm-sleep-clear-pm_suspend_global_flags-upon-hibernate.patch [new file with mode: 0644]
queue-4.5/scsi_common-do-not-clobber-fixed-sense-information.patch [new file with mode: 0644]
queue-4.5/series
queue-4.5/writeback-cgroup-fix-premature-wb_put-in-locked_inode_to_wb_and_lock_list.patch [new file with mode: 0644]
queue-4.5/writeback-cgroup-fix-use-of-the-wrong-bdi_writeback-which-mismatches-the-inode.patch [new file with mode: 0644]

diff --git a/queue-4.5/input-ati_remote2-fix-crashes-on-detecting-device-with-invalid-descriptor.patch b/queue-4.5/input-ati_remote2-fix-crashes-on-detecting-device-with-invalid-descriptor.patch
new file mode 100644 (file)
index 0000000..425d532
--- /dev/null
@@ -0,0 +1,108 @@
+From 950336ba3e4a1ffd2ca60d29f6ef386dd2c7351d Mon Sep 17 00:00:00 2001
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Wed, 23 Mar 2016 11:53:46 -0700
+Subject: Input: ati_remote2 - fix crashes on detecting device with invalid descriptor
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+commit 950336ba3e4a1ffd2ca60d29f6ef386dd2c7351d upstream.
+
+The ati_remote2 driver expects at least two interfaces with one
+endpoint each. If given malicious descriptor that specify one
+interface or no endpoints, it will crash in the probe function.
+Ensure there is at least two interfaces and one endpoint for each
+interface before using it.
+
+The full disclosure: http://seclists.org/bugtraq/2016/Mar/90
+
+Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/misc/ati_remote2.c |   36 ++++++++++++++++++++++++++++++------
+ 1 file changed, 30 insertions(+), 6 deletions(-)
+
+--- a/drivers/input/misc/ati_remote2.c
++++ b/drivers/input/misc/ati_remote2.c
+@@ -817,26 +817,49 @@ static int ati_remote2_probe(struct usb_
+       ar2->udev = udev;
++      /* Sanity check, first interface must have an endpoint */
++      if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
++              dev_err(&interface->dev,
++                      "%s(): interface 0 must have an endpoint\n", __func__);
++              r = -ENODEV;
++              goto fail1;
++      }
+       ar2->intf[0] = interface;
+       ar2->ep[0] = &alt->endpoint[0].desc;
++      /* Sanity check, the device must have two interfaces */
+       ar2->intf[1] = usb_ifnum_to_if(udev, 1);
++      if ((udev->actconfig->desc.bNumInterfaces < 2) || !ar2->intf[1]) {
++              dev_err(&interface->dev, "%s(): need 2 interfaces, found %d\n",
++                      __func__, udev->actconfig->desc.bNumInterfaces);
++              r = -ENODEV;
++              goto fail1;
++      }
++
+       r = usb_driver_claim_interface(&ati_remote2_driver, ar2->intf[1], ar2);
+       if (r)
+               goto fail1;
++
++      /* Sanity check, second interface must have an endpoint */
+       alt = ar2->intf[1]->cur_altsetting;
++      if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
++              dev_err(&interface->dev,
++                      "%s(): interface 1 must have an endpoint\n", __func__);
++              r = -ENODEV;
++              goto fail2;
++      }
+       ar2->ep[1] = &alt->endpoint[0].desc;
+       r = ati_remote2_urb_init(ar2);
+       if (r)
+-              goto fail2;
++              goto fail3;
+       ar2->channel_mask = channel_mask;
+       ar2->mode_mask = mode_mask;
+       r = ati_remote2_setup(ar2, ar2->channel_mask);
+       if (r)
+-              goto fail2;
++              goto fail3;
+       usb_make_path(udev, ar2->phys, sizeof(ar2->phys));
+       strlcat(ar2->phys, "/input0", sizeof(ar2->phys));
+@@ -845,11 +868,11 @@ static int ati_remote2_probe(struct usb_
+       r = sysfs_create_group(&udev->dev.kobj, &ati_remote2_attr_group);
+       if (r)
+-              goto fail2;
++              goto fail3;
+       r = ati_remote2_input_init(ar2);
+       if (r)
+-              goto fail3;
++              goto fail4;
+       usb_set_intfdata(interface, ar2);
+@@ -857,10 +880,11 @@ static int ati_remote2_probe(struct usb_
+       return 0;
+- fail3:
++ fail4:
+       sysfs_remove_group(&udev->dev.kobj, &ati_remote2_attr_group);
+- fail2:
++ fail3:
+       ati_remote2_urb_cleanup(ar2);
++ fail2:
+       usb_driver_release_interface(&ati_remote2_driver, ar2->intf[1]);
+  fail1:
+       kfree(ar2);
diff --git a/queue-4.5/input-ims-pcu-sanity-check-against-missing-interfaces.patch b/queue-4.5/input-ims-pcu-sanity-check-against-missing-interfaces.patch
new file mode 100644 (file)
index 0000000..15f21eb
--- /dev/null
@@ -0,0 +1,40 @@
+From a0ad220c96692eda76b2e3fd7279f3dcd1d8a8ff Mon Sep 17 00:00:00 2001
+From: Oliver Neukum <oneukum@suse.com>
+Date: Thu, 17 Mar 2016 14:00:17 -0700
+Subject: Input: ims-pcu - sanity check against missing interfaces
+
+From: Oliver Neukum <oneukum@suse.com>
+
+commit a0ad220c96692eda76b2e3fd7279f3dcd1d8a8ff upstream.
+
+A malicious device missing interface can make the driver oops.
+Add sanity checking.
+
+Signed-off-by: Oliver Neukum <ONeukum@suse.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/misc/ims-pcu.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/input/misc/ims-pcu.c
++++ b/drivers/input/misc/ims-pcu.c
+@@ -1663,6 +1663,8 @@ static int ims_pcu_parse_cdc_data(struct
+       pcu->ctrl_intf = usb_ifnum_to_if(pcu->udev,
+                                        union_desc->bMasterInterface0);
++      if (!pcu->ctrl_intf)
++              return -EINVAL;
+       alt = pcu->ctrl_intf->cur_altsetting;
+       pcu->ep_ctrl = &alt->endpoint[0].desc;
+@@ -1670,6 +1672,8 @@ static int ims_pcu_parse_cdc_data(struct
+       pcu->data_intf = usb_ifnum_to_if(pcu->udev,
+                                        union_desc->bSlaveInterface0);
++      if (!pcu->data_intf)
++              return -EINVAL;
+       alt = pcu->data_intf->cur_altsetting;
+       if (alt->desc.bNumEndpoints != 2) {
diff --git a/queue-4.5/input-synaptics-handle-spurious-release-of-trackstick-buttons-again.patch b/queue-4.5/input-synaptics-handle-spurious-release-of-trackstick-buttons-again.patch
new file mode 100644 (file)
index 0000000..bfc5239
--- /dev/null
@@ -0,0 +1,35 @@
+From 82be788c96ed5978d3cb4a00079e26b981a3df3f Mon Sep 17 00:00:00 2001
+From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Date: Thu, 17 Mar 2016 17:12:54 -0700
+Subject: Input: synaptics - handle spurious release of trackstick buttons, again
+
+From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+
+commit 82be788c96ed5978d3cb4a00079e26b981a3df3f upstream.
+
+Looks like the fimware 8.2 still has the extra buttons spurious release
+bug.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=114321
+Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/mouse/synaptics.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/input/mouse/synaptics.c
++++ b/drivers/input/mouse/synaptics.c
+@@ -862,8 +862,9 @@ static void synaptics_report_ext_buttons
+       if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap))
+               return;
+-      /* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */
+-      if (SYN_ID_FULL(priv->identity) == 0x801 &&
++      /* Bug in FW 8.1 & 8.2, buttons are reported only when ExtBit is 1 */
++      if ((SYN_ID_FULL(priv->identity) == 0x801 ||
++           SYN_ID_FULL(priv->identity) == 0x802) &&
+           !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02))
+               return;
diff --git a/queue-4.5/intel_idle-prevent-skl-h-boot-failure-when-c8-c9-c10-enabled.patch b/queue-4.5/intel_idle-prevent-skl-h-boot-failure-when-c8-c9-c10-enabled.patch
new file mode 100644 (file)
index 0000000..382dc69
--- /dev/null
@@ -0,0 +1,167 @@
+From d70e28f57e14a481977436695b0c9ba165472431 Mon Sep 17 00:00:00 2001
+From: Len Brown <len.brown@intel.com>
+Date: Sun, 13 Mar 2016 00:33:48 -0500
+Subject: intel_idle: prevent SKL-H boot failure when C8+C9+C10 enabled
+
+From: Len Brown <len.brown@intel.com>
+
+commit d70e28f57e14a481977436695b0c9ba165472431 upstream.
+
+Some SKL-H configurations require "intel_idle.max_cstate=7" to boot.
+While that is an effective workaround, it disables C10.
+
+This patch detects the problematic configuration,
+and disables C8 and C9, keeping C10 enabled.
+
+Note that enabling SGX in BIOS SETUP can also prevent this issue,
+if the system BIOS provides that option.
+
+https://bugzilla.kernel.org/show_bug.cgi?id=109081
+"Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7"
+
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/idle/intel_idle.c |  106 ++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 85 insertions(+), 21 deletions(-)
+
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -65,7 +65,7 @@
+ #include <asm/mwait.h>
+ #include <asm/msr.h>
+-#define INTEL_IDLE_VERSION "0.4"
++#define INTEL_IDLE_VERSION "0.4.1"
+ #define PREFIX "intel_idle: "
+ static struct cpuidle_driver intel_idle_driver = {
+@@ -994,36 +994,92 @@ static void intel_idle_cpuidle_devices_u
+ }
+ /*
+- * intel_idle_state_table_update()
+- *
+- * Update the default state_table for this CPU-id
++ * ivt_idle_state_table_update(void)
+  *
+- * Currently used to access tuned IVT multi-socket targets
++ * Tune IVT multi-socket targets
+  * Assumption: num_sockets == (max_package_num + 1)
+  */
+-void intel_idle_state_table_update(void)
++static void ivt_idle_state_table_update(void)
+ {
+       /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
+-      if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
+-              int cpu, package_num, num_sockets = 1;
++      int cpu, package_num, num_sockets = 1;
+-              for_each_online_cpu(cpu) {
+-                      package_num = topology_physical_package_id(cpu);
+-                      if (package_num + 1 > num_sockets) {
+-                              num_sockets = package_num + 1;
+-
+-                              if (num_sockets > 4) {
+-                                      cpuidle_state_table = ivt_cstates_8s;
+-                                      return;
+-                              }
++      for_each_online_cpu(cpu) {
++              package_num = topology_physical_package_id(cpu);
++              if (package_num + 1 > num_sockets) {
++                      num_sockets = package_num + 1;
++
++                      if (num_sockets > 4) {
++                              cpuidle_state_table = ivt_cstates_8s;
++                              return;
+                       }
+               }
++      }
++
++      if (num_sockets > 2)
++              cpuidle_state_table = ivt_cstates_4s;
++
++      /* else, 1 and 2 socket systems use default ivt_cstates */
++}
++/*
++ * sklh_idle_state_table_update(void)
++ *
++ * On SKL-H (model 0x5e) disable C8 and C9 if:
++ * C10 is enabled and SGX disabled
++ */
++static void sklh_idle_state_table_update(void)
++{
++      unsigned long long msr;
++      unsigned int eax, ebx, ecx, edx;
++
++
++      /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
++      if (max_cstate <= 7)
++              return;
++
++      /* if PC10 not present in CPUID.MWAIT.EDX */
++      if ((mwait_substates & (0xF << 28)) == 0)
++              return;
++
++      rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
++
++      /* PC10 is not enabled in PKG C-state limit */
++      if ((msr & 0xF) != 8)
++              return;
++
++      ecx = 0;
++      cpuid(7, &eax, &ebx, &ecx, &edx);
++
++      /* if SGX is present */
++      if (ebx & (1 << 2)) {
++
++              rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+-              if (num_sockets > 2)
+-                      cpuidle_state_table = ivt_cstates_4s;
+-              /* else, 1 and 2 socket systems use default ivt_cstates */
++              /* if SGX is enabled */
++              if (msr & (1 << 18))
++                      return;
++      }
++
++      skl_cstates[5].disabled = 1;    /* C8-SKL */
++      skl_cstates[6].disabled = 1;    /* C9-SKL */
++}
++/*
++ * intel_idle_state_table_update()
++ *
++ * Update the default state_table for this CPU-id
++ */
++
++static void intel_idle_state_table_update(void)
++{
++      switch (boot_cpu_data.x86_model) {
++
++      case 0x3e: /* IVT */
++              ivt_idle_state_table_update();
++              break;
++      case 0x5e: /* SKL-H */
++              sklh_idle_state_table_update();
++              break;
+       }
+-      return;
+ }
+ /*
+@@ -1063,6 +1119,14 @@ static int __init intel_idle_cpuidle_dri
+               if (num_substates == 0)
+                       continue;
++              /* if state marked as disabled, skip it */
++              if (cpuidle_state_table[cstate].disabled != 0) {
++                      pr_debug(PREFIX "state %s is disabled",
++                              cpuidle_state_table[cstate].name);
++                      continue;
++              }
++
++
+               if (((mwait_cstate + 1) > 2) &&
+                       !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+                       mark_tsc_unstable("TSC halts in idle"
diff --git a/queue-4.5/mm-page_alloc-prevent-merging-between-isolated-and-other-pageblocks.patch b/queue-4.5/mm-page_alloc-prevent-merging-between-isolated-and-other-pageblocks.patch
new file mode 100644 (file)
index 0000000..2268338
--- /dev/null
@@ -0,0 +1,183 @@
+From d9dddbf556674bf125ecd925b24e43a5cf2a568a Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Fri, 25 Mar 2016 14:21:50 -0700
+Subject: mm/page_alloc: prevent merging between isolated and other pageblocks
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit d9dddbf556674bf125ecd925b24e43a5cf2a568a upstream.
+
+Hanjun Guo has reported that a CMA stress test causes broken accounting of
+CMA and free pages:
+
+> Before the test, I got:
+> -bash-4.3# cat /proc/meminfo | grep Cma
+> CmaTotal:         204800 kB
+> CmaFree:          195044 kB
+>
+>
+> After running the test:
+> -bash-4.3# cat /proc/meminfo | grep Cma
+> CmaTotal:         204800 kB
+> CmaFree:         6602584 kB
+>
+> So the freed CMA memory is more than total..
+>
+> Also the the MemFree is more than mem total:
+>
+> -bash-4.3# cat /proc/meminfo
+> MemTotal:       16342016 kB
+> MemFree:        22367268 kB
+> MemAvailable:   22370528 kB
+
+Laura Abbott has confirmed the issue and suspected the freepage accounting
+rewrite around 3.18/4.0 by Joonsoo Kim.  Joonsoo had a theory that this is
+caused by unexpected merging between MIGRATE_ISOLATE and MIGRATE_CMA
+pageblocks:
+
+> CMA isolates MAX_ORDER aligned blocks, but, during the process,
+> partialy isolated block exists. If MAX_ORDER is 11 and
+> pageblock_order is 9, two pageblocks make up MAX_ORDER
+> aligned block and I can think following scenario because pageblock
+> (un)isolation would be done one by one.
+>
+> (each character means one pageblock. 'C', 'I' means MIGRATE_CMA,
+> MIGRATE_ISOLATE, respectively.
+>
+> CC -> IC -> II (Isolation)
+> II -> CI -> CC (Un-isolation)
+>
+> If some pages are freed at this intermediate state such as IC or CI,
+> that page could be merged to the other page that is resident on
+> different type of pageblock and it will cause wrong freepage count.
+
+This was supposed to be prevented by CMA operating on MAX_ORDER blocks,
+but since it doesn't hold the zone->lock between pageblocks, a race
+window does exist.
+
+It's also likely that unexpected merging can occur between
+MIGRATE_ISOLATE and non-CMA pageblocks.  This should be prevented in
+__free_one_page() since commit 3c605096d315 ("mm/page_alloc: restrict
+max order of merging on isolated pageblock").  However, we only check
+the migratetype of the pageblock where buddy merging has been initiated,
+not the migratetype of the buddy pageblock (or group of pageblocks)
+which can be MIGRATE_ISOLATE.
+
+Joonsoo has suggested checking for buddy migratetype as part of
+page_is_buddy(), but that would add extra checks in allocator hotpath
+and bloat-o-meter has shown significant code bloat (the function is
+inline).
+
+This patch reduces the bloat at some expense of more complicated code.
+The buddy-merging while-loop in __free_one_page() is initially bounded
+to pageblock_border and without any migratetype checks.  The checks are
+placed outside, bumping the max_order if merging is allowed, and
+returning to the while-loop with a statement which can't be possibly
+considered harmful.
+
+This fixes the accounting bug and also removes the arguably weird state
+in the original commit 3c605096d315 where buddies could be left
+unmerged.
+
+Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock")
+Link: https://lkml.org/lkml/2016/3/2/280
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Hanjun Guo <guohanjun@huawei.com>
+Tested-by: Hanjun Guo <guohanjun@huawei.com>
+Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Debugged-by: Laura Abbott <labbott@redhat.com>
+Debugged-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   46 +++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 33 insertions(+), 13 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -660,34 +660,28 @@ static inline void __free_one_page(struc
+       unsigned long combined_idx;
+       unsigned long uninitialized_var(buddy_idx);
+       struct page *buddy;
+-      unsigned int max_order = MAX_ORDER;
++      unsigned int max_order;
++
++      max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
+       VM_BUG_ON(!zone_is_initialized(zone));
+       VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
+       VM_BUG_ON(migratetype == -1);
+-      if (is_migrate_isolate(migratetype)) {
+-              /*
+-               * We restrict max order of merging to prevent merge
+-               * between freepages on isolate pageblock and normal
+-               * pageblock. Without this, pageblock isolation
+-               * could cause incorrect freepage accounting.
+-               */
+-              max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
+-      } else {
++      if (likely(!is_migrate_isolate(migratetype)))
+               __mod_zone_freepage_state(zone, 1 << order, migratetype);
+-      }
+-      page_idx = pfn & ((1 << max_order) - 1);
++      page_idx = pfn & ((1 << MAX_ORDER) - 1);
+       VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
+       VM_BUG_ON_PAGE(bad_range(zone, page), page);
++continue_merging:
+       while (order < max_order - 1) {
+               buddy_idx = __find_buddy_index(page_idx, order);
+               buddy = page + (buddy_idx - page_idx);
+               if (!page_is_buddy(page, buddy, order))
+-                      break;
++                      goto done_merging;
+               /*
+                * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
+                * merge with it and move up one order.
+@@ -704,6 +698,32 @@ static inline void __free_one_page(struc
+               page_idx = combined_idx;
+               order++;
+       }
++      if (max_order < MAX_ORDER) {
++              /* If we are here, it means order is >= pageblock_order.
++               * We want to prevent merge between freepages on isolate
++               * pageblock and normal pageblock. Without this, pageblock
++               * isolation could cause incorrect freepage or CMA accounting.
++               *
++               * We don't want to hit this code for the more frequent
++               * low-order merging.
++               */
++              if (unlikely(has_isolate_pageblock(zone))) {
++                      int buddy_mt;
++
++                      buddy_idx = __find_buddy_index(page_idx, order);
++                      buddy = page + (buddy_idx - page_idx);
++                      buddy_mt = get_pageblock_migratetype(buddy);
++
++                      if (migratetype != buddy_mt
++                                      && (is_migrate_isolate(migratetype) ||
++                                              is_migrate_isolate(buddy_mt)))
++                              goto done_merging;
++              }
++              max_order++;
++              goto continue_merging;
++      }
++
++done_merging:
+       set_page_order(page, order);
+       /*
diff --git a/queue-4.5/mtd-onenand-fix-deadlock-in-onenand_block_markbad.patch b/queue-4.5/mtd-onenand-fix-deadlock-in-onenand_block_markbad.patch
new file mode 100644 (file)
index 0000000..1783a6d
--- /dev/null
@@ -0,0 +1,43 @@
+From 5e64c29e98bfbba1b527b0a164f9493f3db9e8cb Mon Sep 17 00:00:00 2001
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+Date: Sat, 20 Feb 2016 22:27:48 +0200
+Subject: mtd: onenand: fix deadlock in onenand_block_markbad
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+commit 5e64c29e98bfbba1b527b0a164f9493f3db9e8cb upstream.
+
+Commit 5942ddbc500d ("mtd: introduce mtd_block_markbad interface")
+incorrectly changed onenand_block_markbad() to call mtd_block_markbad
+instead of onenand_chip's block_markbad function. As a result the function
+will now recurse and deadlock. Fix by reverting the change.
+
+Fixes: 5942ddbc500d ("mtd: introduce mtd_block_markbad interface")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Brian Norris <computersforpeace@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/onenand/onenand_base.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/mtd/onenand/onenand_base.c
++++ b/drivers/mtd/onenand/onenand_base.c
+@@ -2599,6 +2599,7 @@ static int onenand_default_block_markbad
+  */
+ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
+ {
++      struct onenand_chip *this = mtd->priv;
+       int ret;
+       ret = onenand_block_isbad(mtd, ofs);
+@@ -2610,7 +2611,7 @@ static int onenand_block_markbad(struct
+       }
+       onenand_get_device(mtd, FL_WRITING);
+-      ret = mtd_block_markbad(mtd, ofs);
++      ret = this->block_markbad(mtd, ofs);
+       onenand_release_device(mtd);
+       return ret;
+ }
diff --git a/queue-4.5/ocfs2-dlm-fix-bug-in-dlm_move_lockres_to_recovery_list.patch b/queue-4.5/ocfs2-dlm-fix-bug-in-dlm_move_lockres_to_recovery_list.patch
new file mode 100644 (file)
index 0000000..2367b09
--- /dev/null
@@ -0,0 +1,66 @@
+From be12b299a83fc807bbaccd2bcb8ec50cbb0cb55c Mon Sep 17 00:00:00 2001
+From: Joseph Qi <joseph.qi@huawei.com>
+Date: Fri, 25 Mar 2016 14:21:29 -0700
+Subject: ocfs2/dlm: fix BUG in dlm_move_lockres_to_recovery_list
+
+From: Joseph Qi <joseph.qi@huawei.com>
+
+commit be12b299a83fc807bbaccd2bcb8ec50cbb0cb55c upstream.
+
+When master handles convert request, it queues ast first and then
+returns status.  This may happen that the ast is sent before the request
+status because the above two messages are sent by two threads.  And
+right after the ast is sent, if master down, it may trigger BUG in
+dlm_move_lockres_to_recovery_list in the requested node because ast
+handler moves it to grant list without clear lock->convert_pending.  So
+remove BUG_ON statement and check if the ast is processed in
+dlmconvert_remote.
+
+Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
+Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Tariq Saeed <tariq.x.saeed@oracle.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmconvert.c  |   13 +++++++++++++
+ fs/ocfs2/dlm/dlmrecovery.c |    1 -
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+--- a/fs/ocfs2/dlm/dlmconvert.c
++++ b/fs/ocfs2/dlm/dlmconvert.c
+@@ -288,6 +288,19 @@ enum dlm_status dlmconvert_remote(struct
+               status = DLM_DENIED;
+               goto bail;
+       }
++
++      if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) {
++              mlog(0, "last convert request returned DLM_RECOVERING, but "
++                   "owner has already queued and sent ast to me. res %.*s, "
++                   "(cookie=%u:%llu, type=%d, conv=%d)\n",
++                   res->lockname.len, res->lockname.name,
++                   dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
++                   dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
++                   lock->ml.type, lock->ml.convert_type);
++              status = DLM_NORMAL;
++              goto bail;
++      }
++
+       res->state |= DLM_LOCK_RES_IN_PROGRESS;
+       /* move lock to local convert queue */
+       /* do not alter lock refcount.  switching lists. */
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2071,7 +2071,6 @@ void dlm_move_lockres_to_recovery_list(s
+                       dlm_lock_get(lock);
+                       if (lock->convert_pending) {
+                               /* move converting lock back to granted */
+-                              BUG_ON(i != DLM_CONVERTING_LIST);
+                               mlog(0, "node died with convert pending "
+                                    "on %.*s. move back to granted list.\n",
+                                    res->lockname.len, res->lockname.name);
diff --git a/queue-4.5/ocfs2-dlm-fix-race-between-convert-and-recovery.patch b/queue-4.5/ocfs2-dlm-fix-race-between-convert-and-recovery.patch
new file mode 100644 (file)
index 0000000..e366704
--- /dev/null
@@ -0,0 +1,87 @@
+From ac7cf246dfdbec3d8fed296c7bf30e16f5099dac Mon Sep 17 00:00:00 2001
+From: Joseph Qi <joseph.qi@huawei.com>
+Date: Fri, 25 Mar 2016 14:21:26 -0700
+Subject: ocfs2/dlm: fix race between convert and recovery
+
+From: Joseph Qi <joseph.qi@huawei.com>
+
+commit ac7cf246dfdbec3d8fed296c7bf30e16f5099dac upstream.
+
+There is a race window between dlmconvert_remote and
+dlm_move_lockres_to_recovery_list, which will cause a lock with
+OCFS2_LOCK_BUSY in grant list, thus system hangs.
+
+dlmconvert_remote
+{
+        spin_lock(&res->spinlock);
+        list_move_tail(&lock->list, &res->converting);
+        lock->convert_pending = 1;
+        spin_unlock(&res->spinlock);
+
+        status = dlm_send_remote_convert_request();
+        >>>>>> race window, master has queued ast and return DLM_NORMAL,
+               and then down before sending ast.
+               this node detects master down and calls
+               dlm_move_lockres_to_recovery_list, which will revert the
+               lock to grant list.
+               Then OCFS2_LOCK_BUSY won't be cleared as new master won't
+               send ast any more because it thinks already be authorized.
+
+        spin_lock(&res->spinlock);
+        lock->convert_pending = 0;
+        if (status != DLM_NORMAL)
+                dlm_revert_pending_convert(res, lock);
+        spin_unlock(&res->spinlock);
+}
+
+In this case, check if res->state has DLM_LOCK_RES_RECOVERING bit set
+(res is still in recovering) or res master changed (new master has
+finished recovery), reset the status to DLM_RECOVERING, then it will
+retry convert.
+
+Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
+Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
+Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Tariq Saeed <tariq.x.saeed@oracle.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmconvert.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/ocfs2/dlm/dlmconvert.c
++++ b/fs/ocfs2/dlm/dlmconvert.c
+@@ -262,6 +262,7 @@ enum dlm_status dlmconvert_remote(struct
+                                 struct dlm_lock *lock, int flags, int type)
+ {
+       enum dlm_status status;
++      u8 old_owner = res->owner;
+       mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
+            lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
+@@ -316,11 +317,19 @@ enum dlm_status dlmconvert_remote(struct
+       spin_lock(&res->spinlock);
+       res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
+       lock->convert_pending = 0;
+-      /* if it failed, move it back to granted queue */
++      /* if it failed, move it back to granted queue.
++       * if master returns DLM_NORMAL and then down before sending ast,
++       * it may have already been moved to granted queue, reset to
++       * DLM_RECOVERING and retry convert */
+       if (status != DLM_NORMAL) {
+               if (status != DLM_NOTQUEUED)
+                       dlm_error(status);
+               dlm_revert_pending_convert(res, lock);
++      } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
++                      (old_owner != res->owner)) {
++              mlog(0, "res %.*s is in recovering or has been recovered.\n",
++                              res->lockname.len, res->lockname.name);
++              status = DLM_RECOVERING;
+       }
+ bail:
+       spin_unlock(&res->spinlock);
diff --git a/queue-4.5/ocfs2-o2hb-fix-double-free-bug.patch b/queue-4.5/ocfs2-o2hb-fix-double-free-bug.patch
new file mode 100644 (file)
index 0000000..e62849a
--- /dev/null
@@ -0,0 +1,71 @@
+From 9e13f1f9de1cb143fbae6f1170f26c8544b64cff Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Fri, 25 Mar 2016 14:20:50 -0700
+Subject: ocfs2: o2hb: fix double free bug
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 9e13f1f9de1cb143fbae6f1170f26c8544b64cff upstream.
+
+This is a regression issue and caused the following kernel panic when do
+ocfs2 multiple test.
+
+  BUG: unable to handle kernel paging request at 00000002000800c0
+  IP: [<ffffffff81192978>] kmem_cache_alloc+0x78/0x160
+  PGD 7bbe5067 PUD 0
+  Oops: 0000 [#1] SMP
+  Modules linked in: ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi xen_kbdfront xen_netfront xen_fbfront xen_blkfront
+  CPU: 2 PID: 4044 Comm: mpirun Not tainted 4.5.0-rc5-next-20160225 #1
+  Hardware name: Xen HVM domU, BIOS 4.3.1OVM 05/14/2014
+  task: ffff88007a521a80 ti: ffff88007aed0000 task.ti: ffff88007aed0000
+  RIP: 0010:[<ffffffff81192978>]  [<ffffffff81192978>] kmem_cache_alloc+0x78/0x160
+  RSP: 0018:ffff88007aed3a48  EFLAGS: 00010282
+  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000001991
+  RDX: 0000000000001990 RSI: 00000000024000c0 RDI: 000000000001b330
+  RBP: ffff88007aed3a98 R08: ffff88007d29b330 R09: 00000002000800c0
+  R10: 0000000c51376d87 R11: ffff8800792cac38 R12: ffff88007cc30f00
+  R13: 00000000024000c0 R14: ffffffff811b053f R15: ffff88007aed3ce7
+  FS:  0000000000000000(0000) GS:ffff88007d280000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00000002000800c0 CR3: 000000007aeb2000 CR4: 00000000000406e0
+  Call Trace:
+    __d_alloc+0x2f/0x1a0
+    d_alloc+0x17/0x80
+    lookup_dcache+0x8a/0xc0
+    path_openat+0x3c3/0x1210
+    do_filp_open+0x80/0xe0
+    do_sys_open+0x110/0x200
+    SyS_open+0x19/0x20
+    do_syscall_64+0x72/0x230
+    entry_SYSCALL64_slow_path+0x25/0x25
+  Code: 05 e6 77 e7 7e 4d 8b 08 49 8b 40 10 4d 85 c9 0f 84 dd 00 00 00 48 85 c0 0f 84 d4 00 00 00 49 63 44 24 20 49 8b 3c 24 48 8d 4a 01 <49> 8b 1c 01 4c 89 c8 65 48 0f c7 0f 0f 94 c0 3c 01 75 b6 49 63
+  RIP   kmem_cache_alloc+0x78/0x160
+  CR2: 00000002000800c0
+  ---[ end trace 823969e602e4aaac ]---
+
+Fixes: a4a1dfa4bb8b("ocfs2/cluster: fix memory leak in o2hb_region_release")
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/cluster/heartbeat.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ocfs2/cluster/heartbeat.c
++++ b/fs/ocfs2/cluster/heartbeat.c
+@@ -1445,8 +1445,8 @@ static void o2hb_region_release(struct c
+       debugfs_remove(reg->hr_debug_dir);
+       kfree(reg->hr_db_livenodes);
+       kfree(reg->hr_db_regnum);
+-      kfree(reg->hr_debug_elapsed_time);
+-      kfree(reg->hr_debug_pinned);
++      kfree(reg->hr_db_elapsed_time);
++      kfree(reg->hr_db_pinned);
+       spin_lock(&o2hb_live_lock);
+       list_del(&reg->hr_all_item);
diff --git a/queue-4.5/pm-sleep-clear-pm_suspend_global_flags-upon-hibernate.patch b/queue-4.5/pm-sleep-clear-pm_suspend_global_flags-upon-hibernate.patch
new file mode 100644 (file)
index 0000000..6819d35
--- /dev/null
@@ -0,0 +1,39 @@
+From 276142730c39c9839465a36a90e5674a8c34e839 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Wed, 23 Mar 2016 00:11:20 +0100
+Subject: PM / sleep: Clear pm_suspend_global_flags upon hibernate
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 276142730c39c9839465a36a90e5674a8c34e839 upstream.
+
+When suspending to RAM, waking up and later suspending to disk,
+we gratuitously runtime resume devices after the thaw phase.
+This does not occur if we always suspend to RAM or always to disk.
+
+pm_complete_with_resume_check(), which gets called from
+pci_pm_complete() among others, schedules a runtime resume
+if PM_SUSPEND_FLAG_FW_RESUME is set. The flag is set during
+a suspend-to-RAM cycle. It is cleared at the beginning of
+the suspend-to-RAM cycle but not afterwards and it is not
+cleared during a suspend-to-disk cycle at all. Fix it.
+
+Fixes: ef25ba047601 (PM / sleep: Add flags to indicate platform firmware involvement)
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/power/hibernate.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/power/hibernate.c
++++ b/kernel/power/hibernate.c
+@@ -339,6 +339,7 @@ int hibernation_snapshot(int platform_mo
+       pm_message_t msg;
+       int error;
++      pm_suspend_clear_flags();
+       error = platform_begin(platform_mode);
+       if (error)
+               goto Close;
diff --git a/queue-4.5/scsi_common-do-not-clobber-fixed-sense-information.patch b/queue-4.5/scsi_common-do-not-clobber-fixed-sense-information.patch
new file mode 100644 (file)
index 0000000..874bfd5
--- /dev/null
@@ -0,0 +1,45 @@
+From ba08311647892cc7912de74525fd78416caf544a Mon Sep 17 00:00:00 2001
+From: Hannes Reinecke <hare@suse.de>
+Date: Fri, 18 Mar 2016 14:55:38 +0100
+Subject: scsi_common: do not clobber fixed sense information
+
+From: Hannes Reinecke <hare@suse.de>
+
+commit ba08311647892cc7912de74525fd78416caf544a upstream.
+
+For fixed sense the information field is 32 bits, to we need to truncate
+the information field to avoid clobbering the sense code.
+
+Fixes: a1524f226a02 ("libata-eh: Set 'information' field for autosense")
+Signed-off-by: Hannes Reinecke <hare@suse.com>
+Reviewed-by: Lee Duncan <lduncan@suse.com>
+Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Reviewed-by: Ewan D. Milne <emilne@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/scsi_common.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/scsi_common.c
++++ b/drivers/scsi/scsi_common.c
+@@ -278,8 +278,16 @@ int scsi_set_sense_information(u8 *buf,
+               ucp[3] = 0;
+               put_unaligned_be64(info, &ucp[4]);
+       } else if ((buf[0] & 0x7f) == 0x70) {
+-              buf[0] |= 0x80;
+-              put_unaligned_be64(info, &buf[3]);
++              /*
++               * Only set the 'VALID' bit if we can represent the value
++               * correctly; otherwise just fill out the lower bytes and
++               * clear the 'VALID' flag.
++               */
++              if (info <= 0xffffffffUL)
++                      buf[0] |= 0x80;
++              else
++                      buf[0] &= 0x7f;
++              put_unaligned_be32((u32)info, &buf[3]);
+       }
+       return 0;
index da895b040d7b3e00256cb0f308b886ebb17ccf56..c8430c5fbf1e73dadee090f9d801879a95b8ee29 100644 (file)
@@ -217,3 +217,16 @@ nfsd-fix-deadlock-secinfo-readdir-compound.patch
 arm-dts-at91-sama5d3-xplained-don-t-disable-hsmci-regulator.patch
 arm-dts-at91-sama5d4-xplained-don-t-disable-hsmci-regulator.patch
 acpi-pm-runtime-resume-devices-when-waking-from-hibernate.patch
+writeback-cgroup-fix-premature-wb_put-in-locked_inode_to_wb_and_lock_list.patch
+writeback-cgroup-fix-use-of-the-wrong-bdi_writeback-which-mismatches-the-inode.patch
+input-synaptics-handle-spurious-release-of-trackstick-buttons-again.patch
+input-ims-pcu-sanity-check-against-missing-interfaces.patch
+input-ati_remote2-fix-crashes-on-detecting-device-with-invalid-descriptor.patch
+ocfs2-o2hb-fix-double-free-bug.patch
+ocfs2-dlm-fix-race-between-convert-and-recovery.patch
+ocfs2-dlm-fix-bug-in-dlm_move_lockres_to_recovery_list.patch
+mm-page_alloc-prevent-merging-between-isolated-and-other-pageblocks.patch
+mtd-onenand-fix-deadlock-in-onenand_block_markbad.patch
+intel_idle-prevent-skl-h-boot-failure-when-c8-c9-c10-enabled.patch
+pm-sleep-clear-pm_suspend_global_flags-upon-hibernate.patch
+scsi_common-do-not-clobber-fixed-sense-information.patch
diff --git a/queue-4.5/writeback-cgroup-fix-premature-wb_put-in-locked_inode_to_wb_and_lock_list.patch b/queue-4.5/writeback-cgroup-fix-premature-wb_put-in-locked_inode_to_wb_and_lock_list.patch
new file mode 100644 (file)
index 0000000..2d8ff79
--- /dev/null
@@ -0,0 +1,56 @@
+From 614a4e3773148a31f58dc174bbf578ceb63510c2 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 18 Mar 2016 13:50:03 -0400
+Subject: writeback, cgroup: fix premature wb_put() in locked_inode_to_wb_and_lock_list()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 614a4e3773148a31f58dc174bbf578ceb63510c2 upstream.
+
+locked_inode_to_wb_and_lock_list() wb_get()'s the wb associated with
+the target inode, unlocks inode, locks the wb's list_lock and verifies
+that the inode is still associated with the wb.  To prevent the wb
+going away between dropping inode lock and acquiring list_lock, the wb
+is pinned while inode lock is held.  The wb reference is put right
+after acquiring list_lock citing that the wb won't be dereferenced
+anymore.
+
+This isn't true.  If the inode is still associated with the wb, the
+inode has reference and it's safe to return the wb; however, if inode
+has been switched, the wb still needs to be unlocked which is a
+dereference and can lead to use-after-free if it it races with wb
+destruction.
+
+Fix it by putting the reference after releasing list_lock.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: 87e1d789bf55 ("writeback: implement [locked_]inode_to_wb_and_lock_list()")
+Tested-by: Tahsin Erdogan <tahsin@google.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct
+               wb_get(wb);
+               spin_unlock(&inode->i_lock);
+               spin_lock(&wb->list_lock);
+-              wb_put(wb);             /* not gonna deref it anymore */
+               /* i_wb may have changed inbetween, can't use inode_to_wb() */
+-              if (likely(wb == inode->i_wb))
+-                      return wb;      /* @inode already has ref */
++              if (likely(wb == inode->i_wb)) {
++                      wb_put(wb);     /* @inode already has ref */
++                      return wb;
++              }
+               spin_unlock(&wb->list_lock);
++              wb_put(wb);
+               cpu_relax();
+               spin_lock(&inode->i_lock);
+       }
diff --git a/queue-4.5/writeback-cgroup-fix-use-of-the-wrong-bdi_writeback-which-mismatches-the-inode.patch b/queue-4.5/writeback-cgroup-fix-use-of-the-wrong-bdi_writeback-which-mismatches-the-inode.patch
new file mode 100644 (file)
index 0000000..09489c8
--- /dev/null
@@ -0,0 +1,136 @@
+From aaf2559332ba272671bb870464a99b909b29a3a1 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 18 Mar 2016 13:52:04 -0400
+Subject: writeback, cgroup: fix use of the wrong bdi_writeback which mismatches the inode
+
+From: Tejun Heo <tj@kernel.org>
+
+commit aaf2559332ba272671bb870464a99b909b29a3a1 upstream.
+
+When cgroup writeback is in use, there can be multiple wb's
+(bdi_writeback's) per bdi and an inode may switch among them
+dynamically.  In a couple places, the wrong wb was used leading to
+performing operations on the wrong list under the wrong lock
+corrupting the io lists.
+
+* writeback_single_inode() was taking @wb parameter and used it to
+  remove the inode from io lists if it becomes clean after writeback.
+  The callers of this function were always passing in the root wb
+  regardless of the actual wb that the inode was associated with,
+  which could also change while writeback is in progress.
+
+  Fix it by dropping the @wb parameter and using
+  inode_to_wb_and_lock_list() to determine and lock the associated wb.
+
+* After writeback_sb_inodes() writes out an inode, it re-locks @wb and
+  inode to remove it from or move it to the right io list.  It assumes
+  that the inode is still associated with @wb; however, the inode may
+  have switched to another wb while writeback was in progress.
+
+  Fix it by using inode_to_wb_and_lock_list() to determine and lock
+  the associated wb after writeback is complete.  As the function
+  requires the original @wb->list_lock locked for the next iteration,
+  in the unlikely case where the inode has changed association, switch
+  the locks.
+
+Kudos to Tahsin for pinpointing these subtle breakages.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: d10c80955265 ("writeback: implement foreign cgroup inode bdi_writeback switching")
+Link: http://lkml.kernel.org/g/CAAeU0aMYeM_39Y2+PaRvyB1nqAPYZSNngJ1eBRmrxn7gKAt2Mg@mail.gmail.com
+Reported-and-diagnosed-by: Tahsin Erdogan <tahsin@google.com>
+Tested-by: Tahsin Erdogan <tahsin@google.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c |   29 +++++++++++++++++++----------
+ 1 file changed, 19 insertions(+), 10 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -1339,10 +1339,10 @@ __writeback_single_inode(struct inode *i
+  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
+  * and does more profound writeback list handling in writeback_sb_inodes().
+  */
+-static int
+-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+-                     struct writeback_control *wbc)
++static int writeback_single_inode(struct inode *inode,
++                                struct writeback_control *wbc)
+ {
++      struct bdi_writeback *wb;
+       int ret = 0;
+       spin_lock(&inode->i_lock);
+@@ -1380,7 +1380,8 @@ writeback_single_inode(struct inode *ino
+       ret = __writeback_single_inode(inode, wbc);
+       wbc_detach_inode(wbc);
+-      spin_lock(&wb->list_lock);
++
++      wb = inode_to_wb_and_lock_list(inode);
+       spin_lock(&inode->i_lock);
+       /*
+        * If inode is clean, remove it from writeback lists. Otherwise don't
+@@ -1455,6 +1456,7 @@ static long writeback_sb_inodes(struct s
+       while (!list_empty(&wb->b_io)) {
+               struct inode *inode = wb_inode(wb->b_io.prev);
++              struct bdi_writeback *tmp_wb;
+               if (inode->i_sb != sb) {
+                       if (work->sb) {
+@@ -1545,15 +1547,23 @@ static long writeback_sb_inodes(struct s
+                       cond_resched();
+               }
+-
+-              spin_lock(&wb->list_lock);
++              /*
++               * Requeue @inode if still dirty.  Be careful as @inode may
++               * have been switched to another wb in the meantime.
++               */
++              tmp_wb = inode_to_wb_and_lock_list(inode);
+               spin_lock(&inode->i_lock);
+               if (!(inode->i_state & I_DIRTY_ALL))
+                       wrote++;
+-              requeue_inode(inode, wb, &wbc);
++              requeue_inode(inode, tmp_wb, &wbc);
+               inode_sync_complete(inode);
+               spin_unlock(&inode->i_lock);
++              if (unlikely(tmp_wb != wb)) {
++                      spin_unlock(&tmp_wb->list_lock);
++                      spin_lock(&wb->list_lock);
++              }
++
+               /*
+                * bail out to wb_writeback() often enough to check
+                * background threshold and other termination conditions.
+@@ -2340,7 +2350,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
+  */
+ int write_inode_now(struct inode *inode, int sync)
+ {
+-      struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+       struct writeback_control wbc = {
+               .nr_to_write = LONG_MAX,
+               .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
+@@ -2352,7 +2361,7 @@ int write_inode_now(struct inode *inode,
+               wbc.nr_to_write = 0;
+       might_sleep();
+-      return writeback_single_inode(inode, wb, &wbc);
++      return writeback_single_inode(inode, &wbc);
+ }
+ EXPORT_SYMBOL(write_inode_now);
+@@ -2369,7 +2378,7 @@ EXPORT_SYMBOL(write_inode_now);
+  */
+ int sync_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-      return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
++      return writeback_single_inode(inode, wbc);
+ }
+ EXPORT_SYMBOL(sync_inode);