]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Apr 2015 13:22:59 +0000 (15:22 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Apr 2015 13:22:59 +0000 (15:22 +0200)
added patches:
drm-i915-push-vblank-enable-disable-past-encoder-enable-disable.patch
ext4-fix-indirect-punch-hole-corruption.patch
kvm-avoid-page-allocation-failure-in-kvm_set_memory_region.patch
timers-tick-broadcast-hrtimer-fix-suspicious-rcu-usage-in-idle-loop.patch
xfs-ensure-truncate-forces-zeroed-blocks-to-disk.patch

queue-3.19/drm-i915-push-vblank-enable-disable-past-encoder-enable-disable.patch [new file with mode: 0644]
queue-3.19/ext4-fix-indirect-punch-hole-corruption.patch [new file with mode: 0644]
queue-3.19/kvm-avoid-page-allocation-failure-in-kvm_set_memory_region.patch [new file with mode: 0644]
queue-3.19/series
queue-3.19/timers-tick-broadcast-hrtimer-fix-suspicious-rcu-usage-in-idle-loop.patch [new file with mode: 0644]
queue-3.19/xfs-ensure-truncate-forces-zeroed-blocks-to-disk.patch [new file with mode: 0644]

diff --git a/queue-3.19/drm-i915-push-vblank-enable-disable-past-encoder-enable-disable.patch b/queue-3.19/drm-i915-push-vblank-enable-disable-past-encoder-enable-disable.patch
new file mode 100644 (file)
index 0000000..7138df7
--- /dev/null
@@ -0,0 +1,176 @@
+From f9b61ff6bce9a44555324b29e593fdffc9a115bc Mon Sep 17 00:00:00 2001
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+Date: Wed, 7 Jan 2015 13:54:39 +0100
+Subject: drm/i915: Push vblank enable/disable past encoder->enable/disable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+
+commit f9b61ff6bce9a44555324b29e593fdffc9a115bc upstream.
+
+It is platform/output depenedent when exactly the pipe will start
+running. Sometimes we just need the (cpu) pipe enabled, in other cases
+the pch transcoder is enough and in yet other cases the (DP) port is
+sending the frame start signal.
+
+In a perfect world we'd put the drm_crtc_vblank_on call exactly where
+the pipe starts running, but due to cloning and similar things this
+will get messy. And the current approach of picking the most
+conservative place for all combinations also doesn't work since that
+results in legit vblank waits (in encoder->enable hooks, e.g. the 2
+vblank waits for sdvo) failing.
+
+Completely going back to the old world before
+
+commit 51e31d49c89055299e34b8f44d13f70e19aaaad1
+Author: Daniel Vetter <daniel.vetter@ffwll.ch>
+Date:   Mon Sep 15 12:36:02 2014 +0200
+
+    drm/i915: Use generic vblank wait
+
+isn't great either since screaming when the vblank wait work because
+the pipe is off is kinda nice.
+
+Pick a compromise and move the drm_crtc_vblank_on right before the
+encoder->enable call. This is a lie on some outputs/platforms, but
+after the ->enable callback the pipe is guaranteed to run everywhere.
+So not that bad really. Suggested by Ville.
+
+v2: Same treatment for drm_crtc_vblank_off and encoder->disable: I've
+missed the ibx pipe B select w/a, which also has a vblank wait in the
+disable function (while the pipe is obviously still running).
+
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
+Cc: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/intel_display.c |   42 +++++++++++++++++------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -4372,15 +4372,15 @@ static void ironlake_crtc_enable(struct
+       if (intel_crtc->config.has_pch_encoder)
+               ironlake_pch_enable(crtc);
++      assert_vblank_disabled(crtc);
++      drm_crtc_vblank_on(crtc);
++
+       for_each_encoder_on_crtc(dev, crtc, encoder)
+               encoder->enable(encoder);
+       if (HAS_PCH_CPT(dev))
+               cpt_verify_modeset(dev, intel_crtc->pipe);
+-      assert_vblank_disabled(crtc);
+-      drm_crtc_vblank_on(crtc);
+-
+       intel_crtc_enable_planes(crtc);
+ }
+@@ -4492,14 +4492,14 @@ static void haswell_crtc_enable(struct d
+       if (intel_crtc->config.dp_encoder_is_mst)
+               intel_ddi_set_vc_payload_alloc(crtc, true);
++      assert_vblank_disabled(crtc);
++      drm_crtc_vblank_on(crtc);
++
+       for_each_encoder_on_crtc(dev, crtc, encoder) {
+               encoder->enable(encoder);
+               intel_opregion_notify_encoder(encoder, true);
+       }
+-      assert_vblank_disabled(crtc);
+-      drm_crtc_vblank_on(crtc);
+-
+       /* If we change the relative order between pipe/planes enabling, we need
+        * to change the workaround. */
+       haswell_mode_set_planes_workaround(intel_crtc);
+@@ -4550,12 +4550,12 @@ static void ironlake_crtc_disable(struct
+       intel_crtc_disable_planes(crtc);
+-      drm_crtc_vblank_off(crtc);
+-      assert_vblank_disabled(crtc);
+-
+       for_each_encoder_on_crtc(dev, crtc, encoder)
+               encoder->disable(encoder);
++      drm_crtc_vblank_off(crtc);
++      assert_vblank_disabled(crtc);
++
+       if (intel_crtc->config.has_pch_encoder)
+               intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
+@@ -4614,14 +4614,14 @@ static void haswell_crtc_disable(struct
+       intel_crtc_disable_planes(crtc);
+-      drm_crtc_vblank_off(crtc);
+-      assert_vblank_disabled(crtc);
+-
+       for_each_encoder_on_crtc(dev, crtc, encoder) {
+               intel_opregion_notify_encoder(encoder, false);
+               encoder->disable(encoder);
+       }
++      drm_crtc_vblank_off(crtc);
++      assert_vblank_disabled(crtc);
++
+       if (intel_crtc->config.has_pch_encoder)
+               intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+                                                     false);
+@@ -5089,12 +5089,12 @@ static void valleyview_crtc_enable(struc
+       intel_update_watermarks(crtc);
+       intel_enable_pipe(intel_crtc);
+-      for_each_encoder_on_crtc(dev, crtc, encoder)
+-              encoder->enable(encoder);
+-
+       assert_vblank_disabled(crtc);
+       drm_crtc_vblank_on(crtc);
++      for_each_encoder_on_crtc(dev, crtc, encoder)
++              encoder->enable(encoder);
++
+       intel_crtc_enable_planes(crtc);
+       /* Underruns don't raise interrupts, so check manually. */
+@@ -5150,12 +5150,12 @@ static void i9xx_crtc_enable(struct drm_
+       intel_update_watermarks(crtc);
+       intel_enable_pipe(intel_crtc);
+-      for_each_encoder_on_crtc(dev, crtc, encoder)
+-              encoder->enable(encoder);
+-
+       assert_vblank_disabled(crtc);
+       drm_crtc_vblank_on(crtc);
++      for_each_encoder_on_crtc(dev, crtc, encoder)
++              encoder->enable(encoder);
++
+       intel_crtc_enable_planes(crtc);
+       /*
+@@ -5227,12 +5227,12 @@ static void i9xx_crtc_disable(struct drm
+        */
+       intel_wait_for_vblank(dev, pipe);
+-      drm_crtc_vblank_off(crtc);
+-      assert_vblank_disabled(crtc);
+-
+       for_each_encoder_on_crtc(dev, crtc, encoder)
+               encoder->disable(encoder);
++      drm_crtc_vblank_off(crtc);
++      assert_vblank_disabled(crtc);
++
+       intel_disable_pipe(intel_crtc);
+       i9xx_pfit_disable(intel_crtc);
diff --git a/queue-3.19/ext4-fix-indirect-punch-hole-corruption.patch b/queue-3.19/ext4-fix-indirect-punch-hole-corruption.patch
new file mode 100644 (file)
index 0000000..e6b1879
--- /dev/null
@@ -0,0 +1,191 @@
+From 6f30b7e37a8239f9d27db626a1d3427bc7951908 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@osandov.com>
+Date: Sat, 14 Feb 2015 20:08:51 -0500
+Subject: ext4: fix indirect punch hole corruption
+
+From: Omar Sandoval <osandov@osandov.com>
+
+commit 6f30b7e37a8239f9d27db626a1d3427bc7951908 upstream.
+
+Commit 4f579ae7de56 (ext4: fix punch hole on files with indirect
+mapping) rewrote FALLOC_FL_PUNCH_HOLE for ext4 files with indirect
+mapping. However, there are bugs in several corner cases. This fixes 5
+distinct bugs:
+
+1. When there is at least one entire level of indirection between the
+start and end of the punch range and the end of the punch range is the
+first block of its level, we can't return early; we have to free the
+intervening levels.
+
+2. When the end is at a higher level of indirection than the start and
+ext4_find_shared returns a top branch for the end, we still need to free
+the rest of the shared branch it returns; we can't decrement partial2.
+
+3. When a punch happens within one level of indirection, we need to
+converge on an indirect block that contains the start and end. However,
+because the branches returned from ext4_find_shared do not necessarily
+start at the same level (e.g., the partial2 chain will be shallower if
+the last block occurs at the beginning of an indirect group), the walk
+of the two chains can end up "missing" each other and freeing a bunch of
+extra blocks in the process. This mismatch can be handled by first
+making sure that the chains are at the same level, then walking them
+together until they converge.
+
+4. When the punch happens within one level of indirection and
+ext4_find_shared returns a top branch for the start, we must free it,
+but only if the end does not occur within that branch.
+
+5. When the punch happens within one level of indirection and
+ext4_find_shared returns a top branch for the end, then we shouldn't
+free the block referenced by the end of the returned chain (this mirrors
+the different levels case).
+
+Signed-off-by: Omar Sandoval <osandov@osandov.com>
+Cc: Chris J Arges <chris.j.arges@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/indirect.c |  105 +++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 71 insertions(+), 34 deletions(-)
+
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -1393,10 +1393,7 @@ end_range:
+                                * to free. Everything was covered by the start
+                                * of the range.
+                                */
+-                              return 0;
+-                      } else {
+-                              /* Shared branch grows from an indirect block */
+-                              partial2--;
++                              goto do_indirects;
+                       }
+               } else {
+                       /*
+@@ -1427,56 +1424,96 @@ end_range:
+       /* Punch happened within the same level (n == n2) */
+       partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+       partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+-      /*
+-       * ext4_find_shared returns Indirect structure which
+-       * points to the last element which should not be
+-       * removed by truncate. But this is end of the range
+-       * in punch_hole so we need to point to the next element
+-       */
+-      partial2->p++;
+-      while ((partial > chain) || (partial2 > chain2)) {
+-              /* We're at the same block, so we're almost finished */
+-              if ((partial->bh && partial2->bh) &&
+-                  (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
+-                      if ((partial > chain) && (partial2 > chain2)) {
++
++      /* Free top, but only if partial2 isn't its subtree. */
++      if (nr) {
++              int level = min(partial - chain, partial2 - chain2);
++              int i;
++              int subtree = 1;
++
++              for (i = 0; i <= level; i++) {
++                      if (offsets[i] != offsets2[i]) {
++                              subtree = 0;
++                              break;
++                      }
++              }
++
++              if (!subtree) {
++                      if (partial == chain) {
++                              /* Shared branch grows from the inode */
++                              ext4_free_branches(handle, inode, NULL,
++                                                 &nr, &nr+1,
++                                                 (chain+n-1) - partial);
++                              *partial->p = 0;
++                      } else {
++                              /* Shared branch grows from an indirect block */
++                              BUFFER_TRACE(partial->bh, "get_write_access");
+                               ext4_free_branches(handle, inode, partial->bh,
+-                                                 partial->p + 1,
+-                                                 partial2->p,
++                                                 partial->p,
++                                                 partial->p+1,
+                                                  (chain+n-1) - partial);
+-                              BUFFER_TRACE(partial->bh, "call brelse");
+-                              brelse(partial->bh);
+-                              BUFFER_TRACE(partial2->bh, "call brelse");
+-                              brelse(partial2->bh);
+                       }
+-                      return 0;
+               }
++      }
++
++      if (!nr2) {
+               /*
+-               * Clear the ends of indirect blocks on the shared branch
+-               * at the start of the range
++               * ext4_find_shared returns Indirect structure which
++               * points to the last element which should not be
++               * removed by truncate. But this is end of the range
++               * in punch_hole so we need to point to the next element
+                */
+-              if (partial > chain) {
++              partial2->p++;
++      }
++
++      while (partial > chain || partial2 > chain2) {
++              int depth = (chain+n-1) - partial;
++              int depth2 = (chain2+n2-1) - partial2;
++
++              if (partial > chain && partial2 > chain2 &&
++                  partial->bh->b_blocknr == partial2->bh->b_blocknr) {
++                      /*
++                       * We've converged on the same block. Clear the range,
++                       * then we're done.
++                       */
+                       ext4_free_branches(handle, inode, partial->bh,
+-                                 partial->p + 1,
+-                                 (__le32 *)partial->bh->b_data+addr_per_block,
+-                                 (chain+n-1) - partial);
++                                         partial->p + 1,
++                                         partial2->p,
++                                         (chain+n-1) - partial);
+                       BUFFER_TRACE(partial->bh, "call brelse");
+                       brelse(partial->bh);
+-                      partial--;
++                      BUFFER_TRACE(partial2->bh, "call brelse");
++                      brelse(partial2->bh);
++                      return 0;
+               }
++
+               /*
+-               * Clear the ends of indirect blocks on the shared branch
+-               * at the end of the range
++               * The start and end partial branches may not be at the same
++               * level even though the punch happened within one level. So, we
++               * give them a chance to arrive at the same level, then walk
++               * them in step with each other until we converge on the same
++               * block.
+                */
+-              if (partial2 > chain2) {
++              if (partial > chain && depth <= depth2) {
++                      ext4_free_branches(handle, inode, partial->bh,
++                                         partial->p + 1,
++                                         (__le32 *)partial->bh->b_data+addr_per_block,
++                                         (chain+n-1) - partial);
++                      BUFFER_TRACE(partial->bh, "call brelse");
++                      brelse(partial->bh);
++                      partial--;
++              }
++              if (partial2 > chain2 && depth2 <= depth) {
+                       ext4_free_branches(handle, inode, partial2->bh,
+                                          (__le32 *)partial2->bh->b_data,
+                                          partial2->p,
+-                                         (chain2+n-1) - partial2);
++                                         (chain2+n2-1) - partial2);
+                       BUFFER_TRACE(partial2->bh, "call brelse");
+                       brelse(partial2->bh);
+                       partial2--;
+               }
+       }
++      return 0;
+ do_indirects:
+       /* Kill the remaining (whole) subtrees */
diff --git a/queue-3.19/kvm-avoid-page-allocation-failure-in-kvm_set_memory_region.patch b/queue-3.19/kvm-avoid-page-allocation-failure-in-kvm_set_memory_region.patch
new file mode 100644 (file)
index 0000000..edc9946
--- /dev/null
@@ -0,0 +1,99 @@
+From 744961341d472db6272ed9b42319a90f5a2aa7c4 Mon Sep 17 00:00:00 2001
+From: Igor Mammedov <imammedo@redhat.com>
+Date: Fri, 20 Mar 2015 12:21:37 +0000
+Subject: kvm: avoid page allocation failure in kvm_set_memory_region()
+
+From: Igor Mammedov <imammedo@redhat.com>
+
+commit 744961341d472db6272ed9b42319a90f5a2aa7c4 upstream.
+
+KVM guest can fail to startup with following trace on host:
+
+qemu-system-x86: page allocation failure: order:4, mode:0x40d0
+Call Trace:
+  dump_stack+0x47/0x67
+  warn_alloc_failed+0xee/0x150
+  __alloc_pages_direct_compact+0x14a/0x150
+  __alloc_pages_nodemask+0x776/0xb80
+  alloc_kmem_pages+0x3a/0x110
+  kmalloc_order+0x13/0x50
+  kmemdup+0x1b/0x40
+  __kvm_set_memory_region+0x24a/0x9f0 [kvm]
+  kvm_set_ioapic+0x130/0x130 [kvm]
+  kvm_set_memory_region+0x21/0x40 [kvm]
+  kvm_vm_ioctl+0x43f/0x750 [kvm]
+
+Failure happens when attempting to allocate pages for
+'struct kvm_memslots', however it doesn't have to be
+present in physically contiguous (kmalloc-ed) address
+space, change allocation to kvm_kvzalloc() so that
+it will be vmalloc-ed when its size is more then a page.
+
+Signed-off-by: Igor Mammedov <imammedo@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/kvm_main.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -466,7 +466,7 @@ static struct kvm *kvm_create_vm(unsigne
+       BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+       r = -ENOMEM;
+-      kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
++      kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+       if (!kvm->memslots)
+               goto out_err_no_srcu;
+@@ -517,7 +517,7 @@ out_err_no_srcu:
+ out_err_no_disable:
+       for (i = 0; i < KVM_NR_BUSES; i++)
+               kfree(kvm->buses[i]);
+-      kfree(kvm->memslots);
++      kvfree(kvm->memslots);
+       kvm_arch_free_vm(kvm);
+       return ERR_PTR(r);
+ }
+@@ -573,7 +573,7 @@ static void kvm_free_physmem(struct kvm
+       kvm_for_each_memslot(memslot, slots)
+               kvm_free_physmem_slot(kvm, memslot, NULL);
+-      kfree(kvm->memslots);
++      kvfree(kvm->memslots);
+ }
+ static void kvm_destroy_devices(struct kvm *kvm)
+@@ -865,10 +865,10 @@ int __kvm_set_memory_region(struct kvm *
+                       goto out_free;
+       }
+-      slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+-                      GFP_KERNEL);
++      slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+       if (!slots)
+               goto out_free;
++      memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+       if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
+               slot = id_to_memslot(slots, mem->slot);
+@@ -911,7 +911,7 @@ int __kvm_set_memory_region(struct kvm *
+       kvm_arch_commit_memory_region(kvm, mem, &old, change);
+       kvm_free_physmem_slot(kvm, &old, &new);
+-      kfree(old_memslots);
++      kvfree(old_memslots);
+       /*
+        * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
+@@ -930,7 +930,7 @@ int __kvm_set_memory_region(struct kvm *
+       return 0;
+ out_slots:
+-      kfree(slots);
++      kvfree(slots);
+ out_free:
+       kvm_free_physmem_slot(kvm, &new, &old);
+ out:
index b7f3d42e24ed505d54fc745f9366677364fa5b84..2e1bf37e375b846d4c001cef2bce1de31b6d5382 100644 (file)
@@ -94,3 +94,8 @@ vb2-fix-unbalanced-warnings-when-calling-vb2_thread_stop.patch
 clk-divider-fix-selection-of-divider-when-rounding-to-closest.patch
 clk-divider-fix-calculation-of-maximal-parent-rate-for-a-given-divider.patch
 ib-mlx4-saturate-roce-port-pma-counters-in-case-of-overflow.patch
+timers-tick-broadcast-hrtimer-fix-suspicious-rcu-usage-in-idle-loop.patch
+ext4-fix-indirect-punch-hole-corruption.patch
+xfs-ensure-truncate-forces-zeroed-blocks-to-disk.patch
+drm-i915-push-vblank-enable-disable-past-encoder-enable-disable.patch
+kvm-avoid-page-allocation-failure-in-kvm_set_memory_region.patch
diff --git a/queue-3.19/timers-tick-broadcast-hrtimer-fix-suspicious-rcu-usage-in-idle-loop.patch b/queue-3.19/timers-tick-broadcast-hrtimer-fix-suspicious-rcu-usage-in-idle-loop.patch
new file mode 100644 (file)
index 0000000..3e75778
--- /dev/null
@@ -0,0 +1,71 @@
+From a127d2bcf1fbc8c8e0b5cf0dab54f7d3ff50ce47 Mon Sep 17 00:00:00 2001
+From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
+Date: Wed, 18 Mar 2015 16:19:27 +0530
+Subject: timers/tick/broadcast-hrtimer: Fix suspicious RCU usage in idle loop
+
+From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
+
+commit a127d2bcf1fbc8c8e0b5cf0dab54f7d3ff50ce47 upstream.
+
+The hrtimer mode of broadcast queues hrtimers in the idle entry
+path so as to wakeup cpus in deep idle states. The associated
+call graph is :
+
+       cpuidle_idle_call()
+       |____ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, ....))
+            |_____tick_broadcast_set_event()
+                  |____clockevents_program_event()
+                       |____bc_set_next()
+
+The hrtimer_{start/cancel} functions call into tracing which uses RCU.
+But it is not legal to call into RCU in cpuidle because it is one of the
+quiescent states. Hence protect this region with RCU_NONIDLE which informs
+RCU that the cpu is momentarily non-idle.
+
+As an aside it is helpful to point out that the clock event device that is
+programmed here is not a per-cpu clock device; it is a
+pseudo clock device, used by the broadcast framework alone.
+The per-cpu clock device programming never goes through bc_set_next().
+
+Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: linuxppc-dev@ozlabs.org
+Cc: mpe@ellerman.id.au
+Cc: tglx@linutronix.de
+Link: http://lkml.kernel.org/r/20150318104705.17763.56668.stgit@preeti.in.ibm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/tick-broadcast-hrtimer.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/kernel/time/tick-broadcast-hrtimer.c
++++ b/kernel/time/tick-broadcast-hrtimer.c
+@@ -49,6 +49,7 @@ static void bc_set_mode(enum clock_event
+  */
+ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
+ {
++      int bc_moved;
+       /*
+        * We try to cancel the timer first. If the callback is on
+        * flight on some other cpu then we let it handle it. If we
+@@ -60,9 +61,15 @@ static int bc_set_next(ktime_t expires,
+        * restart the timer because we are in the callback, but we
+        * can set the expiry time and let the callback return
+        * HRTIMER_RESTART.
++       *
++       * Since we are in the idle loop at this point and because
++       * hrtimer_{start/cancel} functions call into tracing,
++       * calls to these functions must be bound within RCU_NONIDLE.
+        */
+-      if (hrtimer_try_to_cancel(&bctimer) >= 0) {
+-              hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
++      RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ?
++              !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) :
++                      0);
++      if (bc_moved) {
+               /* Bind the "device" to the cpu */
+               bc->bound_on = smp_processor_id();
+       } else if (bc->bound_on == smp_processor_id()) {
diff --git a/queue-3.19/xfs-ensure-truncate-forces-zeroed-blocks-to-disk.patch b/queue-3.19/xfs-ensure-truncate-forces-zeroed-blocks-to-disk.patch
new file mode 100644 (file)
index 0000000..b5642a5
--- /dev/null
@@ -0,0 +1,186 @@
+From 5885ebda878b47c4b4602d4b0410cb4b282af024 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Mon, 23 Feb 2015 22:37:08 +1100
+Subject: xfs: ensure truncate forces zeroed blocks to disk
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 5885ebda878b47c4b4602d4b0410cb4b282af024 upstream.
+
+A new fsync vs power fail test in xfstests indicated that XFS can
+have unreliable data consistency when doing extending truncates that
+require block zeroing. The blocks beyond EOF get zeroed in memory,
+but we never force those changes to disk before we run the
+transaction that extends the file size and exposes those blocks to
+userspace. This can result in the blocks not being correctly zeroed
+after a crash.
+
+Because in-memory behaviour is correct, tools like fsx don't pick up
+any coherency problems - it's not until the filesystem is shutdown
+or the system crashes after writing the truncate transaction to the
+journal but before the zeroed data in the page cache is flushed that
+the issue is exposed.
+
+Fix this by also flushing the dirty data in memory region between
+the old size and new size when we've found blocks that need zeroing
+in the truncate process.
+
+Reported-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/xfs/xfs_file.c  |   14 ++++++++++----
+ fs/xfs/xfs_inode.h |    5 +++--
+ fs/xfs/xfs_iops.c  |   36 ++++++++++++++----------------------
+ 3 files changed, 27 insertions(+), 28 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -360,7 +360,8 @@ STATIC int                         /* error (positive) */
+ xfs_zero_last_block(
+       struct xfs_inode        *ip,
+       xfs_fsize_t             offset,
+-      xfs_fsize_t             isize)
++      xfs_fsize_t             isize,
++      bool                    *did_zeroing)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
+@@ -388,6 +389,7 @@ xfs_zero_last_block(
+       zero_len = mp->m_sb.sb_blocksize - zero_offset;
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
++      *did_zeroing = true;
+       return xfs_iozero(ip, isize, zero_len);
+ }
+@@ -406,7 +408,8 @@ int                                        /* error (positive) */
+ xfs_zero_eof(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,         /* starting I/O offset */
+-      xfs_fsize_t             isize)          /* current inode size */
++      xfs_fsize_t             isize,          /* current inode size */
++      bool                    *did_zeroing)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           start_zero_fsb;
+@@ -428,7 +431,7 @@ xfs_zero_eof(
+        * We only zero a part of that block so it is handled specially.
+        */
+       if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
+-              error = xfs_zero_last_block(ip, offset, isize);
++              error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
+               if (error)
+                       return error;
+       }
+@@ -488,6 +491,7 @@ xfs_zero_eof(
+               if (error)
+                       return error;
++              *did_zeroing = true;
+               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+       }
+@@ -526,13 +530,15 @@ restart:
+        * having to redo all checks before.
+        */
+       if (*pos > i_size_read(inode)) {
++              bool    zero = false;
++
+               if (*iolock == XFS_IOLOCK_SHARED) {
+                       xfs_rw_iunlock(ip, *iolock);
+                       *iolock = XFS_IOLOCK_EXCL;
+                       xfs_rw_ilock(ip, *iolock);
+                       goto restart;
+               }
+-              error = xfs_zero_eof(ip, *pos, i_size_read(inode));
++              error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
+               if (error)
+                       return error;
+       }
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -377,8 +377,9 @@ int                xfs_droplink(struct xfs_trans *, st
+ int           xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
+ /* from xfs_file.c */
+-int           xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+-int           xfs_iozero(struct xfs_inode *, loff_t, size_t);
++int   xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
++                   xfs_fsize_t isize, bool *did_zeroing);
++int   xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+ #define IHOLD(ip) \
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -741,6 +741,7 @@ xfs_setattr_size(
+       int                     error;
+       uint                    lock_flags = 0;
+       uint                    commit_flags = 0;
++      bool                    did_zeroing = false;
+       trace_xfs_setattr(ip);
+@@ -784,20 +785,16 @@ xfs_setattr_size(
+               return error;
+       /*
+-       * Now we can make the changes.  Before we join the inode to the
+-       * transaction, take care of the part of the truncation that must be
+-       * done without the inode lock.  This needs to be done before joining
+-       * the inode to the transaction, because the inode cannot be unlocked
+-       * once it is a part of the transaction.
++       * File data changes must be complete before we start the transaction to
++       * modify the inode.  This needs to be done before joining the inode to
++       * the transaction because the inode cannot be unlocked once it is a
++       * part of the transaction.
++       *
++       * Start with zeroing any data block beyond EOF that we may expose on
++       * file extension.
+        */
+       if (newsize > oldsize) {
+-              /*
+-               * Do the first part of growing a file: zero any data in the
+-               * last block that is beyond the old EOF.  We need to do this
+-               * before the inode is joined to the transaction to modify
+-               * i_size.
+-               */
+-              error = xfs_zero_eof(ip, newsize, oldsize);
++              error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
+               if (error)
+                       return error;
+       }
+@@ -807,23 +804,18 @@ xfs_setattr_size(
+        * any previous writes that are beyond the on disk EOF and the new
+        * EOF that have not been written out need to be written here.  If we
+        * do not write the data out, we expose ourselves to the null files
+-       * problem.
+-       *
+-       * Only flush from the on disk size to the smaller of the in memory
+-       * file size or the new size as that's the range we really care about
+-       * here and prevents waiting for other data not within the range we
+-       * care about here.
++       * problem. Note that this includes any block zeroing we did above;
++       * otherwise those blocks may not be zeroed after a crash.
+        */
+-      if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
++      if (newsize > ip->i_d.di_size &&
++          (oldsize != ip->i_d.di_size || did_zeroing)) {
+               error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+                                                     ip->i_d.di_size, newsize);
+               if (error)
+                       return error;
+       }
+-      /*
+-       * Wait for all direct I/O to complete.
+-       */
++      /* Now wait for all direct I/O to complete. */
+       inode_dio_wait(inode);
+       /*