]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 2 Apr 2013 20:10:46 +0000 (13:10 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 2 Apr 2013 20:10:46 +0000 (13:10 -0700)
added patches:
btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch
efivars-explicitly-calculate-length-of-variablename.patch
efivars-handle-duplicate-names-from-get_next_variable.patch
iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch
tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch
xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch

queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch [new file with mode: 0644]
queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch [new file with mode: 0644]
queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch [new file with mode: 0644]
queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch [new file with mode: 0644]
queue-3.8/series
queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch [new file with mode: 0644]
queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch [new file with mode: 0644]

diff --git a/queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch b/queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch
new file mode 100644 (file)
index 0000000..9f158b5
--- /dev/null
@@ -0,0 +1,87 @@
+From f4881bc7a83eff263789dd524b7c269d138d4af5 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fusionio.com>
+Date: Mon, 25 Mar 2013 16:03:35 -0400
+Subject: Btrfs: fix space leak when we fail to reserve metadata space
+
+From: Josef Bacik <jbacik@fusionio.com>
+
+commit f4881bc7a83eff263789dd524b7c269d138d4af5 upstream.
+
+Dave reported a warning when running xfstest 275.  We have been leaking delalloc
+metadata space when our reservations fail.  This is because we were improperly
+calculating how much space to free for our checksum reservations.  The problem
+is we would sometimes free up space that had already been freed in another
+thread and we would end up with negative usage for the delalloc space.  This
+patch fixes the problem by calculating how much space the other threads would
+have already freed, and then calculate how much space we need to free had we not
+done the reservation at all, and then freeing any excess space.  This makes
+xfstests 275 no longer have leaked space.  Thanks
+
+Reported-by: David Sterba <dsterba@suse.cz>
+Signed-off-by: Josef Bacik <jbacik@fusionio.com>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   47 +++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 41 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4601,14 +4601,49 @@ int btrfs_delalloc_reserve_metadata(stru
+                * If the inodes csum_bytes is the same as the original
+                * csum_bytes then we know we haven't raced with any free()ers
+                * so we can just reduce our inodes csum bytes and carry on.
+-               * Otherwise we have to do the normal free thing to account for
+-               * the case that the free side didn't free up its reserve
+-               * because of this outstanding reservation.
+                */
+-              if (BTRFS_I(inode)->csum_bytes == csum_bytes)
++              if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
+                       calc_csum_metadata_size(inode, num_bytes, 0);
+-              else
+-                      to_free = calc_csum_metadata_size(inode, num_bytes, 0);
++              } else {
++                      u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
++                      u64 bytes;
++
++                      /*
++                       * This is tricky, but first we need to figure out how much we
++                       * free'd from any free-ers that occured during this
++                       * reservation, so we reset ->csum_bytes to the csum_bytes
++                       * before we dropped our lock, and then call the free for the
++                       * number of bytes that were freed while we were trying our
++                       * reservation.
++                       */
++                      bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
++                      BTRFS_I(inode)->csum_bytes = csum_bytes;
++                      to_free = calc_csum_metadata_size(inode, bytes, 0);
++
++
++                      /*
++                       * Now we need to see how much we would have freed had we not
++                       * been making this reservation and our ->csum_bytes were not
++                       * artificially inflated.
++                       */
++                      BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
++                      bytes = csum_bytes - orig_csum_bytes;
++                      bytes = calc_csum_metadata_size(inode, bytes, 0);
++
++                      /*
++                       * Now reset ->csum_bytes to what it should be.  If bytes is
++                       * more than to_free then we would have free'd more space had we
++                       * not had an artificially high ->csum_bytes, so we need to free
++                       * the remainder.  If bytes is the same or less then we don't
++                       * need to do anything, the other free-ers did the correct
++                       * thing.
++                       */
++                      BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
++                      if (bytes > to_free)
++                              to_free = bytes - to_free;
++                      else
++                              to_free = 0;
++              }
+               spin_unlock(&BTRFS_I(inode)->lock);
+               if (dropped)
+                       to_free += btrfs_calc_trans_metadata_size(root, dropped);
diff --git a/queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch b/queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch
new file mode 100644 (file)
index 0000000..eff3d97
--- /dev/null
@@ -0,0 +1,96 @@
+From ec50bd32f1672d38ddce10fb1841cbfda89cfe9a Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@intel.com>
+Date: Fri, 1 Mar 2013 14:49:12 +0000
+Subject: efivars: explicitly calculate length of VariableName
+
+From: Matt Fleming <matt.fleming@intel.com>
+
+commit ec50bd32f1672d38ddce10fb1841cbfda89cfe9a upstream.
+
+It's not wise to assume VariableNameSize represents the length of
+VariableName, as not all firmware updates VariableNameSize in the same
+way (some don't update it at all if EFI_SUCCESS is returned). There
+are even implementations out there that update VariableNameSize with
+values that are both larger than the string returned in VariableName
+and smaller than the buffer passed to GetNextVariableName(), which
+resulted in the following bug report from Michael Schroeder,
+
+  > On HP z220 system (firmware version 1.54), some EFI variables are
+  > incorrectly named :
+  >
+  > ls -d /sys/firmware/efi/vars/*8be4d* | grep -v -- -8be returns
+  > /sys/firmware/efi/vars/dbxDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
+  > /sys/firmware/efi/vars/KEKDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
+  > /sys/firmware/efi/vars/SecureBoot-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
+  > /sys/firmware/efi/vars/SetupMode-Information8be4df61-93ca-11d2-aa0d-00e098032b8c
+
+The issue here is that because we blindly use VariableNameSize without
+verifying its value, we can potentially read garbage values from the
+buffer containing VariableName if VariableNameSize is larger than the
+length of VariableName.
+
+Since VariableName is a string, we can calculate its size by searching
+for the terminating NULL character.
+
+[Backported for 3.8-stable. Removed workqueue code added in
+a93bc0c 3.9-rc1.]
+
+Reported-by: Frederic Crozat <fcrozat@suse.com>
+Cc: Matthew Garrett <mjg59@srcf.ucam.org>
+Cc: Josh Boyer <jwboyer@redhat.com>
+Cc: Michael Schroeder <mls@suse.com>
+Cc: Lee, Chun-Yi <jlee@suse.com>
+Cc: Lingzhu Xiang <lxiang@redhat.com>
+Cc: Seiji Aguchi <seiji.aguchi@hds.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/firmware/efivars.c |   27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+--- a/drivers/firmware/efivars.c
++++ b/drivers/firmware/efivars.c
+@@ -1670,6 +1670,31 @@ static ssize_t efivar_delete(struct file
+ }
+ /*
++ * Returns the size of variable_name, in bytes, including the
++ * terminating NULL character, or variable_name_size if no NULL
++ * character is found among the first variable_name_size bytes.
++ */
++static unsigned long var_name_strnsize(efi_char16_t *variable_name,
++                                     unsigned long variable_name_size)
++{
++      unsigned long len;
++      efi_char16_t c;
++
++      /*
++       * The variable name is, by definition, a NULL-terminated
++       * string, so make absolutely sure that variable_name_size is
++       * the value we expect it to be. If not, return the real size.
++       */
++      for (len = 2; len <= variable_name_size; len += sizeof(c)) {
++              c = variable_name[(len / sizeof(c)) - 1];
++              if (!c)
++                      break;
++      }
++
++      return min(len, variable_name_size);
++}
++
++/*
+  * Let's not leave out systab information that snuck into
+  * the efivars driver
+  */
+@@ -1912,6 +1937,8 @@ int register_efivars(struct efivars *efi
+                                               &vendor_guid);
+               switch (status) {
+               case EFI_SUCCESS:
++                      variable_name_size = var_name_strnsize(variable_name,
++                                                             variable_name_size);
+                       efivar_create_sysfs_entry(efivars,
+                                                 variable_name_size,
+                                                 variable_name,
diff --git a/queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch b/queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch
new file mode 100644 (file)
index 0000000..1fd2092
--- /dev/null
@@ -0,0 +1,170 @@
+From e971318bbed610e28bb3fde9d548e6aaf0a6b02e Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@intel.com>
+Date: Thu, 7 Mar 2013 11:59:14 +0000
+Subject: efivars: Handle duplicate names from get_next_variable()
+
+From: Matt Fleming <matt.fleming@intel.com>
+
+commit e971318bbed610e28bb3fde9d548e6aaf0a6b02e upstream.
+
+Some firmware exhibits a bug where the same VariableName and
+VendorGuid values are returned on multiple invocations of
+GetNextVariableName(). See,
+
+    https://bugzilla.kernel.org/show_bug.cgi?id=47631
+
+As a consequence of such a bug, Andre reports hitting the following
+WARN_ON() in the sysfs code after updating the BIOS on his, "Gigabyte
+Technology Co., Ltd. To be filled by O.E.M./Z77X-UD3H, BIOS F19e
+11/21/2012)" machine,
+
+[    0.581554] EFI Variables Facility v0.08 2004-May-17
+[    0.584914] ------------[ cut here ]------------
+[    0.585639] WARNING: at /home/andre/linux/fs/sysfs/dir.c:536 sysfs_add_one+0xd4/0x100()
+[    0.586381] Hardware name: To be filled by O.E.M.
+[    0.587123] sysfs: cannot create duplicate filename '/firmware/efi/vars/SbAslBufferPtrVar-01f33c25-764d-43ea-aeea-6b5a41f3f3e8'
+[    0.588694] Modules linked in:
+[    0.589484] Pid: 1, comm: swapper/0 Not tainted 3.8.0+ #7
+[    0.590280] Call Trace:
+[    0.591066]  [<ffffffff81208954>] ? sysfs_add_one+0xd4/0x100
+[    0.591861]  [<ffffffff810587bf>] warn_slowpath_common+0x7f/0xc0
+[    0.592650]  [<ffffffff810588bc>] warn_slowpath_fmt+0x4c/0x50
+[    0.593429]  [<ffffffff8134dd85>] ? strlcat+0x65/0x80
+[    0.594203]  [<ffffffff81208954>] sysfs_add_one+0xd4/0x100
+[    0.594979]  [<ffffffff81208b78>] create_dir+0x78/0xd0
+[    0.595753]  [<ffffffff81208ec6>] sysfs_create_dir+0x86/0xe0
+[    0.596532]  [<ffffffff81347e4c>] kobject_add_internal+0x9c/0x220
+[    0.597310]  [<ffffffff81348307>] kobject_init_and_add+0x67/0x90
+[    0.598083]  [<ffffffff81584a71>] ? efivar_create_sysfs_entry+0x61/0x1c0
+[    0.598859]  [<ffffffff81584b2b>] efivar_create_sysfs_entry+0x11b/0x1c0
+[    0.599631]  [<ffffffff8158517e>] register_efivars+0xde/0x420
+[    0.600395]  [<ffffffff81d430a7>] ? edd_init+0x2f5/0x2f5
+[    0.601150]  [<ffffffff81d4315f>] efivars_init+0xb8/0x104
+[    0.601903]  [<ffffffff8100215a>] do_one_initcall+0x12a/0x180
+[    0.602659]  [<ffffffff81d05d80>] kernel_init_freeable+0x13e/0x1c6
+[    0.603418]  [<ffffffff81d05586>] ? loglevel+0x31/0x31
+[    0.604183]  [<ffffffff816a6530>] ? rest_init+0x80/0x80
+[    0.604936]  [<ffffffff816a653e>] kernel_init+0xe/0xf0
+[    0.605681]  [<ffffffff816ce7ec>] ret_from_fork+0x7c/0xb0
+[    0.606414]  [<ffffffff816a6530>] ? rest_init+0x80/0x80
+[    0.607143] ---[ end trace 1609741ab737eb29 ]---
+
+There's not much we can do to work around and keep traversing the
+variable list once we hit this firmware bug. Our only solution is to
+terminate the loop because, as Lingzhu reports, some machines get
+stuck when they encounter duplicate names,
+
+  > I had an IBM System x3100 M4 and x3850 X5 on which kernel would
+  > get stuck in infinite loop creating duplicate sysfs files because,
+  > for some reason, there are several duplicate boot entries in nvram
+  > getting GetNextVariableName into a circle of iteration (with
+  > period > 2).
+
+Also disable the workqueue, as efivar_update_sysfs_entries() uses
+GetNextVariableName() to figure out which variables have been created
+since the last iteration. That algorithm isn't going to work if
+GetNextVariableName() returns duplicates. Note that we don't disable
+EFI variable creation completely on the affected machines, it's just
+that any pstore dump-* files won't appear in sysfs until the next
+boot.
+
+[Backported for 3.8-stable. Removed code related to pstore
+workqueue but pulled in helper function variable_is_present
+from a93bc0c.]
+
+Reported-by: Andre Heider <a.heider@gmail.com>
+Reported-by: Lingzhu Xiang <lxiang@redhat.com>
+Tested-by: Lingzhu Xiang <lxiang@redhat.com>
+Cc: Seiji Aguchi <seiji.aguchi@hds.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/firmware/efivars.c |   60 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 60 insertions(+)
+
+--- a/drivers/firmware/efivars.c
++++ b/drivers/firmware/efivars.c
+@@ -1669,6 +1669,28 @@ static ssize_t efivar_delete(struct file
+       return count;
+ }
++static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor)
++{
++      struct efivar_entry *entry, *n;
++      struct efivars *efivars = &__efivars;
++      unsigned long strsize1, strsize2;
++      bool found = false;
++
++      strsize1 = utf16_strsize(variable_name, 1024);
++      list_for_each_entry_safe(entry, n, &efivars->list, list) {
++              strsize2 = utf16_strsize(entry->var.VariableName, 1024);
++              if (strsize1 == strsize2 &&
++                      !memcmp(variable_name, &(entry->var.VariableName),
++                              strsize2) &&
++                      !efi_guidcmp(entry->var.VendorGuid,
++                              *vendor)) {
++                      found = true;
++                      break;
++              }
++      }
++      return found;
++}
++
+ /*
+  * Returns the size of variable_name, in bytes, including the
+  * terminating NULL character, or variable_name_size if no NULL
+@@ -1889,6 +1911,28 @@ void unregister_efivars(struct efivars *
+ }
+ EXPORT_SYMBOL_GPL(unregister_efivars);
++/*
++ * Print a warning when duplicate EFI variables are encountered and
++ * disable the sysfs workqueue since the firmware is buggy.
++ */
++static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid,
++                           unsigned long len16)
++{
++      size_t i, len8 = len16 / sizeof(efi_char16_t);
++      char *s8;
++
++      s8 = kzalloc(len8, GFP_KERNEL);
++      if (!s8)
++              return;
++
++      for (i = 0; i < len8; i++)
++              s8[i] = s16[i];
++
++      printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n",
++             s8, vendor_guid);
++      kfree(s8);
++}
++
+ int register_efivars(struct efivars *efivars,
+                    const struct efivar_operations *ops,
+                    struct kobject *parent_kobj)
+@@ -1939,6 +1983,22 @@ int register_efivars(struct efivars *efi
+               case EFI_SUCCESS:
+                       variable_name_size = var_name_strnsize(variable_name,
+                                                              variable_name_size);
++
++                      /*
++                       * Some firmware implementations return the
++                       * same variable name on multiple calls to
++                       * get_next_variable(). Terminate the loop
++                       * immediately as there is no guarantee that
++                       * we'll ever see a different variable name,
++                       * and may end up looping here forever.
++                       */
++                      if (variable_is_present(variable_name, &vendor_guid)) {
++                              dup_variable_bug(variable_name, &vendor_guid,
++                                               variable_name_size);
++                              status = EFI_NOT_FOUND;
++                              break;
++                      }
++
+                       efivar_create_sysfs_entry(efivars,
+                                                 variable_name_size,
+                                                 variable_name,
diff --git a/queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch b/queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch
new file mode 100644 (file)
index 0000000..abe9c24
--- /dev/null
@@ -0,0 +1,64 @@
+From 2d5d50ee596361566f7f84300117cba7d7672bc5 Mon Sep 17 00:00:00 2001
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Date: Thu, 31 Jan 2013 15:03:55 +0200
+Subject: iwlwifi: dvm: don't send HCMD in restart flow
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+commit 2d5d50ee596361566f7f84300117cba7d7672bc5 upstream.
+
+There is a race between the restart flow and the workers.
+The workers are cancelled after the fw is already killed
+and might send HCMD when there is fw to handle them.
+Simply check that there is a fw to which the HCMD can be
+sent before actually sending it.
+
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/iwlwifi/dvm/lib.c   |    9 +++++++++
+ drivers/net/wireless/iwlwifi/dvm/ucode.c |    4 ++--
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/dvm/lib.c
++++ b/drivers/net/wireless/iwlwifi/dvm/lib.c
+@@ -1262,6 +1262,15 @@ int iwl_dvm_send_cmd(struct iwl_priv *pr
+       }
+       /*
++       * This can happen upon FW ASSERT: we clear the STATUS_FW_ERROR flag
++       * in iwl_down but cancel the workers only later.
++       */
++      if (!priv->ucode_loaded) {
++              IWL_ERR(priv, "Fw not loaded - dropping CMD: %x\n", cmd->id);
++              return -EIO;
++      }
++
++      /*
+        * Synchronous commands from this op-mode must hold
+        * the mutex, this ensures we don't try to send two
+        * (or more) synchronous commands at a time.
+--- a/drivers/net/wireless/iwlwifi/dvm/ucode.c
++++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c
+@@ -450,6 +450,8 @@ int iwl_load_ucode_wait_alive(struct iwl
+               return -EIO;
+       }
++      priv->ucode_loaded = true;
++
+       /*
+        * This step takes a long time (60-80ms!!) and
+        * WoWLAN image should be loaded quickly, so
+@@ -474,8 +476,6 @@ int iwl_load_ucode_wait_alive(struct iwl
+               return ret;
+       }
+-      priv->ucode_loaded = true;
+-
+       return 0;
+ }
index 983afc9eb41757b3a105d47bf4d37be055b23fe1..4239bfd71807c7de93cb5ab5960ac9a600aa505b 100644 (file)
@@ -87,3 +87,9 @@ virtio-console-add-locking-around-c_ovq-operations.patch
 nfsd4-reject-negative-acl-lengths.patch
 drm-i915-use-the-fixed-pixel-clock-for-edp-in-intel_dp_set_m_n.patch
 drm-i915-don-t-clobber-crtc-fb-when-queue_flip-fails.patch
+iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch
+btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch
+xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch
+tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch
+efivars-explicitly-calculate-length-of-variablename.patch
+efivars-handle-duplicate-names-from-get_next_variable.patch
diff --git a/queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch b/queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch
new file mode 100644 (file)
index 0000000..41ebc1b
--- /dev/null
@@ -0,0 +1,286 @@
+From 613f04a0f51e6e68ac6fe571ab79da3c0a5eb4da Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+Date: Thu, 14 Mar 2013 15:03:53 -0400
+Subject: tracing: Prevent buffer overwrite disabled for latency tracers
+
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+
+commit 613f04a0f51e6e68ac6fe571ab79da3c0a5eb4da upstream.
+
+The latency tracers require the buffers to be in overwrite mode,
+otherwise they get screwed up. Force the buffers to stay in overwrite
+mode when latency tracers are enabled.
+
+Added a flag_changed() method to the tracer structure to allow
+the tracers to see what flags are being changed, and also be able
+to prevent the change from happing.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace.c              |   38 ++++++++++++++++++++++++++++++++------
+ kernel/trace/trace.h              |    6 ++++++
+ kernel/trace/trace_irqsoff.c      |   19 ++++++++++++++-----
+ kernel/trace/trace_sched_wakeup.c |   18 +++++++++++++-----
+ 4 files changed, 65 insertions(+), 16 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2836,11 +2836,25 @@ static int set_tracer_option(struct trac
+       return -EINVAL;
+ }
+-static void set_tracer_flags(unsigned int mask, int enabled)
++/* Some tracers require overwrite to stay enabled */
++int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
++{
++      if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
++              return -1;
++
++      return 0;
++}
++
++int set_tracer_flag(unsigned int mask, int enabled)
+ {
+       /* do nothing if flag is already set */
+       if (!!(trace_flags & mask) == !!enabled)
+-              return;
++              return 0;
++
++      /* Give the tracer a chance to approve the change */
++      if (current_trace->flag_changed)
++              if (current_trace->flag_changed(current_trace, mask, !!enabled))
++                      return -EINVAL;
+       if (enabled)
+               trace_flags |= mask;
+@@ -2859,13 +2873,15 @@ static void set_tracer_flags(unsigned in
+       if (mask == TRACE_ITER_PRINTK)
+               trace_printk_start_stop_comm(enabled);
++
++      return 0;
+ }
+ static int trace_set_options(char *option)
+ {
+       char *cmp;
+       int neg = 0;
+-      int ret = 0;
++      int ret = -ENODEV;
+       int i;
+       cmp = strstrip(option);
+@@ -2879,7 +2895,7 @@ static int trace_set_options(char *optio
+       for (i = 0; trace_options[i]; i++) {
+               if (strcmp(cmp, trace_options[i]) == 0) {
+-                      set_tracer_flags(1 << i, !neg);
++                      ret = set_tracer_flag(1 << i, !neg);
+                       break;
+               }
+       }
+@@ -2898,6 +2914,7 @@ tracing_trace_options_write(struct file
+                       size_t cnt, loff_t *ppos)
+ {
+       char buf[64];
++      int ret;
+       if (cnt >= sizeof(buf))
+               return -EINVAL;
+@@ -2907,7 +2924,9 @@ tracing_trace_options_write(struct file
+       buf[cnt] = 0;
+-      trace_set_options(buf);
++      ret = trace_set_options(buf);
++      if (ret < 0)
++              return ret;
+       *ppos += cnt;
+@@ -3213,6 +3232,9 @@ static int tracing_set_tracer(const char
+               goto out;
+       trace_branch_disable();
++
++      current_trace->enabled = false;
++
+       if (current_trace && current_trace->reset)
+               current_trace->reset(tr);
+       if (current_trace && current_trace->use_max_tr) {
+@@ -3244,6 +3266,7 @@ static int tracing_set_tracer(const char
+       }
+       current_trace = t;
++      current_trace->enabled = true;
+       trace_branch_enable(tr);
+  out:
+       mutex_unlock(&trace_types_lock);
+@@ -4648,9 +4671,12 @@ trace_options_core_write(struct file *fi
+               return -EINVAL;
+       mutex_lock(&trace_types_lock);
+-      set_tracer_flags(1 << index, val);
++      ret = set_tracer_flag(1 << index, val);
+       mutex_unlock(&trace_types_lock);
++      if (ret < 0)
++              return ret;
++
+       *ppos += cnt;
+       return cnt;
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -283,10 +283,14 @@ struct tracer {
+       enum print_line_t       (*print_line)(struct trace_iterator *iter);
+       /* If you handled the flag setting, return 0 */
+       int                     (*set_flag)(u32 old_flags, u32 bit, int set);
++      /* Return 0 if OK with change, else return non-zero */
++      int                     (*flag_changed)(struct tracer *tracer,
++                                              u32 mask, int set);
+       struct tracer           *next;
+       struct tracer_flags     *flags;
+       bool                    print_max;
+       bool                    use_max_tr;
++      bool                    enabled;
+ };
+@@ -835,6 +839,8 @@ extern const char *__stop___trace_bprint
+ void trace_printk_init_buffers(void);
+ void trace_printk_start_comm(void);
++int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
++int set_tracer_flag(unsigned int mask, int enabled);
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)   \
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -32,7 +32,7 @@ enum {
+ static int trace_type __read_mostly;
+-static int save_lat_flag;
++static int save_flags;
+ static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
+ static int start_irqsoff_tracer(struct trace_array *tr, int graph);
+@@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct t
+ static void __irqsoff_tracer_init(struct trace_array *tr)
+ {
+-      save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
+-      trace_flags |= TRACE_ITER_LATENCY_FMT;
++      save_flags = trace_flags;
++
++      /* non overwrite screws up the latency tracers */
++      set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
++      set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
+       tracing_max_latency = 0;
+       irqsoff_trace = tr;
+@@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct
+ static void irqsoff_tracer_reset(struct trace_array *tr)
+ {
++      int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
++      int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
++
+       stop_irqsoff_tracer(tr, is_graph());
+-      if (!save_lat_flag)
+-              trace_flags &= ~TRACE_ITER_LATENCY_FMT;
++      set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
++      set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
+ }
+ static void irqsoff_tracer_start(struct trace_array *tr)
+@@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __re
+       .print_line     = irqsoff_print_line,
+       .flags          = &tracer_flags,
+       .set_flag       = irqsoff_set_flag,
++      .flag_changed   = trace_keep_overwrite,
+ #ifdef CONFIG_FTRACE_SELFTEST
+       .selftest    = trace_selftest_startup_irqsoff,
+ #endif
+@@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer _
+       .print_line     = irqsoff_print_line,
+       .flags          = &tracer_flags,
+       .set_flag       = irqsoff_set_flag,
++      .flag_changed   = trace_keep_overwrite,
+ #ifdef CONFIG_FTRACE_SELFTEST
+       .selftest    = trace_selftest_startup_preemptoff,
+ #endif
+@@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_trac
+       .print_line     = irqsoff_print_line,
+       .flags          = &tracer_flags,
+       .set_flag       = irqsoff_set_flag,
++      .flag_changed   = trace_keep_overwrite,
+ #ifdef CONFIG_FTRACE_SELFTEST
+       .selftest    = trace_selftest_startup_preemptirqsoff,
+ #endif
+--- a/kernel/trace/trace_sched_wakeup.c
++++ b/kernel/trace/trace_sched_wakeup.c
+@@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_
+ static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
+ static void wakeup_graph_return(struct ftrace_graph_ret *trace);
+-static int save_lat_flag;
++static int save_flags;
+ #define TRACE_DISPLAY_GRAPH     1
+@@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct tr
+ static int __wakeup_tracer_init(struct trace_array *tr)
+ {
+-      save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
+-      trace_flags |= TRACE_ITER_LATENCY_FMT;
++      save_flags = trace_flags;
++
++      /* non overwrite screws up the latency tracers */
++      set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
++      set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
+       tracing_max_latency = 0;
+       wakeup_trace = tr;
+@@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct
+ static void wakeup_tracer_reset(struct trace_array *tr)
+ {
++      int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
++      int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
++
+       stop_wakeup_tracer(tr);
+       /* make sure we put back any tasks we are tracing */
+       wakeup_reset(tr);
+-      if (!save_lat_flag)
+-              trace_flags &= ~TRACE_ITER_LATENCY_FMT;
++      set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
++      set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
+ }
+ static void wakeup_tracer_start(struct trace_array *tr)
+@@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __rea
+       .print_line     = wakeup_print_line,
+       .flags          = &tracer_flags,
+       .set_flag       = wakeup_set_flag,
++      .flag_changed   = trace_keep_overwrite,
+ #ifdef CONFIG_FTRACE_SELFTEST
+       .selftest    = trace_selftest_startup_wakeup,
+ #endif
+@@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __
+       .print_line     = wakeup_print_line,
+       .flags          = &tracer_flags,
+       .set_flag       = wakeup_set_flag,
++      .flag_changed   = trace_keep_overwrite,
+ #ifdef CONFIG_FTRACE_SELFTEST
+       .selftest    = trace_selftest_startup_wakeup,
+ #endif
diff --git a/queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch b/queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch
new file mode 100644 (file)
index 0000000..aea70c3
--- /dev/null
@@ -0,0 +1,76 @@
+From c26377e62f4e6bfb4d99ef88526047209701a83f Mon Sep 17 00:00:00 2001
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Mon, 25 Mar 2013 14:11:19 +0000
+Subject: xen/events: avoid race with raising an event in unmask_evtchn()
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+commit c26377e62f4e6bfb4d99ef88526047209701a83f upstream.
+
+In unmask_evtchn(), when the mask bit is cleared after testing for
+pending and the event becomes pending between the test and clear, then
+the upcall will not become pending and the event may be lost or
+delayed.
+
+Avoid this by always clearing the mask bit before checking for
+pending.  If a hypercall is needed, remask the event as
+EVTCHNOP_unmask will only retrigger pending events if they were
+masked.
+
+This fixes a regression introduced in 3.7 by
+b5e579232d635b79a3da052964cb357ccda8d9ea (xen/events: fix
+unmask_evtchn for PV on HVM guests) which reordered the clear mask and
+check pending operations.
+
+Changes in v2:
+- set mask before hypercall.
+
+Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Lingzhu Xiang <lxiang@redhat.com>
+Reviewed-by: CAI Qian <caiqian@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/events.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -388,11 +388,23 @@ static void unmask_evtchn(int port)
+       if (unlikely((cpu != cpu_from_evtchn(port))))
+               do_hypercall = 1;
+-      else
++      else {
++              /*
++               * Need to clear the mask before checking pending to
++               * avoid a race with an event becoming pending.
++               *
++               * EVTCHNOP_unmask will only trigger an upcall if the
++               * mask bit was set, so if a hypercall is needed
++               * remask the event.
++               */
++              sync_clear_bit(port, &s->evtchn_mask[0]);
+               evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]);
+-      if (unlikely(evtchn_pending && xen_hvm_domain()))
+-              do_hypercall = 1;
++              if (unlikely(evtchn_pending && xen_hvm_domain())) {
++                      sync_set_bit(port, &s->evtchn_mask[0]);
++                      do_hypercall = 1;
++              }
++      }
+       /* Slow path (hypercall) if this is a non-local port or if this is
+        * an hvm domain and an event is pending (hvm domains don't have
+@@ -403,8 +415,6 @@ static void unmask_evtchn(int port)
+       } else {
+               struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+-              sync_clear_bit(port, &s->evtchn_mask[0]);
+-
+               /*
+                * The following is basically the equivalent of
+                * 'hw_resend_irq'. Just like a real IO-APIC we 'lose