From dc65ee8b892ad7d0877a35bfd111f520047963b9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 2 Apr 2013 13:10:46 -0700 Subject: [PATCH] 3.8-stable patches added patches: btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch efivars-explicitly-calculate-length-of-variablename.patch efivars-handle-duplicate-names-from-get_next_variable.patch iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch --- ...en-we-fail-to-reserve-metadata-space.patch | 87 ++++++ ...tly-calculate-length-of-variablename.patch | 96 ++++++ ...plicate-names-from-get_next_variable.patch | 170 +++++++++++ ...-dvm-don-t-send-hcmd-in-restart-flow.patch | 64 ++++ queue-3.8/series | 6 + ...erwrite-disabled-for-latency-tracers.patch | 286 ++++++++++++++++++ ...th-raising-an-event-in-unmask_evtchn.patch | 76 +++++ 7 files changed, 785 insertions(+) create mode 100644 queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch create mode 100644 queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch create mode 100644 queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch create mode 100644 queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch create mode 100644 queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch create mode 100644 queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch diff --git a/queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch b/queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch new file mode 100644 index 00000000000..9f158b5d842 --- /dev/null +++ b/queue-3.8/btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch @@ -0,0 +1,87 @@ +From f4881bc7a83eff263789dd524b7c269d138d4af5 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 25 Mar 2013 16:03:35 -0400 +Subject: Btrfs: fix space leak when we fail to reserve metadata space + +From: Josef Bacik + +commit f4881bc7a83eff263789dd524b7c269d138d4af5 upstream. + +Dave reported a warning when running xfstest 275. We have been leaking delalloc +metadata space when our reservations fail. This is because we were improperly +calculating how much space to free for our checksum reservations. The problem +is we would sometimes free up space that had already been freed in another +thread and we would end up with negative usage for the delalloc space. This +patch fixes the problem by calculating how much space the other threads would +have already freed, and then calculate how much space we need to free had we not +done the reservation at all, and then freeing any excess space. This makes +xfstests 275 no longer have leaked space. Thanks + +Reported-by: David Sterba +Signed-off-by: Josef Bacik +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 47 +++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 41 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -4601,14 +4601,49 @@ int btrfs_delalloc_reserve_metadata(stru + * If the inodes csum_bytes is the same as the original + * csum_bytes then we know we haven't raced with any free()ers + * so we can just reduce our inodes csum bytes and carry on. +- * Otherwise we have to do the normal free thing to account for +- * the case that the free side didn't free up its reserve +- * because of this outstanding reservation. + */ +- if (BTRFS_I(inode)->csum_bytes == csum_bytes) ++ if (BTRFS_I(inode)->csum_bytes == csum_bytes) { + calc_csum_metadata_size(inode, num_bytes, 0); +- else +- to_free = calc_csum_metadata_size(inode, num_bytes, 0); ++ } else { ++ u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; ++ u64 bytes; ++ ++ /* ++ * This is tricky, but first we need to figure out how much we ++ * free'd from any free-ers that occured during this ++ * reservation, so we reset ->csum_bytes to the csum_bytes ++ * before we dropped our lock, and then call the free for the ++ * number of bytes that were freed while we were trying our ++ * reservation. ++ */ ++ bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; ++ BTRFS_I(inode)->csum_bytes = csum_bytes; ++ to_free = calc_csum_metadata_size(inode, bytes, 0); ++ ++ ++ /* ++ * Now we need to see how much we would have freed had we not ++ * been making this reservation and our ->csum_bytes were not ++ * artificially inflated. ++ */ ++ BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; ++ bytes = csum_bytes - orig_csum_bytes; ++ bytes = calc_csum_metadata_size(inode, bytes, 0); ++ ++ /* ++ * Now reset ->csum_bytes to what it should be. If bytes is ++ * more than to_free then we would have free'd more space had we ++ * not had an artificially high ->csum_bytes, so we need to free ++ * the remainder. If bytes is the same or less then we don't ++ * need to do anything, the other free-ers did the correct ++ * thing. ++ */ ++ BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; ++ if (bytes > to_free) ++ to_free = bytes - to_free; ++ else ++ to_free = 0; ++ } + spin_unlock(&BTRFS_I(inode)->lock); + if (dropped) + to_free += btrfs_calc_trans_metadata_size(root, dropped); diff --git a/queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch b/queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch new file mode 100644 index 00000000000..eff3d97eec1 --- /dev/null +++ b/queue-3.8/efivars-explicitly-calculate-length-of-variablename.patch @@ -0,0 +1,96 @@ +From ec50bd32f1672d38ddce10fb1841cbfda89cfe9a Mon Sep 17 00:00:00 2001 +From: Matt Fleming +Date: Fri, 1 Mar 2013 14:49:12 +0000 +Subject: efivars: explicitly calculate length of VariableName + +From: Matt Fleming + +commit ec50bd32f1672d38ddce10fb1841cbfda89cfe9a upstream. + +It's not wise to assume VariableNameSize represents the length of +VariableName, as not all firmware updates VariableNameSize in the same +way (some don't update it at all if EFI_SUCCESS is returned). There +are even implementations out there that update VariableNameSize with +values that are both larger than the string returned in VariableName +and smaller than the buffer passed to GetNextVariableName(), which +resulted in the following bug report from Michael Schroeder, + + > On HP z220 system (firmware version 1.54), some EFI variables are + > incorrectly named : + > + > ls -d /sys/firmware/efi/vars/*8be4d* | grep -v -- -8be returns + > /sys/firmware/efi/vars/dbxDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c + > /sys/firmware/efi/vars/KEKDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c + > /sys/firmware/efi/vars/SecureBoot-pport8be4df61-93ca-11d2-aa0d-00e098032b8c + > /sys/firmware/efi/vars/SetupMode-Information8be4df61-93ca-11d2-aa0d-00e098032b8c + +The issue here is that because we blindly use VariableNameSize without +verifying its value, we can potentially read garbage values from the +buffer containing VariableName if VariableNameSize is larger than the +length of VariableName. + +Since VariableName is a string, we can calculate its size by searching +for the terminating NULL character. + +[Backported for 3.8-stable. Removed workqueue code added in +a93bc0c 3.9-rc1.] + +Reported-by: Frederic Crozat +Cc: Matthew Garrett +Cc: Josh Boyer +Cc: Michael Schroeder +Cc: Lee, Chun-Yi +Cc: Lingzhu Xiang +Cc: Seiji Aguchi +Signed-off-by: Matt Fleming +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efivars.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +--- a/drivers/firmware/efivars.c ++++ b/drivers/firmware/efivars.c +@@ -1670,6 +1670,31 @@ static ssize_t efivar_delete(struct file + } + + /* ++ * Returns the size of variable_name, in bytes, including the ++ * terminating NULL character, or variable_name_size if no NULL ++ * character is found among the first variable_name_size bytes. ++ */ ++static unsigned long var_name_strnsize(efi_char16_t *variable_name, ++ unsigned long variable_name_size) ++{ ++ unsigned long len; ++ efi_char16_t c; ++ ++ /* ++ * The variable name is, by definition, a NULL-terminated ++ * string, so make absolutely sure that variable_name_size is ++ * the value we expect it to be. If not, return the real size. ++ */ ++ for (len = 2; len <= variable_name_size; len += sizeof(c)) { ++ c = variable_name[(len / sizeof(c)) - 1]; ++ if (!c) ++ break; ++ } ++ ++ return min(len, variable_name_size); ++} ++ ++/* + * Let's not leave out systab information that snuck into + * the efivars driver + */ +@@ -1912,6 +1937,8 @@ int register_efivars(struct efivars *efi + &vendor_guid); + switch (status) { + case EFI_SUCCESS: ++ variable_name_size = var_name_strnsize(variable_name, ++ variable_name_size); + efivar_create_sysfs_entry(efivars, + variable_name_size, + variable_name, diff --git a/queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch b/queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch new file mode 100644 index 00000000000..1fd20924658 --- /dev/null +++ b/queue-3.8/efivars-handle-duplicate-names-from-get_next_variable.patch @@ -0,0 +1,170 @@ +From e971318bbed610e28bb3fde9d548e6aaf0a6b02e Mon Sep 17 00:00:00 2001 +From: Matt Fleming +Date: Thu, 7 Mar 2013 11:59:14 +0000 +Subject: efivars: Handle duplicate names from get_next_variable() + +From: Matt Fleming + +commit e971318bbed610e28bb3fde9d548e6aaf0a6b02e upstream. + +Some firmware exhibits a bug where the same VariableName and +VendorGuid values are returned on multiple invocations of +GetNextVariableName(). See, + + https://bugzilla.kernel.org/show_bug.cgi?id=47631 + +As a consequence of such a bug, Andre reports hitting the following +WARN_ON() in the sysfs code after updating the BIOS on his, "Gigabyte +Technology Co., Ltd. To be filled by O.E.M./Z77X-UD3H, BIOS F19e +11/21/2012)" machine, + +[ 0.581554] EFI Variables Facility v0.08 2004-May-17 +[ 0.584914] ------------[ cut here ]------------ +[ 0.585639] WARNING: at /home/andre/linux/fs/sysfs/dir.c:536 sysfs_add_one+0xd4/0x100() +[ 0.586381] Hardware name: To be filled by O.E.M. +[ 0.587123] sysfs: cannot create duplicate filename '/firmware/efi/vars/SbAslBufferPtrVar-01f33c25-764d-43ea-aeea-6b5a41f3f3e8' +[ 0.588694] Modules linked in: +[ 0.589484] Pid: 1, comm: swapper/0 Not tainted 3.8.0+ #7 +[ 0.590280] Call Trace: +[ 0.591066] [] ? sysfs_add_one+0xd4/0x100 +[ 0.591861] [] warn_slowpath_common+0x7f/0xc0 +[ 0.592650] [] warn_slowpath_fmt+0x4c/0x50 +[ 0.593429] [] ? strlcat+0x65/0x80 +[ 0.594203] [] sysfs_add_one+0xd4/0x100 +[ 0.594979] [] create_dir+0x78/0xd0 +[ 0.595753] [] sysfs_create_dir+0x86/0xe0 +[ 0.596532] [] kobject_add_internal+0x9c/0x220 +[ 0.597310] [] kobject_init_and_add+0x67/0x90 +[ 0.598083] [] ? efivar_create_sysfs_entry+0x61/0x1c0 +[ 0.598859] [] efivar_create_sysfs_entry+0x11b/0x1c0 +[ 0.599631] [] register_efivars+0xde/0x420 +[ 0.600395] [] ? edd_init+0x2f5/0x2f5 +[ 0.601150] [] efivars_init+0xb8/0x104 +[ 0.601903] [] do_one_initcall+0x12a/0x180 +[ 0.602659] [] kernel_init_freeable+0x13e/0x1c6 +[ 0.603418] [] ? loglevel+0x31/0x31 +[ 0.604183] [] ? rest_init+0x80/0x80 +[ 0.604936] [] kernel_init+0xe/0xf0 +[ 0.605681] [] ret_from_fork+0x7c/0xb0 +[ 0.606414] [] ? rest_init+0x80/0x80 +[ 0.607143] ---[ end trace 1609741ab737eb29 ]--- + +There's not much we can do to work around and keep traversing the +variable list once we hit this firmware bug. Our only solution is to +terminate the loop because, as Lingzhu reports, some machines get +stuck when they encounter duplicate names, + + > I had an IBM System x3100 M4 and x3850 X5 on which kernel would + > get stuck in infinite loop creating duplicate sysfs files because, + > for some reason, there are several duplicate boot entries in nvram + > getting GetNextVariableName into a circle of iteration (with + > period > 2). + +Also disable the workqueue, as efivar_update_sysfs_entries() uses +GetNextVariableName() to figure out which variables have been created +since the last iteration. That algorithm isn't going to work if +GetNextVariableName() returns duplicates. Note that we don't disable +EFI variable creation completely on the affected machines, it's just +that any pstore dump-* files won't appear in sysfs until the next +boot. + +[Backported for 3.8-stable. Removed code related to pstore +workqueue but pulled in helper function variable_is_present +from a93bc0c.] + +Reported-by: Andre Heider +Reported-by: Lingzhu Xiang +Tested-by: Lingzhu Xiang +Cc: Seiji Aguchi +Signed-off-by: Matt Fleming +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efivars.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 60 insertions(+) + +--- a/drivers/firmware/efivars.c ++++ b/drivers/firmware/efivars.c +@@ -1669,6 +1669,28 @@ static ssize_t efivar_delete(struct file + return count; + } + ++static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor) ++{ ++ struct efivar_entry *entry, *n; ++ struct efivars *efivars = &__efivars; ++ unsigned long strsize1, strsize2; ++ bool found = false; ++ ++ strsize1 = utf16_strsize(variable_name, 1024); ++ list_for_each_entry_safe(entry, n, &efivars->list, list) { ++ strsize2 = utf16_strsize(entry->var.VariableName, 1024); ++ if (strsize1 == strsize2 && ++ !memcmp(variable_name, &(entry->var.VariableName), ++ strsize2) && ++ !efi_guidcmp(entry->var.VendorGuid, ++ *vendor)) { ++ found = true; ++ break; ++ } ++ } ++ return found; ++} ++ + /* + * Returns the size of variable_name, in bytes, including the + * terminating NULL character, or variable_name_size if no NULL +@@ -1889,6 +1911,28 @@ void unregister_efivars(struct efivars * + } + EXPORT_SYMBOL_GPL(unregister_efivars); + ++/* ++ * Print a warning when duplicate EFI variables are encountered and ++ * disable the sysfs workqueue since the firmware is buggy. ++ */ ++static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid, ++ unsigned long len16) ++{ ++ size_t i, len8 = len16 / sizeof(efi_char16_t); ++ char *s8; ++ ++ s8 = kzalloc(len8, GFP_KERNEL); ++ if (!s8) ++ return; ++ ++ for (i = 0; i < len8; i++) ++ s8[i] = s16[i]; ++ ++ printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n", ++ s8, vendor_guid); ++ kfree(s8); ++} ++ + int register_efivars(struct efivars *efivars, + const struct efivar_operations *ops, + struct kobject *parent_kobj) +@@ -1939,6 +1983,22 @@ int register_efivars(struct efivars *efi + case EFI_SUCCESS: + variable_name_size = var_name_strnsize(variable_name, + variable_name_size); ++ ++ /* ++ * Some firmware implementations return the ++ * same variable name on multiple calls to ++ * get_next_variable(). Terminate the loop ++ * immediately as there is no guarantee that ++ * we'll ever see a different variable name, ++ * and may end up looping here forever. ++ */ ++ if (variable_is_present(variable_name, &vendor_guid)) { ++ dup_variable_bug(variable_name, &vendor_guid, ++ variable_name_size); ++ status = EFI_NOT_FOUND; ++ break; ++ } ++ + efivar_create_sysfs_entry(efivars, + variable_name_size, + variable_name, diff --git a/queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch b/queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch new file mode 100644 index 00000000000..abe9c2458bb --- /dev/null +++ b/queue-3.8/iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch @@ -0,0 +1,64 @@ +From 2d5d50ee596361566f7f84300117cba7d7672bc5 Mon Sep 17 00:00:00 2001 +From: Emmanuel Grumbach +Date: Thu, 31 Jan 2013 15:03:55 +0200 +Subject: iwlwifi: dvm: don't send HCMD in restart flow + +From: Emmanuel Grumbach + +commit 2d5d50ee596361566f7f84300117cba7d7672bc5 upstream. + +There is a race between the restart flow and the workers. +The workers are cancelled after the fw is already killed +and might send HCMD when there is fw to handle them. +Simply check that there is a fw to which the HCMD can be +sent before actually sending it. + +Signed-off-by: Emmanuel Grumbach +Signed-off-by: Johannes Berg +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/iwlwifi/dvm/lib.c | 9 +++++++++ + drivers/net/wireless/iwlwifi/dvm/ucode.c | 4 ++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/net/wireless/iwlwifi/dvm/lib.c ++++ b/drivers/net/wireless/iwlwifi/dvm/lib.c +@@ -1262,6 +1262,15 @@ int iwl_dvm_send_cmd(struct iwl_priv *pr + } + + /* ++ * This can happen upon FW ASSERT: we clear the STATUS_FW_ERROR flag ++ * in iwl_down but cancel the workers only later. ++ */ ++ if (!priv->ucode_loaded) { ++ IWL_ERR(priv, "Fw not loaded - dropping CMD: %x\n", cmd->id); ++ return -EIO; ++ } ++ ++ /* + * Synchronous commands from this op-mode must hold + * the mutex, this ensures we don't try to send two + * (or more) synchronous commands at a time. +--- a/drivers/net/wireless/iwlwifi/dvm/ucode.c ++++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c +@@ -450,6 +450,8 @@ int iwl_load_ucode_wait_alive(struct iwl + return -EIO; + } + ++ priv->ucode_loaded = true; ++ + /* + * This step takes a long time (60-80ms!!) and + * WoWLAN image should be loaded quickly, so +@@ -474,8 +476,6 @@ int iwl_load_ucode_wait_alive(struct iwl + return ret; + } + +- priv->ucode_loaded = true; +- + return 0; + } + diff --git a/queue-3.8/series b/queue-3.8/series index 983afc9eb41..4239bfd7180 100644 --- a/queue-3.8/series +++ b/queue-3.8/series @@ -87,3 +87,9 @@ virtio-console-add-locking-around-c_ovq-operations.patch nfsd4-reject-negative-acl-lengths.patch drm-i915-use-the-fixed-pixel-clock-for-edp-in-intel_dp_set_m_n.patch drm-i915-don-t-clobber-crtc-fb-when-queue_flip-fails.patch +iwlwifi-dvm-don-t-send-hcmd-in-restart-flow.patch +btrfs-fix-space-leak-when-we-fail-to-reserve-metadata-space.patch +xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch +tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch +efivars-explicitly-calculate-length-of-variablename.patch +efivars-handle-duplicate-names-from-get_next_variable.patch diff --git a/queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch b/queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch new file mode 100644 index 00000000000..41ebc1b9516 --- /dev/null +++ b/queue-3.8/tracing-prevent-buffer-overwrite-disabled-for-latency-tracers.patch @@ -0,0 +1,286 @@ +From 613f04a0f51e6e68ac6fe571ab79da3c0a5eb4da Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" +Date: Thu, 14 Mar 2013 15:03:53 -0400 +Subject: tracing: Prevent buffer overwrite disabled for latency tracers + +From: "Steven Rostedt (Red Hat)" + +commit 613f04a0f51e6e68ac6fe571ab79da3c0a5eb4da upstream. + +The latency tracers require the buffers to be in overwrite mode, +otherwise they get screwed up. Force the buffers to stay in overwrite +mode when latency tracers are enabled. + +Added a flag_changed() method to the tracer structure to allow +the tracers to see what flags are being changed, and also be able +to prevent the change from happing. + +Signed-off-by: Steven Rostedt +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 38 ++++++++++++++++++++++++++++++++------ + kernel/trace/trace.h | 6 ++++++ + kernel/trace/trace_irqsoff.c | 19 ++++++++++++++----- + kernel/trace/trace_sched_wakeup.c | 18 +++++++++++++----- + 4 files changed, 65 insertions(+), 16 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2836,11 +2836,25 @@ static int set_tracer_option(struct trac + return -EINVAL; + } + +-static void set_tracer_flags(unsigned int mask, int enabled) ++/* Some tracers require overwrite to stay enabled */ ++int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) ++{ ++ if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set) ++ return -1; ++ ++ return 0; ++} ++ ++int set_tracer_flag(unsigned int mask, int enabled) + { + /* do nothing if flag is already set */ + if (!!(trace_flags & mask) == !!enabled) +- return; ++ return 0; ++ ++ /* Give the tracer a chance to approve the change */ ++ if (current_trace->flag_changed) ++ if (current_trace->flag_changed(current_trace, mask, !!enabled)) ++ return -EINVAL; + + if (enabled) + trace_flags |= mask; +@@ -2859,13 +2873,15 @@ static void set_tracer_flags(unsigned in + + if (mask == TRACE_ITER_PRINTK) + trace_printk_start_stop_comm(enabled); ++ ++ return 0; + } + + static int trace_set_options(char *option) + { + char *cmp; + int neg = 0; +- int ret = 0; ++ int ret = -ENODEV; + int i; + + cmp = strstrip(option); +@@ -2879,7 +2895,7 @@ static int trace_set_options(char *optio + + for (i = 0; trace_options[i]; i++) { + if (strcmp(cmp, trace_options[i]) == 0) { +- set_tracer_flags(1 << i, !neg); ++ ret = set_tracer_flag(1 << i, !neg); + break; + } + } +@@ -2898,6 +2914,7 @@ tracing_trace_options_write(struct file + size_t cnt, loff_t *ppos) + { + char buf[64]; ++ int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; +@@ -2907,7 +2924,9 @@ tracing_trace_options_write(struct file + + buf[cnt] = 0; + +- trace_set_options(buf); ++ ret = trace_set_options(buf); ++ if (ret < 0) ++ return ret; + + *ppos += cnt; + +@@ -3213,6 +3232,9 @@ static int tracing_set_tracer(const char + goto out; + + trace_branch_disable(); ++ ++ current_trace->enabled = false; ++ + if (current_trace && current_trace->reset) + current_trace->reset(tr); + if (current_trace && current_trace->use_max_tr) { +@@ -3244,6 +3266,7 @@ static int tracing_set_tracer(const char + } + + current_trace = t; ++ current_trace->enabled = true; + trace_branch_enable(tr); + out: + mutex_unlock(&trace_types_lock); +@@ -4648,9 +4671,12 @@ trace_options_core_write(struct file *fi + return -EINVAL; + + mutex_lock(&trace_types_lock); +- set_tracer_flags(1 << index, val); ++ ret = set_tracer_flag(1 << index, val); + mutex_unlock(&trace_types_lock); + ++ if (ret < 0) ++ return ret; ++ + *ppos += cnt; + + return cnt; +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -283,10 +283,14 @@ struct tracer { + enum print_line_t (*print_line)(struct trace_iterator *iter); + /* If you handled the flag setting, return 0 */ + int (*set_flag)(u32 old_flags, u32 bit, int set); ++ /* Return 0 if OK with change, else return non-zero */ ++ int (*flag_changed)(struct tracer *tracer, ++ u32 mask, int set); + struct tracer *next; + struct tracer_flags *flags; + bool print_max; + bool use_max_tr; ++ bool enabled; + }; + + +@@ -835,6 +839,8 @@ extern const char *__stop___trace_bprint + + void trace_printk_init_buffers(void); + void trace_printk_start_comm(void); ++int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); ++int set_tracer_flag(unsigned int mask, int enabled); + + #undef FTRACE_ENTRY + #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ +--- a/kernel/trace/trace_irqsoff.c ++++ b/kernel/trace/trace_irqsoff.c +@@ -32,7 +32,7 @@ enum { + + static int trace_type __read_mostly; + +-static int save_lat_flag; ++static int save_flags; + + static void stop_irqsoff_tracer(struct trace_array *tr, int graph); + static int start_irqsoff_tracer(struct trace_array *tr, int graph); +@@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct t + + static void __irqsoff_tracer_init(struct trace_array *tr) + { +- save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; +- trace_flags |= TRACE_ITER_LATENCY_FMT; ++ save_flags = trace_flags; ++ ++ /* non overwrite screws up the latency tracers */ ++ set_tracer_flag(TRACE_ITER_OVERWRITE, 1); ++ set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); + + tracing_max_latency = 0; + irqsoff_trace = tr; +@@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct + + static void irqsoff_tracer_reset(struct trace_array *tr) + { ++ int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; ++ int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; ++ + stop_irqsoff_tracer(tr, is_graph()); + +- if (!save_lat_flag) +- trace_flags &= ~TRACE_ITER_LATENCY_FMT; ++ set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); ++ set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); + } + + static void irqsoff_tracer_start(struct trace_array *tr) +@@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __re + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, ++ .flag_changed = trace_keep_overwrite, + #ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_irqsoff, + #endif +@@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer _ + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, ++ .flag_changed = trace_keep_overwrite, + #ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptoff, + #endif +@@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_trac + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, ++ .flag_changed = trace_keep_overwrite, + #ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptirqsoff, + #endif +--- a/kernel/trace/trace_sched_wakeup.c ++++ b/kernel/trace/trace_sched_wakeup.c +@@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_ + static int wakeup_graph_entry(struct ftrace_graph_ent *trace); + static void wakeup_graph_return(struct ftrace_graph_ret *trace); + +-static int save_lat_flag; ++static int save_flags; + + #define TRACE_DISPLAY_GRAPH 1 + +@@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct tr + + static int __wakeup_tracer_init(struct trace_array *tr) + { +- save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; +- trace_flags |= TRACE_ITER_LATENCY_FMT; ++ save_flags = trace_flags; ++ ++ /* non overwrite screws up the latency tracers */ ++ set_tracer_flag(TRACE_ITER_OVERWRITE, 1); ++ set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); + + tracing_max_latency = 0; + wakeup_trace = tr; +@@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct + + static void wakeup_tracer_reset(struct trace_array *tr) + { ++ int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; ++ int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; ++ + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); + +- if (!save_lat_flag) +- trace_flags &= ~TRACE_ITER_LATENCY_FMT; ++ set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); ++ set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); + } + + static void wakeup_tracer_start(struct trace_array *tr) +@@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __rea + .print_line = wakeup_print_line, + .flags = &tracer_flags, + .set_flag = wakeup_set_flag, ++ .flag_changed = trace_keep_overwrite, + #ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_wakeup, + #endif +@@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __ + .print_line = wakeup_print_line, + .flags = &tracer_flags, + .set_flag = wakeup_set_flag, ++ .flag_changed = trace_keep_overwrite, + #ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_wakeup, + #endif diff --git a/queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch b/queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch new file mode 100644 index 00000000000..aea70c37d85 --- /dev/null +++ b/queue-3.8/xen-events-avoid-race-with-raising-an-event-in-unmask_evtchn.patch @@ -0,0 +1,76 @@ +From c26377e62f4e6bfb4d99ef88526047209701a83f Mon Sep 17 00:00:00 2001 +From: David Vrabel +Date: Mon, 25 Mar 2013 14:11:19 +0000 +Subject: xen/events: avoid race with raising an event in unmask_evtchn() + +From: David Vrabel + +commit c26377e62f4e6bfb4d99ef88526047209701a83f upstream. + +In unmask_evtchn(), when the mask bit is cleared after testing for +pending and the event becomes pending between the test and clear, then +the upcall will not become pending and the event may be lost or +delayed. + +Avoid this by always clearing the mask bit before checking for +pending. If a hypercall is needed, remask the event as +EVTCHNOP_unmask will only retrigger pending events if they were +masked. + +This fixes a regression introduced in 3.7 by +b5e579232d635b79a3da052964cb357ccda8d9ea (xen/events: fix +unmask_evtchn for PV on HVM guests) which reordered the clear mask and +check pending operations. + +Changes in v2: +- set mask before hypercall. + +Acked-by: Stefano Stabellini +Signed-off-by: David Vrabel +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Lingzhu Xiang +Reviewed-by: CAI Qian +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -388,11 +388,23 @@ static void unmask_evtchn(int port) + + if (unlikely((cpu != cpu_from_evtchn(port)))) + do_hypercall = 1; +- else ++ else { ++ /* ++ * Need to clear the mask before checking pending to ++ * avoid a race with an event becoming pending. ++ * ++ * EVTCHNOP_unmask will only trigger an upcall if the ++ * mask bit was set, so if a hypercall is needed ++ * remask the event. ++ */ ++ sync_clear_bit(port, &s->evtchn_mask[0]); + evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); + +- if (unlikely(evtchn_pending && xen_hvm_domain())) +- do_hypercall = 1; ++ if (unlikely(evtchn_pending && xen_hvm_domain())) { ++ sync_set_bit(port, &s->evtchn_mask[0]); ++ do_hypercall = 1; ++ } ++ } + + /* Slow path (hypercall) if this is a non-local port or if this is + * an hvm domain and an event is pending (hvm domains don't have +@@ -403,8 +415,6 @@ static void unmask_evtchn(int port) + } else { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + +- sync_clear_bit(port, &s->evtchn_mask[0]); +- + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose -- 2.47.3