From: Greg Kroah-Hartman Date: Fri, 17 Apr 2015 10:05:54 +0000 (+0200) Subject: 3.19-stable patches X-Git-Tag: v3.10.75~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0668ed5f8793618463bf80704ac23bd335facf76;p=thirdparty%2Fkernel%2Fstable-queue.git 3.19-stable patches added patches: btrfs-simplify-insert_orphan_item.patch cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch iwlwifi-dvm-run-init-firmware-again-upon-.start.patch libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch libata-update-crucial-micron-blacklist.patch mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch nbd-fix-possible-memory-leak.patch nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch powerpc-re-enable-dynticks.patch radeon-do-not-directly-dereference-pointers-to-bios-area.patch sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch sunrpc-make-debugfs-file-creation-failure-non-fatal.patch writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch x86-xen-prepare-p2m-list-for-memory-hotplug.patch xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch --- diff --git a/queue-3.19/btrfs-simplify-insert_orphan_item.patch b/queue-3.19/btrfs-simplify-insert_orphan_item.patch new file mode 100644 index 00000000000..ebf2b149ef2 --- /dev/null +++ b/queue-3.19/btrfs-simplify-insert_orphan_item.patch @@ -0,0 +1,50 @@ +From 9c4f61f01d269815bb7c37be3ede59c5587747c6 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Fri, 2 Jan 2015 19:12:57 +0100 +Subject: btrfs: simplify insert_orphan_item + +From: David Sterba + +commit 9c4f61f01d269815bb7c37be3ede59c5587747c6 upstream. + +We can search and add the orphan item in one go, +btrfs_insert_orphan_item will find out if the item already exists. + +Signed-off-by: David Sterba +Cc: Chris Mason +Cc: Roman Mamedov +Signed-off-by: Greg Kroah-Hartman + + +--- + fs/btrfs/tree-log.c | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -1266,21 +1266,13 @@ out: + } + + static int insert_orphan_item(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, u64 offset) ++ struct btrfs_root *root, u64 ino) + { + int ret; +- struct btrfs_path *path; + +- path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; +- +- ret = btrfs_find_item(root, path, BTRFS_ORPHAN_OBJECTID, +- offset, BTRFS_ORPHAN_ITEM_KEY, NULL); +- if (ret > 0) +- ret = btrfs_insert_orphan_item(trans, root, offset); +- +- btrfs_free_path(path); ++ ret = btrfs_insert_orphan_item(trans, root, ino); ++ if (ret == -EEXIST) ++ ret = 0; + + return ret; + } diff --git a/queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch b/queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch new file mode 100644 index 00000000000..4f4457eb539 --- /dev/null +++ b/queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch @@ -0,0 +1,68 @@ +From c75de0ac0756d4b442f460e10461720c7c2412c2 Mon Sep 17 00:00:00 2001 +From: Viresh Kumar +Date: Thu, 2 Apr 2015 10:21:33 +0530 +Subject: cpufreq: Schedule work for the first-online CPU on resume + +From: Viresh Kumar + +commit c75de0ac0756d4b442f460e10461720c7c2412c2 upstream. + +All CPUs leaving the first-online CPU are hotplugged out on suspend and +and cpufreq core stops managing them. + +On resume, we need to call cpufreq_update_policy() for this CPU's policy +to make sure its frequency is in sync with cpufreq's cached value, as it +might have got updated by hardware during suspend/resume. + +The policies are always added to the top of the policy-list. So, in +normal circumstances, CPU 0's policy will be the last one in the list. +And so the code checks for the last policy. + +But there are cases where it will fail. Consider quad-core system, with +policy-per core. If CPU0 is hotplugged out and added back again, the +last policy will be on CPU1 :( + +To fix this in a proper way, always look for the policy of the first +online CPU. That way we will be sure that we are calling +cpufreq_update_policy() for the only CPU that wasn't hotplugged out. + +Fixes: 2f0aea936360 ("cpufreq: suspend governors on system suspend/hibernate") +Reported-by: Saravana Kannan +Signed-off-by: Viresh Kumar +Acked-by: Saravana Kannan +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/cpufreq.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +--- a/drivers/cpufreq/cpufreq.c ++++ b/drivers/cpufreq/cpufreq.c +@@ -1724,15 +1724,18 @@ void cpufreq_resume(void) + || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) + pr_err("%s: Failed to start governor for policy: %p\n", + __func__, policy); +- +- /* +- * schedule call cpufreq_update_policy() for boot CPU, i.e. last +- * policy in list. It will verify that the current freq is in +- * sync with what we believe it to be. +- */ +- if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) +- schedule_work(&policy->update); + } ++ ++ /* ++ * schedule call cpufreq_update_policy() for first-online CPU, as that ++ * wouldn't be hotplugged-out on suspend. It will verify that the ++ * current freq is in sync with what we believe it to be. ++ */ ++ policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask)); ++ if (WARN_ON(!policy)) ++ return; ++ ++ schedule_work(&policy->update); + } + + /** diff --git a/queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch b/queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch new file mode 100644 index 00000000000..9f302273d55 --- /dev/null +++ b/queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch @@ -0,0 +1,47 @@ +From 8494057ab5e40df590ef6ef7d66324d3ae33356b Mon Sep 17 00:00:00 2001 +From: Shachar Raindel +Date: Wed, 18 Mar 2015 17:39:08 +0000 +Subject: IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic + +From: Shachar Raindel + +commit 8494057ab5e40df590ef6ef7d66324d3ae33356b upstream. + +Properly verify that the resulting page aligned end address is larger +than both the start address and the length of the memory area requested. + +Both the start and length arguments for ib_umem_get are controlled by +the user. A misbehaving user can provide values which will cause an +integer overflow when calculating the page aligned end address. + +This overflow can cause also miscalculation of the number of pages +mapped, and additional logic issues. + +Addresses: CVE-2014-8159 +Signed-off-by: Shachar Raindel +Signed-off-by: Jack Morgenstein +Signed-off-by: Or Gerlitz +Signed-off-by: Roland Dreier +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/core/umem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/infiniband/core/umem.c ++++ b/drivers/infiniband/core/umem.c +@@ -99,6 +99,14 @@ struct ib_umem *ib_umem_get(struct ib_uc + if (dmasync) + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + ++ /* ++ * If the combination of the addr and size requested for this memory ++ * region causes an integer overflow, return error. ++ */ ++ if ((PAGE_ALIGN(addr + size) <= size) || ++ (PAGE_ALIGN(addr + size) <= addr)) ++ return ERR_PTR(-EINVAL); ++ + if (!can_do_mlock()) + return ERR_PTR(-EPERM); + diff --git a/queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch b/queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch new file mode 100644 index 00000000000..d3bd0a14b8b --- /dev/null +++ b/queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch @@ -0,0 +1,58 @@ +From 9c8928f5176766bec79f272bd47b7124e11cccbd Mon Sep 17 00:00:00 2001 +From: Emmanuel Grumbach +Date: Mon, 16 Mar 2015 09:08:07 +0200 +Subject: iwlwifi: dvm: run INIT firmware again upon .start() + +From: Emmanuel Grumbach + +commit 9c8928f5176766bec79f272bd47b7124e11cccbd upstream. + +The assumption before this patch was that we don't need to +run again the INIT firmware after the system booted. The +INIT firmware runs calibrations which impact the physical +layer's behavior. +Users reported that it may be helpful to run these +calibrations again every time the interface is brought up. +The penatly is minimal, since the calibrations run fast. +This fixes: +https://bugzilla.kernel.org/show_bug.cgi?id=94341 + +Signed-off-by: Emmanuel Grumbach +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/iwlwifi/dvm/dev.h | 1 - + drivers/net/wireless/iwlwifi/dvm/ucode.c | 5 ----- + 2 files changed, 6 deletions(-) + +--- a/drivers/net/wireless/iwlwifi/dvm/dev.h ++++ b/drivers/net/wireless/iwlwifi/dvm/dev.h +@@ -708,7 +708,6 @@ struct iwl_priv { + unsigned long reload_jiffies; + int reload_count; + bool ucode_loaded; +- bool init_ucode_run; /* Don't run init uCode again */ + + u8 plcp_delta_threshold; + +--- a/drivers/net/wireless/iwlwifi/dvm/ucode.c ++++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c +@@ -418,9 +418,6 @@ int iwl_run_init_ucode(struct iwl_priv * + if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len) + return 0; + +- if (priv->init_ucode_run) +- return 0; +- + iwl_init_notification_wait(&priv->notif_wait, &calib_wait, + calib_complete, ARRAY_SIZE(calib_complete), + iwlagn_wait_calib, priv); +@@ -440,8 +437,6 @@ int iwl_run_init_ucode(struct iwl_priv * + */ + ret = iwl_wait_notification(&priv->notif_wait, &calib_wait, + UCODE_CALIB_TIMEOUT); +- if (!ret) +- priv->init_ucode_run = true; + + goto out; + diff --git a/queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch b/queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch new file mode 100644 index 00000000000..c6c3e92a3fb --- /dev/null +++ b/queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch @@ -0,0 +1,31 @@ +From 6fc4d97a4987c5d247655a157a9377996626221a Mon Sep 17 00:00:00 2001 +From: "Martin K. Petersen" +Date: Fri, 27 Mar 2015 15:17:21 -0400 +Subject: libata: Blacklist queued TRIM on Samsung SSD 850 Pro + +From: "Martin K. Petersen" + +commit 6fc4d97a4987c5d247655a157a9377996626221a upstream. + +Blacklist queued TRIM on this drive for now. + +Reported-by: Stefan Keller +Signed-off-by: Martin K. Petersen +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-core.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -4243,6 +4243,8 @@ static const struct ata_blacklist_entry + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Samsung SSD 850 PRO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ++ ATA_HORKAGE_ZERO_AFTER_TRIM, }, + + /* + * As defined, the DRAT (Deterministic Read After Trim) and RZAT diff --git a/queue-3.19/libata-update-crucial-micron-blacklist.patch b/queue-3.19/libata-update-crucial-micron-blacklist.patch new file mode 100644 index 00000000000..804afbeb825 --- /dev/null +++ b/queue-3.19/libata-update-crucial-micron-blacklist.patch @@ -0,0 +1,54 @@ +From ff7f53fb82a7801a778e5902bdbbc5e195ab0de0 Mon Sep 17 00:00:00 2001 +From: "Martin K. Petersen" +Date: Fri, 27 Mar 2015 15:17:20 -0400 +Subject: libata: Update Crucial/Micron blacklist + +From: "Martin K. Petersen" + +commit ff7f53fb82a7801a778e5902bdbbc5e195ab0de0 upstream. + +Micron has released an updated firmware (MU02) for M510/M550/MX100 +drives to fix the issues with queued TRIM. Queued TRIM remains broken on +M500 but is working fine on later drives such as M600 and MX200. + +Tweak our blacklist to reflect the above. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=71371 +Signed-off-by: Martin K. Petersen +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-core.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -4233,9 +4233,16 @@ static const struct ata_blacklist_entry + { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + + /* devices that don't properly handle queued TRIM commands */ +- { "Micron_M[56]*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ++ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ++ ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ++ ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Micron_M5[15]0*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ++ ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ++ ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, +- { "Crucial_CT*SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + + /* + * As defined, the DRAT (Deterministic Read After Trim) and RZAT +@@ -4255,6 +4262,8 @@ static const struct ata_blacklist_entry + */ + { "INTEL*SSDSC2MH*", NULL, 0, }, + ++ { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, ++ { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, diff --git a/queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch b/queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch new file mode 100644 index 00000000000..8f89d46b3e4 --- /dev/null +++ b/queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch @@ -0,0 +1,99 @@ +From 788211d81bfdf9b6a547d0530f206ba6ee76b107 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Wed, 1 Apr 2015 14:20:42 +0200 +Subject: mac80211: fix RX A-MPDU session reorder timer deletion + +From: Johannes Berg + +commit 788211d81bfdf9b6a547d0530f206ba6ee76b107 upstream. + +There's an issue with the way the RX A-MPDU reorder timer is +deleted that can cause a kernel crash like this: + + * tid_rx is removed - call_rcu(ieee80211_free_tid_rx) + * station is destroyed + * reorder timer fires before ieee80211_free_tid_rx() runs, + accessing the station, thus potentially crashing due to + the use-after-free + +The station deletion is protected by synchronize_net(), but +that isn't enough -- ieee80211_free_tid_rx() need not have +run when that returns (it deletes the timer.) We could use +rcu_barrier() instead of synchronize_net(), but that's much +more expensive. + +Instead, to fix this, add a field tracking that the session +is being deleted. In this case, the only re-arming of the +timer happens with the reorder spinlock held, so make that +code not rearm it if the session is being deleted and also +delete the timer after setting that field. This ensures the +timer cannot fire after ___ieee80211_stop_rx_ba_session() +returns, which fixes the problem. + +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/agg-rx.c | 8 ++++++-- + net/mac80211/rx.c | 7 ++++--- + net/mac80211/sta_info.h | 2 ++ + 3 files changed, 12 insertions(+), 5 deletions(-) + +--- a/net/mac80211/agg-rx.c ++++ b/net/mac80211/agg-rx.c +@@ -49,8 +49,6 @@ static void ieee80211_free_tid_rx(struct + container_of(h, struct tid_ampdu_rx, rcu_head); + int i; + +- del_timer_sync(&tid_rx->reorder_timer); +- + for (i = 0; i < tid_rx->buf_size; i++) + __skb_queue_purge(&tid_rx->reorder_buf[i]); + kfree(tid_rx->reorder_buf); +@@ -93,6 +91,12 @@ void ___ieee80211_stop_rx_ba_session(str + + del_timer_sync(&tid_rx->session_timer); + ++ /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ ++ spin_lock_bh(&tid_rx->reorder_lock); ++ tid_rx->removed = true; ++ spin_unlock_bh(&tid_rx->reorder_lock); ++ del_timer_sync(&tid_rx->reorder_timer); ++ + call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); + } + +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -870,9 +870,10 @@ static void ieee80211_sta_reorder_releas + + set_release_timer: + +- mod_timer(&tid_agg_rx->reorder_timer, +- tid_agg_rx->reorder_time[j] + 1 + +- HT_RX_REORDER_BUF_TIMEOUT); ++ if (!tid_agg_rx->removed) ++ mod_timer(&tid_agg_rx->reorder_timer, ++ tid_agg_rx->reorder_time[j] + 1 + ++ HT_RX_REORDER_BUF_TIMEOUT); + } else { + del_timer(&tid_agg_rx->reorder_timer); + } +--- a/net/mac80211/sta_info.h ++++ b/net/mac80211/sta_info.h +@@ -175,6 +175,7 @@ struct tid_ampdu_tx { + * @reorder_lock: serializes access to reorder buffer, see below. + * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and + * and ssn. ++ * @removed: this session is removed (but might have been found due to RCU) + * + * This structure's lifetime is managed by RCU, assignments to + * the array holding it must hold the aggregation mutex. +@@ -199,6 +200,7 @@ struct tid_ampdu_rx { + u16 timeout; + u8 dialog_token; + bool auto_seq; ++ bool removed; + }; + + /** diff --git a/queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch b/queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch new file mode 100644 index 00000000000..32c666c414a --- /dev/null +++ b/queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch @@ -0,0 +1,75 @@ +From 3fe89b3e2a7bbf3e97657104b9b33a9d81b950b3 Mon Sep 17 00:00:00 2001 +From: Leon Yu +Date: Wed, 25 Mar 2015 15:55:11 -0700 +Subject: mm: fix anon_vma->degree underflow in anon_vma endless growing prevention + +From: Leon Yu + +commit 3fe89b3e2a7bbf3e97657104b9b33a9d81b950b3 upstream. + +I have constantly stumbled upon "kernel BUG at mm/rmap.c:399!" after +upgrading to 3.19 and had no luck with 4.0-rc1 neither. + +So, after looking into new logic introduced by commit 7a3ef208e662 ("mm: +prevent endless growth of anon_vma hierarchy"), I found chances are that +unlink_anon_vmas() is called without incrementing dst->anon_vma->degree +in anon_vma_clone() due to allocation failure. If dst->anon_vma is not +NULL in error path, its degree will be incorrectly decremented in +unlink_anon_vmas() and eventually underflow when exiting as a result of +another call to unlink_anon_vmas(). That's how "kernel BUG at +mm/rmap.c:399!" is triggered for me. + +This patch fixes the underflow by dropping dst->anon_vma when allocation +fails. It's safe to do so regardless of original value of dst->anon_vma +because dst->anon_vma doesn't have valid meaning if anon_vma_clone() +fails. Besides, callers don't care dst->anon_vma in such case neither. + +Also suggested by Michal Hocko, we can clean up vma_adjust() a bit as +anon_vma_clone() now does the work. + +[akpm@linux-foundation.org: tweak comment] +Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") +Signed-off-by: Leon Yu +Signed-off-by: Konstantin Khlebnikov +Reviewed-by: Michal Hocko +Acked-by: Rik van Riel +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mmap.c | 4 +--- + mm/rmap.c | 7 +++++++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -780,10 +780,8 @@ again: remove_next = 1 + (end > next-> + + importer->anon_vma = exporter->anon_vma; + error = anon_vma_clone(importer, exporter); +- if (error) { +- importer->anon_vma = NULL; ++ if (error) + return error; +- } + } + } + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct + return 0; + + enomem_failure: ++ /* ++ * dst->anon_vma is dropped here otherwise its degree can be incorrectly ++ * decremented in unlink_anon_vmas(). ++ * We can safely do this because callers of anon_vma_clone() don't care ++ * about dst->anon_vma if anon_vma_clone() failed. ++ */ ++ dst->anon_vma = NULL; + unlink_anon_vmas(dst); + return -ENOMEM; + } diff --git a/queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch b/queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch new file mode 100644 index 00000000000..7c41909c762 --- /dev/null +++ b/queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch @@ -0,0 +1,111 @@ +From b0dc3a342af36f95a68fe229b8f0f73552c5ca08 Mon Sep 17 00:00:00 2001 +From: Gu Zheng +Date: Wed, 25 Mar 2015 15:55:20 -0700 +Subject: mm/memory hotplug: postpone the reset of obsolete pgdat + +From: Gu Zheng + +commit b0dc3a342af36f95a68fe229b8f0f73552c5ca08 upstream. + +Qiu Xishi reported the following BUG when testing hot-add/hot-remove node under +stress condition: + + BUG: unable to handle kernel paging request at 0000000000025f60 + IP: next_online_pgdat+0x1/0x50 + PGD 0 + Oops: 0000 [#1] SMP + ACPI: Device does not support D3cold + Modules linked in: fuse nls_iso8859_1 nls_cp437 vfat fat loop dm_mod coretemp mperf crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 pcspkr microcode igb dca i2c_algo_bit ipv6 megaraid_sas iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support tg3 sg hwmon ptp lpc_ich pps_core mfd_core acpi_pad rtc_cmos button ext3 jbd mbcache sd_mod crc_t10dif scsi_dh_alua scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh ahci libahci libata scsi_mod [last unloaded: rasf] + CPU: 23 PID: 238 Comm: kworker/23:1 Tainted: G O 3.10.15-5885-euler0302 #1 + Hardware name: HUAWEI TECHNOLOGIES CO.,LTD. Huawei N1/Huawei N1, BIOS V100R001 03/02/2015 + Workqueue: events vmstat_update + task: ffffa800d32c0000 ti: ffffa800d32ae000 task.ti: ffffa800d32ae000 + RIP: 0010: next_online_pgdat+0x1/0x50 + RSP: 0018:ffffa800d32afce8 EFLAGS: 00010286 + RAX: 0000000000001440 RBX: ffffffff81da53b8 RCX: 0000000000000082 + RDX: 0000000000000000 RSI: 0000000000000082 RDI: 0000000000000000 + RBP: ffffa800d32afd28 R08: ffffffff81c93bfc R09: ffffffff81cbdc96 + R10: 00000000000040ec R11: 00000000000000a0 R12: ffffa800fffb3440 + R13: ffffa800d32afd38 R14: 0000000000000017 R15: ffffa800e6616800 + FS: 0000000000000000(0000) GS:ffffa800e6600000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000025f60 CR3: 0000000001a0b000 CR4: 00000000001407e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + refresh_cpu_vm_stats+0xd0/0x140 + vmstat_update+0x11/0x50 + process_one_work+0x194/0x3d0 + worker_thread+0x12b/0x410 + kthread+0xc6/0xd0 + ret_from_fork+0x7c/0xb0 + +The cause is the "memset(pgdat, 0, sizeof(*pgdat))" at the end of +try_offline_node, which will reset all the content of pgdat to 0, as the +pgdat is accessed lock-free, so that the users still using the pgdat +will panic, such as the vmstat_update routine. + +process A: offline node XX: + +vmstat_updat() + refresh_cpu_vm_stats() + for_each_populated_zone() + find online node XX + cond_resched() + offline cpu and memory, then try_offline_node() + node_set_offline(nid), and memset(pgdat, 0, sizeof(*pgdat)) + zone = next_zone(zone) + pg_data_t *pgdat = zone->zone_pgdat; // here pgdat is NULL now + next_online_pgdat(pgdat) + next_online_node(pgdat->node_id); // NULL pointer access + +So the solution here is postponing the reset of obsolete pgdat from +try_offline_node() to hotadd_new_pgdat(), and just resetting +pgdat->nr_zones and pgdat->classzone_idx to be 0 rather than the memset +0 to avoid breaking pointer information in pgdat. + +Signed-off-by: Gu Zheng +Reported-by: Xishi Qiu +Suggested-by: KAMEZAWA Hiroyuki +Cc: David Rientjes +Cc: Yasuaki Ishimatsu +Cc: Taku Izumi +Cc: Tang Chen +Cc: Xie XiuQi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory_hotplug.c | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat + return NULL; + + arch_refresh_nodedata(nid, pgdat); ++ } else { ++ /* Reset the nr_zones and classzone_idx to 0 before reuse */ ++ pgdat->nr_zones = 0; ++ pgdat->classzone_idx = 0; + } + + /* we can use NODE_DATA(nid) from here */ +@@ -1977,15 +1981,6 @@ void try_offline_node(int nid) + if (is_vmalloc_addr(zone->wait_table)) + vfree(zone->wait_table); + } +- +- /* +- * Since there is no way to guarentee the address of pgdat/zone is not +- * on stack of any kernel threads or used by other kernel objects +- * without reference counting or other symchronizing method, do not +- * reset node_data and free pgdat here. Just reset it to 0 and reuse +- * the memory when the node is online again. +- */ +- memset(pgdat, 0, sizeof(*pgdat)); + } + EXPORT_SYMBOL(try_offline_node); + diff --git a/queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch b/queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch new file mode 100644 index 00000000000..be37166fc26 --- /dev/null +++ b/queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch @@ -0,0 +1,66 @@ +From cfa869438282be84ad4110bba5027ef1fbbe71e4 Mon Sep 17 00:00:00 2001 +From: Laura Abbott +Date: Wed, 25 Mar 2015 15:55:26 -0700 +Subject: mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate + +From: Laura Abbott + +commit cfa869438282be84ad4110bba5027ef1fbbe71e4 upstream. + +Commit 3c605096d315 ("mm/page_alloc: restrict max order of merging on +isolated pageblock") changed the logic of unset_migratetype_isolate to +check the buddy allocator and explicitly call __free_pages to merge. + +The page that is being freed in this path never had prep_new_page called +so set_page_refcounted is called explicitly but there is no call to +kernel_map_pages. With the default kernel_map_pages this is mostly +harmless but if kernel_map_pages does any manipulation of the page +tables (unmapping or setting pages to read only) this may trigger a +fault: + + alloc_contig_range test_pages_isolated(ceb00, ced00) failed + Unable to handle kernel paging request at virtual address ffffffc0cec00000 + pgd = ffffffc045fc4000 + [ffffffc0cec00000] *pgd=0000000000000000 + Internal error: Oops: 9600004f [#1] PREEMPT SMP + Modules linked in: exfatfs + CPU: 1 PID: 23237 Comm: TimedEventQueue Not tainted 3.10.49-gc72ad36-dirty #1 + task: ffffffc03de52100 ti: ffffffc015388000 task.ti: ffffffc015388000 + PC is at memset+0xc8/0x1c0 + LR is at kernel_map_pages+0x1ec/0x244 + +Fix this by calling kernel_map_pages to ensure the page is set in the +page table properly + +Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") +Signed-off-by: Laura Abbott +Cc: Naoya Horiguchi +Cc: Mel Gorman +Acked-by: Rik van Riel +Cc: Yasuaki Ishimatsu +Cc: Zhang Yanfei +Cc: Xishi Qiu +Cc: Vladimir Davydov +Acked-by: Joonsoo Kim +Cc: Gioh Kim +Cc: Michal Nazarewicz +Cc: Marek Szyprowski +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_isolation.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/page_isolation.c ++++ b/mm/page_isolation.c +@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct pa + + if (!is_migrate_isolate_page(buddy)) { + __isolate_free_page(page, order); ++ kernel_map_pages(page, (1 << order), 1); + set_page_refcounted(page); + isolated_page = page; + } diff --git a/queue-3.19/nbd-fix-possible-memory-leak.patch b/queue-3.19/nbd-fix-possible-memory-leak.patch new file mode 100644 index 00000000000..3e67ba23e94 --- /dev/null +++ b/queue-3.19/nbd-fix-possible-memory-leak.patch @@ -0,0 +1,45 @@ +From ff6b8090e26ef7649ef0cc6b42389141ef48b0cf Mon Sep 17 00:00:00 2001 +From: Sudip Mukherjee +Date: Tue, 27 Jan 2015 18:08:22 +0530 +Subject: nbd: fix possible memory leak + +From: Sudip Mukherjee + +commit ff6b8090e26ef7649ef0cc6b42389141ef48b0cf upstream. + +we have already allocated memory for nbd_dev, but we were not +releasing that memory and just returning the error value. + +Signed-off-by: Sudip Mukherjee +Acked-by: Paul Clements +Signed-off-by: Markus Pargmann +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/nbd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -803,10 +803,6 @@ static int __init nbd_init(void) + return -EINVAL; + } + +- nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); +- if (!nbd_dev) +- return -ENOMEM; +- + part_shift = 0; + if (max_part > 0) { + part_shift = fls(max_part); +@@ -828,6 +824,10 @@ static int __init nbd_init(void) + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; + ++ nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); ++ if (!nbd_dev) ++ return -ENOMEM; ++ + for (i = 0; i < nbds_max; i++) { + struct gendisk *disk = alloc_disk(1 << part_shift); + if (!disk) diff --git a/queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch b/queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch new file mode 100644 index 00000000000..bf5889f1fce --- /dev/null +++ b/queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch @@ -0,0 +1,37 @@ +From 340f0ba1c6c8412aa35fd6476044836b84361ea6 Mon Sep 17 00:00:00 2001 +From: "J. Bruce Fields" +Date: Mon, 23 Mar 2015 11:02:30 -0400 +Subject: nfsd: return correct lockowner when there is a race on hash insert + +From: "J. Bruce Fields" + +commit 340f0ba1c6c8412aa35fd6476044836b84361ea6 upstream. + +alloc_init_lock_stateowner can return an already freed entry if there is +a race to put openowners in the hashtable. + +Noticed by inspection after Jeff Layton fixed the same bug for open +owners. Depending on client behavior, this one may be trickier to +trigger in practice. + +Fixes: c58c6610ec24 "nfsd: Protect adding/removing lock owners using client_lock" +Cc: Trond Myklebust +Acked-by: Jeff Layton +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4state.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -5065,7 +5065,7 @@ alloc_init_lock_stateowner(unsigned int + } else + nfs4_free_lockowner(&lo->lo_owner); + spin_unlock(&clp->cl_lock); +- return lo; ++ return ret; + } + + static void diff --git a/queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch b/queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch new file mode 100644 index 00000000000..7d6f700e697 --- /dev/null +++ b/queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch @@ -0,0 +1,46 @@ +From c5952338bfc234e54deda45b7228f610a545e28a Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Mon, 23 Mar 2015 10:53:42 -0400 +Subject: nfsd: return correct openowner when there is a race to put one in the hash + +From: Jeff Layton + +commit c5952338bfc234e54deda45b7228f610a545e28a upstream. + +alloc_init_open_stateowner can return an already freed entry if there is +a race to put openowners in the hashtable. + +In commit 7ffb588086e9, we changed it so that we allocate and initialize +an openowner, and then check to see if a matching one got stuffed into +the hashtable in the meantime. If it did, then we free the one we just +allocated and take a reference on the one already there. There is a bug +here though. The code will then return the pointer to the one that was +allocated (and has now been freed). + +This wasn't evident before as this race almost never occurred. The Linux +kernel client used to serialize requests for a single openowner. That +has changed now with v4.0 kernels, and this race can now easily occur. + +Fixes: 7ffb588086e9 +Cc: Trond Myklebust +Reported-by: Christoph Hellwig +Reviewed-by: Christoph Hellwig +Signed-off-by: Jeff Layton +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4state.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -3228,7 +3228,7 @@ alloc_init_open_stateowner(unsigned int + } else + nfs4_free_openowner(&oo->oo_owner); + spin_unlock(&clp->cl_lock); +- return oo; ++ return ret; + } + + static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { diff --git a/queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch b/queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch new file mode 100644 index 00000000000..7376ededeab --- /dev/null +++ b/queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch @@ -0,0 +1,37 @@ +From d52356e7f48e400ca258c6763a232a92fa82ff68 Mon Sep 17 00:00:00 2001 +From: Jan Stancek +Date: Tue, 31 Mar 2015 18:11:46 +0200 +Subject: powerpc: fix memory corruption by pnv_alloc_idle_core_states + +From: Jan Stancek + +commit d52356e7f48e400ca258c6763a232a92fa82ff68 upstream. + +Space allocated for paca is based off nr_cpu_ids, +but pnv_alloc_idle_core_states() iterates paca with +cpu_nr_cores()*threads_per_core, which is using NR_CPUS. + +This causes pnv_alloc_idle_core_states() to write over memory, +which is outside of paca array and may later lead to various panics. + +Fixes: 7cba160ad789 (powernv/cpuidle: Redesign idle states management) +Signed-off-by: Jan Stancek +Signed-off-by: Michael Ellerman +Signed-off-by: Preet U. Murthy +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/cputhreads.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/include/asm/cputhreads.h ++++ b/arch/powerpc/include/asm/cputhreads.h +@@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_ + + static inline int cpu_nr_cores(void) + { +- return NR_CPUS >> threads_shift; ++ return nr_cpu_ids >> threads_shift; + } + + static inline cpumask_t cpu_online_cores_map(void) diff --git a/queue-3.19/powerpc-re-enable-dynticks.patch b/queue-3.19/powerpc-re-enable-dynticks.patch new file mode 100644 index 00000000000..0e585847e51 --- /dev/null +++ b/queue-3.19/powerpc-re-enable-dynticks.patch @@ -0,0 +1,58 @@ +From fea559f303567e558bfab9c8ba4a2af5b309205a Mon Sep 17 00:00:00 2001 +From: Paul Clarke +Date: Fri, 20 Feb 2015 11:13:33 -0600 +Subject: powerpc: Re-enable dynticks + +From: Paul Clarke + +commit fea559f303567e558bfab9c8ba4a2af5b309205a upstream. + +Implement arch_irq_work_has_interrupt() for powerpc + +Commit 9b01f5bf3 introduced a dependency on "IRQ work self-IPIs" for +full dynamic ticks to be enabled, by expecting architectures to +implement a suitable arch_irq_work_has_interrupt() routine. + +Several arches have implemented this routine, including x86 (3010279f) +and arm (09f6edd4), but powerpc was omitted. + +This patch implements this routine for powerpc. + +The symptom, at boot (on powerpc systems) with "nohz_full=" +is displayed: + + NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs + +after this patch: + + NO_HZ: Full dynticks CPUs: . + +Tested against 3.19. + +powerpc implements "IRQ work self-IPIs" by setting the decrementer to 1 in +arch_irq_work_raise(), which causes a decrementer exception on the next +timebase tick. We then handle the work in __timer_interrupt(). + +CC: Frederic Weisbecker +Signed-off-by: Paul A. Clarke +Reviewed-by: Paul E. McKenney +[mpe: Flesh out change log, fix ws & include guards, remove include of processor.h] +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/irq_work.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- /dev/null ++++ b/arch/powerpc/include/asm/irq_work.h +@@ -0,0 +1,9 @@ ++#ifndef _ASM_POWERPC_IRQ_WORK_H ++#define _ASM_POWERPC_IRQ_WORK_H ++ ++static inline bool arch_irq_work_has_interrupt(void) ++{ ++ return true; ++} ++ ++#endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch b/queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch new file mode 100644 index 00000000000..b47caada00a --- /dev/null +++ b/queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch @@ -0,0 +1,56 @@ +From f2c9e560b406f2f6b14b345c7da33467dee9cdf2 Mon Sep 17 00:00:00 2001 +From: David Miller +Date: Wed, 18 Mar 2015 23:18:40 -0400 +Subject: radeon: Do not directly dereference pointers to BIOS area. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Miller + +commit f2c9e560b406f2f6b14b345c7da33467dee9cdf2 upstream. + +Use readb() and memcpy_fromio() accessors instead. + +Reviewed-by: Christian König +Signed-off-by: David S. Miller +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/radeon/radeon_bios.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/radeon/radeon_bios.c ++++ b/drivers/gpu/drm/radeon/radeon_bios.c +@@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(stru + + static bool radeon_read_bios(struct radeon_device *rdev) + { +- uint8_t __iomem *bios; ++ uint8_t __iomem *bios, val1, val2; + size_t size; + + rdev->bios = NULL; +@@ -86,15 +86,19 @@ static bool radeon_read_bios(struct rade + return false; + } + +- if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { ++ val1 = readb(&bios[0]); ++ val2 = readb(&bios[1]); ++ ++ if (size == 0 || val1 != 0x55 || val2 != 0xaa) { + pci_unmap_rom(rdev->pdev, bios); + return false; + } +- rdev->bios = kmemdup(bios, size, GFP_KERNEL); ++ rdev->bios = kzalloc(size, GFP_KERNEL); + if (rdev->bios == NULL) { + pci_unmap_rom(rdev->pdev, bios); + return false; + } ++ memcpy_fromio(rdev->bios, bios, size); + pci_unmap_rom(rdev->pdev, bios); + return true; + } diff --git a/queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch b/queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch new file mode 100644 index 00000000000..b395146fe02 --- /dev/null +++ b/queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch @@ -0,0 +1,47 @@ +From 746db9443ea57fd9c059f62c4bfbf41cf224fe13 Mon Sep 17 00:00:00 2001 +From: Brian Silverman +Date: Wed, 18 Feb 2015 16:23:56 -0800 +Subject: sched: Fix RLIMIT_RTTIME when PI-boosting to RT + +From: Brian Silverman + +commit 746db9443ea57fd9c059f62c4bfbf41cf224fe13 upstream. + +When non-realtime tasks get priority-inheritance boosted to a realtime +scheduling class, RLIMIT_RTTIME starts to apply to them. However, the +counter used for checking this (the same one used for SCHED_RR +timeslices) was not getting reset. This meant that tasks running with a +non-realtime scheduling class which are repeatedly boosted to a realtime +one, but never block while they are running realtime, eventually hit the +timeout without ever running for a time over the limit. This patch +resets the realtime timeslice counter when un-PI-boosting from an RT to +a non-RT scheduling class. + +I have some test code with two threads and a shared PTHREAD_PRIO_INHERIT +mutex which induces priority boosting and spins while boosted that gets +killed by a SIGXCPU on non-fixed kernels but doesn't with this patch +applied. It happens much faster with a CONFIG_PREEMPT_RT kernel, and +does happen eventually with PREEMPT_VOLUNTARY kernels. + +Signed-off-by: Brian Silverman +Signed-off-by: Peter Zijlstra (Intel) +Cc: austin@peloton-tech.com +Link: http://lkml.kernel.org/r/1424305436-6716-1-git-send-email-brian@peloton-tech.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3079,6 +3079,8 @@ void rt_mutex_setprio(struct task_struct + } else { + if (dl_prio(oldprio)) + p->dl.dl_boosted = 0; ++ if (rt_prio(oldprio)) ++ p->rt.timeout = 0; + p->sched_class = &fair_sched_class; + } + diff --git a/queue-3.19/series b/queue-3.19/series index 662b5a05c04..c8f4bae593f 100644 --- a/queue-3.19/series +++ b/queue-3.19/series @@ -28,3 +28,25 @@ drm-i915-vlv-save-restore-the-power-context-base-reg.patch drm-i915-vlv-remove-wait-for-previous-gfx-clk-disable-request.patch drm-amdkfd-initialize-only-amdkfd-s-assigned-pipelines.patch drm-i915-align-initial-plane-backing-objects-correctly.patch +btrfs-simplify-insert_orphan_item.patch +ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch +iwlwifi-dvm-run-init-firmware-again-upon-.start.patch +x86-xen-prepare-p2m-list-for-memory-hotplug.patch +xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch +nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch +nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch +sunrpc-make-debugfs-file-creation-failure-non-fatal.patch +powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch +powerpc-re-enable-dynticks.patch +nbd-fix-possible-memory-leak.patch +mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch +mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch +mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch +mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch +sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch +cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch +writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch +writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch +libata-update-crucial-micron-blacklist.patch +libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch +radeon-do-not-directly-dereference-pointers-to-bios-area.patch diff --git a/queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch b/queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch new file mode 100644 index 00000000000..c0ab566ff8f --- /dev/null +++ b/queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch @@ -0,0 +1,296 @@ +From f9c72d10d6fbf949558cd088389a42213ed7b12d Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Tue, 31 Mar 2015 12:03:28 -0400 +Subject: sunrpc: make debugfs file creation failure non-fatal + +From: Jeff Layton + +commit f9c72d10d6fbf949558cd088389a42213ed7b12d upstream. + +We currently have a problem that SELinux policy is being enforced when +creating debugfs files. If a debugfs file is created as a side effect of +doing some syscall, then that creation can fail if the SELinux policy +for that process prevents it. + +This seems wrong. We don't do that for files under /proc, for instance, +so Bruce has proposed a patch to fix that. + +While discussing that patch however, Greg K.H. stated: + + "No kernel code should care / fail if a debugfs function fails, so + please fix up the sunrpc code first." + +This patch converts all of the sunrpc debugfs setup code to be void +return functins, and the callers to not look for errors from those +functions. + +This should allow rpc_clnt and rpc_xprt creation to work, even if the +kernel fails to create debugfs files for some reason. + +Symptoms were failing krb5 mounts on systems using gss-proxy and +selinux. + +Fixes: 388f0c776781 "sunrpc: add a debugfs rpc_xprt directory..." +Signed-off-by: Jeff Layton +Acked-by: Greg Kroah-Hartman +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sunrpc/debug.h | 18 +++++++------- + net/sunrpc/clnt.c | 4 --- + net/sunrpc/debugfs.c | 52 +++++++++++++++++++++++-------------------- + net/sunrpc/sunrpc_syms.c | 7 ----- + net/sunrpc/xprt.c | 7 ----- + 5 files changed, 41 insertions(+), 47 deletions(-) + +--- a/include/linux/sunrpc/debug.h ++++ b/include/linux/sunrpc/debug.h +@@ -60,17 +60,17 @@ struct rpc_xprt; + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + void rpc_register_sysctl(void); + void rpc_unregister_sysctl(void); +-int sunrpc_debugfs_init(void); ++void sunrpc_debugfs_init(void); + void sunrpc_debugfs_exit(void); +-int rpc_clnt_debugfs_register(struct rpc_clnt *); ++void rpc_clnt_debugfs_register(struct rpc_clnt *); + void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +-int rpc_xprt_debugfs_register(struct rpc_xprt *); ++void rpc_xprt_debugfs_register(struct rpc_xprt *); + void rpc_xprt_debugfs_unregister(struct rpc_xprt *); + #else +-static inline int ++static inline void + sunrpc_debugfs_init(void) + { +- return 0; ++ return; + } + + static inline void +@@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void) + return; + } + +-static inline int ++static inline void + rpc_clnt_debugfs_register(struct rpc_clnt *clnt) + { +- return 0; ++ return; + } + + static inline void +@@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_c + return; + } + +-static inline int ++static inline void + rpc_xprt_debugfs_register(struct rpc_xprt *xprt) + { +- return 0; ++ return; + } + + static inline void +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -303,9 +303,7 @@ static int rpc_client_register(struct rp + struct super_block *pipefs_sb; + int err; + +- err = rpc_clnt_debugfs_register(clnt); +- if (err) +- return err; ++ rpc_clnt_debugfs_register(clnt); + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { +--- a/net/sunrpc/debugfs.c ++++ b/net/sunrpc/debugfs.c +@@ -129,48 +129,52 @@ static const struct file_operations task + .release = tasks_release, + }; + +-int ++void + rpc_clnt_debugfs_register(struct rpc_clnt *clnt) + { +- int len, err; ++ int len; + char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ ++ struct rpc_xprt *xprt; + + /* Already registered? */ +- if (clnt->cl_debugfs) +- return 0; ++ if (clnt->cl_debugfs || !rpc_clnt_dir) ++ return; + + len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); + if (len >= sizeof(name)) +- return -EINVAL; ++ return; + + /* make the per-client dir */ + clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); + if (!clnt->cl_debugfs) +- return -ENOMEM; ++ return; + + /* make tasks file */ +- err = -ENOMEM; + if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, + clnt, &tasks_fops)) + goto out_err; + +- err = -EINVAL; + rcu_read_lock(); ++ xprt = rcu_dereference(clnt->cl_xprt); ++ /* no "debugfs" dentry? Don't bother with the symlink. */ ++ if (!xprt->debugfs) { ++ rcu_read_unlock(); ++ return; ++ } + len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", +- rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); ++ xprt->debugfs->d_name.name); + rcu_read_unlock(); ++ + if (len >= sizeof(name)) + goto out_err; + +- err = -ENOMEM; + if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) + goto out_err; + +- return 0; ++ return; + out_err: + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; +- return err; + } + + void +@@ -226,33 +230,33 @@ static const struct file_operations xprt + .release = xprt_info_release, + }; + +-int ++void + rpc_xprt_debugfs_register(struct rpc_xprt *xprt) + { + int len, id; + static atomic_t cur_id; + char name[9]; /* 8 hex digits + NULL term */ + ++ if (!rpc_xprt_dir) ++ return; ++ + id = (unsigned int)atomic_inc_return(&cur_id); + + len = snprintf(name, sizeof(name), "%x", id); + if (len >= sizeof(name)) +- return -EINVAL; ++ return; + + /* make the per-client dir */ + xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); + if (!xprt->debugfs) +- return -ENOMEM; ++ return; + + /* make tasks file */ + if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, + xprt, &xprt_info_fops)) { + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; +- return -ENOMEM; + } +- +- return 0; + } + + void +@@ -266,14 +270,17 @@ void __exit + sunrpc_debugfs_exit(void) + { + debugfs_remove_recursive(topdir); ++ topdir = NULL; ++ rpc_clnt_dir = NULL; ++ rpc_xprt_dir = NULL; + } + +-int __init ++void __init + sunrpc_debugfs_init(void) + { + topdir = debugfs_create_dir("sunrpc", NULL); + if (!topdir) +- goto out; ++ return; + + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); + if (!rpc_clnt_dir) +@@ -283,10 +290,9 @@ sunrpc_debugfs_init(void) + if (!rpc_xprt_dir) + goto out_remove; + +- return 0; ++ return; + out_remove: + debugfs_remove_recursive(topdir); + topdir = NULL; +-out: +- return -ENOMEM; ++ rpc_clnt_dir = NULL; + } +--- a/net/sunrpc/sunrpc_syms.c ++++ b/net/sunrpc/sunrpc_syms.c +@@ -98,10 +98,7 @@ init_sunrpc(void) + if (err) + goto out4; + +- err = sunrpc_debugfs_init(); +- if (err) +- goto out5; +- ++ sunrpc_debugfs_init(); + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + rpc_register_sysctl(); + #endif +@@ -109,8 +106,6 @@ init_sunrpc(void) + init_socket_xprt(); /* clnt sock transport */ + return 0; + +-out5: +- unregister_rpc_pipefs(); + out4: + unregister_pernet_subsys(&sunrpc_net_ops); + out3: +--- a/net/sunrpc/xprt.c ++++ b/net/sunrpc/xprt.c +@@ -1303,7 +1303,6 @@ static void xprt_init(struct rpc_xprt *x + */ + struct rpc_xprt *xprt_create_transport(struct xprt_create *args) + { +- int err; + struct rpc_xprt *xprt; + struct xprt_class *t; + +@@ -1344,11 +1343,7 @@ found: + return ERR_PTR(-ENOMEM); + } + +- err = rpc_xprt_debugfs_register(xprt); +- if (err) { +- xprt_destroy(xprt); +- return ERR_PTR(err); +- } ++ rpc_xprt_debugfs_register(xprt); + + dprintk("RPC: created transport %p with %u slots\n", xprt, + xprt->max_reqs); diff --git a/queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch b/queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch new file mode 100644 index 00000000000..18b4db78eba --- /dev/null +++ b/queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch @@ -0,0 +1,44 @@ +From 7d70e15480c0450d2bfafaad338a32e884fc215e Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 4 Mar 2015 10:37:43 -0500 +Subject: writeback: add missing INITIAL_JIFFIES init in global_update_bandwidth() + +From: Tejun Heo + +commit 7d70e15480c0450d2bfafaad338a32e884fc215e upstream. + +global_update_bandwidth() uses static variable update_time as the +timestamp for the last update but forgets to initialize it to +INITIALIZE_JIFFIES. + +This means that global_dirty_limit will be 5 mins into the future on +32bit and some large amount jiffies into the past on 64bit. This +isn't critical as the only effect is that global_dirty_limit won't be +updated for the first 5 mins after booting on 32bit machines, +especially given the auxiliary nature of global_dirty_limit's role - +protecting against global dirty threshold's sudden dips; however, it +does lead to unintended suboptimal behavior. Fix it. + +Fixes: c42843f2f0bb ("writeback: introduce smoothed global dirty limit") +Signed-off-by: Tejun Heo +Acked-by: Jan Kara +Cc: Wu Fengguang +Cc: Jens Axboe +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page-writeback.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -922,7 +922,7 @@ static void global_update_bandwidth(unsi + unsigned long now) + { + static DEFINE_SPINLOCK(dirty_lock); +- static unsigned long update_time; ++ static unsigned long update_time = INITIAL_JIFFIES; + + /* + * check locklessly first to optimize away locking for the most time diff --git a/queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch b/queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch new file mode 100644 index 00000000000..209d4206cc8 --- /dev/null +++ b/queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch @@ -0,0 +1,56 @@ +From c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Mon, 23 Mar 2015 00:18:48 -0400 +Subject: writeback: fix possible underflow in write bandwidth calculation + +From: Tejun Heo + +commit c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e upstream. + +From 1ebf33901ecc75d9496862dceb1ef0377980587c Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Mon, 23 Mar 2015 00:08:19 -0400 + +2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") +introduced account_page_redirty() which reverts stat updates for a +redirtied page, making BDI_DIRTIED no longer monotonically increasing. + +bdi_update_write_bandwidth() uses the delta in BDI_DIRTIED as the +basis for bandwidth calculation. While unlikely, since the above +patch, the newer value may be lower than the recorded past value and +underflow the bandwidth calculation leading to a wild result. + +Fix it by subtracing min of the old and new values when calculating +delta. AFAIK, there hasn't been any report of it happening but the +resulting erratic behavior would be non-critical and temporary, so +it's possible that the issue is happening without being reported. The +risk of the fix is very low, so tagged for -stable. + +Signed-off-by: Tejun Heo +Cc: Jens Axboe +Cc: Jan Kara +Cc: Wu Fengguang +Cc: Greg Thelen +Fixes: 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page-writeback.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(s + * bw * elapsed + write_bandwidth * (period - elapsed) + * write_bandwidth = --------------------------------------------------- + * period ++ * ++ * @written may have decreased due to account_page_redirty(). ++ * Avoid underflowing @bw calculation. + */ +- bw = written - bdi->written_stamp; ++ bw = written - min(written, bdi->written_stamp); + bw *= HZ; + if (unlikely(elapsed > period)) { + do_div(bw, elapsed); diff --git a/queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch b/queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch new file mode 100644 index 00000000000..f6e4809c878 --- /dev/null +++ b/queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch @@ -0,0 +1,83 @@ +From 633d6f17cd91ad5bf2370265946f716e42d388c6 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Fri, 20 Mar 2015 13:55:38 +0100 +Subject: x86/xen: prepare p2m list for memory hotplug + +From: Juergen Gross + +commit 633d6f17cd91ad5bf2370265946f716e42d388c6 upstream. + +Commit 054954eb051f35e74b75a566a96fe756015352c8 ("xen: switch to linear +virtual mapped sparse p2m list") introduced a regression regarding to +memory hotplug for a pv-domain: as the virtual space for the p2m list +is allocated for the to be expected memory size of the domain only, +hotplugged memory above that size will not be usable by the domain. + +Correct this by using a configurable size for the p2m list in case of +memory hotplug enabled (default supported memory size is 512 GB for +64 bit domains and 4 GB for 32 bit domains). + +Signed-off-by: Juergen Gross +Reviewed-by: Daniel Kiper +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/xen/p2m.c | 10 +++++++++- + drivers/xen/Kconfig | 17 +++++++++++++++++ + 2 files changed, 26 insertions(+), 1 deletion(-) + +--- a/arch/x86/xen/p2m.c ++++ b/arch/x86/xen/p2m.c +@@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); + unsigned long xen_max_p2m_pfn __read_mostly; + EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); + ++#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT ++#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT ++#else ++#define P2M_LIMIT 0 ++#endif ++ + static DEFINE_SPINLOCK(p2m_update_lock); + + static unsigned long *p2m_mid_missing_mfn; +@@ -387,9 +393,11 @@ static void __init xen_rebuild_p2m_list( + void __init xen_vmalloc_p2m_tree(void) + { + static struct vm_struct vm; ++ unsigned long p2m_limit; + ++ p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; + vm.flags = VM_ALLOC; +- vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, ++ vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), + PMD_SIZE * PMDS_PER_MID_PAGE); + vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); + pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); +--- a/drivers/xen/Kconfig ++++ b/drivers/xen/Kconfig +@@ -55,6 +55,23 @@ config XEN_BALLOON_MEMORY_HOTPLUG + + In that case step 3 should be omitted. + ++config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT ++ int "Hotplugged memory limit (in GiB) for a PV guest" ++ default 512 if X86_64 ++ default 4 if X86_32 ++ range 0 64 if X86_32 ++ depends on XEN_HAVE_PVMMU ++ depends on XEN_BALLOON_MEMORY_HOTPLUG ++ help ++ Maxmium amount of memory (in GiB) that a PV guest can be ++ expanded to when using memory hotplug. ++ ++ A PV guest can have more memory than this limit if is ++ started with a larger maximum. ++ ++ This value is used to allocate enough space in internal ++ tables needed for physical memory administration. ++ + config XEN_SCRUB_PAGES + bool "Scrub pages before returning them to system" + depends on XEN_BALLOON diff --git a/queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch b/queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch new file mode 100644 index 00000000000..1bf80674066 --- /dev/null +++ b/queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch @@ -0,0 +1,58 @@ +From 3c56b3a12ce52f361468cbdd2f79b2f3b8da0ea6 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Fri, 20 Mar 2015 13:55:39 +0100 +Subject: xen/balloon: before adding hotplugged memory, set frames to invalid + +From: Juergen Gross + +commit 3c56b3a12ce52f361468cbdd2f79b2f3b8da0ea6 upstream. + +Commit 25b884a83d487fd62c3de7ac1ab5549979188482 ("x86/xen: set +regions above the end of RAM as 1:1") introduced a regression. + +To be able to add memory pages which were added via memory hotplug to +a pv domain, the pages must be "invalid" instead of "identity" in the +p2m list before they can be added. + +Suggested-by: David Vrabel +Signed-off-by: Juergen Gross +Reviewed-by: Daniel Kiper +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/balloon.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/drivers/xen/balloon.c ++++ b/drivers/xen/balloon.c +@@ -230,6 +230,29 @@ static enum bp_state reserve_additional_ + balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); + nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + ++#ifdef CONFIG_XEN_HAVE_PVMMU ++ /* ++ * add_memory() will build page tables for the new memory so ++ * the p2m must contain invalid entries so the correct ++ * non-present PTEs will be written. ++ * ++ * If a failure occurs, the original (identity) p2m entries ++ * are not restored since this region is now known not to ++ * conflict with any devices. ++ */ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ unsigned long pfn, i; ++ ++ pfn = PFN_DOWN(hotplug_start_paddr); ++ for (i = 0; i < balloon_hotplug; i++) { ++ if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { ++ pr_warn("set_phys_to_machine() failed, no memory added\n"); ++ return BP_ECANCELED; ++ } ++ } ++ } ++#endif ++ + rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); + + if (rc) {