]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Apr 2015 10:05:54 +0000 (12:05 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Apr 2015 10:05:54 +0000 (12:05 +0200)
added patches:
btrfs-simplify-insert_orphan_item.patch
cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch
ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch
iwlwifi-dvm-run-init-firmware-again-upon-.start.patch
libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch
libata-update-crucial-micron-blacklist.patch
mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch
mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch
mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch
mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch
nbd-fix-possible-memory-leak.patch
nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch
nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch
powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch
powerpc-re-enable-dynticks.patch
radeon-do-not-directly-dereference-pointers-to-bios-area.patch
sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch
sunrpc-make-debugfs-file-creation-failure-non-fatal.patch
writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch
writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch
x86-xen-prepare-p2m-list-for-memory-hotplug.patch
xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch

23 files changed:
queue-3.19/btrfs-simplify-insert_orphan_item.patch [new file with mode: 0644]
queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch [new file with mode: 0644]
queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch [new file with mode: 0644]
queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch [new file with mode: 0644]
queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch [new file with mode: 0644]
queue-3.19/libata-update-crucial-micron-blacklist.patch [new file with mode: 0644]
queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch [new file with mode: 0644]
queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch [new file with mode: 0644]
queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch [new file with mode: 0644]
queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch [new file with mode: 0644]
queue-3.19/nbd-fix-possible-memory-leak.patch [new file with mode: 0644]
queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch [new file with mode: 0644]
queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch [new file with mode: 0644]
queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch [new file with mode: 0644]
queue-3.19/powerpc-re-enable-dynticks.patch [new file with mode: 0644]
queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch [new file with mode: 0644]
queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch [new file with mode: 0644]
queue-3.19/series
queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch [new file with mode: 0644]
queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch [new file with mode: 0644]
queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch [new file with mode: 0644]
queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch [new file with mode: 0644]
queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch [new file with mode: 0644]

diff --git a/queue-3.19/btrfs-simplify-insert_orphan_item.patch b/queue-3.19/btrfs-simplify-insert_orphan_item.patch
new file mode 100644 (file)
index 0000000..ebf2b14
--- /dev/null
@@ -0,0 +1,50 @@
+From 9c4f61f01d269815bb7c37be3ede59c5587747c6 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.cz>
+Date: Fri, 2 Jan 2015 19:12:57 +0100
+Subject: btrfs: simplify insert_orphan_item
+
+From: David Sterba <dsterba@suse.cz>
+
+commit 9c4f61f01d269815bb7c37be3ede59c5587747c6 upstream.
+
+We can search and add the orphan item in one go,
+btrfs_insert_orphan_item will find out if the item already exists.
+
+Signed-off-by: David Sterba <dsterba@suse.cz>
+Cc: Chris Mason <clm@fb.com>
+Cc: Roman Mamedov <rm@romanrm.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/btrfs/tree-log.c |   16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1266,21 +1266,13 @@ out:
+ }
+ static int insert_orphan_item(struct btrfs_trans_handle *trans,
+-                            struct btrfs_root *root, u64 offset)
++                            struct btrfs_root *root, u64 ino)
+ {
+       int ret;
+-      struct btrfs_path *path;
+-      path = btrfs_alloc_path();
+-      if (!path)
+-              return -ENOMEM;
+-
+-      ret = btrfs_find_item(root, path, BTRFS_ORPHAN_OBJECTID,
+-                      offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
+-      if (ret > 0)
+-              ret = btrfs_insert_orphan_item(trans, root, offset);
+-
+-      btrfs_free_path(path);
++      ret = btrfs_insert_orphan_item(trans, root, ino);
++      if (ret == -EEXIST)
++              ret = 0;
+       return ret;
+ }
diff --git a/queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch b/queue-3.19/cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch
new file mode 100644 (file)
index 0000000..4f4457e
--- /dev/null
@@ -0,0 +1,68 @@
+From c75de0ac0756d4b442f460e10461720c7c2412c2 Mon Sep 17 00:00:00 2001
+From: Viresh Kumar <viresh.kumar@linaro.org>
+Date: Thu, 2 Apr 2015 10:21:33 +0530
+Subject: cpufreq: Schedule work for the first-online CPU on resume
+
+From: Viresh Kumar <viresh.kumar@linaro.org>
+
+commit c75de0ac0756d4b442f460e10461720c7c2412c2 upstream.
+
+All CPUs leaving the first-online CPU are hotplugged out on suspend and
+and cpufreq core stops managing them.
+
+On resume, we need to call cpufreq_update_policy() for this CPU's policy
+to make sure its frequency is in sync with cpufreq's cached value, as it
+might have got updated by hardware during suspend/resume.
+
+The policies are always added to the top of the policy-list. So, in
+normal circumstances, CPU 0's policy will be the last one in the list.
+And so the code checks for the last policy.
+
+But there are cases where it will fail. Consider quad-core system, with
+policy-per core. If CPU0 is hotplugged out and added back again, the
+last policy will be on CPU1 :(
+
+To fix this in a proper way, always look for the policy of the first
+online CPU. That way we will be sure that we are calling
+cpufreq_update_policy() for the only CPU that wasn't hotplugged out.
+
+Fixes: 2f0aea936360 ("cpufreq: suspend governors on system suspend/hibernate")
+Reported-by: Saravana Kannan <skannan@codeaurora.org>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Acked-by: Saravana Kannan <skannan@codeaurora.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpufreq/cpufreq.c |   19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -1724,15 +1724,18 @@ void cpufreq_resume(void)
+                   || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
+                       pr_err("%s: Failed to start governor for policy: %p\n",
+                               __func__, policy);
+-
+-              /*
+-               * schedule call cpufreq_update_policy() for boot CPU, i.e. last
+-               * policy in list. It will verify that the current freq is in
+-               * sync with what we believe it to be.
+-               */
+-              if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
+-                      schedule_work(&policy->update);
+       }
++
++      /*
++       * schedule call cpufreq_update_policy() for first-online CPU, as that
++       * wouldn't be hotplugged-out on suspend. It will verify that the
++       * current freq is in sync with what we believe it to be.
++       */
++      policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
++      if (WARN_ON(!policy))
++              return;
++
++      schedule_work(&policy->update);
+ }
+ /**
diff --git a/queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch b/queue-3.19/ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch
new file mode 100644 (file)
index 0000000..9f30227
--- /dev/null
@@ -0,0 +1,47 @@
+From 8494057ab5e40df590ef6ef7d66324d3ae33356b Mon Sep 17 00:00:00 2001
+From: Shachar Raindel <raindel@mellanox.com>
+Date: Wed, 18 Mar 2015 17:39:08 +0000
+Subject: IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic
+
+From: Shachar Raindel <raindel@mellanox.com>
+
+commit 8494057ab5e40df590ef6ef7d66324d3ae33356b upstream.
+
+Properly verify that the resulting page aligned end address is larger
+than both the start address and the length of the memory area requested.
+
+Both the start and length arguments for ib_umem_get are controlled by
+the user. A misbehaving user can provide values which will cause an
+integer overflow when calculating the page aligned end address.
+
+This overflow can cause also miscalculation of the number of pages
+mapped, and additional logic issues.
+
+Addresses: CVE-2014-8159
+Signed-off-by: Shachar Raindel <raindel@mellanox.com>
+Signed-off-by: Jack Morgenstein <jackm@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/umem.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/infiniband/core/umem.c
++++ b/drivers/infiniband/core/umem.c
+@@ -99,6 +99,14 @@ struct ib_umem *ib_umem_get(struct ib_uc
+       if (dmasync)
+               dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
++      /*
++       * If the combination of the addr and size requested for this memory
++       * region causes an integer overflow, return error.
++       */
++      if ((PAGE_ALIGN(addr + size) <= size) ||
++          (PAGE_ALIGN(addr + size) <= addr))
++              return ERR_PTR(-EINVAL);
++
+       if (!can_do_mlock())
+               return ERR_PTR(-EPERM);
diff --git a/queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch b/queue-3.19/iwlwifi-dvm-run-init-firmware-again-upon-.start.patch
new file mode 100644 (file)
index 0000000..d3bd0a1
--- /dev/null
@@ -0,0 +1,58 @@
+From 9c8928f5176766bec79f272bd47b7124e11cccbd Mon Sep 17 00:00:00 2001
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Date: Mon, 16 Mar 2015 09:08:07 +0200
+Subject: iwlwifi: dvm: run INIT firmware again upon .start()
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+commit 9c8928f5176766bec79f272bd47b7124e11cccbd upstream.
+
+The assumption before this patch was that we don't need to
+run again the INIT firmware after the system booted. The
+INIT firmware runs calibrations which impact the physical
+layer's behavior.
+Users reported that it may be helpful to run these
+calibrations again every time the interface is brought up.
+The penatly is minimal, since the calibrations run fast.
+This fixes:
+https://bugzilla.kernel.org/show_bug.cgi?id=94341
+
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/iwlwifi/dvm/dev.h   |    1 -
+ drivers/net/wireless/iwlwifi/dvm/ucode.c |    5 -----
+ 2 files changed, 6 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/dvm/dev.h
++++ b/drivers/net/wireless/iwlwifi/dvm/dev.h
+@@ -708,7 +708,6 @@ struct iwl_priv {
+       unsigned long reload_jiffies;
+       int reload_count;
+       bool ucode_loaded;
+-      bool init_ucode_run;            /* Don't run init uCode again */
+       u8 plcp_delta_threshold;
+--- a/drivers/net/wireless/iwlwifi/dvm/ucode.c
++++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c
+@@ -418,9 +418,6 @@ int iwl_run_init_ucode(struct iwl_priv *
+       if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len)
+               return 0;
+-      if (priv->init_ucode_run)
+-              return 0;
+-
+       iwl_init_notification_wait(&priv->notif_wait, &calib_wait,
+                                  calib_complete, ARRAY_SIZE(calib_complete),
+                                  iwlagn_wait_calib, priv);
+@@ -440,8 +437,6 @@ int iwl_run_init_ucode(struct iwl_priv *
+        */
+       ret = iwl_wait_notification(&priv->notif_wait, &calib_wait,
+                                       UCODE_CALIB_TIMEOUT);
+-      if (!ret)
+-              priv->init_ucode_run = true;
+       goto out;
diff --git a/queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch b/queue-3.19/libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch
new file mode 100644 (file)
index 0000000..c6c3e92
--- /dev/null
@@ -0,0 +1,31 @@
+From 6fc4d97a4987c5d247655a157a9377996626221a Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Fri, 27 Mar 2015 15:17:21 -0400
+Subject: libata: Blacklist queued TRIM on Samsung SSD 850 Pro
+
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+
+commit 6fc4d97a4987c5d247655a157a9377996626221a upstream.
+
+Blacklist queued TRIM on this drive for now.
+
+Reported-by: Stefan Keller <linux-list@zahlenfresser.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-core.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4243,6 +4243,8 @@ static const struct ata_blacklist_entry
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Samsung SSD 850 PRO*",       NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
++                                              ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       /*
+        * As defined, the DRAT (Deterministic Read After Trim) and RZAT
diff --git a/queue-3.19/libata-update-crucial-micron-blacklist.patch b/queue-3.19/libata-update-crucial-micron-blacklist.patch
new file mode 100644 (file)
index 0000000..804afbe
--- /dev/null
@@ -0,0 +1,54 @@
+From ff7f53fb82a7801a778e5902bdbbc5e195ab0de0 Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Fri, 27 Mar 2015 15:17:20 -0400
+Subject: libata: Update Crucial/Micron blacklist
+
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+
+commit ff7f53fb82a7801a778e5902bdbbc5e195ab0de0 upstream.
+
+Micron has released an updated firmware (MU02) for M510/M550/MX100
+drives to fix the issues with queued TRIM. Queued TRIM remains broken on
+M500 but is working fine on later drives such as M600 and MX200.
+
+Tweak our blacklist to reflect the above.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=71371
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-core.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4233,9 +4233,16 @@ static const struct ata_blacklist_entry
+       { "PIONEER DVD-RW  DVR-216D",   NULL,   ATA_HORKAGE_NOSETXFER },
+       /* devices that don't properly handle queued TRIM commands */
+-      { "Micron_M[56]*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
++      { "Micron_M500*",               NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
++                                              ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
++                                              ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Micron_M5[15]0*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
++                                              ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
++                                              ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+-      { "Crucial_CT*SSD*",            NULL,   ATA_HORKAGE_NO_NCQ_TRIM, },
+       /*
+        * As defined, the DRAT (Deterministic Read After Trim) and RZAT
+@@ -4255,6 +4262,8 @@ static const struct ata_blacklist_entry
+        */
+       { "INTEL*SSDSC2MH*",            NULL,   0, },
++      { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
++      { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
diff --git a/queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch b/queue-3.19/mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch
new file mode 100644 (file)
index 0000000..8f89d46
--- /dev/null
@@ -0,0 +1,99 @@
+From 788211d81bfdf9b6a547d0530f206ba6ee76b107 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 1 Apr 2015 14:20:42 +0200
+Subject: mac80211: fix RX A-MPDU session reorder timer deletion
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 788211d81bfdf9b6a547d0530f206ba6ee76b107 upstream.
+
+There's an issue with the way the RX A-MPDU reorder timer is
+deleted that can cause a kernel crash like this:
+
+ * tid_rx is removed - call_rcu(ieee80211_free_tid_rx)
+ * station is destroyed
+ * reorder timer fires before ieee80211_free_tid_rx() runs,
+   accessing the station, thus potentially crashing due to
+   the use-after-free
+
+The station deletion is protected by synchronize_net(), but
+that isn't enough -- ieee80211_free_tid_rx() need not have
+run when that returns (it deletes the timer.) We could use
+rcu_barrier() instead of synchronize_net(), but that's much
+more expensive.
+
+Instead, to fix this, add a field tracking that the session
+is being deleted. In this case, the only re-arming of the
+timer happens with the reorder spinlock held, so make that
+code not rearm it if the session is being deleted and also
+delete the timer after setting that field. This ensures the
+timer cannot fire after ___ieee80211_stop_rx_ba_session()
+returns, which fixes the problem.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/agg-rx.c   |    8 ++++++--
+ net/mac80211/rx.c       |    7 ++++---
+ net/mac80211/sta_info.h |    2 ++
+ 3 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/net/mac80211/agg-rx.c
++++ b/net/mac80211/agg-rx.c
+@@ -49,8 +49,6 @@ static void ieee80211_free_tid_rx(struct
+               container_of(h, struct tid_ampdu_rx, rcu_head);
+       int i;
+-      del_timer_sync(&tid_rx->reorder_timer);
+-
+       for (i = 0; i < tid_rx->buf_size; i++)
+               __skb_queue_purge(&tid_rx->reorder_buf[i]);
+       kfree(tid_rx->reorder_buf);
+@@ -93,6 +91,12 @@ void ___ieee80211_stop_rx_ba_session(str
+       del_timer_sync(&tid_rx->session_timer);
++      /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */
++      spin_lock_bh(&tid_rx->reorder_lock);
++      tid_rx->removed = true;
++      spin_unlock_bh(&tid_rx->reorder_lock);
++      del_timer_sync(&tid_rx->reorder_timer);
++
+       call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
+ }
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -870,9 +870,10 @@ static void ieee80211_sta_reorder_releas
+  set_release_timer:
+-              mod_timer(&tid_agg_rx->reorder_timer,
+-                        tid_agg_rx->reorder_time[j] + 1 +
+-                        HT_RX_REORDER_BUF_TIMEOUT);
++              if (!tid_agg_rx->removed)
++                      mod_timer(&tid_agg_rx->reorder_timer,
++                                tid_agg_rx->reorder_time[j] + 1 +
++                                HT_RX_REORDER_BUF_TIMEOUT);
+       } else {
+               del_timer(&tid_agg_rx->reorder_timer);
+       }
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -175,6 +175,7 @@ struct tid_ampdu_tx {
+  * @reorder_lock: serializes access to reorder buffer, see below.
+  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
+  *    and ssn.
++ * @removed: this session is removed (but might have been found due to RCU)
+  *
+  * This structure's lifetime is managed by RCU, assignments to
+  * the array holding it must hold the aggregation mutex.
+@@ -199,6 +200,7 @@ struct tid_ampdu_rx {
+       u16 timeout;
+       u8 dialog_token;
+       bool auto_seq;
++      bool removed;
+ };
+ /**
diff --git a/queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch b/queue-3.19/mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch
new file mode 100644 (file)
index 0000000..32c666c
--- /dev/null
@@ -0,0 +1,75 @@
+From 3fe89b3e2a7bbf3e97657104b9b33a9d81b950b3 Mon Sep 17 00:00:00 2001
+From: Leon Yu <chianglungyu@gmail.com>
+Date: Wed, 25 Mar 2015 15:55:11 -0700
+Subject: mm: fix anon_vma->degree underflow in anon_vma endless growing prevention
+
+From: Leon Yu <chianglungyu@gmail.com>
+
+commit 3fe89b3e2a7bbf3e97657104b9b33a9d81b950b3 upstream.
+
+I have constantly stumbled upon "kernel BUG at mm/rmap.c:399!" after
+upgrading to 3.19 and had no luck with 4.0-rc1 neither.
+
+So, after looking into new logic introduced by commit 7a3ef208e662 ("mm:
+prevent endless growth of anon_vma hierarchy"), I found chances are that
+unlink_anon_vmas() is called without incrementing dst->anon_vma->degree
+in anon_vma_clone() due to allocation failure.  If dst->anon_vma is not
+NULL in error path, its degree will be incorrectly decremented in
+unlink_anon_vmas() and eventually underflow when exiting as a result of
+another call to unlink_anon_vmas().  That's how "kernel BUG at
+mm/rmap.c:399!" is triggered for me.
+
+This patch fixes the underflow by dropping dst->anon_vma when allocation
+fails.  It's safe to do so regardless of original value of dst->anon_vma
+because dst->anon_vma doesn't have valid meaning if anon_vma_clone()
+fails.  Besides, callers don't care dst->anon_vma in such case neither.
+
+Also suggested by Michal Hocko, we can clean up vma_adjust() a bit as
+anon_vma_clone() now does the work.
+
+[akpm@linux-foundation.org: tweak comment]
+Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy")
+Signed-off-by: Leon Yu <chianglungyu@gmail.com>
+Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmap.c |    4 +---
+ mm/rmap.c |    7 +++++++
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -780,10 +780,8 @@ again:                    remove_next = 1 + (end > next->
+                       importer->anon_vma = exporter->anon_vma;
+                       error = anon_vma_clone(importer, exporter);
+-                      if (error) {
+-                              importer->anon_vma = NULL;
++                      if (error)
+                               return error;
+-                      }
+               }
+       }
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct
+       return 0;
+  enomem_failure:
++      /*
++       * dst->anon_vma is dropped here otherwise its degree can be incorrectly
++       * decremented in unlink_anon_vmas().
++       * We can safely do this because callers of anon_vma_clone() don't care
++       * about dst->anon_vma if anon_vma_clone() failed.
++       */
++      dst->anon_vma = NULL;
+       unlink_anon_vmas(dst);
+       return -ENOMEM;
+ }
diff --git a/queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch b/queue-3.19/mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch
new file mode 100644 (file)
index 0000000..7c41909
--- /dev/null
@@ -0,0 +1,111 @@
+From b0dc3a342af36f95a68fe229b8f0f73552c5ca08 Mon Sep 17 00:00:00 2001
+From: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Date: Wed, 25 Mar 2015 15:55:20 -0700
+Subject: mm/memory hotplug: postpone the reset of obsolete pgdat
+
+From: Gu Zheng <guz.fnst@cn.fujitsu.com>
+
+commit b0dc3a342af36f95a68fe229b8f0f73552c5ca08 upstream.
+
+Qiu Xishi reported the following BUG when testing hot-add/hot-remove node under
+stress condition:
+
+  BUG: unable to handle kernel paging request at 0000000000025f60
+  IP: next_online_pgdat+0x1/0x50
+  PGD 0
+  Oops: 0000 [#1] SMP
+  ACPI: Device does not support D3cold
+  Modules linked in: fuse nls_iso8859_1 nls_cp437 vfat fat loop dm_mod coretemp mperf crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 pcspkr microcode igb dca i2c_algo_bit ipv6 megaraid_sas iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support tg3 sg hwmon ptp lpc_ich pps_core mfd_core acpi_pad rtc_cmos button ext3 jbd mbcache sd_mod crc_t10dif scsi_dh_alua scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh ahci libahci libata scsi_mod [last unloaded: rasf]
+  CPU: 23 PID: 238 Comm: kworker/23:1 Tainted: G           O 3.10.15-5885-euler0302 #1
+  Hardware name: HUAWEI TECHNOLOGIES CO.,LTD. Huawei N1/Huawei N1, BIOS V100R001 03/02/2015
+  Workqueue: events vmstat_update
+  task: ffffa800d32c0000 ti: ffffa800d32ae000 task.ti: ffffa800d32ae000
+  RIP: 0010: next_online_pgdat+0x1/0x50
+  RSP: 0018:ffffa800d32afce8  EFLAGS: 00010286
+  RAX: 0000000000001440 RBX: ffffffff81da53b8 RCX: 0000000000000082
+  RDX: 0000000000000000 RSI: 0000000000000082 RDI: 0000000000000000
+  RBP: ffffa800d32afd28 R08: ffffffff81c93bfc R09: ffffffff81cbdc96
+  R10: 00000000000040ec R11: 00000000000000a0 R12: ffffa800fffb3440
+  R13: ffffa800d32afd38 R14: 0000000000000017 R15: ffffa800e6616800
+  FS:  0000000000000000(0000) GS:ffffa800e6600000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000025f60 CR3: 0000000001a0b000 CR4: 00000000001407e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+    refresh_cpu_vm_stats+0xd0/0x140
+    vmstat_update+0x11/0x50
+    process_one_work+0x194/0x3d0
+    worker_thread+0x12b/0x410
+    kthread+0xc6/0xd0
+    ret_from_fork+0x7c/0xb0
+
+The cause is the "memset(pgdat, 0, sizeof(*pgdat))" at the end of
+try_offline_node, which will reset all the content of pgdat to 0, as the
+pgdat is accessed lock-free, so that the users still using the pgdat
+will panic, such as the vmstat_update routine.
+
+process A:                             offline node XX:
+
+vmstat_updat()
+   refresh_cpu_vm_stats()
+     for_each_populated_zone()
+       find online node XX
+     cond_resched()
+                                       offline cpu and memory, then try_offline_node()
+                                       node_set_offline(nid), and memset(pgdat, 0, sizeof(*pgdat))
+       zone = next_zone(zone)
+         pg_data_t *pgdat = zone->zone_pgdat;  // here pgdat is NULL now
+           next_online_pgdat(pgdat)
+             next_online_node(pgdat->node_id);  // NULL pointer access
+
+So the solution here is postponing the reset of obsolete pgdat from
+try_offline_node() to hotadd_new_pgdat(), and just resetting
+pgdat->nr_zones and pgdat->classzone_idx to be 0 rather than the memset
+0 to avoid breaking pointer information in pgdat.
+
+Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Reported-by: Xishi Qiu <qiuxishi@huawei.com>
+Suggested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
+Cc: Tang Chen <tangchen@cn.fujitsu.com>
+Cc: Xie XiuQi <xiexiuqi@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory_hotplug.c |   13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat
+                       return NULL;
+               arch_refresh_nodedata(nid, pgdat);
++      } else {
++              /* Reset the nr_zones and classzone_idx to 0 before reuse */
++              pgdat->nr_zones = 0;
++              pgdat->classzone_idx = 0;
+       }
+       /* we can use NODE_DATA(nid) from here */
+@@ -1977,15 +1981,6 @@ void try_offline_node(int nid)
+               if (is_vmalloc_addr(zone->wait_table))
+                       vfree(zone->wait_table);
+       }
+-
+-      /*
+-       * Since there is no way to guarentee the address of pgdat/zone is not
+-       * on stack of any kernel threads or used by other kernel objects
+-       * without reference counting or other symchronizing method, do not
+-       * reset node_data and free pgdat here. Just reset it to 0 and reuse
+-       * the memory when the node is online again.
+-       */
+-      memset(pgdat, 0, sizeof(*pgdat));
+ }
+ EXPORT_SYMBOL(try_offline_node);
diff --git a/queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch b/queue-3.19/mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch
new file mode 100644 (file)
index 0000000..be37166
--- /dev/null
@@ -0,0 +1,66 @@
+From cfa869438282be84ad4110bba5027ef1fbbe71e4 Mon Sep 17 00:00:00 2001
+From: Laura Abbott <lauraa@codeaurora.org>
+Date: Wed, 25 Mar 2015 15:55:26 -0700
+Subject: mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate
+
+From: Laura Abbott <lauraa@codeaurora.org>
+
+commit cfa869438282be84ad4110bba5027ef1fbbe71e4 upstream.
+
+Commit 3c605096d315 ("mm/page_alloc: restrict max order of merging on
+isolated pageblock") changed the logic of unset_migratetype_isolate to
+check the buddy allocator and explicitly call __free_pages to merge.
+
+The page that is being freed in this path never had prep_new_page called
+so set_page_refcounted is called explicitly but there is no call to
+kernel_map_pages.  With the default kernel_map_pages this is mostly
+harmless but if kernel_map_pages does any manipulation of the page
+tables (unmapping or setting pages to read only) this may trigger a
+fault:
+
+    alloc_contig_range test_pages_isolated(ceb00, ced00) failed
+    Unable to handle kernel paging request at virtual address ffffffc0cec00000
+    pgd = ffffffc045fc4000
+    [ffffffc0cec00000] *pgd=0000000000000000
+    Internal error: Oops: 9600004f [#1] PREEMPT SMP
+    Modules linked in: exfatfs
+    CPU: 1 PID: 23237 Comm: TimedEventQueue Not tainted 3.10.49-gc72ad36-dirty #1
+    task: ffffffc03de52100 ti: ffffffc015388000 task.ti: ffffffc015388000
+    PC is at memset+0xc8/0x1c0
+    LR is at kernel_map_pages+0x1ec/0x244
+
+Fix this by calling kernel_map_pages to ensure the page is set in the
+page table properly
+
+Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock")
+Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Cc: Xishi Qiu <qiuxishi@huawei.com>
+Cc: Vladimir Davydov <vdavydov@parallels.com>
+Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Gioh Kim <gioh.kim@lge.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_isolation.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/page_isolation.c
++++ b/mm/page_isolation.c
+@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct pa
+                       if (!is_migrate_isolate_page(buddy)) {
+                               __isolate_free_page(page, order);
++                              kernel_map_pages(page, (1 << order), 1);
+                               set_page_refcounted(page);
+                               isolated_page = page;
+                       }
diff --git a/queue-3.19/nbd-fix-possible-memory-leak.patch b/queue-3.19/nbd-fix-possible-memory-leak.patch
new file mode 100644 (file)
index 0000000..3e67ba2
--- /dev/null
@@ -0,0 +1,45 @@
+From ff6b8090e26ef7649ef0cc6b42389141ef48b0cf Mon Sep 17 00:00:00 2001
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Date: Tue, 27 Jan 2015 18:08:22 +0530
+Subject: nbd: fix possible memory leak
+
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+
+commit ff6b8090e26ef7649ef0cc6b42389141ef48b0cf upstream.
+
+we have already allocated memory for nbd_dev, but we were not
+releasing that memory and just returning the error value.
+
+Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
+Acked-by: Paul Clements <Paul.Clements@SteelEye.com>
+Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/nbd.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -803,10 +803,6 @@ static int __init nbd_init(void)
+               return -EINVAL;
+       }
+-      nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+-      if (!nbd_dev)
+-              return -ENOMEM;
+-
+       part_shift = 0;
+       if (max_part > 0) {
+               part_shift = fls(max_part);
+@@ -828,6 +824,10 @@ static int __init nbd_init(void)
+       if (nbds_max > 1UL << (MINORBITS - part_shift))
+               return -EINVAL;
++      nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
++      if (!nbd_dev)
++              return -ENOMEM;
++
+       for (i = 0; i < nbds_max; i++) {
+               struct gendisk *disk = alloc_disk(1 << part_shift);
+               if (!disk)
diff --git a/queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch b/queue-3.19/nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch
new file mode 100644 (file)
index 0000000..bf5889f
--- /dev/null
@@ -0,0 +1,37 @@
+From 340f0ba1c6c8412aa35fd6476044836b84361ea6 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Mon, 23 Mar 2015 11:02:30 -0400
+Subject: nfsd: return correct lockowner when there is a race on hash insert
+
+From: "J. Bruce Fields" <bfields@redhat.com>
+
+commit 340f0ba1c6c8412aa35fd6476044836b84361ea6 upstream.
+
+alloc_init_lock_stateowner can return an already freed entry if there is
+a race to put openowners in the hashtable.
+
+Noticed by inspection after Jeff Layton fixed the same bug for open
+owners.  Depending on client behavior, this one may be trickier to
+trigger in practice.
+
+Fixes: c58c6610ec24 "nfsd: Protect adding/removing lock owners using client_lock"
+Cc: Trond Myklebust <trond.myklebust@primarydata.com>
+Acked-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5065,7 +5065,7 @@ alloc_init_lock_stateowner(unsigned int
+       } else
+               nfs4_free_lockowner(&lo->lo_owner);
+       spin_unlock(&clp->cl_lock);
+-      return lo;
++      return ret;
+ }
+ static void
diff --git a/queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch b/queue-3.19/nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch
new file mode 100644 (file)
index 0000000..7d6f700
--- /dev/null
@@ -0,0 +1,46 @@
+From c5952338bfc234e54deda45b7228f610a545e28a Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@poochiereds.net>
+Date: Mon, 23 Mar 2015 10:53:42 -0400
+Subject: nfsd: return correct openowner when there is a race to put one in the hash
+
+From: Jeff Layton <jlayton@poochiereds.net>
+
+commit c5952338bfc234e54deda45b7228f610a545e28a upstream.
+
+alloc_init_open_stateowner can return an already freed entry if there is
+a race to put openowners in the hashtable.
+
+In commit 7ffb588086e9, we changed it so that we allocate and initialize
+an openowner, and then check to see if a matching one got stuffed into
+the hashtable in the meantime. If it did, then we free the one we just
+allocated and take a reference on the one already there. There is a bug
+here though. The code will then return the pointer to the one that was
+allocated (and has now been freed).
+
+This wasn't evident before as this race almost never occurred. The Linux
+kernel client used to serialize requests for a single openowner.  That
+has changed now with v4.0 kernels, and this race can now easily occur.
+
+Fixes: 7ffb588086e9
+Cc: Trond Myklebust <trond.myklebust@primarydata.com>
+Reported-by: Christoph Hellwig <hch@infradead.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3228,7 +3228,7 @@ alloc_init_open_stateowner(unsigned int
+       } else
+               nfs4_free_openowner(&oo->oo_owner);
+       spin_unlock(&clp->cl_lock);
+-      return oo;
++      return ret;
+ }
+ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
diff --git a/queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch b/queue-3.19/powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch
new file mode 100644 (file)
index 0000000..7376ede
--- /dev/null
@@ -0,0 +1,37 @@
+From d52356e7f48e400ca258c6763a232a92fa82ff68 Mon Sep 17 00:00:00 2001
+From: Jan Stancek <jstancek@redhat.com>
+Date: Tue, 31 Mar 2015 18:11:46 +0200
+Subject: powerpc: fix memory corruption by pnv_alloc_idle_core_states
+
+From: Jan Stancek <jstancek@redhat.com>
+
+commit d52356e7f48e400ca258c6763a232a92fa82ff68 upstream.
+
+Space allocated for paca is based off nr_cpu_ids,
+but pnv_alloc_idle_core_states() iterates paca with
+cpu_nr_cores()*threads_per_core, which is using NR_CPUS.
+
+This causes pnv_alloc_idle_core_states() to write over memory,
+which is outside of paca array and may later lead to various panics.
+
+Fixes: 7cba160ad789 (powernv/cpuidle: Redesign idle states management)
+Signed-off-by: Jan Stancek <jstancek@redhat.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Preet U. Murthy <preeti@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/cputhreads.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/include/asm/cputhreads.h
++++ b/arch/powerpc/include/asm/cputhreads.h
+@@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_
+ static inline int cpu_nr_cores(void)
+ {
+-      return NR_CPUS >> threads_shift;
++      return nr_cpu_ids >> threads_shift;
+ }
+ static inline cpumask_t cpu_online_cores_map(void)
diff --git a/queue-3.19/powerpc-re-enable-dynticks.patch b/queue-3.19/powerpc-re-enable-dynticks.patch
new file mode 100644 (file)
index 0000000..0e58584
--- /dev/null
@@ -0,0 +1,58 @@
+From fea559f303567e558bfab9c8ba4a2af5b309205a Mon Sep 17 00:00:00 2001
+From: Paul Clarke <pc@us.ibm.com>
+Date: Fri, 20 Feb 2015 11:13:33 -0600
+Subject: powerpc: Re-enable dynticks
+
+From: Paul Clarke <pc@us.ibm.com>
+
+commit fea559f303567e558bfab9c8ba4a2af5b309205a upstream.
+
+Implement arch_irq_work_has_interrupt() for powerpc
+
+Commit 9b01f5bf3 introduced a dependency on "IRQ work self-IPIs" for
+full dynamic ticks to be enabled, by expecting architectures to
+implement a suitable arch_irq_work_has_interrupt() routine.
+
+Several arches have implemented this routine, including x86 (3010279f)
+and arm (09f6edd4), but powerpc was omitted.
+
+This patch implements this routine for powerpc.
+
+The symptom, at boot (on powerpc systems) with "nohz_full=<CPU list>"
+is displayed:
+
+     NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs
+
+after this patch:
+
+     NO_HZ: Full dynticks CPUs: <CPU list>.
+
+Tested against 3.19.
+
+powerpc implements "IRQ work self-IPIs" by setting the decrementer to 1 in
+arch_irq_work_raise(), which causes a decrementer exception on the next
+timebase tick. We then handle the work in __timer_interrupt().
+
+CC: Frederic Weisbecker <fweisbec@gmail.com>
+Signed-off-by: Paul A. Clarke <pc@us.ibm.com>
+Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+[mpe: Flesh out change log, fix ws & include guards, remove include of processor.h]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/irq_work.h |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- /dev/null
++++ b/arch/powerpc/include/asm/irq_work.h
+@@ -0,0 +1,9 @@
++#ifndef _ASM_POWERPC_IRQ_WORK_H
++#define _ASM_POWERPC_IRQ_WORK_H
++
++static inline bool arch_irq_work_has_interrupt(void)
++{
++      return true;
++}
++
++#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch b/queue-3.19/radeon-do-not-directly-dereference-pointers-to-bios-area.patch
new file mode 100644 (file)
index 0000000..b47caad
--- /dev/null
@@ -0,0 +1,56 @@
+From f2c9e560b406f2f6b14b345c7da33467dee9cdf2 Mon Sep 17 00:00:00 2001
+From: David Miller <davem@davemloft.net>
+Date: Wed, 18 Mar 2015 23:18:40 -0400
+Subject: radeon: Do not directly dereference pointers to BIOS area.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Miller <davem@davemloft.net>
+
+commit f2c9e560b406f2f6b14b345c7da33467dee9cdf2 upstream.
+
+Use readb() and memcpy_fromio() accessors instead.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/radeon/radeon_bios.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/radeon_bios.c
++++ b/drivers/gpu/drm/radeon/radeon_bios.c
+@@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(stru
+ static bool radeon_read_bios(struct radeon_device *rdev)
+ {
+-      uint8_t __iomem *bios;
++      uint8_t __iomem *bios, val1, val2;
+       size_t size;
+       rdev->bios = NULL;
+@@ -86,15 +86,19 @@ static bool radeon_read_bios(struct rade
+               return false;
+       }
+-      if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) {
++      val1 = readb(&bios[0]);
++      val2 = readb(&bios[1]);
++
++      if (size == 0 || val1 != 0x55 || val2 != 0xaa) {
+               pci_unmap_rom(rdev->pdev, bios);
+               return false;
+       }
+-      rdev->bios = kmemdup(bios, size, GFP_KERNEL);
++      rdev->bios = kzalloc(size, GFP_KERNEL);
+       if (rdev->bios == NULL) {
+               pci_unmap_rom(rdev->pdev, bios);
+               return false;
+       }
++      memcpy_fromio(rdev->bios, bios, size);
+       pci_unmap_rom(rdev->pdev, bios);
+       return true;
+ }
diff --git a/queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch b/queue-3.19/sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch
new file mode 100644 (file)
index 0000000..b395146
--- /dev/null
@@ -0,0 +1,47 @@
+From 746db9443ea57fd9c059f62c4bfbf41cf224fe13 Mon Sep 17 00:00:00 2001
+From: Brian Silverman <brian@peloton-tech.com>
+Date: Wed, 18 Feb 2015 16:23:56 -0800
+Subject: sched: Fix RLIMIT_RTTIME when PI-boosting to RT
+
+From: Brian Silverman <brian@peloton-tech.com>
+
+commit 746db9443ea57fd9c059f62c4bfbf41cf224fe13 upstream.
+
+When non-realtime tasks get priority-inheritance boosted to a realtime
+scheduling class, RLIMIT_RTTIME starts to apply to them. However, the
+counter used for checking this (the same one used for SCHED_RR
+timeslices) was not getting reset. This meant that tasks running with a
+non-realtime scheduling class which are repeatedly boosted to a realtime
+one, but never block while they are running realtime, eventually hit the
+timeout without ever running for a time over the limit. This patch
+resets the realtime timeslice counter when un-PI-boosting from an RT to
+a non-RT scheduling class.
+
+I have some test code with two threads and a shared PTHREAD_PRIO_INHERIT
+mutex which induces priority boosting and spins while boosted that gets
+killed by a SIGXCPU on non-fixed kernels but doesn't with this patch
+applied. It happens much faster with a CONFIG_PREEMPT_RT kernel, and
+does happen eventually with PREEMPT_VOLUNTARY kernels.
+
+Signed-off-by: Brian Silverman <brian@peloton-tech.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: austin@peloton-tech.com
+Link: http://lkml.kernel.org/r/1424305436-6716-1-git-send-email-brian@peloton-tech.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3079,6 +3079,8 @@ void rt_mutex_setprio(struct task_struct
+       } else {
+               if (dl_prio(oldprio))
+                       p->dl.dl_boosted = 0;
++              if (rt_prio(oldprio))
++                      p->rt.timeout = 0;
+               p->sched_class = &fair_sched_class;
+       }
index 662b5a05c04a69f5f638d1e64803c0aa427b08aa..c8f4bae593f630069d08d707d14d81b83ce6b3c6 100644 (file)
@@ -28,3 +28,25 @@ drm-i915-vlv-save-restore-the-power-context-base-reg.patch
 drm-i915-vlv-remove-wait-for-previous-gfx-clk-disable-request.patch
 drm-amdkfd-initialize-only-amdkfd-s-assigned-pipelines.patch
 drm-i915-align-initial-plane-backing-objects-correctly.patch
+btrfs-simplify-insert_orphan_item.patch
+ib-uverbs-prevent-integer-overflow-in-ib_umem_get-address-arithmetic.patch
+iwlwifi-dvm-run-init-firmware-again-upon-.start.patch
+x86-xen-prepare-p2m-list-for-memory-hotplug.patch
+xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch
+nfsd-return-correct-openowner-when-there-is-a-race-to-put-one-in-the-hash.patch
+nfsd-return-correct-lockowner-when-there-is-a-race-on-hash-insert.patch
+sunrpc-make-debugfs-file-creation-failure-non-fatal.patch
+powerpc-fix-memory-corruption-by-pnv_alloc_idle_core_states.patch
+powerpc-re-enable-dynticks.patch
+nbd-fix-possible-memory-leak.patch
+mac80211-fix-rx-a-mpdu-session-reorder-timer-deletion.patch
+mm-fix-anon_vma-degree-underflow-in-anon_vma-endless-growing-prevention.patch
+mm-memory-hotplug-postpone-the-reset-of-obsolete-pgdat.patch
+mm-page_alloc.c-call-kernel_map_pages-in-unset_migrateype_isolate.patch
+sched-fix-rlimit_rttime-when-pi-boosting-to-rt.patch
+cpufreq-schedule-work-for-the-first-online-cpu-on-resume.patch
+writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch
+writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch
+libata-update-crucial-micron-blacklist.patch
+libata-blacklist-queued-trim-on-samsung-ssd-850-pro.patch
+radeon-do-not-directly-dereference-pointers-to-bios-area.patch
diff --git a/queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch b/queue-3.19/sunrpc-make-debugfs-file-creation-failure-non-fatal.patch
new file mode 100644 (file)
index 0000000..c0ab566
--- /dev/null
@@ -0,0 +1,296 @@
+From f9c72d10d6fbf949558cd088389a42213ed7b12d Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@poochiereds.net>
+Date: Tue, 31 Mar 2015 12:03:28 -0400
+Subject: sunrpc: make debugfs file creation failure non-fatal
+
+From: Jeff Layton <jlayton@poochiereds.net>
+
+commit f9c72d10d6fbf949558cd088389a42213ed7b12d upstream.
+
+We currently have a problem that SELinux policy is being enforced when
+creating debugfs files. If a debugfs file is created as a side effect of
+doing some syscall, then that creation can fail if the SELinux policy
+for that process prevents it.
+
+This seems wrong. We don't do that for files under /proc, for instance,
+so Bruce has proposed a patch to fix that.
+
+While discussing that patch however, Greg K.H. stated:
+
+    "No kernel code should care / fail if a debugfs function fails, so
+     please fix up the sunrpc code first."
+
+This patch converts all of the sunrpc debugfs setup code to be void
+return functins, and the callers to not look for errors from those
+functions.
+
+This should allow rpc_clnt and rpc_xprt creation to work, even if the
+kernel fails to create debugfs files for some reason.
+
+Symptoms were failing krb5 mounts on systems using gss-proxy and
+selinux.
+
+Fixes: 388f0c776781 "sunrpc: add a debugfs rpc_xprt directory..."
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sunrpc/debug.h |   18 +++++++-------
+ net/sunrpc/clnt.c            |    4 ---
+ net/sunrpc/debugfs.c         |   52 +++++++++++++++++++++++--------------------
+ net/sunrpc/sunrpc_syms.c     |    7 -----
+ net/sunrpc/xprt.c            |    7 -----
+ 5 files changed, 41 insertions(+), 47 deletions(-)
+
+--- a/include/linux/sunrpc/debug.h
++++ b/include/linux/sunrpc/debug.h
+@@ -60,17 +60,17 @@ struct rpc_xprt;
+ #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ void          rpc_register_sysctl(void);
+ void          rpc_unregister_sysctl(void);
+-int           sunrpc_debugfs_init(void);
++void          sunrpc_debugfs_init(void);
+ void          sunrpc_debugfs_exit(void);
+-int           rpc_clnt_debugfs_register(struct rpc_clnt *);
++void          rpc_clnt_debugfs_register(struct rpc_clnt *);
+ void          rpc_clnt_debugfs_unregister(struct rpc_clnt *);
+-int           rpc_xprt_debugfs_register(struct rpc_xprt *);
++void          rpc_xprt_debugfs_register(struct rpc_xprt *);
+ void          rpc_xprt_debugfs_unregister(struct rpc_xprt *);
+ #else
+-static inline int
++static inline void
+ sunrpc_debugfs_init(void)
+ {
+-      return 0;
++      return;
+ }
+ static inline void
+@@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void)
+       return;
+ }
+-static inline int
++static inline void
+ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+ {
+-      return 0;
++      return;
+ }
+ static inline void
+@@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_c
+       return;
+ }
+-static inline int
++static inline void
+ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
+ {
+-      return 0;
++      return;
+ }
+ static inline void
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -303,9 +303,7 @@ static int rpc_client_register(struct rp
+       struct super_block *pipefs_sb;
+       int err;
+-      err = rpc_clnt_debugfs_register(clnt);
+-      if (err)
+-              return err;
++      rpc_clnt_debugfs_register(clnt);
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+--- a/net/sunrpc/debugfs.c
++++ b/net/sunrpc/debugfs.c
+@@ -129,48 +129,52 @@ static const struct file_operations task
+       .release        = tasks_release,
+ };
+-int
++void
+ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+ {
+-      int len, err;
++      int len;
+       char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
++      struct rpc_xprt *xprt;
+       /* Already registered? */
+-      if (clnt->cl_debugfs)
+-              return 0;
++      if (clnt->cl_debugfs || !rpc_clnt_dir)
++              return;
+       len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
+       if (len >= sizeof(name))
+-              return -EINVAL;
++              return;
+       /* make the per-client dir */
+       clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
+       if (!clnt->cl_debugfs)
+-              return -ENOMEM;
++              return;
+       /* make tasks file */
+-      err = -ENOMEM;
+       if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+                                clnt, &tasks_fops))
+               goto out_err;
+-      err = -EINVAL;
+       rcu_read_lock();
++      xprt = rcu_dereference(clnt->cl_xprt);
++      /* no "debugfs" dentry? Don't bother with the symlink. */
++      if (!xprt->debugfs) {
++              rcu_read_unlock();
++              return;
++      }
+       len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
+-                      rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name);
++                      xprt->debugfs->d_name.name);
+       rcu_read_unlock();
++
+       if (len >= sizeof(name))
+               goto out_err;
+-      err = -ENOMEM;
+       if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
+               goto out_err;
+-      return 0;
++      return;
+ out_err:
+       debugfs_remove_recursive(clnt->cl_debugfs);
+       clnt->cl_debugfs = NULL;
+-      return err;
+ }
+ void
+@@ -226,33 +230,33 @@ static const struct file_operations xprt
+       .release        = xprt_info_release,
+ };
+-int
++void
+ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
+ {
+       int len, id;
+       static atomic_t cur_id;
+       char            name[9]; /* 8 hex digits + NULL term */
++      if (!rpc_xprt_dir)
++              return;
++
+       id = (unsigned int)atomic_inc_return(&cur_id);
+       len = snprintf(name, sizeof(name), "%x", id);
+       if (len >= sizeof(name))
+-              return -EINVAL;
++              return;
+       /* make the per-client dir */
+       xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
+       if (!xprt->debugfs)
+-              return -ENOMEM;
++              return;
+       /* make tasks file */
+       if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+                                xprt, &xprt_info_fops)) {
+               debugfs_remove_recursive(xprt->debugfs);
+               xprt->debugfs = NULL;
+-              return -ENOMEM;
+       }
+-
+-      return 0;
+ }
+ void
+@@ -266,14 +270,17 @@ void __exit
+ sunrpc_debugfs_exit(void)
+ {
+       debugfs_remove_recursive(topdir);
++      topdir = NULL;
++      rpc_clnt_dir = NULL;
++      rpc_xprt_dir = NULL;
+ }
+-int __init
++void __init
+ sunrpc_debugfs_init(void)
+ {
+       topdir = debugfs_create_dir("sunrpc", NULL);
+       if (!topdir)
+-              goto out;
++              return;
+       rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
+       if (!rpc_clnt_dir)
+@@ -283,10 +290,9 @@ sunrpc_debugfs_init(void)
+       if (!rpc_xprt_dir)
+               goto out_remove;
+-      return 0;
++      return;
+ out_remove:
+       debugfs_remove_recursive(topdir);
+       topdir = NULL;
+-out:
+-      return -ENOMEM;
++      rpc_clnt_dir = NULL;
+ }
+--- a/net/sunrpc/sunrpc_syms.c
++++ b/net/sunrpc/sunrpc_syms.c
+@@ -98,10 +98,7 @@ init_sunrpc(void)
+       if (err)
+               goto out4;
+-      err = sunrpc_debugfs_init();
+-      if (err)
+-              goto out5;
+-
++      sunrpc_debugfs_init();
+ #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+       rpc_register_sysctl();
+ #endif
+@@ -109,8 +106,6 @@ init_sunrpc(void)
+       init_socket_xprt();     /* clnt sock transport */
+       return 0;
+-out5:
+-      unregister_rpc_pipefs();
+ out4:
+       unregister_pernet_subsys(&sunrpc_net_ops);
+ out3:
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -1303,7 +1303,6 @@ static void xprt_init(struct rpc_xprt *x
+  */
+ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
+ {
+-      int err;
+       struct rpc_xprt *xprt;
+       struct xprt_class *t;
+@@ -1344,11 +1343,7 @@ found:
+               return ERR_PTR(-ENOMEM);
+       }
+-      err = rpc_xprt_debugfs_register(xprt);
+-      if (err) {
+-              xprt_destroy(xprt);
+-              return ERR_PTR(err);
+-      }
++      rpc_xprt_debugfs_register(xprt);
+       dprintk("RPC:       created transport %p with %u slots\n", xprt,
+                       xprt->max_reqs);
diff --git a/queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch b/queue-3.19/writeback-add-missing-initial_jiffies-init-in-global_update_bandwidth.patch
new file mode 100644 (file)
index 0000000..18b4db7
--- /dev/null
@@ -0,0 +1,44 @@
+From 7d70e15480c0450d2bfafaad338a32e884fc215e Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Wed, 4 Mar 2015 10:37:43 -0500
+Subject: writeback: add missing INITIAL_JIFFIES init in global_update_bandwidth()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 7d70e15480c0450d2bfafaad338a32e884fc215e upstream.
+
+global_update_bandwidth() uses static variable update_time as the
+timestamp for the last update but forgets to initialize it to
+INITIALIZE_JIFFIES.
+
+This means that global_dirty_limit will be 5 mins into the future on
+32bit and some large amount jiffies into the past on 64bit.  This
+isn't critical as the only effect is that global_dirty_limit won't be
+updated for the first 5 mins after booting on 32bit machines,
+especially given the auxiliary nature of global_dirty_limit's role -
+protecting against global dirty threshold's sudden dips; however, it
+does lead to unintended suboptimal behavior.  Fix it.
+
+Fixes: c42843f2f0bb ("writeback: introduce smoothed global dirty limit")
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Acked-by: Jan Kara <jack@suse.cz>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -922,7 +922,7 @@ static void global_update_bandwidth(unsi
+                                   unsigned long now)
+ {
+       static DEFINE_SPINLOCK(dirty_lock);
+-      static unsigned long update_time;
++      static unsigned long update_time = INITIAL_JIFFIES;
+       /*
+        * check locklessly first to optimize away locking for the most time
diff --git a/queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch b/queue-3.19/writeback-fix-possible-underflow-in-write-bandwidth-calculation.patch
new file mode 100644 (file)
index 0000000..209d420
--- /dev/null
@@ -0,0 +1,56 @@
+From c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 23 Mar 2015 00:18:48 -0400
+Subject: writeback: fix possible underflow in write bandwidth calculation
+
+From: Tejun Heo <tj@kernel.org>
+
+commit c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e upstream.
+
+From 1ebf33901ecc75d9496862dceb1ef0377980587c Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 23 Mar 2015 00:08:19 -0400
+
+2f800fbd777b ("writeback: fix dirtied pages accounting on redirty")
+introduced account_page_redirty() which reverts stat updates for a
+redirtied page, making BDI_DIRTIED no longer monotonically increasing.
+
+bdi_update_write_bandwidth() uses the delta in BDI_DIRTIED as the
+basis for bandwidth calculation.  While unlikely, since the above
+patch, the newer value may be lower than the recorded past value and
+underflow the bandwidth calculation leading to a wild result.
+
+Fix it by subtracing min of the old and new values when calculating
+delta.  AFAIK, there hasn't been any report of it happening but the
+resulting erratic behavior would be non-critical and temporary, so
+it's possible that the issue is happening without being reported.  The
+risk of the fix is very low, so tagged for -stable.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Greg Thelen <gthelen@google.com>
+Fixes: 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty")
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(s
+        *                   bw * elapsed + write_bandwidth * (period - elapsed)
+        * write_bandwidth = ---------------------------------------------------
+        *                                          period
++       *
++       * @written may have decreased due to account_page_redirty().
++       * Avoid underflowing @bw calculation.
+        */
+-      bw = written - bdi->written_stamp;
++      bw = written - min(written, bdi->written_stamp);
+       bw *= HZ;
+       if (unlikely(elapsed > period)) {
+               do_div(bw, elapsed);
diff --git a/queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch b/queue-3.19/x86-xen-prepare-p2m-list-for-memory-hotplug.patch
new file mode 100644 (file)
index 0000000..f6e4809
--- /dev/null
@@ -0,0 +1,83 @@
+From 633d6f17cd91ad5bf2370265946f716e42d388c6 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Fri, 20 Mar 2015 13:55:38 +0100
+Subject: x86/xen: prepare p2m list for memory hotplug
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 633d6f17cd91ad5bf2370265946f716e42d388c6 upstream.
+
+Commit 054954eb051f35e74b75a566a96fe756015352c8 ("xen: switch to linear
+virtual mapped sparse p2m list") introduced a regression regarding to
+memory hotplug for a pv-domain: as the virtual space for the p2m list
+is allocated for the to be expected memory size of the domain only,
+hotplugged memory above that size will not be usable by the domain.
+
+Correct this by using a configurable size for the p2m list in case of
+memory hotplug enabled (default supported memory size is 512 GB for
+64 bit domains and 4 GB for 32 bit domains).
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c  |   10 +++++++++-
+ drivers/xen/Kconfig |   17 +++++++++++++++++
+ 2 files changed, 26 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
+ unsigned long xen_max_p2m_pfn __read_mostly;
+ EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
++#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
++#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
++#else
++#define P2M_LIMIT 0
++#endif
++
+ static DEFINE_SPINLOCK(p2m_update_lock);
+ static unsigned long *p2m_mid_missing_mfn;
+@@ -387,9 +393,11 @@ static void __init xen_rebuild_p2m_list(
+ void __init xen_vmalloc_p2m_tree(void)
+ {
+       static struct vm_struct vm;
++      unsigned long p2m_limit;
++      p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
+       vm.flags = VM_ALLOC;
+-      vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
++      vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
+                       PMD_SIZE * PMDS_PER_MID_PAGE);
+       vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
+       pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -55,6 +55,23 @@ config XEN_BALLOON_MEMORY_HOTPLUG
+         In that case step 3 should be omitted.
++config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
++      int "Hotplugged memory limit (in GiB) for a PV guest"
++      default 512 if X86_64
++      default 4 if X86_32
++      range 0 64 if X86_32
++      depends on XEN_HAVE_PVMMU
++      depends on XEN_BALLOON_MEMORY_HOTPLUG
++      help
++        Maxmium amount of memory (in GiB) that a PV guest can be
++        expanded to when using memory hotplug.
++
++        A PV guest can have more memory than this limit if is
++        started with a larger maximum.
++
++        This value is used to allocate enough space in internal
++        tables needed for physical memory administration.
++
+ config XEN_SCRUB_PAGES
+       bool "Scrub pages before returning them to system"
+       depends on XEN_BALLOON
diff --git a/queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch b/queue-3.19/xen-balloon-before-adding-hotplugged-memory-set-frames-to-invalid.patch
new file mode 100644 (file)
index 0000000..1bf8067
--- /dev/null
@@ -0,0 +1,58 @@
+From 3c56b3a12ce52f361468cbdd2f79b2f3b8da0ea6 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Fri, 20 Mar 2015 13:55:39 +0100
+Subject: xen/balloon: before adding hotplugged memory, set frames to invalid
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 3c56b3a12ce52f361468cbdd2f79b2f3b8da0ea6 upstream.
+
+Commit 25b884a83d487fd62c3de7ac1ab5549979188482 ("x86/xen: set
+regions above the end of RAM as 1:1") introduced a regression.
+
+To be able to add memory pages which were added via memory hotplug to
+a pv domain, the pages must be "invalid" instead of "identity" in the
+p2m list before they can be added.
+
+Suggested-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/balloon.c |   23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/drivers/xen/balloon.c
++++ b/drivers/xen/balloon.c
+@@ -230,6 +230,29 @@ static enum bp_state reserve_additional_
+       balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
+       nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
++#ifdef CONFIG_XEN_HAVE_PVMMU
++        /*
++         * add_memory() will build page tables for the new memory so
++         * the p2m must contain invalid entries so the correct
++         * non-present PTEs will be written.
++         *
++         * If a failure occurs, the original (identity) p2m entries
++         * are not restored since this region is now known not to
++         * conflict with any devices.
++         */
++      if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++              unsigned long pfn, i;
++
++              pfn = PFN_DOWN(hotplug_start_paddr);
++              for (i = 0; i < balloon_hotplug; i++) {
++                      if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
++                              pr_warn("set_phys_to_machine() failed, no memory added\n");
++                              return BP_ECANCELED;
++                      }
++                }
++      }
++#endif
++
+       rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
+       if (rc) {