]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Fri, 19 Aug 2011 16:58:51 +0000 (09:58 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 19 Aug 2011 16:58:51 +0000 (09:58 -0700)
queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch [new file with mode: 0644]
queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch [new file with mode: 0644]
queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch [new file with mode: 0644]
queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch [new file with mode: 0644]
queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch [new file with mode: 0644]
queue-3.0/series
queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch [new file with mode: 0644]

diff --git a/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch b/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch
new file mode 100644 (file)
index 0000000..3c53bf3
--- /dev/null
@@ -0,0 +1,179 @@
+From 55a673990ec04cf63005318bcf08c2b0046e5778 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 2 Aug 2011 14:46:29 -0400
+Subject: NFSv4.1: Fix the callback 'highest_used_slotid' behaviour
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 55a673990ec04cf63005318bcf08c2b0046e5778 upstream.
+
+Currently, there is no guarantee that we will call nfs4_cb_take_slot() even
+though nfs4_callback_compound() will consistently call
+nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field.
+The result is that we can trigger the BUG_ON() upon the next call to
+nfs4_cb_take_slot().
+
+This patch fixes the above problem by using the slot id that was taken in
+the CB_SEQUENCE operation as a flag for whether or not we need to call
+nfs4_cb_free_slot().
+It also fixes an atomicity problem: we need to set tbl->highest_used_slotid
+atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up
+racing with the various tests in nfs4_begin_drain_session().
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback.h      |    2 +-
+ fs/nfs/callback_proc.c |   20 ++++++++++++++------
+ fs/nfs/callback_xdr.c  |   24 +++++++-----------------
+ 3 files changed, 22 insertions(+), 24 deletions(-)
+
+--- a/fs/nfs/callback.h
++++ b/fs/nfs/callback.h
+@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
+ struct cb_process_state {
+       __be32                  drc_status;
+       struct nfs_client       *clp;
++      int                     slotid;
+ };
+ struct cb_compound_hdr_arg {
+@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutreca
+       void *dummy, struct cb_process_state *cps);
+ extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+-extern void nfs4_cb_take_slot(struct nfs_client *clp);
+ struct cb_devicenotifyitem {
+       uint32_t                cbd_notify_type;
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *t
+       /* Normal */
+       if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
+               slot->seq_nr++;
+-              return htonl(NFS4_OK);
++              goto out_ok;
+       }
+       /* Replay */
+@@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *t
+       /* Wraparound */
+       if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
+               slot->seq_nr = 1;
+-              return htonl(NFS4_OK);
++              goto out_ok;
+       }
+       /* Misordered request */
+       return htonl(NFS4ERR_SEQ_MISORDERED);
++out_ok:
++      tbl->highest_used_slotid = args->csa_slotid;
++      return htonl(NFS4_OK);
+ }
+ /*
+@@ -418,26 +421,32 @@ __be32 nfs4_callback_sequence(struct cb_
+                             struct cb_sequenceres *res,
+                             struct cb_process_state *cps)
+ {
++      struct nfs4_slot_table *tbl;
+       struct nfs_client *clp;
+       int i;
+       __be32 status = htonl(NFS4ERR_BADSESSION);
+-      cps->clp = NULL;
+-
+       clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
+       if (clp == NULL)
+               goto out;
++      tbl = &clp->cl_session->bc_slot_table;
++
++      spin_lock(&tbl->slot_tbl_lock);
+       /* state manager is resetting the session */
+       if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+-              status = NFS4ERR_DELAY;
++              spin_unlock(&tbl->slot_tbl_lock);
++              status = htonl(NFS4ERR_DELAY);
+               goto out;
+       }
+       status = validate_seqid(&clp->cl_session->bc_slot_table, args);
++      spin_unlock(&tbl->slot_tbl_lock);
+       if (status)
+               goto out;
++      cps->slotid = args->csa_slotid;
++
+       /*
+        * Check for pending referring calls.  If a match is found, a
+        * related callback was received before the response to the original
+@@ -454,7 +463,6 @@ __be32 nfs4_callback_sequence(struct cb_
+       res->csr_slotid = args->csa_slotid;
+       res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+       res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+-      nfs4_cb_take_slot(clp);
+ out:
+       cps->clp = clp; /* put in nfs4_callback_compound */
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(stru
+        * Let the state manager know callback processing done.
+        * A single slot, so highest used slotid is either 0 or -1
+        */
+-      tbl->highest_used_slotid--;
++      tbl->highest_used_slotid = -1;
+       nfs4_check_drain_bc_complete(session);
+       spin_unlock(&tbl->slot_tbl_lock);
+ }
+-static void nfs4_cb_free_slot(struct nfs_client *clp)
++static void nfs4_cb_free_slot(struct cb_process_state *cps)
+ {
+-      if (clp && clp->cl_session)
+-              nfs4_callback_free_slot(clp->cl_session);
+-}
+-
+-/* A single slot, so highest used slotid is either 0 or -1 */
+-void nfs4_cb_take_slot(struct nfs_client *clp)
+-{
+-      struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+-
+-      spin_lock(&tbl->slot_tbl_lock);
+-      tbl->highest_used_slotid++;
+-      BUG_ON(tbl->highest_used_slotid != 0);
+-      spin_unlock(&tbl->slot_tbl_lock);
++      if (cps->slotid != -1)
++              nfs4_callback_free_slot(cps->clp->cl_session);
+ }
+ #else /* CONFIG_NFS_V4_1 */
+@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned in
+       return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+ }
+-static void nfs4_cb_free_slot(struct nfs_client *clp)
++static void nfs4_cb_free_slot(struct cb_process_state *cps)
+ {
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(str
+       struct cb_process_state cps = {
+               .drc_status = 0,
+               .clp = NULL,
++              .slotid = -1,
+       };
+       unsigned int nops = 0;
+@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(str
+       *hdr_res.status = status;
+       *hdr_res.nops = htonl(nops);
+-      nfs4_cb_free_slot(cps.clp);
++      nfs4_cb_free_slot(&cps);
+       nfs_put_client(cps.clp);
+       dprintk("%s: done, status = %u\n", __func__, ntohl(status));
+       return rpc_success;
diff --git a/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch b/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch
new file mode 100644 (file)
index 0000000..48806c3
--- /dev/null
@@ -0,0 +1,44 @@
+From 910ac68a2b80c7de95bc8488734067b1bb15d583 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 2 Aug 2011 14:46:52 -0400
+Subject: NFSv4.1: Return NFS4ERR_BADSESSION to callbacks during
+ session resets
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 910ac68a2b80c7de95bc8488734067b1bb15d583 upstream.
+
+If the client is in the process of resetting the session when it receives
+a callback, then returning NFS4ERR_DELAY may cause a deadlock with the
+DESTROY_SESSION call.
+
+Basically, if the client returns NFS4ERR_DELAY in response to the
+CB_SEQUENCE call, then the server is entitled to believe that the
+client is busy because it is already processing that call. In that
+case, the server is perfectly entitled to respond with a
+NFS4ERR_BACK_CHAN_BUSY to any DESTROY_SESSION call.
+
+Fix this by having the client reply with a NFS4ERR_BADSESSION in
+response to the callback if it is resetting the session.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback_proc.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -437,6 +437,11 @@ __be32 nfs4_callback_sequence(struct cb_
+       if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+               spin_unlock(&tbl->slot_tbl_lock);
+               status = htonl(NFS4ERR_DELAY);
++              /* Return NFS4ERR_BADSESSION if we're draining the session
++               * in order to reset it.
++               */
++              if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
++                      status = htonl(NFS4ERR_BADSESSION);
+               goto out;
+       }
diff --git a/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch b/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch
new file mode 100644 (file)
index 0000000..065f37e
--- /dev/null
@@ -0,0 +1,59 @@
+From 6d0e194d2eefcaab6dbdca1f639748660144acb5 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 4 Aug 2011 11:15:07 +0200
+Subject: pata_via: disable ATAPI DMA on AVERATEC 3200
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 6d0e194d2eefcaab6dbdca1f639748660144acb5 upstream.
+
+On AVERATEC 3200, pata_via causes memory corruption with ATAPI DMA,
+which often leads to random kernel oops.  The cause of the problem is
+not well understood yet and only small subset of machines using the
+controller seem affected.  Blacklist ATAPI DMA on the machine.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=11426
+Reported-and-tested-by: Jim Bray <jimsantelmo@gmail.com>
+Cc: Alan Cox <alan@linux.intel.com>
+Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/pata_via.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/ata/pata_via.c
++++ b/drivers/ata/pata_via.c
+@@ -124,6 +124,17 @@ static const struct via_isa_bridge {
+       { NULL }
+ };
++static const struct dmi_system_id no_atapi_dma_dmi_table[] = {
++      {
++              .ident = "AVERATEC 3200",
++              .matches = {
++                      DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"),
++                      DMI_MATCH(DMI_BOARD_NAME, "3200"),
++              },
++      },
++      { }
++};
++
+ struct via_port {
+       u8 cached_device;
+ };
+@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(str
+                       mask &= ~ ATA_MASK_UDMA;
+               }
+       }
++
++      if (dev->class == ATA_DEV_ATAPI &&
++          dmi_check_system(no_atapi_dma_dmi_table)) {
++              ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n");
++              mask &= ATA_MASK_PIO;
++      }
++
+       return mask;
+ }
diff --git a/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch b/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch
new file mode 100644 (file)
index 0000000..a9db245
--- /dev/null
@@ -0,0 +1,69 @@
+From 20618b21da0796115e81906d24ff1601552701b7 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Wed, 3 Aug 2011 21:54:33 -0700
+Subject: pnfs-obj: Bug when we are running out of bio
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 20618b21da0796115e81906d24ff1601552701b7 upstream.
+
+When we have a situation that the number of pages we want
+to encode is bigger then the size of the bio. (Which can
+currently happen only when all IO is going to a single device
+.e.g group_width==1) then the IO is submitted short and we
+report back only the amount of bytes we actually wrote/read
+and all is fine. BUT ...
+
+There was a bug that the current length counter was advanced
+before the fail to add the extra page, and we come to a situation
+that the CDB length was one-page longer then the actual bio size,
+which is of course rejected by the osd-target.
+
+While here also fix the bio size calculation, in the case
+that we received more then one group of devices.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -587,22 +587,19 @@ static void _calc_stripe_info(struct obj
+ }
+ static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
+-              unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
++              unsigned pgbase, struct _objio_per_comp *per_dev, int len,
+               gfp_t gfp_flags)
+ {
+       unsigned pg = *cur_pg;
++      int cur_len = len;
+       struct request_queue *q =
+                       osd_request_queue(_io_od(ios, per_dev->dev));
+-      per_dev->length += cur_len;
+-
+       if (per_dev->bio == NULL) {
+-              unsigned stripes = ios->layout->num_comps /
+-                                                   ios->layout->mirrors_p1;
+-              unsigned pages_in_stripe = stripes *
++              unsigned pages_in_stripe = ios->layout->group_width *
+                                     (ios->layout->stripe_unit / PAGE_SIZE);
+               unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
+-                                  stripes;
++                                  ios->layout->group_width;
+               if (BIO_MAX_PAGES_KMALLOC < bio_size)
+                       bio_size = BIO_MAX_PAGES_KMALLOC;
+@@ -630,6 +627,7 @@ static int _add_stripe_unit(struct objio
+       }
+       BUG_ON(cur_len);
++      per_dev->length += len;
+       *cur_pg = pg;
+       return 0;
+ }
diff --git a/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch b/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch
new file mode 100644 (file)
index 0000000..6d10741
--- /dev/null
@@ -0,0 +1,97 @@
+From 9af7db3228acc286c50e3a0f054ec982efdbc6c6 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Wed, 3 Aug 2011 21:52:51 -0700
+Subject: pnfs-obj: Fix the comp_index != 0 case
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 9af7db3228acc286c50e3a0f054ec982efdbc6c6 upstream.
+
+There were bugs in the case of partial layout where olo_comp_index
+is not zero. This used to work and was tested but one of the later
+cleanup SQUASHMEs broke it and was not tested since.
+
+Also add a dprint that specify those received layout parameters.
+Everything else was already printed.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c        |   16 +++++++---------
+ fs/nfs/objlayout/pnfs_osd_xdr_cli.c |    3 +++
+ 2 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -479,7 +479,6 @@ static int _io_check(struct objio_state
+       for (i = 0; i <  ios->numdevs; i++) {
+               struct osd_sense_info osi;
+               struct osd_request *or = ios->per_dev[i].or;
+-              unsigned dev;
+               int ret;
+               if (!or)
+@@ -500,9 +499,8 @@ static int _io_check(struct objio_state
+                       continue; /* we recovered */
+               }
+-              dev = ios->per_dev[i].dev;
+-              objlayout_io_set_result(&ios->ol_state, dev,
+-                                      &ios->layout->comps[dev].oc_object_id,
++              objlayout_io_set_result(&ios->ol_state, i,
++                                      &ios->layout->comps[i].oc_object_id,
+                                       osd_pri_2_pnfs_err(osi.osd_err_pri),
+                                       ios->per_dev[i].offset,
+                                       ios->per_dev[i].length,
+@@ -650,7 +648,7 @@ static int _prepare_one_group(struct obj
+       int ret = 0;
+       while (length) {
+-              struct _objio_per_comp *per_dev = &ios->per_dev[dev];
++              struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
+               unsigned cur_len, page_off = 0;
+               if (!per_dev->length) {
+@@ -670,8 +668,8 @@ static int _prepare_one_group(struct obj
+                               cur_len = stripe_unit;
+                       }
+-                      if (max_comp < dev)
+-                              max_comp = dev;
++                      if (max_comp < dev - first_dev)
++                              max_comp = dev - first_dev;
+               } else {
+                       cur_len = stripe_unit;
+               }
+@@ -806,7 +804,7 @@ static int _read_mirrors(struct objio_st
+       struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
+       unsigned dev = per_dev->dev;
+       struct pnfs_osd_object_cred *cred =
+-                      &ios->layout->comps[dev];
++                      &ios->layout->comps[cur_comp];
+       struct osd_obj_id obj = {
+               .partition = cred->oc_object_id.oid_partition_id,
+               .id = cred->oc_object_id.oid_object_id,
+@@ -904,7 +902,7 @@ static int _write_mirrors(struct objio_s
+       for (; cur_comp < last_comp; ++cur_comp, ++dev) {
+               struct osd_request *or = NULL;
+               struct pnfs_osd_object_cred *cred =
+-                                      &ios->layout->comps[dev];
++                                      &ios->layout->comps[cur_comp];
+               struct osd_obj_id obj = {
+                       .partition = cred->oc_object_id.oid_partition_id,
+                       .id = cred->oc_object_id.oid_object_id,
+--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
++++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struc
+       p = _osd_xdr_decode_data_map(p, &layout->olo_map);
+       layout->olo_comps_index = be32_to_cpup(p++);
+       layout->olo_num_comps = be32_to_cpup(p++);
++      dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
++              layout->olo_comps_index, layout->olo_num_comps);
++
+       iter->total_comps = layout->olo_num_comps;
+       return 0;
+ }
index 7a038408476ef1a2cec7666ee9e9afae10c92358..07bb57d8819a6379141cdf421798cc1a445966bd 100644 (file)
@@ -4,3 +4,9 @@ befs-validate-length-of-long-symbolic-links.patch
 i7core_edac-fixed-typo-in-error-count-calculation.patch
 possible-memory-corruption-on-mount.patch
 x86-intel-power-correct-the-msr_ia32_energy_perf_bias.patch
+pata_via-disable-atapi-dma-on-averatec-3200.patch
+pnfs-obj-fix-the-comp_index-0-case.patch
+pnfs-obj-bug-when-we-are-running-out-of-bio.patch
+nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch
+nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch
+x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch
diff --git a/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch b/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch
new file mode 100644 (file)
index 0000000..02094b1
--- /dev/null
@@ -0,0 +1,109 @@
+From 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 Mon Sep 17 00:00:00 2001
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Date: Thu, 23 Jun 2011 11:19:26 -0700
+Subject: x86, mtrr: lock stop machine during MTRR rendezvous sequence
+
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+
+commit 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 upstream.
+
+MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially
+happen in parallel with another system wide rendezvous using
+stop_machine(). This can lead to deadlock (The order in which
+works are queued can be different on different cpu's. Some cpu's
+will be running the first rendezvous handler and others will be running
+the second rendezvous handler. Each set waiting for the other set to join
+for the system wide rendezvous, leading to a deadlock).
+
+MTRR rendezvous sequence is not implemented using stop_machine() as this
+gets called both from the process context aswell as the cpu online paths
+(where the cpu has not come online and the interrupts are disabled etc).
+stop_machine() works with only online cpus.
+
+For now, take the stop_machine mutex in the MTRR rendezvous sequence that
+gets called from an online cpu (here we are in the process context
+and can potentially sleep while taking the mutex). And the MTRR rendezvous
+that gets triggered during cpu online doesn't need to take this stop_machine
+lock (as the stop_machine() already ensures that there is no cpu hotplug
+going on in parallel by doing get_online_cpus())
+
+    TBD: Pursue a cleaner solution of extending the stop_machine()
+         infrastructure to handle the case where the calling cpu is
+         still not online and use this for MTRR rendezvous sequence.
+
+fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008
+
+Reported-by: Vadim Kotelnikov <vadimuzzz@inbox.ru>
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Link: http://lkml.kernel.org/r/20110623182056.807230326@sbsiddha-MOBL3.sc.intel.com
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/mtrr/main.c |   23 +++++++++++++++++++++++
+ include/linux/stop_machine.h    |    2 ++
+ kernel/stop_machine.c           |    2 +-
+ 3 files changed, 26 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mtrr/main.c
++++ b/arch/x86/kernel/cpu/mtrr/main.c
+@@ -248,6 +248,25 @@ set_mtrr(unsigned int reg, unsigned long
+       unsigned long flags;
+       int cpu;
++#ifdef CONFIG_SMP
++      /*
++       * If this cpu is not yet active, we are in the cpu online path. There
++       * can be no stop_machine() in parallel, as stop machine ensures this
++       * by using get_online_cpus(). We can skip taking the stop_cpus_mutex,
++       * as we don't need it and also we can't afford to block while waiting
++       * for the mutex.
++       *
++       * If this cpu is active, we need to prevent stop_machine() happening
++       * in parallel by taking the stop cpus mutex.
++       *
++       * Also, this is called in the context of cpu online path or in the
++       * context where cpu hotplug is prevented. So checking the active status
++       * of the raw_smp_processor_id() is safe.
++       */
++      if (cpu_active(raw_smp_processor_id()))
++              mutex_lock(&stop_cpus_mutex);
++#endif
++
+       preempt_disable();
+       data.smp_reg = reg;
+@@ -330,6 +349,10 @@ set_mtrr(unsigned int reg, unsigned long
+       local_irq_restore(flags);
+       preempt_enable();
++#ifdef CONFIG_SMP
++      if (cpu_active(raw_smp_processor_id()))
++              mutex_unlock(&stop_cpus_mutex);
++#endif
+ }
+ /**
+--- a/include/linux/stop_machine.h
++++ b/include/linux/stop_machine.h
+@@ -27,6 +27,8 @@ struct cpu_stop_work {
+       struct cpu_stop_done    *done;
+ };
++extern struct mutex stop_cpus_mutex;
++
+ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+                        struct cpu_stop_work *work_buf);
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cp
+       cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
+ }
++DEFINE_MUTEX(stop_cpus_mutex);
+ /* static data for stop_cpus */
+-static DEFINE_MUTEX(stop_cpus_mutex);
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)