]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.7-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 18 Aug 2016 09:58:51 +0000 (11:58 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 18 Aug 2016 09:58:51 +0000 (11:58 +0200)
added patches:
btrfs-fix-delalloc-accounting-after-copy_from_user-faults.patch
nfs-don-t-create-zero-length-requests.patch
nfsd-don-t-return-an-unhashed-lock-stateid-after-taking-mutex.patch
nfsd-fix-race-between-free_stateid-and-lock.patch
pnfs-fix-layoutget-handling-of-nfs4err_bad_stateid-and-nfs4err_expired.patch
pnfs-fix-post-layoutget-error-handling-in-pnfs_update_layout.patch
pnfs-handle-nfs4err_recallconflict-correctly-in-layoutget.patch
pnfs-separate-handling-of-nfs4err_layouttrylater-and-recallconflict.patch
powerpc-tm-fix-stack-pointer-corruption-in-__tm_recheckpoint.patch

queue-4.7/btrfs-fix-delalloc-accounting-after-copy_from_user-faults.patch [new file with mode: 0644]
queue-4.7/nfs-don-t-create-zero-length-requests.patch [new file with mode: 0644]
queue-4.7/nfsd-don-t-return-an-unhashed-lock-stateid-after-taking-mutex.patch [new file with mode: 0644]
queue-4.7/nfsd-fix-race-between-free_stateid-and-lock.patch [new file with mode: 0644]
queue-4.7/pnfs-fix-layoutget-handling-of-nfs4err_bad_stateid-and-nfs4err_expired.patch [new file with mode: 0644]
queue-4.7/pnfs-fix-post-layoutget-error-handling-in-pnfs_update_layout.patch [new file with mode: 0644]
queue-4.7/pnfs-handle-nfs4err_recallconflict-correctly-in-layoutget.patch [new file with mode: 0644]
queue-4.7/pnfs-separate-handling-of-nfs4err_layouttrylater-and-recallconflict.patch [new file with mode: 0644]
queue-4.7/powerpc-tm-fix-stack-pointer-corruption-in-__tm_recheckpoint.patch [new file with mode: 0644]
queue-4.7/series

diff --git a/queue-4.7/btrfs-fix-delalloc-accounting-after-copy_from_user-faults.patch b/queue-4.7/btrfs-fix-delalloc-accounting-after-copy_from_user-faults.patch
new file mode 100644 (file)
index 0000000..b6a5e4a
--- /dev/null
@@ -0,0 +1,49 @@
+From 8b8b08cbfb9021af4b54b4175fc4c51d655aac8c Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Tue, 19 Jul 2016 05:52:36 -0700
+Subject: Btrfs: fix delalloc accounting after copy_from_user faults
+
+From: Chris Mason <clm@fb.com>
+
+commit 8b8b08cbfb9021af4b54b4175fc4c51d655aac8c upstream.
+
+Commit 56244ef151c3cd11 was almost but not quite enough to fix the
+reservation math after btrfs_copy_from_user returned partial copies.
+
+Some users are still seeing warnings in btrfs_destroy_inode, and with a
+long enough test run I'm able to trigger them as well.
+
+This patch fixes the accounting math again, bringing it much closer to
+the way it was before the sectorsize conversion Chandan did.  The
+problem is accounting for the offset into the page/sector when we do a
+partial copy.  This one just uses the dirty_sectors variable which
+should already be updated properly.
+
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1629,13 +1629,11 @@ again:
+                * managed to copy.
+                */
+               if (num_sectors > dirty_sectors) {
+-                      /*
+-                       * we round down because we don't want to count
+-                       * any partial blocks actually sent through the
+-                       * IO machines
+-                       */
+-                      release_bytes = round_down(release_bytes - copied,
+-                                    root->sectorsize);
++
++                      /* release everything except the sectors we dirtied */
++                      release_bytes -= dirty_sectors <<
++                              root->fs_info->sb->s_blocksize_bits;
++
+                       if (copied > 0) {
+                               spin_lock(&BTRFS_I(inode)->lock);
+                               BTRFS_I(inode)->outstanding_extents++;
diff --git a/queue-4.7/nfs-don-t-create-zero-length-requests.patch b/queue-4.7/nfs-don-t-create-zero-length-requests.patch
new file mode 100644 (file)
index 0000000..dd4d5f7
--- /dev/null
@@ -0,0 +1,44 @@
+From 149a4fddd0a72d526abbeac0c8deaab03559836a Mon Sep 17 00:00:00 2001
+From: Benjamin Coddington <bcodding@redhat.com>
+Date: Mon, 18 Jul 2016 10:41:57 -0400
+Subject: nfs: don't create zero-length requests
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+commit 149a4fddd0a72d526abbeac0c8deaab03559836a upstream.
+
+NFS doesn't expect requests with wb_bytes set to zero and may make
+unexpected decisions about how to handle that request at the page IO layer.
+Skip request creation if we won't have any wb_bytes in the request.
+
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
+Reviewed-by: Weston Andros Adamson <dros@primarydata.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/write.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1289,6 +1289,9 @@ int nfs_updatepage(struct file *file, st
+       dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
+               file, count, (long long)(page_file_offset(page) + offset));
++      if (!count)
++              goto out;
++
+       if (nfs_can_extend_write(file, page, inode)) {
+               count = max(count + offset, nfs_page_length(page));
+               offset = 0;
+@@ -1299,7 +1302,7 @@ int nfs_updatepage(struct file *file, st
+               nfs_set_pageerror(page);
+       else
+               __set_page_dirty_nobuffers(page);
+-
++out:
+       dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
+                       status, (long long)i_size_read(inode));
+       return status;
diff --git a/queue-4.7/nfsd-don-t-return-an-unhashed-lock-stateid-after-taking-mutex.patch b/queue-4.7/nfsd-don-t-return-an-unhashed-lock-stateid-after-taking-mutex.patch
new file mode 100644 (file)
index 0000000..2aa9092
--- /dev/null
@@ -0,0 +1,88 @@
+From dd257933fa4b9fea66a1195f8a15111029810abc Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Thu, 11 Aug 2016 10:37:39 -0400
+Subject: nfsd: don't return an unhashed lock stateid after taking mutex
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit dd257933fa4b9fea66a1195f8a15111029810abc upstream.
+
+nfsd4_lock will take the st_mutex before working with the stateid it
+gets, but between the time when we drop the cl_lock and take the mutex,
+the stateid could become unhashed (a'la FREE_STATEID). If that happens
+the lock stateid returned to the client will be forgotten.
+
+Fix this by first moving the st_mutex acquisition into
+lookup_or_create_lock_state. Then, have it check to see if the lock
+stateid is still hashed after taking the mutex. If it's not, then put
+the stateid and try the find/create again.
+
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Tested-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |   25 ++++++++++++++++++++-----
+ 1 file changed, 20 insertions(+), 5 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5526,7 +5526,7 @@ static __be32
+ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
+                           struct nfs4_ol_stateid *ost,
+                           struct nfsd4_lock *lock,
+-                          struct nfs4_ol_stateid **lst, bool *new)
++                          struct nfs4_ol_stateid **plst, bool *new)
+ {
+       __be32 status;
+       struct nfs4_file *fi = ost->st_stid.sc_file;
+@@ -5534,7 +5534,9 @@ lookup_or_create_lock_state(struct nfsd4
+       struct nfs4_client *cl = oo->oo_owner.so_client;
+       struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
+       struct nfs4_lockowner *lo;
++      struct nfs4_ol_stateid *lst;
+       unsigned int strhashval;
++      bool hashed;
+       lo = find_lockowner_str(cl, &lock->lk_new_owner);
+       if (!lo) {
+@@ -5550,12 +5552,27 @@ lookup_or_create_lock_state(struct nfsd4
+                       goto out;
+       }
+-      *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+-      if (*lst == NULL) {
++retry:
++      lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
++      if (lst == NULL) {
+               status = nfserr_jukebox;
+               goto out;
+       }
++
++      mutex_lock(&lst->st_mutex);
++
++      /* See if it's still hashed to avoid race with FREE_STATEID */
++      spin_lock(&cl->cl_lock);
++      hashed = !list_empty(&lst->st_perfile);
++      spin_unlock(&cl->cl_lock);
++
++      if (!hashed) {
++              mutex_unlock(&lst->st_mutex);
++              nfs4_put_stid(&lst->st_stid);
++              goto retry;
++      }
+       status = nfs_ok;
++      *plst = lst;
+ out:
+       nfs4_put_stateowner(&lo->lo_owner);
+       return status;
+@@ -5622,8 +5639,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+                       goto out;
+               status = lookup_or_create_lock_state(cstate, open_stp, lock,
+                                                       &lock_stp, &new);
+-              if (status == nfs_ok)
+-                      mutex_lock(&lock_stp->st_mutex);
+       } else {
+               status = nfs4_preprocess_seqid_op(cstate,
+                                      lock->lk_old_lock_seqid,
diff --git a/queue-4.7/nfsd-fix-race-between-free_stateid-and-lock.patch b/queue-4.7/nfsd-fix-race-between-free_stateid-and-lock.patch
new file mode 100644 (file)
index 0000000..ad3e84d
--- /dev/null
@@ -0,0 +1,111 @@
+From 42691398be08bd1fe99326911a0aa31f2c041d53 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Thu, 11 Aug 2016 10:37:30 -0400
+Subject: nfsd: Fix race between FREE_STATEID and LOCK
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 42691398be08bd1fe99326911a0aa31f2c041d53 upstream.
+
+When running LTP's nfslock01 test, the Linux client can send a LOCK
+and a FREE_STATEID request at the same time. The outcome is:
+
+Frame 324    R OPEN stateid [2,O]
+
+Frame 115004 C LOCK lockowner_is_new stateid [2,O] offset 672000 len 64
+Frame 115008 R LOCK stateid [1,L]
+Frame 115012 C WRITE stateid [0,L] offset 672000 len 64
+Frame 115016 R WRITE NFS4_OK
+Frame 115019 C LOCKU stateid [1,L] offset 672000 len 64
+Frame 115022 R LOCKU NFS4_OK
+Frame 115025 C FREE_STATEID stateid [2,L]
+Frame 115026 C LOCK lockowner_is_new stateid [2,O] offset 672128 len 64
+Frame 115029 R FREE_STATEID NFS4_OK
+Frame 115030 R LOCK stateid [3,L]
+Frame 115034 C WRITE stateid [0,L] offset 672128 len 64
+Frame 115038 R WRITE NFS4ERR_BAD_STATEID
+
+In other words, the server returns stateid L in a successful LOCK
+reply, but it has already released it. Subsequent uses of stateid L
+fail.
+
+To address this, protect the generation check in nfsd4_free_stateid
+with the st_mutex. This should guarantee that only one of two
+outcomes occurs: either LOCK returns a fresh valid stateid, or
+FREE_STATEID returns NFS4ERR_LOCKS_HELD.
+
+Reported-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Fix-suggested-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |   40 ++++++++++++++++++++++++++++------------
+ 1 file changed, 28 insertions(+), 12 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4906,6 +4906,32 @@ nfsd4_test_stateid(struct svc_rqst *rqst
+       return nfs_ok;
+ }
++static __be32
++nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
++{
++      struct nfs4_ol_stateid *stp = openlockstateid(s);
++      __be32 ret;
++
++      mutex_lock(&stp->st_mutex);
++
++      ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
++      if (ret)
++              goto out;
++
++      ret = nfserr_locks_held;
++      if (check_for_locks(stp->st_stid.sc_file,
++                          lockowner(stp->st_stateowner)))
++              goto out;
++
++      release_lock_stateid(stp);
++      ret = nfs_ok;
++
++out:
++      mutex_unlock(&stp->st_mutex);
++      nfs4_put_stid(s);
++      return ret;
++}
++
+ __be32
+ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+                  struct nfsd4_free_stateid *free_stateid)
+@@ -4913,7 +4939,6 @@ nfsd4_free_stateid(struct svc_rqst *rqst
+       stateid_t *stateid = &free_stateid->fr_stateid;
+       struct nfs4_stid *s;
+       struct nfs4_delegation *dp;
+-      struct nfs4_ol_stateid *stp;
+       struct nfs4_client *cl = cstate->session->se_client;
+       __be32 ret = nfserr_bad_stateid;
+@@ -4932,18 +4957,9 @@ nfsd4_free_stateid(struct svc_rqst *rqst
+               ret = nfserr_locks_held;
+               break;
+       case NFS4_LOCK_STID:
+-              ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+-              if (ret)
+-                      break;
+-              stp = openlockstateid(s);
+-              ret = nfserr_locks_held;
+-              if (check_for_locks(stp->st_stid.sc_file,
+-                                  lockowner(stp->st_stateowner)))
+-                      break;
+-              WARN_ON(!unhash_lock_stateid(stp));
++              atomic_inc(&s->sc_count);
+               spin_unlock(&cl->cl_lock);
+-              nfs4_put_stid(s);
+-              ret = nfs_ok;
++              ret = nfsd4_free_lock_stateid(stateid, s);
+               goto out;
+       case NFS4_REVOKED_DELEG_STID:
+               dp = delegstateid(s);
diff --git a/queue-4.7/pnfs-fix-layoutget-handling-of-nfs4err_bad_stateid-and-nfs4err_expired.patch b/queue-4.7/pnfs-fix-layoutget-handling-of-nfs4err_bad_stateid-and-nfs4err_expired.patch
new file mode 100644 (file)
index 0000000..25cb948
--- /dev/null
@@ -0,0 +1,77 @@
+From f7db0b283868411dc6bc8a223fd032b211d2d91f Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Thu, 14 Jul 2016 15:14:02 -0400
+Subject: pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit f7db0b283868411dc6bc8a223fd032b211d2d91f upstream.
+
+We want to recover the open stateid if there is no layout stateid
+and/or the stateid argument matches an open stateid.
+Otherwise throw out the existing layout and recover from scratch, as
+the layout stateid is bad.
+
+Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |   32 ++++++++++++++------------------
+ 1 file changed, 14 insertions(+), 18 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -7872,6 +7872,7 @@ nfs4_layoutget_handle_exception(struct r
+       struct pnfs_layout_hdr *lo;
+       int nfs4err = task->tk_status;
+       int err, status = 0;
++      LIST_HEAD(head);
+       dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
+@@ -7916,30 +7917,25 @@ nfs4_layoutget_handle_exception(struct r
+       case -NFS4ERR_BAD_STATEID:
+               exception->timeout = 0;
+               spin_lock(&inode->i_lock);
+-              if (nfs4_stateid_match(&lgp->args.stateid,
++              lo = NFS_I(inode)->layout;
++              /* If the open stateid was bad, then recover it. */
++              if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
++                  nfs4_stateid_match_other(&lgp->args.stateid,
+                                       &lgp->args.ctx->state->stateid)) {
+                       spin_unlock(&inode->i_lock);
+-                      /* If the open stateid was bad, then recover it. */
+                       exception->state = lgp->args.ctx->state;
+                       break;
+               }
+-              lo = NFS_I(inode)->layout;
+-              if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+-                  nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
+-                      LIST_HEAD(head);
+-                      /*
+-                       * Mark the bad layout state as invalid, then retry
+-                       * with the current stateid.
+-                       */
+-                      set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+-                      pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
+-                      spin_unlock(&inode->i_lock);
+-                      pnfs_free_lseg_list(&head);
+-                      status = -EAGAIN;
+-                      goto out;
+-              } else
+-                      spin_unlock(&inode->i_lock);
++              /*
++               * Mark the bad layout state as invalid, then retry
++               */
++              set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
++              pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
++              spin_unlock(&inode->i_lock);
++              pnfs_free_lseg_list(&head);
++              status = -EAGAIN;
++              goto out;
+       }
+       err = nfs4_handle_exception(server, nfs4err, exception);
diff --git a/queue-4.7/pnfs-fix-post-layoutget-error-handling-in-pnfs_update_layout.patch b/queue-4.7/pnfs-fix-post-layoutget-error-handling-in-pnfs_update_layout.patch
new file mode 100644 (file)
index 0000000..642ce00
--- /dev/null
@@ -0,0 +1,72 @@
+From 56b38a1f7c781519eef09c1668a3c97ea911f86b Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Thu, 14 Jul 2016 18:34:12 -0400
+Subject: pNFS: Fix post-layoutget error handling in pnfs_update_layout()
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 56b38a1f7c781519eef09c1668a3c97ea911f86b upstream.
+
+The non-retry error path is currently broken and ends up releasing the
+reference to the layout twice. It also can end up clearing the
+NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race.
+
+In addition, the retry path will fail to decrement the plh_outstanding
+counter.
+
+Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/pnfs.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1645,6 +1645,7 @@ lookup_again:
+       lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
+       trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
+                                PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
++      atomic_dec(&lo->plh_outstanding);
+       if (IS_ERR(lseg)) {
+               switch(PTR_ERR(lseg)) {
+               case -EBUSY:
+@@ -1653,26 +1654,26 @@ lookup_again:
+                               lseg = NULL;
+                       /* Fallthrough */
+               case -EAGAIN:
+-                      pnfs_put_layout_hdr(lo);
+-                      if (first)
+-                              pnfs_clear_first_layoutget(lo);
+-                      if (lseg) {
+-                              trace_pnfs_update_layout(ino, pos, count,
+-                                      iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
+-                              goto lookup_again;
+-                      }
+-                      /* Fallthrough */
++                      break;
+               default:
+                       if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
+                               pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+                               lseg = NULL;
+                       }
++                      goto out_put_layout_hdr;
++              }
++              if (lseg) {
++                      if (first)
++                              pnfs_clear_first_layoutget(lo);
++                      trace_pnfs_update_layout(ino, pos, count,
++                              iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
++                      pnfs_put_layout_hdr(lo);
++                      goto lookup_again;
+               }
+       } else {
+               pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+       }
+-      atomic_dec(&lo->plh_outstanding);
+ out_put_layout_hdr:
+       if (first)
+               pnfs_clear_first_layoutget(lo);
diff --git a/queue-4.7/pnfs-handle-nfs4err_recallconflict-correctly-in-layoutget.patch b/queue-4.7/pnfs-handle-nfs4err_recallconflict-correctly-in-layoutget.patch
new file mode 100644 (file)
index 0000000..5f52181
--- /dev/null
@@ -0,0 +1,57 @@
+From 66b53f325876703b7ab815c482cd104609f8772c Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Thu, 14 Jul 2016 14:28:31 -0400
+Subject: pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 66b53f325876703b7ab815c482cd104609f8772c upstream.
+
+Instead of giving up altogether and falling back to doing I/O
+through the MDS, which may make the situation worse, wait for
+2 lease periods for the callback to resolve itself, and then
+try destroying the existing layout.
+
+Only if this was an attempt at getting a first layout, do we
+give up altogether, as the server is clearly crazy.
+
+Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/pnfs.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1505,7 +1505,7 @@ pnfs_update_layout(struct inode *ino,
+       struct pnfs_layout_segment *lseg = NULL;
+       nfs4_stateid stateid;
+       long timeout = 0;
+-      unsigned long giveup = jiffies + rpc_get_timeout(server->client);
++      unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
+       bool first;
+       if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
+@@ -1649,9 +1649,18 @@ lookup_again:
+       if (IS_ERR(lseg)) {
+               switch(PTR_ERR(lseg)) {
+               case -EBUSY:
+-              case -ERECALLCONFLICT:
+                       if (time_after(jiffies, giveup))
+                               lseg = NULL;
++                      break;
++              case -ERECALLCONFLICT:
++                      /* Huh? We hold no layouts, how is there a recall? */
++                      if (first) {
++                              lseg = NULL;
++                              break;
++                      }
++                      /* Destroy the existing layout and start over */
++                      if (time_after(jiffies, giveup))
++                              pnfs_destroy_layout(NFS_I(ino));
+                       /* Fallthrough */
+               case -EAGAIN:
+                       break;
diff --git a/queue-4.7/pnfs-separate-handling-of-nfs4err_layouttrylater-and-recallconflict.patch b/queue-4.7/pnfs-separate-handling-of-nfs4err_layouttrylater-and-recallconflict.patch
new file mode 100644 (file)
index 0000000..ade87d1
--- /dev/null
@@ -0,0 +1,89 @@
+From e85d7ee42003314652ab3ae2c60e3b8cd793b65f Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Thu, 14 Jul 2016 18:46:24 -0400
+Subject: pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit e85d7ee42003314652ab3ae2c60e3b8cd793b65f upstream.
+
+They are not the same error, and need to be handled differently.
+
+Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |   23 ++++++++++++++---------
+ fs/nfs/pnfs.c     |    1 +
+ 2 files changed, 15 insertions(+), 9 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -427,6 +427,7 @@ static int nfs4_do_handle_exception(stru
+               case -NFS4ERR_DELAY:
+                       nfs_inc_server_stats(server, NFSIOS_DELAY);
+               case -NFS4ERR_GRACE:
++              case -NFS4ERR_LAYOUTTRYLATER:
+               case -NFS4ERR_RECALLCONFLICT:
+                       exception->delay = 1;
+                       return 0;
+@@ -7869,11 +7870,12 @@ nfs4_layoutget_handle_exception(struct r
+       struct inode *inode = lgp->args.inode;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layout_hdr *lo;
+-      int status = task->tk_status;
++      int nfs4err = task->tk_status;
++      int err, status = 0;
+       dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
+-      switch (status) {
++      switch (nfs4err) {
+       case 0:
+               goto out;
+@@ -7905,12 +7907,11 @@ nfs4_layoutget_handle_exception(struct r
+                       status = -EOVERFLOW;
+                       goto out;
+               }
+-              /* Fallthrough */
++              status = -EBUSY;
++              break;
+       case -NFS4ERR_RECALLCONFLICT:
+-              nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT,
+-                                      exception);
+               status = -ERECALLCONFLICT;
+-              goto out;
++              break;
+       case -NFS4ERR_EXPIRED:
+       case -NFS4ERR_BAD_STATEID:
+               exception->timeout = 0;
+@@ -7941,9 +7942,13 @@ nfs4_layoutget_handle_exception(struct r
+                       spin_unlock(&inode->i_lock);
+       }
+-      status = nfs4_handle_exception(server, status, exception);
+-      if (exception->retry)
+-              status = -EAGAIN;
++      err = nfs4_handle_exception(server, nfs4err, exception);
++      if (!status) {
++              if (exception->retry)
++                      status = -EAGAIN;
++              else
++                      status = err;
++      }
+ out:
+       dprintk("<-- %s\n", __func__);
+       return status;
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1647,6 +1647,7 @@ lookup_again:
+                                PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
+       if (IS_ERR(lseg)) {
+               switch(PTR_ERR(lseg)) {
++              case -EBUSY:
+               case -ERECALLCONFLICT:
+                       if (time_after(jiffies, giveup))
+                               lseg = NULL;
diff --git a/queue-4.7/powerpc-tm-fix-stack-pointer-corruption-in-__tm_recheckpoint.patch b/queue-4.7/powerpc-tm-fix-stack-pointer-corruption-in-__tm_recheckpoint.patch
new file mode 100644 (file)
index 0000000..45a8f30
--- /dev/null
@@ -0,0 +1,81 @@
+From 6bcb80143e792becfd2b9cc6a339ce523e4e2219 Mon Sep 17 00:00:00 2001
+From: Michael Neuling <mikey@neuling.org>
+Date: Wed, 6 Jul 2016 14:58:06 +1000
+Subject: powerpc/tm: Fix stack pointer corruption in __tm_recheckpoint()
+
+From: Michael Neuling <mikey@neuling.org>
+
+commit 6bcb80143e792becfd2b9cc6a339ce523e4e2219 upstream.
+
+At the start of __tm_recheckpoint() we save the kernel stack pointer
+(r1) in SPRG SCRATCH0 (SPRG2) so that we can restore it after the
+trecheckpoint.
+
+Unfortunately, the same SPRG is used in the SLB miss handler.  If an
+SLB miss is taken between the save and restore of r1 to the SPRG, the
+SPRG is changed and hence r1 is also corrupted.  We can end up with
+the following crash when we start using r1 again after the restore
+from the SPRG:
+
+  Oops: Bad kernel stack pointer, sig: 6 [#1]
+  SMP NR_CPUS=2048 NUMA pSeries
+  CPU: 658 PID: 143777 Comm: htm_demo Tainted: G            EL   X 4.4.13-0-default #1
+  task: c0000b56993a7810 ti: c00000000cfec000 task.ti: c0000b56993bc000
+  NIP: c00000000004f188 LR: 00000000100040b8 CTR: 0000000010002570
+  REGS: c00000000cfefd40 TRAP: 0300   Tainted: G            EL   X  (4.4.13-0-default)
+  MSR: 8000000300001033 <SF,ME,IR,DR,RI,LE>  CR: 02000424  XER: 20000000
+  CFAR: c000000000008468 DAR: 00003ffd84e66880 DSISR: 40000000 SOFTE: 0
+  PACATMSCRATCH: 00003ffbc865e680
+  GPR00: fffffffcfabc4268 00003ffd84e667a0 00000000100d8c38 000000030544bb80
+  GPR04: 0000000000000002 00000000100cf200 0000000000000449 00000000100cf100
+  GPR08: 000000000000c350 0000000000002569 0000000000002569 00000000100d6c30
+  GPR12: 00000000100d6c28 c00000000e6a6b00 00003ffd84660000 0000000000000000
+  GPR16: 0000000000000003 0000000000000449 0000000010002570 0000010009684f20
+  GPR20: 0000000000800000 00003ffd84e5f110 00003ffd84e5f7a0 00000000100d0f40
+  GPR24: 0000000000000000 0000000000000000 0000000000000000 00003ffff0673f50
+  GPR28: 00003ffd84e5e960 00000000003d0f00 00003ffd84e667a0 00003ffd84e5e680
+  NIP [c00000000004f188] restore_gprs+0x110/0x17c
+  LR [00000000100040b8] 0x100040b8
+  Call Trace:
+  Instruction dump:
+  f8a1fff0 e8e700a8 38a00000 7ca10164 e8a1fff8 e821fff0 7c0007dd 7c421378
+  7db142a6 7c3242a6 38800002 7c810164 <e9c100e0> e9e100e8 ea0100f0 ea2100f8
+
+We hit this on large memory machines (> 2TB) but it can also be hit on
+smaller machines when 1TB segments are disabled.
+
+To hit this, you also need to be virtualised to ensure SLBs are
+periodically removed by the hypervisor.
+
+This patches moves the saving of r1 to the SPRG to the region where we
+are guaranteed not to take any further SLB misses.
+
+Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching")
+Signed-off-by: Michael Neuling <mikey@neuling.org>
+Acked-by: Cyril Bur <cyrilbur@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/tm.S |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/tm.S
++++ b/arch/powerpc/kernel/tm.S
+@@ -338,8 +338,6 @@ _GLOBAL(__tm_recheckpoint)
+        */
+       subi    r7, r7, STACK_FRAME_OVERHEAD
+-      SET_SCRATCH0(r1)
+-
+       mfmsr   r6
+       /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
+@@ -468,6 +466,7 @@ restore_gprs:
+        * until we turn MSR RI back on.
+        */
++      SET_SCRATCH0(r1)
+       ld      r5, -8(r1)
+       ld      r1, -16(r1)
index cf3312a4af61cd6dc3b77e232abb9ab9b9c62275..4679925902a3e198e90c7352dfd0f10828948505 100644 (file)
@@ -86,3 +86,12 @@ mips-kvm-fix-mapped-fault-broken-commpage-handling.patch
 mips-kvm-add-missing-gfn-range-check.patch
 mips-kvm-fix-gfn-range-check-in-kseg0-tlb-faults.patch
 mips-kvm-propagate-kseg0-mapped-tlb-fault-errors.patch
+powerpc-tm-fix-stack-pointer-corruption-in-__tm_recheckpoint.patch
+btrfs-fix-delalloc-accounting-after-copy_from_user-faults.patch
+nfs-don-t-create-zero-length-requests.patch
+nfsd-fix-race-between-free_stateid-and-lock.patch
+nfsd-don-t-return-an-unhashed-lock-stateid-after-taking-mutex.patch
+pnfs-separate-handling-of-nfs4err_layouttrylater-and-recallconflict.patch
+pnfs-fix-post-layoutget-error-handling-in-pnfs_update_layout.patch
+pnfs-handle-nfs4err_recallconflict-correctly-in-layoutget.patch
+pnfs-fix-layoutget-handling-of-nfs4err_bad_stateid-and-nfs4err_expired.patch