--- /dev/null
+From cb7323fffa85df37161f4d3be45e1f787808309c Mon Sep 17 00:00:00 2001
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Date: Tue, 18 Sep 2012 13:37:23 +0400
+Subject: lockd: create and use per-net NSM RPC clients on MON/UNMON requests
+
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+
+commit cb7323fffa85df37161f4d3be45e1f787808309c upstream.
+
+NSM RPC client can be required on NFSv3 umount, when child reaper is dying
+(and destroying it's mount namespace). It means, that current nsproxy is set
+to NULL already, but creation of RPC client requires UTS namespace for gaining
+hostname string.
+
+This patch creates reference-counted per-net NSM client on first monitor
+request and destroys it after last unmonitor request.
+
+Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/lockd/mon.c | 37 ++++++++++++++++++++-----------------
+ 1 file changed, 20 insertions(+), 17 deletions(-)
+
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -7,7 +7,6 @@
+ */
+
+ #include <linux/types.h>
+-#include <linux/utsname.h>
+ #include <linux/kernel.h>
+ #include <linux/ktime.h>
+ #include <linux/slab.h>
+@@ -86,7 +85,7 @@ static struct rpc_clnt *nsm_create(struc
+ return rpc_create(&args);
+ }
+
+-__maybe_unused static struct rpc_clnt *nsm_client_get(struct net *net)
++static struct rpc_clnt *nsm_client_get(struct net *net)
+ {
+ static DEFINE_MUTEX(nsm_create_mutex);
+ struct rpc_clnt *clnt;
+@@ -113,7 +112,7 @@ out:
+ return clnt;
+ }
+
+-__maybe_unused static void nsm_client_put(struct net *net)
++static void nsm_client_put(struct net *net)
+ {
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+ struct rpc_clnt *clnt = ln->nsm_clnt;
+@@ -132,9 +131,8 @@ __maybe_unused static void nsm_client_pu
+ }
+
+ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
+- struct net *net)
++ struct rpc_clnt *clnt)
+ {
+- struct rpc_clnt *clnt;
+ int status;
+ struct nsm_args args = {
+ .priv = &nsm->sm_priv,
+@@ -142,20 +140,14 @@ static int nsm_mon_unmon(struct nsm_hand
+ .vers = 3,
+ .proc = NLMPROC_NSM_NOTIFY,
+ .mon_name = nsm->sm_mon_name,
+- .nodename = utsname()->nodename,
++ .nodename = clnt->cl_nodename,
+ };
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = res,
+ };
+
+- clnt = nsm_create(net);
+- if (IS_ERR(clnt)) {
+- status = PTR_ERR(clnt);
+- dprintk("lockd: failed to create NSM upcall transport, "
+- "status=%d\n", status);
+- goto out;
+- }
++ BUG_ON(clnt == NULL);
+
+ memset(res, 0, sizeof(*res));
+
+@@ -166,8 +158,6 @@ static int nsm_mon_unmon(struct nsm_hand
+ status);
+ else
+ status = 0;
+- rpc_shutdown_client(clnt);
+- out:
+ return status;
+ }
+
+@@ -187,6 +177,7 @@ int nsm_monitor(const struct nlm_host *h
+ struct nsm_handle *nsm = host->h_nsmhandle;
+ struct nsm_res res;
+ int status;
++ struct rpc_clnt *clnt;
+
+ dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
+
+@@ -199,7 +190,15 @@ int nsm_monitor(const struct nlm_host *h
+ */
+ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
+
+- status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net);
++ clnt = nsm_client_get(host->net);
++ if (IS_ERR(clnt)) {
++ status = PTR_ERR(clnt);
++ dprintk("lockd: failed to create NSM upcall transport, "
++ "status=%d, net=%p\n", status, host->net);
++ return status;
++ }
++
++ status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
+ if (unlikely(res.status != 0))
+ status = -EIO;
+ if (unlikely(status < 0)) {
+@@ -231,9 +230,11 @@ void nsm_unmonitor(const struct nlm_host
+
+ if (atomic_read(&nsm->sm_count) == 1
+ && nsm->sm_monitored && !nsm->sm_sticky) {
++ struct lockd_net *ln = net_generic(host->net, lockd_net_id);
++
+ dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
+
+- status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net);
++ status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
+ if (res.status != 0)
+ status = -EIO;
+ if (status < 0)
+@@ -241,6 +242,8 @@ void nsm_unmonitor(const struct nlm_host
+ nsm->sm_name);
+ else
+ nsm->sm_monitored = 0;
++
++ nsm_client_put(host->net);
+ }
+ }
+
--- /dev/null
+From e9406db20fecbfcab646bad157b4cfdc7cadddfb Mon Sep 17 00:00:00 2001
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Date: Tue, 18 Sep 2012 13:37:12 +0400
+Subject: lockd: per-net NSM client creation and destruction helpers introduced
+
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+
+commit e9406db20fecbfcab646bad157b4cfdc7cadddfb upstream.
+
+NSM RPC client can be required on NFSv3 umount, when child reaper is dying (and
+destroying it's mount namespace). It means, that current nsproxy is set to
+NULL already, but creation of RPC client requires UTS namespace for gaining
+hostname string.
+This patch introduces reference counted NFS RPC clients creation and
+destruction helpers (similar to RPCBIND RPC clients).
+
+Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/lockd/mon.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++--
+ fs/lockd/netns.h | 4 ++++
+ fs/lockd/svc.c | 1 +
+ 3 files changed, 54 insertions(+), 2 deletions(-)
+
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -19,6 +19,8 @@
+
+ #include <asm/unaligned.h>
+
++#include "netns.h"
++
+ #define NLMDBG_FACILITY NLMDBG_MONITOR
+ #define NSM_PROGRAM 100024
+ #define NSM_VERSION 1
+@@ -70,7 +72,7 @@ static struct rpc_clnt *nsm_create(struc
+ };
+ struct rpc_create_args args = {
+ .net = net,
+- .protocol = XPRT_TRANSPORT_UDP,
++ .protocol = XPRT_TRANSPORT_TCP,
+ .address = (struct sockaddr *)&sin,
+ .addrsize = sizeof(sin),
+ .servername = "rpc.statd",
+@@ -83,6 +85,51 @@ static struct rpc_clnt *nsm_create(struc
+ return rpc_create(&args);
+ }
+
++__maybe_unused static struct rpc_clnt *nsm_client_get(struct net *net)
++{
++ static DEFINE_MUTEX(nsm_create_mutex);
++ struct rpc_clnt *clnt;
++ struct lockd_net *ln = net_generic(net, lockd_net_id);
++
++ spin_lock(&ln->nsm_clnt_lock);
++ if (ln->nsm_users) {
++ ln->nsm_users++;
++ clnt = ln->nsm_clnt;
++ spin_unlock(&ln->nsm_clnt_lock);
++ goto out;
++ }
++ spin_unlock(&ln->nsm_clnt_lock);
++
++ mutex_lock(&nsm_create_mutex);
++ clnt = nsm_create(net);
++ if (!IS_ERR(clnt)) {
++ ln->nsm_clnt = clnt;
++ smp_wmb();
++ ln->nsm_users = 1;
++ }
++ mutex_unlock(&nsm_create_mutex);
++out:
++ return clnt;
++}
++
++__maybe_unused static void nsm_client_put(struct net *net)
++{
++ struct lockd_net *ln = net_generic(net, lockd_net_id);
++ struct rpc_clnt *clnt = ln->nsm_clnt;
++ int shutdown = 0;
++
++ spin_lock(&ln->nsm_clnt_lock);
++ if (ln->nsm_users) {
++ if (--ln->nsm_users)
++ ln->nsm_clnt = NULL;
++ shutdown = !ln->nsm_users;
++ }
++ spin_unlock(&ln->nsm_clnt_lock);
++
++ if (shutdown)
++ rpc_shutdown_client(clnt);
++}
++
+ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
+ struct net *net)
+ {
+@@ -111,7 +158,7 @@ static int nsm_mon_unmon(struct nsm_hand
+ memset(res, 0, sizeof(*res));
+
+ msg.rpc_proc = &clnt->cl_procinfo[proc];
+- status = rpc_call_sync(clnt, &msg, 0);
++ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+ if (status < 0)
+ dprintk("lockd: NSM upcall RPC failed, status=%d\n",
+ status);
+--- a/fs/lockd/netns.h
++++ b/fs/lockd/netns.h
+@@ -12,6 +12,10 @@ struct lockd_net {
+ struct delayed_work grace_period_end;
+ struct lock_manager lockd_manager;
+ struct list_head grace_list;
++
++ spinlock_t nsm_clnt_lock;
++ unsigned int nsm_users;
++ struct rpc_clnt *nsm_clnt;
+ };
+
+ extern int lockd_net_id;
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -596,6 +596,7 @@ static int lockd_init_net(struct net *ne
+
+ INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
+ INIT_LIST_HEAD(&ln->grace_list);
++ spin_lock_init(&ln->nsm_clnt_lock);
+ return 0;
+ }
+
--- /dev/null
+From 303a7ce92064c285a04c870f2dc0192fdb2968cb Mon Sep 17 00:00:00 2001
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Date: Tue, 18 Sep 2012 13:37:18 +0400
+Subject: lockd: use rpc client's cl_nodename for id encoding
+
+From: Stanislav Kinsbursky <skinsbursky@parallels.com>
+
+commit 303a7ce92064c285a04c870f2dc0192fdb2968cb upstream.
+
+Taking hostname from uts namespace if not safe, because this cuold be
+performind during umount operation on child reaper death. And in this case
+current->nsproxy is NULL already.
+
+Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/lockd/mon.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -42,6 +42,7 @@ struct nsm_args {
+ u32 proc;
+
+ char *mon_name;
++ char *nodename;
+ };
+
+ struct nsm_res {
+@@ -141,6 +142,7 @@ static int nsm_mon_unmon(struct nsm_hand
+ .vers = 3,
+ .proc = NLMPROC_NSM_NOTIFY,
+ .mon_name = nsm->sm_mon_name,
++ .nodename = utsname()->nodename,
+ };
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+@@ -477,7 +479,7 @@ static void encode_my_id(struct xdr_stre
+ {
+ __be32 *p;
+
+- encode_nsm_string(xdr, utsname()->nodename);
++ encode_nsm_string(xdr, argp->nodename);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(argp->prog);
+ *p++ = cpu_to_be32(argp->vers);
--- /dev/null
+From 6938867edba929a65a167a97581231e76aeb10b4 Mon Sep 17 00:00:00 2001
+From: Bryan Schumaker <bjschuma@netapp.com>
+Date: Wed, 26 Sep 2012 15:25:52 -0400
+Subject: NFS: Remove bad delegations during open recovery
+
+From: Bryan Schumaker <bjschuma@netapp.com>
+
+commit 6938867edba929a65a167a97581231e76aeb10b4 upstream.
+
+I put the client into an open recovery loop by:
+ Client: Open file
+ read half
+ Server: Expire client (echo 0 > /sys/kernel/debug/nfsd/forget_clients)
+ Client: Drop vm cache (echo 3 > /proc/sys/vm/drop_caches)
+ finish reading file
+
+This causes a loop because the client never updates the nfs4_state after
+discovering that the delegation is invalid. This means it will keep
+trying to read using the bad delegation rather than attempting to re-open
+the file.
+
+Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1774,7 +1774,11 @@ static void nfs41_clear_delegation_state
+ * informs us the stateid is unrecognized. */
+ if (status != -NFS4ERR_BAD_STATEID)
+ nfs41_free_stateid(server, stateid);
++ nfs_remove_bad_delegation(state->inode);
+
++ write_seqlock(&state->seqlock);
++ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
++ write_sequnlock(&state->seqlock);
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ }
+ }
--- /dev/null
+From dc182549d439f60c332bf74d7f220a1bccf37da6 Mon Sep 17 00:00:00 2001
+From: Peng Tao <bergwolf@gmail.com>
+Date: Fri, 24 Aug 2012 00:27:49 +0800
+Subject: NFS41: fix error of setting blocklayoutdriver
+
+From: Peng Tao <bergwolf@gmail.com>
+
+commit dc182549d439f60c332bf74d7f220a1bccf37da6 upstream.
+
+After commit e38eb650 (NFS: set_pnfs_layoutdriver() from
+nfs4_proc_fsinfo()), set_pnfs_layoutdriver() is called inside
+nfs4_proc_fsinfo(), but pnfs_blksize is not set. It causes setting
+blocklayoutdriver failure and pnfsblock mount failure.
+
+Signed-off-by: Peng Tao <tao.peng@emc.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/client.c | 1 -
+ fs/nfs/nfs4proc.c | 5 ++++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -855,7 +855,6 @@ static void nfs_server_set_fsinfo(struct
+ if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ server->wsize = NFS_MAX_FILE_IO_SIZE;
+ server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+- server->pnfs_blksize = fsinfo->blksize;
+
+ server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3362,8 +3362,11 @@ static int nfs4_proc_fsinfo(struct nfs_s
+
+ nfs_fattr_init(fsinfo->fattr);
+ error = nfs4_do_fsinfo(server, fhandle, fsinfo);
+- if (error == 0)
++ if (error == 0) {
++ /* block layout checks this! */
++ server->pnfs_blksize = fsinfo->blksize;
+ set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
++ }
+
+ return error;
+ }
--- /dev/null
+From 9959ba0c241a71c7ed8133401cfbbee2720da0b5 Mon Sep 17 00:00:00 2001
+From: Malahal Naineni <malahal@us.ibm.com>
+Date: Sun, 9 Sep 2012 10:25:47 -0500
+Subject: NFSD: pass null terminated buf to kstrtouint()
+
+From: Malahal Naineni <malahal@us.ibm.com>
+
+commit 9959ba0c241a71c7ed8133401cfbbee2720da0b5 upstream.
+
+The 'buf' is prepared with null termination with intention of using it for
+this purpose, but 'name' is passed instead!
+
+Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4idmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4idmap.c
++++ b/fs/nfsd/nfs4idmap.c
+@@ -598,7 +598,7 @@ numeric_name_to_id(struct svc_rqst *rqst
+ /* Just to make sure it's null-terminated: */
+ memcpy(buf, name, namelen);
+ buf[namelen] = '\0';
+- ret = kstrtouint(name, 10, id);
++ ret = kstrtouint(buf, 10, id);
+ return ret == 0;
+ }
+
--- /dev/null
+From 68eb35081e297b37db49d854cda144c6a3397699 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 21 Aug 2012 12:48:30 -0400
+Subject: nfsd4: don't pin clientids to pseudoflavors
+
+From: "J. Bruce Fields" <bfields@redhat.com>
+
+commit 68eb35081e297b37db49d854cda144c6a3397699 upstream.
+
+I added cr_flavor to the data compared in same_creds without any
+justification, in d5497fc693a446ce9100fcf4117c3f795ddfd0d2 "nfsd4: move
+rq_flavor into svc_cred".
+
+Recent client changes then started making
+
+ mount -osec=krb5 server:/export /mnt/
+ echo "hello" >/mnt/TMP
+ umount /mnt/
+ mount -osec=krb5i server:/export /mnt/
+ echo "hello" >/mnt/TMP
+
+to fail due to a clid_inuse on the second open.
+
+Mounting sequentially like this with different flavors probably isn't
+that common outside artificial tests. Also, the real bug here may be
+that the server isn't just destroying the former clientid in this case
+(because it isn't good enough at recognizing when the old state is
+gone). But it prompted some discussion and a look back at the spec, and
+I think the check was probably wrong. Fix and document.
+
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1223,10 +1223,26 @@ static bool groups_equal(struct group_in
+ return true;
+ }
+
++/*
++ * RFC 3530 language requires clid_inuse be returned when the
++ * "principal" associated with a requests differs from that previously
++ * used. We use uid, gid's, and gss principal string as our best
++ * approximation. We also don't want to allow non-gss use of a client
++ * established using gss: in theory cr_principal should catch that
++ * change, but in practice cr_principal can be null even in the gss case
++ * since gssd doesn't always pass down a principal string.
++ */
++static bool is_gss_cred(struct svc_cred *cr)
++{
++ /* Is cr_flavor one of the gss "pseudoflavors"?: */
++ return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR);
++}
++
++
+ static bool
+ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
+ {
+- if ((cr1->cr_flavor != cr2->cr_flavor)
++ if ((is_gss_cred(cr1) != is_gss_cred(cr2))
+ || (cr1->cr_uid != cr2->cr_uid)
+ || (cr1->cr_gid != cr2->cr_gid)
+ || !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
--- /dev/null
+From cf9182e90b2af04245ac4fae497fe73fc71285b4 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Wed, 29 Aug 2012 15:21:58 -0700
+Subject: nfsd4: fix nfs4 stateid leak
+
+From: "J. Bruce Fields" <bfields@redhat.com>
+
+commit cf9182e90b2af04245ac4fae497fe73fc71285b4 upstream.
+
+Processes that open and close multiple files may end up setting this
+oo_last_closed_stid without freeing what was previously pointed to.
+This can result in a major leak, visible for example by watching the
+nfsd4_stateids line of /proc/slabinfo.
+
+Reported-by: Cyril B. <cbay@excellency.fr>
+Tested-by: Cyril B. <cbay@excellency.fr>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3782,6 +3782,7 @@ nfsd4_close(struct svc_rqst *rqstp, stru
+ memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+
+ nfsd4_close_open_stateid(stp);
++ release_last_closed_stateid(oo);
+ oo->oo_last_closed_stid = stp;
+
+ if (list_empty(&oo->oo_owner.so_stateids)) {
--- /dev/null
+From fe6e1e8d9fad86873eb74a26e80a8f91f9e870b5 Mon Sep 17 00:00:00 2001
+From: Peng Tao <bergwolf@gmail.com>
+Date: Fri, 24 Aug 2012 00:27:51 +0800
+Subject: pnfsblock: fix partial page buffer wirte
+
+From: Peng Tao <bergwolf@gmail.com>
+
+commit fe6e1e8d9fad86873eb74a26e80a8f91f9e870b5 upstream.
+
+If applications use flock to protect its write range, generic NFS
+will not do read-modify-write cycle at page cache level. Therefore
+LD should know how to handle non-sector aligned writes. Otherwise
+there will be data corruption.
+
+Signed-off-by: Peng Tao <tao.peng@emc.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/blocklayout/blocklayout.c | 177 ++++++++++++++++++++++++++++++++++++---
+ fs/nfs/blocklayout/blocklayout.h | 1
+ 2 files changed, 166 insertions(+), 12 deletions(-)
+
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int
+ return bio;
+ }
+
+-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
++static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+- struct parallel_io *par)
++ struct parallel_io *par,
++ unsigned int offset, int len)
+ {
++ isect = isect + (offset >> SECTOR_SHIFT);
++ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
++ npg, rw, (unsigned long long)isect, offset, len);
+ retry:
+ if (!bio) {
+ bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+ }
+- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
++ if (bio_add_page(bio, page, len, offset) < len) {
+ bio = bl_submit_bio(rw, bio);
+ goto retry;
+ }
+ return bio;
+ }
+
++static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
++ sector_t isect, struct page *page,
++ struct pnfs_block_extent *be,
++ void (*end_io)(struct bio *, int err),
++ struct parallel_io *par)
++{
++ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
++ end_io, par, 0, PAGE_CACHE_SIZE);
++}
++
+ /* This is basically copied from mpage_end_io_read */
+ static void bl_end_io_read(struct bio *bio, int err)
+ {
+@@ -461,6 +475,106 @@ map_block(struct buffer_head *bh, sector
+ return;
+ }
+
++static void
++bl_read_single_end_io(struct bio *bio, int error)
++{
++ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++ struct page *page = bvec->bv_page;
++
++ /* Only one page in bvec */
++ unlock_page(page);
++}
++
++static int
++bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
++ unsigned int offset, unsigned int len)
++{
++ struct bio *bio;
++ struct page *shadow_page;
++ sector_t isect;
++ char *kaddr, *kshadow_addr;
++ int ret = 0;
++
++ dprintk("%s: offset %u len %u\n", __func__, offset, len);
++
++ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
++ if (shadow_page == NULL)
++ return -ENOMEM;
++
++ bio = bio_alloc(GFP_NOIO, 1);
++ if (bio == NULL)
++ return -ENOMEM;
++
++ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
++ (offset / SECTOR_SIZE);
++
++ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
++ bio->bi_bdev = be->be_mdev;
++ bio->bi_end_io = bl_read_single_end_io;
++
++ lock_page(shadow_page);
++ if (bio_add_page(bio, shadow_page,
++ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
++ unlock_page(shadow_page);
++ bio_put(bio);
++ return -EIO;
++ }
++
++ submit_bio(READ, bio);
++ wait_on_page_locked(shadow_page);
++ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
++ ret = -EIO;
++ } else {
++ kaddr = kmap_atomic(page);
++ kshadow_addr = kmap_atomic(shadow_page);
++ memcpy(kaddr + offset, kshadow_addr + offset, len);
++ kunmap_atomic(kshadow_addr);
++ kunmap_atomic(kaddr);
++ }
++ __free_page(shadow_page);
++ bio_put(bio);
++
++ return ret;
++}
++
++static int
++bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
++ unsigned int dirty_offset, unsigned int dirty_len,
++ bool full_page)
++{
++ int ret = 0;
++ unsigned int start, end;
++
++ if (full_page) {
++ start = 0;
++ end = PAGE_CACHE_SIZE;
++ } else {
++ start = round_down(dirty_offset, SECTOR_SIZE);
++ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
++ }
++
++ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
++ if (!be) {
++ zero_user_segments(page, start, dirty_offset,
++ dirty_offset + dirty_len, end);
++ if (start == 0 && end == PAGE_CACHE_SIZE &&
++ trylock_page(page)) {
++ SetPageUptodate(page);
++ unlock_page(page);
++ }
++ return ret;
++ }
++
++ if (start != dirty_offset)
++ ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
++
++ if (!ret && (dirty_offset + dirty_len < end))
++ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
++ end - dirty_offset - dirty_len);
++
++ return ret;
++}
++
+ /* Given an unmapped page, zero it or read in page for COW, page is locked
+ * by caller.
+ */
+@@ -494,7 +608,6 @@ init_page_for_write(struct page *page, s
+ SetPageUptodate(page);
+
+ cleanup:
+- bl_put_extent(cow_read);
+ if (bh)
+ free_buffer_head(bh);
+ if (ret) {
+@@ -566,6 +679,7 @@ bl_write_pagelist(struct nfs_write_data
+ struct parallel_io *par = NULL;
+ loff_t offset = wdata->args.offset;
+ size_t count = wdata->args.count;
++ unsigned int pg_offset, pg_len, saved_len;
+ struct page **pages = wdata->args.pages;
+ struct page *page;
+ pgoff_t index;
+@@ -674,10 +788,11 @@ next_page:
+ if (!extent_length) {
+ /* We've used up the previous extent */
+ bl_put_extent(be);
++ bl_put_extent(cow_read);
+ bio = bl_submit_bio(WRITE, bio);
+ /* Get the next one */
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
+- isect, NULL);
++ isect, &cow_read);
+ if (!be || !is_writable(be, isect)) {
+ header->pnfs_error = -EINVAL;
+ goto out;
+@@ -694,7 +809,26 @@ next_page:
+ extent_length = be->be_length -
+ (isect - be->be_f_offset);
+ }
+- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++
++ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
++ pg_offset = offset & ~PAGE_CACHE_MASK;
++ if (pg_offset + count > PAGE_CACHE_SIZE)
++ pg_len = PAGE_CACHE_SIZE - pg_offset;
++ else
++ pg_len = count;
++
++ saved_len = pg_len;
++ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
++ !bl_is_sector_init(be->be_inval, isect)) {
++ ret = bl_read_partial_page_sync(pages[i], cow_read,
++ pg_offset, pg_len, true);
++ if (ret) {
++ dprintk("%s bl_read_partial_page_sync fail %d\n",
++ __func__, ret);
++ header->pnfs_error = ret;
++ goto out;
++ }
++
+ ret = bl_mark_sectors_init(be->be_inval, isect,
+ PAGE_CACHE_SECTORS);
+ if (unlikely(ret)) {
+@@ -703,15 +837,35 @@ next_page:
+ header->pnfs_error = ret;
+ goto out;
+ }
++
++ /* Expand to full page write */
++ pg_offset = 0;
++ pg_len = PAGE_CACHE_SIZE;
++ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
++ (pg_len & (SECTOR_SIZE - 1))){
++ /* ahh, nasty case. We have to do sync full sector
++ * read-modify-write cycles.
++ */
++ unsigned int saved_offset = pg_offset;
++ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
++ pg_len, false);
++ pg_offset = round_down(pg_offset, SECTOR_SIZE);
++ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
++ - pg_offset;
+ }
+- bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
++
++
++ bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+ isect, pages[i], be,
+- bl_end_io_write, par);
++ bl_end_io_write, par,
++ pg_offset, pg_len);
+ if (IS_ERR(bio)) {
+ header->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
++ offset += saved_len;
++ count -= saved_len;
+ isect += PAGE_CACHE_SECTORS;
+ last_isect = isect;
+ extent_length -= PAGE_CACHE_SECTORS;
+@@ -729,17 +883,16 @@ next_page:
+ }
+
+ write_done:
+- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
+- if (count < wdata->res.count) {
+- wdata->res.count = count;
+- }
++ wdata->res.count = wdata->args.count;
+ out:
+ bl_put_extent(be);
++ bl_put_extent(cow_read);
+ bl_submit_bio(WRITE, bio);
+ put_parallel(par);
+ return PNFS_ATTEMPTED;
+ out_mds:
+ bl_put_extent(be);
++ bl_put_extent(cow_read);
+ kfree(par);
+ return PNFS_NOT_ATTEMPTED;
+ }
+--- a/fs/nfs/blocklayout/blocklayout.h
++++ b/fs/nfs/blocklayout/blocklayout.h
+@@ -41,6 +41,7 @@
+
+ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
+ #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+
+ struct block_mount_id {
+ spinlock_t bm_lock; /* protects list */
arm-vfp-fix-saving-d16-d31-vfp-registers-on-v6-kernels.patch
+pnfsblock-fix-partial-page-buffer-wirte.patch
+nfs41-fix-error-of-setting-blocklayoutdriver.patch
+nfs-remove-bad-delegations-during-open-recovery.patch
+nfsd4-don-t-pin-clientids-to-pseudoflavors.patch
+nfsd4-fix-nfs4-stateid-leak.patch
+nfsd-pass-null-terminated-buf-to-kstrtouint.patch
+lockd-per-net-nsm-client-creation-and-destruction-helpers-introduced.patch
+lockd-use-rpc-client-s-cl_nodename-for-id-encoding.patch
+lockd-create-and-use-per-net-nsm-rpc-clients-on-mon-unmon-requests.patch