--- /dev/null
+From stable+bounces-3094-greg=kroah.com@vger.kernel.org Tue Nov 28 22:02:05 2023
+From: Chuck Lever <cel@kernel.org>
+Date: Tue, 28 Nov 2023 17:01:36 -0500
+Subject: NFSD: Fix checksum mismatches in the duplicate reply cache
+To: stable@vger.kernel.org
+Cc: linux-nfs@vger.kernel.org
+Message-ID: <170120889657.1725.7300213662876041857.stgit@klimt.1015granger.net>
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit bf51c52a1f3c238d72c64e14d5e7702d3a245b82 ]
+
+nfsd_cache_csum() currently assumes that the server's RPC layer has
+been advancing rq_arg.head[0].iov_base as it decodes an incoming
+request, because that's the way it used to work. On entry, it
+expects that buf->head[0].iov_base points to the start of the NFS
+header, and excludes the already-decoded RPC header.
+
+These days however, head[0].iov_base now points to the start of the
+RPC header during all processing. It no longer points at the NFS
+Call header when execution arrives at nfsd_cache_csum().
+
+In a retransmitted RPC the XID and the NFS header are supposed to
+be the same as the original message, but the contents of the
+retransmitted RPC header can be different. For example, for krb5,
+the GSS sequence number will be different between the two. Thus if
+the RPC header is always included in the DRC checksum computation,
+the checksum of the retransmitted message might not match the
+checksum of the original message, even though the NFS part of these
+messages is identical.
+
+The result is that, even if a matching XID is found in the DRC,
+the checksum mismatch causes the server to execute the
+retransmitted RPC transaction again.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/cache.h | 3 +-
+ fs/nfsd/nfscache.c | 65 +++++++++++++++++++++++++++++++++++------------------
+ fs/nfsd/nfssvc.c | 11 ++++++++
+ 3 files changed, 56 insertions(+), 23 deletions(-)
+
+--- a/fs/nfsd/cache.h
++++ b/fs/nfsd/cache.h
+@@ -82,7 +82,8 @@ int nfsd_drc_slab_create(void);
+ void nfsd_drc_slab_free(void);
+ int nfsd_reply_cache_init(struct nfsd_net *);
+ void nfsd_reply_cache_shutdown(struct nfsd_net *);
+-int nfsd_cache_lookup(struct svc_rqst *);
++int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
++ unsigned int len);
+ void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
+
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -311,33 +311,53 @@ nfsd_reply_cache_scan(struct shrinker *s
+
+ return prune_cache_entries(nn);
+ }
+-/*
+- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
++
++/**
++ * nfsd_cache_csum - Checksum incoming NFS Call arguments
++ * @buf: buffer containing a whole RPC Call message
++ * @start: starting byte of the NFS Call header
++ * @remaining: size of the NFS Call header, in bytes
++ *
++ * Compute a weak checksum of the leading bytes of an NFS procedure
++ * call header to help verify that a retransmitted Call matches an
++ * entry in the duplicate reply cache.
++ *
++ * To avoid assumptions about how the RPC message is laid out in
++ * @buf and what else it might contain (eg, a GSS MIC suffix), the
++ * caller passes us the exact location and length of the NFS Call
++ * header.
++ *
++ * Returns a 32-bit checksum value, as defined in RFC 793.
+ */
+-static __wsum
+-nfsd_cache_csum(struct svc_rqst *rqstp)
++static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
++ unsigned int remaining)
+ {
++ unsigned int base, len;
++ struct xdr_buf subbuf;
++ __wsum csum = 0;
++ void *p;
+ int idx;
+- unsigned int base;
+- __wsum csum;
+- struct xdr_buf *buf = &rqstp->rq_arg;
+- const unsigned char *p = buf->head[0].iov_base;
+- size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
+- RC_CSUMLEN);
+- size_t len = min(buf->head[0].iov_len, csum_len);
++
++ if (remaining > RC_CSUMLEN)
++ remaining = RC_CSUMLEN;
++ if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
++ return csum;
+
+ /* rq_arg.head first */
+- csum = csum_partial(p, len, 0);
+- csum_len -= len;
++ if (subbuf.head[0].iov_len) {
++ len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
++ csum = csum_partial(subbuf.head[0].iov_base, len, csum);
++ remaining -= len;
++ }
+
+ /* Continue into page array */
+- idx = buf->page_base / PAGE_SIZE;
+- base = buf->page_base & ~PAGE_MASK;
+- while (csum_len) {
+- p = page_address(buf->pages[idx]) + base;
+- len = min_t(size_t, PAGE_SIZE - base, csum_len);
++ idx = subbuf.page_base / PAGE_SIZE;
++ base = subbuf.page_base & ~PAGE_MASK;
++ while (remaining) {
++ p = page_address(subbuf.pages[idx]) + base;
++ len = min_t(unsigned int, PAGE_SIZE - base, remaining);
+ csum = csum_partial(p, len, csum);
+- csum_len -= len;
++ remaining -= len;
+ base = 0;
+ ++idx;
+ }
+@@ -408,6 +428,8 @@ out:
+ /**
+ * nfsd_cache_lookup - Find an entry in the duplicate reply cache
+ * @rqstp: Incoming Call to find
++ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
++ * @len: size of the NFS Call header, in bytes
+ *
+ * Try to find an entry matching the current call in the cache. When none
+ * is found, we try to grab the oldest expired entry off the LRU list. If
+@@ -420,7 +442,8 @@ out:
+ * %RC_REPLY: Reply from cache
+ * %RC_DROPIT: Do not process the request further
+ */
+-int nfsd_cache_lookup(struct svc_rqst *rqstp)
++int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
++ unsigned int len)
+ {
+ struct nfsd_net *nn;
+ struct svc_cacherep *rp, *found;
+@@ -435,7 +458,7 @@ int nfsd_cache_lookup(struct svc_rqst *r
+ goto out;
+ }
+
+- csum = nfsd_cache_csum(rqstp);
++ csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
+
+ /*
+ * Since the common case is a cache miss followed by an insert,
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1027,6 +1027,7 @@ out:
+ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *proc = rqstp->rq_procinfo;
++ unsigned int start, len;
+ __be32 *nfs_reply;
+
+ /*
+@@ -1036,10 +1037,18 @@ int nfsd_dispatch(struct svc_rqst *rqstp
+ rqstp->rq_cachetype = proc->pc_cachetype;
+
+ svcxdr_init_decode(rqstp);
++
++ /*
++ * ->pc_decode advances the argument stream past the NFS
++ * Call header, so grab the header's starting location and
++ * size now for the call to nfsd_cache_lookup().
++ */
++ start = xdr_stream_pos(&rqstp->rq_arg_stream);
++ len = xdr_stream_remaining(&rqstp->rq_arg_stream);
+ if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
+ goto out_decode_err;
+
+- switch (nfsd_cache_lookup(rqstp)) {
++ switch (nfsd_cache_lookup(rqstp, start, len)) {
+ case RC_DOIT:
+ break;
+ case RC_REPLY:
--- /dev/null
+From stable+bounces-3092-greg=kroah.com@vger.kernel.org Tue Nov 28 22:01:38 2023
+From: Chuck Lever <cel@kernel.org>
+Date: Tue, 28 Nov 2023 17:01:30 -0500
+Subject: NFSD: Fix "start of NFS reply" pointer passed to nfsd_cache_update()
+To: stable@vger.kernel.org
+Cc: linux-nfs@vger.kernel.org
+Message-ID: <170120889023.1725.6734851513409877139.stgit@klimt.1015granger.net>
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 1caf5f61dd8430ae5a0b4538afe4953ce7517cbb ]
+
+The "statp + 1" pointer that is passed to nfsd_cache_update() is
+supposed to point to the start of the egress NFS Reply header. In
+fact, it does point there for AUTH_SYS and RPCSEC_GSS_KRB5 requests.
+
+But both krb5i and krb5p add fields between the RPC header's
+accept_stat field and the start of the NFS Reply header. In those
+cases, "statp + 1" points at the extra fields instead of the Reply.
+The result is that nfsd_cache_update() caches what looks to the
+client like garbage.
+
+A connection break can occur for a number of reasons, but the most
+common reason when using krb5i/p is a GSS sequence number window
+underrun. When an underrun is detected, the server is obliged to
+drop the RPC and the connection to force a retransmit with a fresh
+GSS sequence number. The client presents the same XID, it hits in
+the server's DRC, and the server returns the garbage cache entry.
+
+The "statp + 1" argument has been used since the oldest changeset
+in the kernel history repo, so it has been in nfsd_dispatch()
+literally since before history began. The problem arose only when
+the server-side GSS implementation was added twenty years ago.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfssvc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1027,6 +1027,7 @@ out:
+ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *proc = rqstp->rq_procinfo;
++ __be32 *nfs_reply;
+
+ /*
+ * Give the xdr decoder a chance to change this if it wants
+@@ -1053,6 +1054,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp
+ */
+ svcxdr_init_encode(rqstp);
+
++ nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
+ *statp = proc->pc_func(rqstp);
+ if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
+ goto out_update_drop;
+@@ -1060,7 +1062,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp
+ if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+ goto out_encode_err;
+
+- nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
++ nfsd_cache_update(rqstp, rqstp->rq_cachetype, nfs_reply);
+ out_cached_reply:
+ return 1;
+