From: Greg Kroah-Hartman Date: Thu, 21 Mar 2019 08:48:24 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v3.18.137~28 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=495c55a2102c1da9c43f85ac798be0a1c0d2e43d;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: bcache-never-writeback-a-discard-operation.patch cpcap-charger-generate-events-for-userspace.patch mfd-sm501-fix-potential-null-pointer-dereference.patch nfs-don-t-recoalesce-on-error-in-nfs_pageio_complete_mirror.patch nfs-fix-an-i-o-request-leakage-in-nfs_do_recoalesce.patch nfs-fix-i-o-request-leakages.patch nfsd-fix-memory-corruption-caused-by-readdir.patch nfsd-fix-performance-limiting-session-calculation.patch nfsd-fix-wrong-check-in-write_v4_end_grace.patch nfsv4.1-reinitialise-sequence-results-before-retransmitting-a-request.patch pm-wakeup-rework-wakeup-source-timer-cancellation.patch svcrpc-fix-udp-on-servers-with-lots-of-threads.patch --- diff --git a/queue-4.19/bcache-never-writeback-a-discard-operation.patch b/queue-4.19/bcache-never-writeback-a-discard-operation.patch new file mode 100644 index 00000000000..1e0022e0ff1 --- /dev/null +++ b/queue-4.19/bcache-never-writeback-a-discard-operation.patch @@ -0,0 +1,133 @@ +From 9951379b0ca88c95876ad9778b9099e19a95d566 Mon Sep 17 00:00:00 2001 +From: Daniel Axtens +Date: Sat, 9 Feb 2019 12:52:53 +0800 +Subject: bcache: never writeback a discard operation + +From: Daniel Axtens + +commit 9951379b0ca88c95876ad9778b9099e19a95d566 upstream. + +Some users see panics like the following when performing fstrim on a +bcached volume: + +[ 529.803060] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 +[ 530.183928] #PF error: [normal kernel read fault] +[ 530.412392] PGD 8000001f42163067 P4D 8000001f42163067 PUD 1f42168067 PMD 0 +[ 530.750887] Oops: 0000 [#1] SMP PTI +[ 530.920869] CPU: 10 PID: 4167 Comm: fstrim Kdump: loaded Not tainted 5.0.0-rc1+ #3 +[ 531.290204] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 12/27/2015 +[ 531.693137] RIP: 0010:blk_queue_split+0x148/0x620 +[ 531.922205] Code: 60 38 89 55 a0 45 31 db 45 31 f6 45 31 c9 31 ff 89 4d 98 85 db 0f 84 7f 04 00 00 44 8b 6d 98 4c 89 ee 48 c1 e6 04 49 03 70 78 <8b> 46 08 44 8b 56 0c 48 +8b 16 44 29 e0 39 d8 48 89 55 a8 0f 47 c3 +[ 532.838634] RSP: 0018:ffffb9b708df39b0 EFLAGS: 00010246 +[ 533.093571] RAX: 00000000ffffffff RBX: 0000000000046000 RCX: 0000000000000000 +[ 533.441865] RDX: 0000000000000200 RSI: 0000000000000000 RDI: 0000000000000000 +[ 533.789922] RBP: ffffb9b708df3a48 R08: ffff940d3b3fdd20 R09: 0000000000000000 +[ 534.137512] R10: ffffb9b708df3958 R11: 0000000000000000 R12: 0000000000000000 +[ 534.485329] R13: 0000000000000000 R14: 0000000000000000 R15: ffff940d39212020 +[ 534.833319] FS: 00007efec26e3840(0000) GS:ffff940d1f480000(0000) knlGS:0000000000000000 +[ 535.224098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 535.504318] CR2: 0000000000000008 CR3: 0000001f4e256004 CR4: 00000000001606e0 +[ 535.851759] Call Trace: +[ 535.970308] ? mempool_alloc_slab+0x15/0x20 +[ 536.174152] ? bch_data_insert+0x42/0xd0 [bcache] +[ 536.403399] blk_mq_make_request+0x97/0x4f0 +[ 536.607036] generic_make_request+0x1e2/0x410 +[ 536.819164] submit_bio+0x73/0x150 +[ 536.980168] ? submit_bio+0x73/0x150 +[ 537.149731] ? bio_associate_blkg_from_css+0x3b/0x60 +[ 537.391595] ? _cond_resched+0x1a/0x50 +[ 537.573774] submit_bio_wait+0x59/0x90 +[ 537.756105] blkdev_issue_discard+0x80/0xd0 +[ 537.959590] ext4_trim_fs+0x4a9/0x9e0 +[ 538.137636] ? ext4_trim_fs+0x4a9/0x9e0 +[ 538.324087] ext4_ioctl+0xea4/0x1530 +[ 538.497712] ? _copy_to_user+0x2a/0x40 +[ 538.679632] do_vfs_ioctl+0xa6/0x600 +[ 538.853127] ? __do_sys_newfstat+0x44/0x70 +[ 539.051951] ksys_ioctl+0x6d/0x80 +[ 539.212785] __x64_sys_ioctl+0x1a/0x20 +[ 539.394918] do_syscall_64+0x5a/0x110 +[ 539.568674] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +We have observed it where both: +1) LVM/devmapper is involved (bcache backing device is LVM volume) and +2) writeback cache is involved (bcache cache_mode is writeback) + +On one machine, we can reliably reproduce it with: + + # echo writeback > /sys/block/bcache0/bcache/cache_mode + (not sure whether above line is required) + # mount /dev/bcache0 /test + # for i in {0..10}; do + file="$(mktemp /test/zero.XXX)" + dd if=/dev/zero of="$file" bs=1M count=256 + sync + rm $file + done + # fstrim -v /test + +Observing this with tracepoints on, we see the following writes: + +fstrim-18019 [022] .... 91107.302026: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4260112 + 196352 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.302050: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4456464 + 262144 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.302075: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 4718608 + 81920 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.302094: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5324816 + 180224 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.302121: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5505040 + 262144 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.302145: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 5767184 + 81920 hit 0 bypass 1 +fstrim-18019 [022] .... 91107.308777: bcache_write: 73f95583-561c-408f-a93a-4cbd2498f5c8 inode 0 DS 6373392 + 180224 hit 1 bypass 0 + + +Note the final one has different hit/bypass flags. + +This is because in should_writeback(), we were hitting a case where +the partial stripe condition was returning true and so +should_writeback() was returning true early. + +If that hadn't been the case, it would have hit the would_skip test, and +as would_skip == s->iop.bypass == true, should_writeback() would have +returned false. + +Looking at the git history from 'commit 72c270612bd3 ("bcache: Write out +full stripes")', it looks like the idea was to optimise for raid5/6: + + * If a stripe is already dirty, force writes to that stripe to + writeback mode - to help build up full stripes of dirty data + +To fix this issue, make sure that should_writeback() on a discard op +never returns true. + +More details of debugging: +https://www.spinics.net/lists/linux-bcache/msg06996.html + +Previous reports: + - https://bugzilla.kernel.org/show_bug.cgi?id=201051 + - https://bugzilla.kernel.org/show_bug.cgi?id=196103 + - https://www.spinics.net/lists/linux-bcache/msg06885.html + +(Coly Li: minor modification to follow maximum 75 chars per line rule) + +Cc: Kent Overstreet +Cc: stable@vger.kernel.org +Fixes: 72c270612bd3 ("bcache: Write out full stripes") +Signed-off-by: Daniel Axtens +Signed-off-by: Coly Li +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/writeback.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/md/bcache/writeback.h ++++ b/drivers/md/bcache/writeback.h +@@ -63,6 +63,9 @@ static inline bool should_writeback(stru + in_use > CUTOFF_WRITEBACK_SYNC) + return false; + ++ if (bio_op(bio) == REQ_OP_DISCARD) ++ return false; ++ + if (dc->partial_stripes_expensive && + bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector, + bio_sectors(bio))) diff --git a/queue-4.19/cpcap-charger-generate-events-for-userspace.patch b/queue-4.19/cpcap-charger-generate-events-for-userspace.patch new file mode 100644 index 00000000000..7890bc2e923 --- /dev/null +++ b/queue-4.19/cpcap-charger-generate-events-for-userspace.patch @@ -0,0 +1,37 @@ +From fd10606f93a149a9f3d37574e5385b083b4a7b32 Mon Sep 17 00:00:00 2001 +From: Pavel Machek +Date: Thu, 27 Dec 2018 20:52:21 +0100 +Subject: cpcap-charger: generate events for userspace + +From: Pavel Machek + +commit fd10606f93a149a9f3d37574e5385b083b4a7b32 upstream. + +The driver doesn't generate uevents on charger connect/disconnect. +This leads to UPower not detecting when AC is on or off... and that is +bad. + +Reported by Arthur D. on github ( +https://github.com/maemo-leste/bugtracker/issues/206 ), thanks to +Merlijn Wajer for suggesting a fix. + +Cc: stable@kernel.org +Signed-off-by: Pavel Machek +Acked-by: Tony Lindgren +Signed-off-by: Sebastian Reichel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/power/supply/cpcap-charger.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/power/supply/cpcap-charger.c ++++ b/drivers/power/supply/cpcap-charger.c +@@ -458,6 +458,7 @@ static void cpcap_usb_detect(struct work + goto out_err; + } + ++ power_supply_changed(ddata->usb); + return; + + out_err: diff --git a/queue-4.19/mfd-sm501-fix-potential-null-pointer-dereference.patch b/queue-4.19/mfd-sm501-fix-potential-null-pointer-dereference.patch new file mode 100644 index 00000000000..3cdeae28036 --- /dev/null +++ b/queue-4.19/mfd-sm501-fix-potential-null-pointer-dereference.patch @@ -0,0 +1,38 @@ +From ae7b8eda27b33b1f688dfdebe4d46f690a8f9162 Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Tue, 22 Jan 2019 10:56:36 -0600 +Subject: mfd: sm501: Fix potential NULL pointer dereference + +From: Gustavo A. R. Silva + +commit ae7b8eda27b33b1f688dfdebe4d46f690a8f9162 upstream. + +There is a potential NULL pointer dereference in case devm_kzalloc() +fails and returns NULL. + +Fix this by adding a NULL check on *lookup* + +This bug was detected with the help of Coccinelle. + +Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") +Cc: stable@vger.kernel.org +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mfd/sm501.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/mfd/sm501.c ++++ b/drivers/mfd/sm501.c +@@ -1145,6 +1145,9 @@ static int sm501_register_gpio_i2c_insta + lookup = devm_kzalloc(&pdev->dev, + sizeof(*lookup) + 3 * sizeof(struct gpiod_lookup), + GFP_KERNEL); ++ if (!lookup) ++ return -ENOMEM; ++ + lookup->dev_id = "i2c-gpio"; + if (iic->pin_sda < 32) + lookup->table[0].chip_label = "SM501-LOW"; diff --git a/queue-4.19/nfs-don-t-recoalesce-on-error-in-nfs_pageio_complete_mirror.patch b/queue-4.19/nfs-don-t-recoalesce-on-error-in-nfs_pageio_complete_mirror.patch new file mode 100644 index 00000000000..342232ad1ac --- /dev/null +++ b/queue-4.19/nfs-don-t-recoalesce-on-error-in-nfs_pageio_complete_mirror.patch @@ -0,0 +1,32 @@ +From 8127d82705998568b52ac724e28e00941538083d Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Fri, 15 Feb 2019 16:08:25 -0500 +Subject: NFS: Don't recoalesce on error in nfs_pageio_complete_mirror() + +From: Trond Myklebust + +commit 8127d82705998568b52ac724e28e00941538083d upstream. + +If the I/O completion failed with a fatal error, then we should just +exit nfs_pageio_complete_mirror() rather than try to recoalesce. + +Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer") +Signed-off-by: Trond Myklebust +Cc: stable@vger.kernel.org # v4.0+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/pagelist.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfs/pagelist.c ++++ b/fs/nfs/pagelist.c +@@ -1214,7 +1214,7 @@ static void nfs_pageio_complete_mirror(s + desc->pg_mirror_idx = mirror_idx; + for (;;) { + nfs_pageio_doio(desc); +- if (!mirror->pg_recoalesce) ++ if (desc->pg_error < 0 || !mirror->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; diff --git a/queue-4.19/nfs-fix-an-i-o-request-leakage-in-nfs_do_recoalesce.patch b/queue-4.19/nfs-fix-an-i-o-request-leakage-in-nfs_do_recoalesce.patch new file mode 100644 index 00000000000..e6d8abe9dbb --- /dev/null +++ b/queue-4.19/nfs-fix-an-i-o-request-leakage-in-nfs_do_recoalesce.patch @@ -0,0 +1,31 @@ +From 4d91969ed4dbcefd0e78f77494f0cb8fada9048a Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Fri, 15 Feb 2019 14:59:52 -0500 +Subject: NFS: Fix an I/O request leakage in nfs_do_recoalesce + +From: Trond Myklebust + +commit 4d91969ed4dbcefd0e78f77494f0cb8fada9048a upstream. + +Whether we need to exit early, or just reprocess the list, we +must not lost track of the request which failed to get recoalesced. + +Fixes: 03d5eb65b538 ("NFS: Fix a memory leak in nfs_do_recoalesce") +Signed-off-by: Trond Myklebust +Cc: stable@vger.kernel.org # v4.0+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/pagelist.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/nfs/pagelist.c ++++ b/fs/nfs/pagelist.c +@@ -1093,7 +1093,6 @@ static int nfs_do_recoalesce(struct nfs_ + struct nfs_page *req; + + req = list_first_entry(&head, struct nfs_page, wb_list); +- nfs_list_remove_request(req); + if (__nfs_pageio_add_request(desc, req)) + continue; + if (desc->pg_error < 0) { diff --git a/queue-4.19/nfs-fix-i-o-request-leakages.patch b/queue-4.19/nfs-fix-i-o-request-leakages.patch new file mode 100644 index 00000000000..7ed32854d91 --- /dev/null +++ b/queue-4.19/nfs-fix-i-o-request-leakages.patch @@ -0,0 +1,89 @@ +From f57dcf4c72113c745d83f1c65f7291299f65c14f Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Wed, 13 Feb 2019 09:21:38 -0500 +Subject: NFS: Fix I/O request leakages + +From: Trond Myklebust + +commit f57dcf4c72113c745d83f1c65f7291299f65c14f upstream. + +When we fail to add the request to the I/O queue, we currently leave it +to the caller to free the failed request. However since some of the +requests that fail are actually created by nfs_pageio_add_request() +itself, and are not passed back the caller, this leads to a leakage +issue, which can again cause page locks to leak. + +This commit addresses the leakage by freeing the created requests on +error, using desc->pg_completion_ops->error_cleanup() + +Signed-off-by: Trond Myklebust +Fixes: a7d42ddb30997 ("nfs: add mirroring support to pgio layer") +Cc: stable@vger.kernel.org # v4.0: c18b96a1b862: nfs: clean up rest of reqs +Cc: stable@vger.kernel.org # v4.0: d600ad1f2bdb: NFS41: pop some layoutget +Cc: stable@vger.kernel.org # v4.0+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/pagelist.c | 26 +++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +--- a/fs/nfs/pagelist.c ++++ b/fs/nfs/pagelist.c +@@ -989,6 +989,17 @@ static void nfs_pageio_doio(struct nfs_p + } + } + ++static void ++nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc, ++ struct nfs_page *req) ++{ ++ LIST_HEAD(head); ++ ++ nfs_list_remove_request(req); ++ nfs_list_add_request(req, &head); ++ desc->pg_completion_ops->error_cleanup(&head); ++} ++ + /** + * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor +@@ -1026,10 +1037,8 @@ static int __nfs_pageio_add_request(stru + nfs_page_group_unlock(req); + desc->pg_moreio = 1; + nfs_pageio_doio(desc); +- if (desc->pg_error < 0) +- return 0; +- if (mirror->pg_recoalesce) +- return 0; ++ if (desc->pg_error < 0 || mirror->pg_recoalesce) ++ goto out_cleanup_subreq; + /* retry add_request for this subreq */ + nfs_page_group_lock(req); + continue; +@@ -1062,6 +1071,10 @@ err_ptr: + desc->pg_error = PTR_ERR(subreq); + nfs_page_group_unlock(req); + return 0; ++out_cleanup_subreq: ++ if (req != subreq) ++ nfs_pageio_cleanup_request(desc, subreq); ++ return 0; + } + + static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) +@@ -1169,11 +1182,14 @@ int nfs_pageio_add_request(struct nfs_pa + if (nfs_pgio_has_mirroring(desc)) + desc->pg_mirror_idx = midx; + if (!nfs_pageio_add_request_mirror(desc, dupreq)) +- goto out_failed; ++ goto out_cleanup_subreq; + } + + return 1; + ++out_cleanup_subreq: ++ if (req != dupreq) ++ nfs_pageio_cleanup_request(desc, dupreq); + out_failed: + /* remember fatal errors */ + if (nfs_error_is_fatal(desc->pg_error)) diff --git a/queue-4.19/nfsd-fix-memory-corruption-caused-by-readdir.patch b/queue-4.19/nfsd-fix-memory-corruption-caused-by-readdir.patch new file mode 100644 index 00000000000..0c6259271c7 --- /dev/null +++ b/queue-4.19/nfsd-fix-memory-corruption-caused-by-readdir.patch @@ -0,0 +1,98 @@ +From b602345da6cbb135ba68cf042df8ec9a73da7981 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 4 Mar 2019 14:08:22 +1100 +Subject: nfsd: fix memory corruption caused by readdir + +From: NeilBrown + +commit b602345da6cbb135ba68cf042df8ec9a73da7981 upstream. + +If the result of an NFSv3 readdir{,plus} request results in the +"offset" on one entry having to be split across 2 pages, and is sized +so that the next directory entry doesn't fit in the requested size, +then memory corruption can happen. + +When encode_entry() is called after encoding the last entry that fits, +it notices that ->offset and ->offset1 are set, and so stores the +offset value in the two pages as required. It clears ->offset1 but +*does not* clear ->offset. + +Normally this omission doesn't matter as encode_entry_baggage() will +be called, and will set ->offset to a suitable value (not on a page +boundary). +But in the case where cd->buflen < elen and nfserr_toosmall is +returned, ->offset is not reset. + +This means that nfsd3proc_readdirplus will see ->offset with a value 4 +bytes before the end of a page, and ->offset1 set to NULL. +It will try to write 8bytes to ->offset. +If we are lucky, the next page will be read-only, and the system will + BUG: unable to handle kernel paging request at... + +If we are unlucky, some innocent page will have the first 4 bytes +corrupted. + +nfsd3proc_readdir() doesn't even check for ->offset1, it just blindly +writes 8 bytes to the offset wherever it is. + +Fix this by clearing ->offset after it is used, and copying the +->offset handling code from nfsd3_proc_readdirplus into +nfsd3_proc_readdir. + +(Note that the commit hash in the Fixes tag is from the 'history' + tree - this bug predates git). + +Fixes: 0b1d57cf7654 ("[PATCH] kNFSd: Fix nfs3 dentry encoding") +Fixes-URL: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=0b1d57cf7654 +Cc: stable@vger.kernel.org (v2.6.12+) +Signed-off-by: NeilBrown +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs3proc.c | 16 ++++++++++++++-- + fs/nfsd/nfs3xdr.c | 1 + + 2 files changed, 15 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -463,8 +463,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqst + &resp->common, nfs3svc_encode_entry); + memcpy(resp->verf, argp->verf, 8); + resp->count = resp->buffer - argp->buffer; +- if (resp->offset) +- xdr_encode_hyper(resp->offset, argp->cookie); ++ if (resp->offset) { ++ loff_t offset = argp->cookie; ++ ++ if (unlikely(resp->offset1)) { ++ /* we ended up with offset on a page boundary */ ++ *resp->offset = htonl(offset >> 32); ++ *resp->offset1 = htonl(offset & 0xffffffff); ++ resp->offset1 = NULL; ++ } else { ++ xdr_encode_hyper(resp->offset, offset); ++ } ++ resp->offset = NULL; ++ } + + RETURN_STATUS(nfserr); + } +@@ -533,6 +544,7 @@ nfsd3_proc_readdirplus(struct svc_rqst * + } else { + xdr_encode_hyper(resp->offset, offset); + } ++ resp->offset = NULL; + } + + RETURN_STATUS(nfserr); +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -921,6 +921,7 @@ encode_entry(struct readdir_cd *ccd, con + } else { + xdr_encode_hyper(cd->offset, offset64); + } ++ cd->offset = NULL; + } + + /* diff --git a/queue-4.19/nfsd-fix-performance-limiting-session-calculation.patch b/queue-4.19/nfsd-fix-performance-limiting-session-calculation.patch new file mode 100644 index 00000000000..cfec164ff3f --- /dev/null +++ b/queue-4.19/nfsd-fix-performance-limiting-session-calculation.patch @@ -0,0 +1,53 @@ +From c54f24e338ed2a35218f117a4a1afb5f9e2b4e64 Mon Sep 17 00:00:00 2001 +From: "J. Bruce Fields" +Date: Thu, 21 Feb 2019 10:47:00 -0500 +Subject: nfsd: fix performance-limiting session calculation + +From: J. Bruce Fields + +commit c54f24e338ed2a35218f117a4a1afb5f9e2b4e64 upstream. + +We're unintentionally limiting the number of slots per nfsv4.1 session +to 10. Often more than 10 simultaneous RPCs are needed for the best +performance. + +This calculation was meant to prevent any one client from using up more +than a third of the limit we set for total memory use across all clients +and sessions. Instead, it's limiting the client to a third of the +maximum for a single session. + +Fix this. + +Reported-by: Chris Tracy +Cc: stable@vger.kernel.org +Fixes: de766e570413 "nfsd: give out fewer session slots as limit approaches" +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4state.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -1514,16 +1514,16 @@ static u32 nfsd4_get_drc_mem(struct nfsd + { + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; +- int avail; ++ unsigned long avail, total_avail; + + spin_lock(&nfsd_drc_lock); +- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, +- nfsd_drc_max_mem - nfsd_drc_mem_used); ++ total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; ++ avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); + /* + * Never use more than a third of the remaining memory, + * unless it's the only way to give this client a slot: + */ +- avail = clamp_t(int, avail, slotsize, avail/3); ++ avail = clamp_t(int, avail, slotsize, total_avail/3); + num = min_t(int, num, avail / slotsize); + nfsd_drc_mem_used += num * slotsize; + spin_unlock(&nfsd_drc_lock); diff --git a/queue-4.19/nfsd-fix-wrong-check-in-write_v4_end_grace.patch b/queue-4.19/nfsd-fix-wrong-check-in-write_v4_end_grace.patch new file mode 100644 index 00000000000..c034331855f --- /dev/null +++ b/queue-4.19/nfsd-fix-wrong-check-in-write_v4_end_grace.patch @@ -0,0 +1,35 @@ +From dd838821f0a29781b185cd8fb8e48d5c177bd838 Mon Sep 17 00:00:00 2001 +From: Yihao Wu +Date: Wed, 6 Mar 2019 21:03:50 +0800 +Subject: nfsd: fix wrong check in write_v4_end_grace() + +From: Yihao Wu + +commit dd838821f0a29781b185cd8fb8e48d5c177bd838 upstream. + +Commit 62a063b8e7d1 "nfsd4: fix crash on writing v4_end_grace before +nfsd startup" is trying to fix a NULL dereference issue, but it +mistakenly checks if the nfsd server is started. So fix it. + +Fixes: 62a063b8e7d1 "nfsd4: fix crash on writing v4_end_grace before nfsd startup" +Cc: stable@vger.kernel.org +Reviewed-by: Joseph Qi +Signed-off-by: Yihao Wu +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfsctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1126,7 +1126,7 @@ static ssize_t write_v4_end_grace(struct + case 'Y': + case 'y': + case '1': +- if (nn->nfsd_serv) ++ if (!nn->nfsd_serv) + return -EBUSY; + nfsd4_end_grace(nn); + break; diff --git a/queue-4.19/nfsv4.1-reinitialise-sequence-results-before-retransmitting-a-request.patch b/queue-4.19/nfsv4.1-reinitialise-sequence-results-before-retransmitting-a-request.patch new file mode 100644 index 00000000000..c56019e59f8 --- /dev/null +++ b/queue-4.19/nfsv4.1-reinitialise-sequence-results-before-retransmitting-a-request.patch @@ -0,0 +1,56 @@ +From c1dffe0bf7f9c3d57d9f237a7cb2a81e62babd2b Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Fri, 1 Mar 2019 12:13:34 -0500 +Subject: NFSv4.1: Reinitialise sequence results before retransmitting a request + +From: Trond Myklebust + +commit c1dffe0bf7f9c3d57d9f237a7cb2a81e62babd2b upstream. + +If we have to retransmit a request, we should ensure that we reinitialise +the sequence results structure, since in the event of a signal +we need to treat the request as if it had not been sent. + +Signed-off-by: Trond Myklebust +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -947,6 +947,13 @@ nfs4_sequence_process_interrupted(struct + + #endif /* !CONFIG_NFS_V4_1 */ + ++static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) ++{ ++ res->sr_timestamp = jiffies; ++ res->sr_status_flags = 0; ++ res->sr_status = 1; ++} ++ + static + void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, +@@ -958,10 +965,6 @@ void nfs4_sequence_attach_slot(struct nf + args->sa_slot = slot; + + res->sr_slot = slot; +- res->sr_timestamp = jiffies; +- res->sr_status_flags = 0; +- res->sr_status = 1; +- + } + + int nfs4_setup_sequence(struct nfs_client *client, +@@ -1007,6 +1010,7 @@ int nfs4_setup_sequence(struct nfs_clien + + trace_nfs4_setup_sequence(session, args); + out_start: ++ nfs41_sequence_res_init(res); + rpc_call_start(task); + return 0; + diff --git a/queue-4.19/pm-wakeup-rework-wakeup-source-timer-cancellation.patch b/queue-4.19/pm-wakeup-rework-wakeup-source-timer-cancellation.patch new file mode 100644 index 00000000000..929106efe4b --- /dev/null +++ b/queue-4.19/pm-wakeup-rework-wakeup-source-timer-cancellation.patch @@ -0,0 +1,55 @@ +From 1fad17fb1bbcd73159c2b992668a6957ecc5af8a Mon Sep 17 00:00:00 2001 +From: Viresh Kumar +Date: Fri, 8 Mar 2019 15:23:11 +0530 +Subject: PM / wakeup: Rework wakeup source timer cancellation + +From: Viresh Kumar + +commit 1fad17fb1bbcd73159c2b992668a6957ecc5af8a upstream. + +If wakeup_source_add() is called right after wakeup_source_remove() +for the same wakeup source, timer_setup() may be called for a +potentially scheduled timer which is incorrect. + +To avoid that, move the wakeup source timer cancellation from +wakeup_source_drop() to wakeup_source_remove(). + +Moreover, make wakeup_source_remove() clear the timer function after +canceling the timer to let wakeup_source_not_registered() treat +unregistered wakeup sources in the same way as the ones that have +never been registered. + +Signed-off-by: Viresh Kumar +Cc: 4.4+ # 4.4+ +[ rjw: Subject, changelog, merged two patches together ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/wakeup.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/base/power/wakeup.c ++++ b/drivers/base/power/wakeup.c +@@ -118,7 +118,6 @@ void wakeup_source_drop(struct wakeup_so + if (!ws) + return; + +- del_timer_sync(&ws->timer); + __pm_relax(ws); + } + EXPORT_SYMBOL_GPL(wakeup_source_drop); +@@ -205,6 +204,13 @@ void wakeup_source_remove(struct wakeup_ + list_del_rcu(&ws->entry); + raw_spin_unlock_irqrestore(&events_lock, flags); + synchronize_srcu(&wakeup_srcu); ++ ++ del_timer_sync(&ws->timer); ++ /* ++ * Clear timer.function to make wakeup_source_not_registered() treat ++ * this wakeup source as not registered. ++ */ ++ ws->timer.function = NULL; + } + EXPORT_SYMBOL_GPL(wakeup_source_remove); + diff --git a/queue-4.19/series b/queue-4.19/series index f8989ebb4fc..e2abd7bb3da 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -235,3 +235,15 @@ arm64-kvm-fix-architecturally-invalid-reset-value-for-fpexc32_el2.patch ipmi_si-fix-use-after-free-of-resource-name.patch dm-fix-to_sector-for-32bit.patch dm-integrity-limit-the-rate-of-error-messages.patch +mfd-sm501-fix-potential-null-pointer-dereference.patch +cpcap-charger-generate-events-for-userspace.patch +nfs-fix-i-o-request-leakages.patch +nfs-fix-an-i-o-request-leakage-in-nfs_do_recoalesce.patch +nfs-don-t-recoalesce-on-error-in-nfs_pageio_complete_mirror.patch +nfsd-fix-performance-limiting-session-calculation.patch +nfsd-fix-memory-corruption-caused-by-readdir.patch +nfsd-fix-wrong-check-in-write_v4_end_grace.patch +nfsv4.1-reinitialise-sequence-results-before-retransmitting-a-request.patch +svcrpc-fix-udp-on-servers-with-lots-of-threads.patch +pm-wakeup-rework-wakeup-source-timer-cancellation.patch +bcache-never-writeback-a-discard-operation.patch diff --git a/queue-4.19/svcrpc-fix-udp-on-servers-with-lots-of-threads.patch b/queue-4.19/svcrpc-fix-udp-on-servers-with-lots-of-threads.patch new file mode 100644 index 00000000000..58050475b08 --- /dev/null +++ b/queue-4.19/svcrpc-fix-udp-on-servers-with-lots-of-threads.patch @@ -0,0 +1,76 @@ +From b7e5034cbecf5a65b7bfdc2b20a8378039577706 Mon Sep 17 00:00:00 2001 +From: "J. Bruce Fields" +Date: Wed, 20 Feb 2019 12:54:50 -0500 +Subject: svcrpc: fix UDP on servers with lots of threads + +From: J. Bruce Fields + +commit b7e5034cbecf5a65b7bfdc2b20a8378039577706 upstream. + +James Pearson found that an NFS server stopped responding to UDP +requests if started with more than 1017 threads. + +sv_max_mesg is about 2^20, so that is probably where the calculation +performed by + + svc_sock_setbufsize(svsk->sk_sock, + (serv->sv_nrthreads+3) * serv->sv_max_mesg, + (serv->sv_nrthreads+3) * serv->sv_max_mesg); + +starts to overflow an int. + +Reported-by: James Pearson +Tested-by: James Pearson +Cc: stable@vger.kernel.org +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/svcsock.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -381,12 +381,16 @@ static int svc_partial_recvfrom(struct s + /* + * Set socket snd and rcv buffer lengths + */ +-static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, +- unsigned int rcv) ++static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) + { ++ unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg; ++ struct socket *sock = svsk->sk_sock; ++ ++ nreqs = min(nreqs, INT_MAX / 2 / max_mesg); ++ + lock_sock(sock->sk); +- sock->sk->sk_sndbuf = snd * 2; +- sock->sk->sk_rcvbuf = rcv * 2; ++ sock->sk->sk_sndbuf = nreqs * max_mesg * 2; ++ sock->sk->sk_rcvbuf = nreqs * max_mesg * 2; + sock->sk->sk_write_space(sock->sk); + release_sock(sock->sk); + } +@@ -548,9 +552,7 @@ static int svc_udp_recvfrom(struct svc_r + * provides an upper bound on the number of threads + * which will access the socket. + */ +- svc_sock_setbufsize(svsk->sk_sock, +- (serv->sv_nrthreads+3) * serv->sv_max_mesg, +- (serv->sv_nrthreads+3) * serv->sv_max_mesg); ++ svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); + + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + skb = NULL; +@@ -718,9 +720,7 @@ static void svc_udp_init(struct svc_sock + * receive and respond to one request. + * svc_udp_recvfrom will re-adjust if necessary + */ +- svc_sock_setbufsize(svsk->sk_sock, +- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, +- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); ++ svc_sock_setbufsize(svsk, 3); + + /* data might have come in before data_ready set up */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);