From ffeb0c6282faef937348ad626431a5f7cf49ff86 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 25 Jun 2019 23:39:24 -0400 Subject: [PATCH] fixes for 4.14 Signed-off-by: Sasha Levin --- ...a-lower-level-bio_add_page-interface.patch | 183 ++++++++++++++++++ ...er_get_pages-pin-more-pages-for-mult.patch | 98 ++++++++++ ...close-psm-sdma_progress-sleep-window.patch | 89 +++++++++ ...ss-ftrace-fix-ftrace_likely_update-v.patch | 45 +++++ queue-4.14/series | 4 + 5 files changed, 419 insertions(+) create mode 100644 queue-4.14/block-add-a-lower-level-bio_add_page-interface.patch create mode 100644 queue-4.14/block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch create mode 100644 queue-4.14/ib-hfi1-close-psm-sdma_progress-sleep-window.patch create mode 100644 queue-4.14/revert-x86-uaccess-ftrace-fix-ftrace_likely_update-v.patch diff --git a/queue-4.14/block-add-a-lower-level-bio_add_page-interface.patch b/queue-4.14/block-add-a-lower-level-bio_add_page-interface.patch new file mode 100644 index 00000000000..aabdce3ba9a --- /dev/null +++ b/queue-4.14/block-add-a-lower-level-bio_add_page-interface.patch @@ -0,0 +1,183 @@ +From e7ae19aa4e17413195a9b709868bc81246f1885b Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Fri, 1 Jun 2018 09:03:05 -0700 +Subject: block: add a lower-level bio_add_page interface + +[ Upstream commit 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b ] + +For the upcoming removal of buffer heads in XFS we need to keep track of +the number of outstanding writeback requests per page. For this we need +to know if bio_add_page merged a region with the previous bvec or not. +Instead of adding additional arguments this refactors bio_add_page to +be implemented using three lower level helpers which users like XFS can +use directly if they care about the merge decisions. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Jens Axboe +Reviewed-by: Ming Lei +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Sasha Levin +--- + block/bio.c | 96 +++++++++++++++++++++++++++++---------------- + include/linux/bio.h | 9 +++++ + 2 files changed, 72 insertions(+), 33 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index d01ab919b313..c1386ce2c014 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page + return 0; + } + +- if (bio->bi_vcnt >= bio->bi_max_vecs) ++ if (bio_full(bio)) + return 0; + + /* +@@ -821,52 +821,82 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page + EXPORT_SYMBOL(bio_add_pc_page); + + /** +- * bio_add_page - attempt to add page to bio +- * @bio: destination bio +- * @page: page to add +- * @len: vec entry length +- * @offset: vec entry offset ++ * __bio_try_merge_page - try appending data to an existing bvec. ++ * @bio: destination bio ++ * @page: page to add ++ * @len: length of the data to add ++ * @off: offset of the data in @page + * +- * Attempt to add a page to the bio_vec maplist. This will only fail +- * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. ++ * Try to add the data at @page + @off to the last bvec of @bio. This is a ++ * a useful optimisation for file systems with a block size smaller than the ++ * page size. ++ * ++ * Return %true on success or %false on failure. + */ +-int bio_add_page(struct bio *bio, struct page *page, +- unsigned int len, unsigned int offset) ++bool __bio_try_merge_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off) + { +- struct bio_vec *bv; +- +- /* +- * cloned bio must not modify vec list +- */ + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) +- return 0; ++ return false; + +- /* +- * For filesystems with a blocksize smaller than the pagesize +- * we will often be called with the same page as last time and +- * a consecutive offset. Optimize this special case. +- */ + if (bio->bi_vcnt > 0) { +- bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; ++ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; + +- if (page == bv->bv_page && +- offset == bv->bv_offset + bv->bv_len) { ++ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { + bv->bv_len += len; +- goto done; ++ bio->bi_iter.bi_size += len; ++ return true; + } + } ++ return false; ++} ++EXPORT_SYMBOL_GPL(__bio_try_merge_page); + +- if (bio->bi_vcnt >= bio->bi_max_vecs) +- return 0; ++/** ++ * __bio_add_page - add page to a bio in a new segment ++ * @bio: destination bio ++ * @page: page to add ++ * @len: length of the data to add ++ * @off: offset of the data in @page ++ * ++ * Add the data at @page + @off to @bio as a new bvec. The caller must ensure ++ * that @bio has space for another bvec. ++ */ ++void __bio_add_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off) ++{ ++ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; + +- bv = &bio->bi_io_vec[bio->bi_vcnt]; +- bv->bv_page = page; +- bv->bv_len = len; +- bv->bv_offset = offset; ++ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); ++ WARN_ON_ONCE(bio_full(bio)); ++ ++ bv->bv_page = page; ++ bv->bv_offset = off; ++ bv->bv_len = len; + +- bio->bi_vcnt++; +-done: + bio->bi_iter.bi_size += len; ++ bio->bi_vcnt++; ++} ++EXPORT_SYMBOL_GPL(__bio_add_page); ++ ++/** ++ * bio_add_page - attempt to add page to bio ++ * @bio: destination bio ++ * @page: page to add ++ * @len: vec entry length ++ * @offset: vec entry offset ++ * ++ * Attempt to add a page to the bio_vec maplist. This will only fail ++ * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. ++ */ ++int bio_add_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int offset) ++{ ++ if (!__bio_try_merge_page(bio, page, len, offset)) { ++ if (bio_full(bio)) ++ return 0; ++ __bio_add_page(bio, page, len, offset); ++ } + return len; + } + EXPORT_SYMBOL(bio_add_page); +diff --git a/include/linux/bio.h b/include/linux/bio.h +index d4b39caf081d..e260f000b9ac 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -123,6 +123,11 @@ static inline void *bio_data(struct bio *bio) + return NULL; + } + ++static inline bool bio_full(struct bio *bio) ++{ ++ return bio->bi_vcnt >= bio->bi_max_vecs; ++} ++ + /* + * will die + */ +@@ -459,6 +464,10 @@ void bio_chain(struct bio *, struct bio *); + extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); + extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, + unsigned int, unsigned int); ++bool __bio_try_merge_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off); ++void __bio_add_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); + struct rq_map_data; + extern struct bio *bio_map_user_iov(struct request_queue *, +-- +2.20.1 + diff --git a/queue-4.14/block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch b/queue-4.14/block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch new file mode 100644 index 00000000000..dcb4aa8539e --- /dev/null +++ b/queue-4.14/block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch @@ -0,0 +1,98 @@ +From 22adc2787d8bc10d4e7842ac6408c0102f9da104 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Wed, 25 Jul 2018 23:15:09 +0200 +Subject: block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs + +[ Upstream commit 17d51b10d7773e4618bcac64648f30f12d4078fb ] + +bio_iov_iter_get_pages() currently only adds pages for the next non-zero +segment from the iov_iter to the bio. That's suboptimal for callers, +which typically try to pin as many pages as fit into the bio. This patch +converts the current bio_iov_iter_get_pages() into a static helper, and +introduces a new helper that allocates as many pages as + + 1) fit into the bio, + 2) are present in the iov_iter, + 3) and can be pinned by MM. + +Error is returned only if zero pages could be pinned. Because of 3), a +zero return value doesn't necessarily mean all pages have been pinned. +Callers that have to pin every page in the iov_iter must still call this +function in a loop (this is currently the case). + +This change matters most for __blkdev_direct_IO_simple(), which calls +bio_iov_iter_get_pages() only once. If it obtains less pages than +requested, it returns a "short write" or "short read", and +__generic_file_write_iter() falls back to buffered writes, which may +lead to data corruption. + +Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io") +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin Wilck +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/bio.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index c1386ce2c014..1384f9790882 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -902,14 +902,16 @@ int bio_add_page(struct bio *bio, struct page *page, + EXPORT_SYMBOL(bio_add_page); + + /** +- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio ++ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio + * @bio: bio to add pages to + * @iter: iov iterator describing the region to be mapped + * +- * Pins as many pages from *iter and appends them to @bio's bvec array. The ++ * Pins pages from *iter and appends them to @bio's bvec array. The + * pages will have to be released using put_page() when done. ++ * For multi-segment *iter, this function only adds pages from the ++ * the next non-empty segment of the iov iterator. + */ +-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ++static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + { + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; +@@ -946,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + iov_iter_advance(iter, size); + return 0; + } ++ ++/** ++ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio ++ * @bio: bio to add pages to ++ * @iter: iov iterator describing the region to be mapped ++ * ++ * Pins pages from *iter and appends them to @bio's bvec array. The ++ * pages will have to be released using put_page() when done. ++ * The function tries, but does not guarantee, to pin as many pages as ++ * fit into the bio, or are requested in *iter, whatever is smaller. ++ * If MM encounters an error pinning the requested pages, it stops. ++ * Error is returned only if 0 pages could be pinned. ++ */ ++int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ++{ ++ unsigned short orig_vcnt = bio->bi_vcnt; ++ ++ do { ++ int ret = __bio_iov_iter_get_pages(bio, iter); ++ ++ if (unlikely(ret)) ++ return bio->bi_vcnt > orig_vcnt ? 0 : ret; ++ ++ } while (iov_iter_count(iter) && !bio_full(bio)); ++ ++ return 0; ++} + EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); + + struct submit_bio_ret { +-- +2.20.1 + diff --git a/queue-4.14/ib-hfi1-close-psm-sdma_progress-sleep-window.patch b/queue-4.14/ib-hfi1-close-psm-sdma_progress-sleep-window.patch new file mode 100644 index 00000000000..6502b03b813 --- /dev/null +++ b/queue-4.14/ib-hfi1-close-psm-sdma_progress-sleep-window.patch @@ -0,0 +1,89 @@ +From 4e4e40c66638a459fc7e3cfd3376f0347816c291 Mon Sep 17 00:00:00 2001 +From: Mike Marciniszyn +Date: Mon, 24 Jun 2019 16:17:36 -0400 +Subject: IB/hfi1: Close PSM sdma_progress sleep window + +commit da9de5f8527f4b9efc82f967d29a583318c034c7 upstream. + +The call to sdma_progress() is called outside the wait lock. + +In this case, there is a race condition where sdma_progress() can return +false and the sdma_engine can idle. If that happens, there will be no +more sdma interrupts to cause the wakeup and the user_sdma xmit will hang. + +Fix by moving the lock to enclose the sdma_progress() call. + +Also, delete busycount. The need for this was removed by: +commit bcad29137a97 ("IB/hfi1: Serve the most starved iowait entry first") + +Ported to linux-4.14.y. + +Cc: +Fixes: 7724105686e7 ("IB/hfi1: add driver files") +Reviewed-by: Gary Leshner +Signed-off-by: Mike Marciniszyn +Signed-off-by: Dennis Dalessandro +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hfi1/user_sdma.c | 12 ++++-------- + drivers/infiniband/hw/hfi1/user_sdma.h | 1 - + 2 files changed, 4 insertions(+), 9 deletions(-) + +diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c +index cbe5ab26d95b..75275f9e363d 100644 +--- a/drivers/infiniband/hw/hfi1/user_sdma.c ++++ b/drivers/infiniband/hw/hfi1/user_sdma.c +@@ -132,25 +132,22 @@ static int defer_packet_queue( + struct hfi1_user_sdma_pkt_q *pq = + container_of(wait, struct hfi1_user_sdma_pkt_q, busy); + struct hfi1_ibdev *dev = &pq->dd->verbs_dev; +- struct user_sdma_txreq *tx = +- container_of(txreq, struct user_sdma_txreq, txreq); + +- if (sdma_progress(sde, seq, txreq)) { +- if (tx->busycount++ < MAX_DEFER_RETRY_COUNT) +- goto eagain; +- } ++ write_seqlock(&dev->iowait_lock); ++ if (sdma_progress(sde, seq, txreq)) ++ goto eagain; + /* + * We are assuming that if the list is enqueued somewhere, it + * is to the dmawait list since that is the only place where + * it is supposed to be enqueued. + */ + xchg(&pq->state, SDMA_PKT_Q_DEFERRED); +- write_seqlock(&dev->iowait_lock); + if (list_empty(&pq->busy.list)) + iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); + write_sequnlock(&dev->iowait_lock); + return -EBUSY; + eagain: ++ write_sequnlock(&dev->iowait_lock); + return -EAGAIN; + } + +@@ -803,7 +800,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) + + tx->flags = 0; + tx->req = req; +- tx->busycount = 0; + INIT_LIST_HEAD(&tx->list); + + /* +diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h +index 2b5326d6db53..87b0c567f442 100644 +--- a/drivers/infiniband/hw/hfi1/user_sdma.h ++++ b/drivers/infiniband/hw/hfi1/user_sdma.h +@@ -236,7 +236,6 @@ struct user_sdma_txreq { + struct list_head list; + struct user_sdma_request *req; + u16 flags; +- unsigned int busycount; + u64 seqnum; + }; + +-- +2.20.1 + diff --git a/queue-4.14/revert-x86-uaccess-ftrace-fix-ftrace_likely_update-v.patch b/queue-4.14/revert-x86-uaccess-ftrace-fix-ftrace_likely_update-v.patch new file mode 100644 index 00000000000..f9769b78f56 --- /dev/null +++ b/queue-4.14/revert-x86-uaccess-ftrace-fix-ftrace_likely_update-v.patch @@ -0,0 +1,45 @@ +From 4b66424c2b1a3ae7287a539278f9a06a532e6c97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jun 2019 07:36:40 -0400 +Subject: Revert "x86/uaccess, ftrace: Fix ftrace_likely_update() vs. SMAP" + +This reverts commit 8190d6fbb1e9b7fa4eb41fe7aa337c46ca514e79, which was +upstream commit 4a6c91fbdef846ec7250b82f2eeeb87ac5f18cf9. + +On Tue, Jun 25, 2019 at 09:39:45AM +0200, Sebastian Andrzej Siewior wrote: +>Please backport commit e74deb11931ff682b59d5b9d387f7115f689698e to +>stable _or_ revert the backport of commit 4a6c91fbdef84 ("x86/uaccess, +>ftrace: Fix ftrace_likely_update() vs. SMAP"). It uses +>user_access_{save|restore}() which has been introduced in the following +>commit. + +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_branch.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c +index 3ea65cdff30d..4ad967453b6f 100644 +--- a/kernel/trace/trace_branch.c ++++ b/kernel/trace/trace_branch.c +@@ -205,8 +205,6 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) + void ftrace_likely_update(struct ftrace_likely_data *f, int val, + int expect, int is_constant) + { +- unsigned long flags = user_access_save(); +- + /* A constant is always correct */ + if (is_constant) { + f->constant++; +@@ -225,8 +223,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, + f->data.correct++; + else + f->data.incorrect++; +- +- user_access_restore(flags); + } + EXPORT_SYMBOL(ftrace_likely_update); + +-- +2.20.1 + diff --git a/queue-4.14/series b/queue-4.14/series index 1419671bd60..3fe3123e809 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -1,3 +1,7 @@ perf-ui-helpline-use-strlcpy-as-a-shorter-form-of-strncpy-explicit-set-nul.patch perf-help-remove-needless-use-of-strncpy.patch perf-header-fix-unchecked-usage-of-strncpy.patch +revert-x86-uaccess-ftrace-fix-ftrace_likely_update-v.patch +ib-hfi1-close-psm-sdma_progress-sleep-window.patch +block-add-a-lower-level-bio_add_page-interface.patch +block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch -- 2.47.2