]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/5.0.19/md-batch-flush-requests.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 5.0.19 / md-batch-flush-requests.patch
CommitLineData
427cc94b
GKH
1From 2bc13b83e6298486371761de503faeffd15b7534 Mon Sep 17 00:00:00 2001
2From: NeilBrown <neilb@suse.com>
3Date: Fri, 29 Mar 2019 10:46:17 -0700
4Subject: md: batch flush requests.
5
6From: NeilBrown <neilb@suse.com>
7
8commit 2bc13b83e6298486371761de503faeffd15b7534 upstream.
9
10Currently if many flush requests are submitted to an md device is quick
11succession, they are serialized and can take a long to process them all.
12We don't really need to call flush all those times - a single flush call
13can satisfy all requests submitted before it started.
14So keep track of when the current flush started and when it finished,
15allow any pending flush that was requested before the flush started
16to complete without waiting any more.
17
18Test results from Xiao:
19
20Test is done on a raid10 device which is created by 4 SSDs. The tool is
21dbench.
22
231. The latest linux stable kernel
24 Operation Count AvgLat MaxLat
25 --------------------------------------------------
26 Deltree 768 10.509 78.305
27 Flush 2078376 0.013 10.094
28 Close 21787697 0.019 18.821
29 LockX 96580 0.007 3.184
30 Mkdir 384 0.008 0.062
31 Rename 1255883 0.191 23.534
32 ReadX 46495589 0.020 14.230
33 WriteX 14790591 7.123 60.706
34 Unlink 5989118 0.440 54.551
35 UnlockX 96580 0.005 2.736
36 FIND_FIRST 10393845 0.042 12.079
37 SET_FILE_INFORMATION 2415558 0.129 10.088
38 QUERY_FILE_INFORMATION 4711725 0.005 8.462
39 QUERY_PATH_INFORMATION 26883327 0.032 21.715
40 QUERY_FS_INFORMATION 4929409 0.010 8.238
41 NTCreateX 29660080 0.100 53.268
42
43Throughput 1034.88 MB/sec (sync open) 128 clients 128 procs
44max_latency=60.712 ms
45
462. With patch1 "Revert "MD: fix lock contention for flush bios""
47 Operation Count AvgLat MaxLat
48 --------------------------------------------------
49 Deltree 256 8.326 36.761
50 Flush 693291 3.974 180.269
51 Close 7266404 0.009 36.929
52 LockX 32160 0.006 0.840
53 Mkdir 128 0.008 0.021
54 Rename 418755 0.063 29.945
55 ReadX 15498708 0.007 7.216
56 WriteX 4932310 22.482 267.928
57 Unlink 1997557 0.109 47.553
58 UnlockX 32160 0.004 1.110
59 FIND_FIRST 3465791 0.036 7.320
60 SET_FILE_INFORMATION 805825 0.015 1.561
61 QUERY_FILE_INFORMATION 1570950 0.005 2.403
62 QUERY_PATH_INFORMATION 8965483 0.013 14.277
63 QUERY_FS_INFORMATION 1643626 0.009 3.314
64 NTCreateX 9892174 0.061 41.278
65
66Throughput 345.009 MB/sec (sync open) 128 clients 128 procs
67max_latency=267.939 m
68
693. With patch1 and patch2
70 Operation Count AvgLat MaxLat
71 --------------------------------------------------
72 Deltree 768 9.570 54.588
73 Flush 2061354 0.666 15.102
74 Close 21604811 0.012 25.697
75 LockX 95770 0.007 1.424
76 Mkdir 384 0.008 0.053
77 Rename 1245411 0.096 12.263
78 ReadX 46103198 0.011 12.116
79 WriteX 14667988 7.375 60.069
80 Unlink 5938936 0.173 30.905
81 UnlockX 95770 0.005 4.147
82 FIND_FIRST 10306407 0.041 11.715
83 SET_FILE_INFORMATION 2395987 0.048 7.640
84 QUERY_FILE_INFORMATION 4672371 0.005 9.291
85 QUERY_PATH_INFORMATION 26656735 0.018 19.719
86 QUERY_FS_INFORMATION 4887940 0.010 7.654
87 NTCreateX 29410811 0.059 28.551
88
89Throughput 1026.21 MB/sec (sync open) 128 clients 128 procs
90max_latency=60.075 ms
91
92Cc: <stable@vger.kernel.org> # v4.19+
93Tested-by: Xiao Ni <xni@redhat.com>
94Signed-off-by: NeilBrown <neilb@suse.com>
95Signed-off-by: Song Liu <songliubraving@fb.com>
96Signed-off-by: Jens Axboe <axboe@kernel.dk>
97Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
98
99---
100 drivers/md/md.c | 27 +++++++++++++++++++++++----
101 drivers/md/md.h | 3 +++
102 2 files changed, 26 insertions(+), 4 deletions(-)
103
104--- a/drivers/md/md.c
105+++ b/drivers/md/md.c
106@@ -427,6 +427,7 @@ static void submit_flushes(struct work_s
107 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
108 struct md_rdev *rdev;
109
110+ mddev->start_flush = ktime_get_boottime();
111 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
112 atomic_set(&mddev->flush_pending, 1);
113 rcu_read_lock();
114@@ -467,6 +468,7 @@ static void md_submit_flush_data(struct
115 * could wait for this and below md_handle_request could wait for those
116 * bios because of suspend check
117 */
118+ mddev->last_flush = mddev->start_flush;
119 mddev->flush_bio = NULL;
120 wake_up(&mddev->sb_wait);
121
122@@ -481,15 +483,32 @@ static void md_submit_flush_data(struct
123
124 void md_flush_request(struct mddev *mddev, struct bio *bio)
125 {
126+ ktime_t start = ktime_get_boottime();
127 spin_lock_irq(&mddev->lock);
128 wait_event_lock_irq(mddev->sb_wait,
129- !mddev->flush_bio,
130+ !mddev->flush_bio ||
131+ ktime_after(mddev->last_flush, start),
132 mddev->lock);
133- mddev->flush_bio = bio;
134+ if (!ktime_after(mddev->last_flush, start)) {
135+ WARN_ON(mddev->flush_bio);
136+ mddev->flush_bio = bio;
137+ bio = NULL;
138+ }
139 spin_unlock_irq(&mddev->lock);
140
141- INIT_WORK(&mddev->flush_work, submit_flushes);
142- queue_work(md_wq, &mddev->flush_work);
143+ if (!bio) {
144+ INIT_WORK(&mddev->flush_work, submit_flushes);
145+ queue_work(md_wq, &mddev->flush_work);
146+ } else {
147+ /* flush was performed for some other bio while we waited. */
148+ if (bio->bi_iter.bi_size == 0)
149+ /* an empty barrier - all done */
150+ bio_endio(bio);
151+ else {
152+ bio->bi_opf &= ~REQ_PREFLUSH;
153+ mddev->pers->make_request(mddev, bio);
154+ }
155+ }
156 }
157 EXPORT_SYMBOL(md_flush_request);
158
159--- a/drivers/md/md.h
160+++ b/drivers/md/md.h
161@@ -463,6 +463,9 @@ struct mddev {
162 */
163 struct bio *flush_bio;
164 atomic_t flush_pending;
165+ ktime_t start_flush, last_flush; /* last_flush is when the last completed
166+ * flush was started.
167+ */
168 struct work_struct flush_work;
169 struct work_struct event_work; /* used by dm to report failure event */
170 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);