fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)

committer Sasha Levin <sashal@kernel.org>

Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)
author Sasha Levin <sashal@kernel.org>
Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)
committer Sasha Levin <sashal@kernel.org>
Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)
diff --git a/queue-4.19/fuse-fix-deadlock-with-aio-poll-and-fuse_iqueue-wait.patch b/queue-4.19/fuse-fix-deadlock-with-aio-poll-and-fuse_iqueue-wait.patch

new file mode 100644 (file)

index 0000000..0cc9384
--- /dev/null
+++ b/queue-4.19/fuse-fix-deadlock-with-aio-poll-and-fuse_iqueue-wait.patch
@@ -0,0 +1,401 @@
+From 69635a5b9099e2bf0766c6c4ff33c2be992e2d9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 8 Sep 2019 20:15:18 -0700
+Subject: fuse: fix deadlock with aio poll and fuse_iqueue::waitq.lock
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit 76e43c8ccaa35c30d5df853013561145a0f750a5 ]
+
+When IOCB_CMD_POLL is used on the FUSE device, aio_poll() disables IRQs
+and takes kioctx::ctx_lock, then fuse_iqueue::waitq.lock.
+
+This may have to wait for fuse_iqueue::waitq.lock to be released by one
+of many places that take it with IRQs enabled.  Since the IRQ handler
+may take kioctx::ctx_lock, lockdep reports that a deadlock is possible.
+
+Fix it by protecting the state of struct fuse_iqueue with a separate
+spinlock, and only accessing fuse_iqueue::waitq using the versions of
+the waitqueue functions which do IRQ-safe locking internally.
+
+Reproducer:
+
+       #include <fcntl.h>
+       #include <stdio.h>
+       #include <sys/mount.h>
+       #include <sys/stat.h>
+       #include <sys/syscall.h>
+       #include <unistd.h>
+       #include <linux/aio_abi.h>
+
+       int main()
+       {
+               char opts[128];
+               int fd = open("/dev/fuse", O_RDWR);
+               aio_context_t ctx = 0;
+               struct iocb cb = { .aio_lio_opcode = IOCB_CMD_POLL, .aio_fildes = fd };
+               struct iocb *cbp = &cb;
+
+               sprintf(opts, "fd=%d,rootmode=040000,user_id=0,group_id=0", fd);
+               mkdir("mnt", 0700);
+               mount("foo",  "mnt", "fuse", 0, opts);
+               syscall(__NR_io_setup, 1, &ctx);
+               syscall(__NR_io_submit, ctx, 1, &cbp);
+       }
+
+Beginning of lockdep output:
+
+       =====================================================
+       WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
+       5.3.0-rc5 #9 Not tainted
+       -----------------------------------------------------
+       syz_fuse/135 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
+       000000003590ceda (&fiq->waitq){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline]
+       000000003590ceda (&fiq->waitq){+.+.}, at: aio_poll fs/aio.c:1751 [inline]
+       000000003590ceda (&fiq->waitq){+.+.}, at: __io_submit_one.constprop.0+0x203/0x5b0 fs/aio.c:1825
+
+       and this task is already holding:
+       0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:363 [inline]
+       0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1749 [inline]
+       0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one.constprop.0+0x1f4/0x5b0 fs/aio.c:1825
+       which would create a new lock dependency:
+        (&(&ctx->ctx_lock)->rlock){..-.} -> (&fiq->waitq){+.+.}
+
+       but this new dependency connects a SOFTIRQ-irq-safe lock:
+        (&(&ctx->ctx_lock)->rlock){..-.}
+
+       [...]
+
+Reported-by: syzbot+af05535bb79520f95431@syzkaller.appspotmail.com
+Reported-by: syzbot+d86c4426a01f60feddc7@syzkaller.appspotmail.com
+Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
+Cc: <stable@vger.kernel.org> # v4.19+
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/dev.c    | 89 +++++++++++++++++++++++++-----------------------
+ fs/fuse/fuse_i.h |  3 ++
+ fs/fuse/inode.c  |  1 +
+ 3 files changed, 50 insertions(+), 43 deletions(-)
+
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index 6ee471b72a34d..c0d59a86ada2e 100644
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -331,7 +331,7 @@ static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
+       req->in.h.len = sizeof(struct fuse_in_header) +
+               len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+       list_add_tail(&req->list, &fiq->pending);
+-      wake_up_locked(&fiq->waitq);
++      wake_up(&fiq->waitq);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+ }
+ 
+@@ -343,16 +343,16 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+       forget->forget_one.nodeid = nodeid;
+       forget->forget_one.nlookup = nlookup;
+ 
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       if (fiq->connected) {
+               fiq->forget_list_tail->next = forget;
+               fiq->forget_list_tail = forget;
+-              wake_up_locked(&fiq->waitq);
++              wake_up(&fiq->waitq);
+               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+       } else {
+               kfree(forget);
+       }
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+ }
+ 
+ static void flush_bg_queue(struct fuse_conn *fc)
+@@ -365,10 +365,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
+               req = list_entry(fc->bg_queue.next, struct fuse_req, list);
+               list_del(&req->list);
+               fc->active_background++;
+-              spin_lock(&fiq->waitq.lock);
++              spin_lock(&fiq->lock);
+               req->in.h.unique = fuse_get_unique(fiq);
+               queue_request(fiq, req);
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+       }
+ }
+ 
+@@ -387,9 +387,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+       if (test_and_set_bit(FR_FINISHED, &req->flags))
+               goto put_request;
+ 
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       list_del_init(&req->intr_entry);
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+       WARN_ON(test_bit(FR_PENDING, &req->flags));
+       WARN_ON(test_bit(FR_SENT, &req->flags));
+       if (test_bit(FR_BACKGROUND, &req->flags)) {
+@@ -427,16 +427,16 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+ 
+ static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
+ {
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       if (test_bit(FR_FINISHED, &req->flags)) {
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+               return;
+       }
+       if (list_empty(&req->intr_entry)) {
+               list_add_tail(&req->intr_entry, &fiq->interrupts);
+               wake_up_locked(&fiq->waitq);
+       }
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+ }
+ 
+@@ -466,16 +466,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+               if (!err)
+                       return;
+ 
+-              spin_lock(&fiq->waitq.lock);
++              spin_lock(&fiq->lock);
+               /* Request is not yet in userspace, bail out */
+               if (test_bit(FR_PENDING, &req->flags)) {
+                       list_del(&req->list);
+-                      spin_unlock(&fiq->waitq.lock);
++                      spin_unlock(&fiq->lock);
+                       __fuse_put_request(req);
+                       req->out.h.error = -EINTR;
+                       return;
+               }
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+       }
+ 
+       /*
+@@ -490,9 +490,9 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+       struct fuse_iqueue *fiq = &fc->iq;
+ 
+       BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       if (!fiq->connected) {
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+               req->out.h.error = -ENOTCONN;
+       } else {
+               req->in.h.unique = fuse_get_unique(fiq);
+@@ -500,7 +500,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+               /* acquire extra reference, since request is still needed
+                  after request_end() */
+               __fuse_get_request(req);
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+ 
+               request_wait_answer(fc, req);
+               /* Pairs with smp_wmb() in request_end() */
+@@ -633,12 +633,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ 
+       __clear_bit(FR_ISREPLY, &req->flags);
+       req->in.h.unique = unique;
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       if (fiq->connected) {
+               queue_request(fiq, req);
+               err = 0;
+       }
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+ 
+       return err;
+ }
+@@ -1082,12 +1082,12 @@ static int request_pending(struct fuse_iqueue *fiq)
+  * Unlike other requests this is assembled on demand, without a need
+  * to allocate a separate fuse_req structure.
+  *
+- * Called with fiq->waitq.lock held, releases it
++ * Called with fiq->lock held, releases it
+  */
+ static int fuse_read_interrupt(struct fuse_iqueue *fiq,
+                              struct fuse_copy_state *cs,
+                              size_t nbytes, struct fuse_req *req)
+-__releases(fiq->waitq.lock)
++__releases(fiq->lock)
+ {
+       struct fuse_in_header ih;
+       struct fuse_interrupt_in arg;
+@@ -1103,7 +1103,7 @@ __releases(fiq->waitq.lock)
+       ih.unique = req->intr_unique;
+       arg.unique = req->in.h.unique;
+ 
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+       if (nbytes < reqsize)
+               return -EINVAL;
+ 
+@@ -1140,7 +1140,7 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
+ static int fuse_read_single_forget(struct fuse_iqueue *fiq,
+                                  struct fuse_copy_state *cs,
+                                  size_t nbytes)
+-__releases(fiq->waitq.lock)
++__releases(fiq->lock)
+ {
+       int err;
+       struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
+@@ -1154,7 +1154,7 @@ __releases(fiq->waitq.lock)
+               .len = sizeof(ih) + sizeof(arg),
+       };
+ 
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+       kfree(forget);
+       if (nbytes < ih.len)
+               return -EINVAL;
+@@ -1172,7 +1172,7 @@ __releases(fiq->waitq.lock)
+ 
+ static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
+                                  struct fuse_copy_state *cs, size_t nbytes)
+-__releases(fiq->waitq.lock)
++__releases(fiq->lock)
+ {
+       int err;
+       unsigned max_forgets;
+@@ -1186,13 +1186,13 @@ __releases(fiq->waitq.lock)
+       };
+ 
+       if (nbytes < ih.len) {
+-              spin_unlock(&fiq->waitq.lock);
++              spin_unlock(&fiq->lock);
+               return -EINVAL;
+       }
+ 
+       max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
+       head = dequeue_forget(fiq, max_forgets, &count);
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+ 
+       arg.count = count;
+       ih.len += count * sizeof(struct fuse_forget_one);
+@@ -1222,7 +1222,7 @@ __releases(fiq->waitq.lock)
+ static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
+                           struct fuse_copy_state *cs,
+                           size_t nbytes)
+-__releases(fiq->waitq.lock)
++__releases(fiq->lock)
+ {
+       if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
+               return fuse_read_single_forget(fiq, cs, nbytes);
+@@ -1251,16 +1251,19 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
+       unsigned reqsize;
+ 
+  restart:
+-      spin_lock(&fiq->waitq.lock);
+-      err = -EAGAIN;
+-      if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
+-          !request_pending(fiq))
+-              goto err_unlock;
++      for (;;) {
++              spin_lock(&fiq->lock);
++              if (!fiq->connected || request_pending(fiq))
++                      break;
++              spin_unlock(&fiq->lock);
+ 
+-      err = wait_event_interruptible_exclusive_locked(fiq->waitq,
++              if (file->f_flags & O_NONBLOCK)
++                      return -EAGAIN;
++              err = wait_event_interruptible_exclusive(fiq->waitq,
+                               !fiq->connected || request_pending(fiq));
+-      if (err)
+-              goto err_unlock;
++              if (err)
++                      return err;
++      }
+ 
+       if (!fiq->connected) {
+               err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
+@@ -1284,7 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
+       req = list_entry(fiq->pending.next, struct fuse_req, list);
+       clear_bit(FR_PENDING, &req->flags);
+       list_del_init(&req->list);
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+ 
+       in = &req->in;
+       reqsize = in->h.len;
+@@ -1341,7 +1344,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
+       return err;
+ 
+  err_unlock:
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+       return err;
+ }
+ 
+@@ -2054,12 +2057,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
+       fiq = &fud->fc->iq;
+       poll_wait(file, &fiq->waitq, wait);
+ 
+-      spin_lock(&fiq->waitq.lock);
++      spin_lock(&fiq->lock);
+       if (!fiq->connected)
+               mask = EPOLLERR;
+       else if (request_pending(fiq))
+               mask |= EPOLLIN | EPOLLRDNORM;
+-      spin_unlock(&fiq->waitq.lock);
++      spin_unlock(&fiq->lock);
+ 
+       return mask;
+ }
+@@ -2150,15 +2153,15 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
+               fc->max_background = UINT_MAX;
+               flush_bg_queue(fc);
+ 
+-              spin_lock(&fiq->waitq.lock);
++              spin_lock(&fiq->lock);
+               fiq->connected = 0;
+               list_for_each_entry(req, &fiq->pending, list)
+                       clear_bit(FR_PENDING, &req->flags);
+               list_splice_tail_init(&fiq->pending, &to_end);
+               while (forget_pending(fiq))
+                       kfree(dequeue_forget(fiq, 1, NULL));
+-              wake_up_all_locked(&fiq->waitq);
+-              spin_unlock(&fiq->waitq.lock);
++              wake_up_all(&fiq->waitq);
++              spin_unlock(&fiq->lock);
+               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
+               end_polls(fc);
+               wake_up_all(&fc->blocked_waitq);
+diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
+index cec8b8e749695..900bdcf79bfc0 100644
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -388,6 +388,9 @@ struct fuse_iqueue {
+       /** Connection established */
+       unsigned connected;
+ 
++      /** Lock protecting accesses to members of this structure */
++      spinlock_t lock;
++
+       /** Readers of the connection are waiting on this */
+       wait_queue_head_t waitq;
+ 
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index db9e60b7eb691..cb018315ecaf5 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -585,6 +585,7 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
+ static void fuse_iqueue_init(struct fuse_iqueue *fiq)
+ {
+       memset(fiq, 0, sizeof(struct fuse_iqueue));
++      spin_lock_init(&fiq->lock);
+       init_waitqueue_head(&fiq->waitq);
+       INIT_LIST_HEAD(&fiq->pending);
+       INIT_LIST_HEAD(&fiq->interrupts);
+-- 
+2.20.1
+
diff --git a/queue-4.19/md-raid0-avoid-raid0-data-corruption-due-to-layout-c.patch b/queue-4.19/md-raid0-avoid-raid0-data-corruption-due-to-layout-c.patch

new file mode 100644 (file)

index 0000000..da20060
--- /dev/null
+++ b/queue-4.19/md-raid0-avoid-raid0-data-corruption-due-to-layout-c.patch
@@ -0,0 +1,140 @@
+From f641c74d0a1623d4c8e07aa2492d825e416190da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Sep 2019 16:30:02 +1000
+Subject: md/raid0: avoid RAID0 data corruption due to layout confusion.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit c84a1372df929033cb1a0441fb57bd3932f39ac9 ]
+
+If the drives in a RAID0 are not all the same size, the array is
+divided into zones.
+The first zone covers all drives, to the size of the smallest.
+The second zone covers all drives larger than the smallest, up to
+the size of the second smallest - etc.
+
+A change in Linux 3.14 unintentionally changed the layout for the
+second and subsequent zones.  All the correct data is still stored, but
+each chunk may be assigned to a different device than in pre-3.14 kernels.
+This can lead to data corruption.
+
+It is not possible to determine what layout to use - it depends which
+kernel the data was written by.
+So we add a module parameter to allow the old (0) or new (1) layout to be
+specified, and refused to assemble an affected array if that parameter is
+not set.
+
+Fixes: 20d0189b1012 ("block: Introduce new bio_split()")
+cc: stable@vger.kernel.org (3.14+)
+Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/raid0.c | 33 ++++++++++++++++++++++++++++++++-
+ drivers/md/raid0.h | 14 ++++++++++++++
+ 2 files changed, 46 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
+index f4daa56d204dd..43fa7dbf844b0 100644
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -26,6 +26,9 @@
+ #include "raid0.h"
+ #include "raid5.h"
+ 
++static int default_layout = 0;
++module_param(default_layout, int, 0644);
++
+ #define UNSUPPORTED_MDDEV_FLAGS               \
+       ((1L << MD_HAS_JOURNAL) |       \
+        (1L << MD_JOURNAL_CLEAN) |     \
+@@ -146,6 +149,19 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
+       }
+       pr_debug("md/raid0:%s: FINAL %d zones\n",
+                mdname(mddev), conf->nr_strip_zones);
++
++      if (conf->nr_strip_zones == 1) {
++              conf->layout = RAID0_ORIG_LAYOUT;
++      } else if (default_layout == RAID0_ORIG_LAYOUT ||
++                 default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
++              conf->layout = default_layout;
++      } else {
++              pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
++                     mdname(mddev));
++              pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
++              err = -ENOTSUPP;
++              goto abort;
++      }
+       /*
+        * now since we have the hard sector sizes, we can make sure
+        * chunk size is a multiple of that sector size
+@@ -555,10 +571,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ 
+ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+ {
++      struct r0conf *conf = mddev->private;
+       struct strip_zone *zone;
+       struct md_rdev *tmp_dev;
+       sector_t bio_sector;
+       sector_t sector;
++      sector_t orig_sector;
+       unsigned chunk_sects;
+       unsigned sectors;
+ 
+@@ -592,8 +610,21 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+               bio = split;
+       }
+ 
++      orig_sector = sector;
+       zone = find_zone(mddev->private, &sector);
+-      tmp_dev = map_sector(mddev, zone, sector, &sector);
++      switch (conf->layout) {
++      case RAID0_ORIG_LAYOUT:
++              tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
++              break;
++      case RAID0_ALT_MULTIZONE_LAYOUT:
++              tmp_dev = map_sector(mddev, zone, sector, &sector);
++              break;
++      default:
++              WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
++              bio_io_error(bio);
++              return true;
++      }
++
+       bio_set_dev(bio, tmp_dev->bdev);
+       bio->bi_iter.bi_sector = sector + zone->dev_start +
+               tmp_dev->data_offset;
+diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
+index 540e65d92642d..3816e5477db1e 100644
+--- a/drivers/md/raid0.h
++++ b/drivers/md/raid0.h
+@@ -8,11 +8,25 @@ struct strip_zone {
+       int      nb_dev;        /* # of devices attached to the zone */
+ };
+ 
++/* Linux 3.14 (20d0189b101) made an unintended change to
++ * the RAID0 layout for multi-zone arrays (where devices aren't all
++ * the same size.
++ * RAID0_ORIG_LAYOUT restores the original layout
++ * RAID0_ALT_MULTIZONE_LAYOUT uses the altered layout
++ * The layouts are identical when there is only one zone (all
++ * devices the same size).
++ */
++
++enum r0layout {
++      RAID0_ORIG_LAYOUT = 1,
++      RAID0_ALT_MULTIZONE_LAYOUT = 2,
++};
+ struct r0conf {
+       struct strip_zone       *strip_zone;
+       struct md_rdev          **devlist; /* lists of rdevs, pointed to
+                                           * by strip_zone->dev */
+       int                     nr_strip_zones;
++      enum r0layout           layout;
+ };
+ 
+ #endif
+-- 
+2.20.1
+
diff --git a/queue-4.19/mm-compaction.c-clear-total_-migrate-free-_scanned-b.patch b/queue-4.19/mm-compaction.c-clear-total_-migrate-free-_scanned-b.patch

new file mode 100644 (file)

index 0000000..0493ea6
--- /dev/null
+++ b/queue-4.19/mm-compaction.c-clear-total_-migrate-free-_scanned-b.patch
@@ -0,0 +1,132 @@
+From 4c02ea4c03ce0756262037cf56839c40a6e79b84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Sep 2019 15:36:54 -0700
+Subject: mm/compaction.c: clear total_{migrate,free}_scanned before scanning a
+ new zone
+
+From: Yafang Shao <laoar.shao@gmail.com>
+
+[ Upstream commit a94b525241c0fff3598809131d7cfcfe1d572d8c ]
+
+total_{migrate,free}_scanned will be added to COMPACTMIGRATE_SCANNED and
+COMPACTFREE_SCANNED in compact_zone().  We should clear them before
+scanning a new zone.  In the proc triggered compaction, we forgot clearing
+them.
+
+[laoar.shao@gmail.com: introduce a helper compact_zone_counters_init()]
+  Link: http://lkml.kernel.org/r/1563869295-25748-1-git-send-email-laoar.shao@gmail.com
+[akpm@linux-foundation.org: expand compact_zone_counters_init() into its single callsite, per mhocko]
+[vbabka@suse.cz: squash compact_zone() list_head init as well]
+  Link: http://lkml.kernel.org/r/1fb6f7da-f776-9e42-22f8-bbb79b030b98@suse.cz
+[akpm@linux-foundation.org: kcompactd_do_work(): avoid unnecessary initialization of cc.zone]
+Link: http://lkml.kernel.org/r/1563789275-9639-1-git-send-email-laoar.shao@gmail.com
+Fixes: 7f354a548d1c ("mm, compaction: add vmstats for kcompactd work")
+Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Yafang Shao <shaoyafang@didiglobal.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/compaction.c | 35 +++++++++++++----------------------
+ 1 file changed, 13 insertions(+), 22 deletions(-)
+
+diff --git a/mm/compaction.c b/mm/compaction.c
+index faca45ebe62df..5079ddbec8f9e 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1540,6 +1540,17 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
+       unsigned long end_pfn = zone_end_pfn(zone);
+       const bool sync = cc->mode != MIGRATE_ASYNC;
+ 
++      /*
++       * These counters track activities during zone compaction.  Initialize
++       * them before compacting a new zone.
++       */
++      cc->total_migrate_scanned = 0;
++      cc->total_free_scanned = 0;
++      cc->nr_migratepages = 0;
++      cc->nr_freepages = 0;
++      INIT_LIST_HEAD(&cc->freepages);
++      INIT_LIST_HEAD(&cc->migratepages);
++
+       cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
+       ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
+                                                       cc->classzone_idx);
+@@ -1703,10 +1714,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
+ {
+       enum compact_result ret;
+       struct compact_control cc = {
+-              .nr_freepages = 0,
+-              .nr_migratepages = 0,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .order = order,
+               .gfp_mask = gfp_mask,
+               .zone = zone,
+@@ -1719,8 +1726,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
+               .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
+               .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
+       };
+-      INIT_LIST_HEAD(&cc.freepages);
+-      INIT_LIST_HEAD(&cc.migratepages);
+ 
+       ret = compact_zone(zone, &cc);
+ 
+@@ -1819,8 +1824,6 @@ static void compact_node(int nid)
+       struct zone *zone;
+       struct compact_control cc = {
+               .order = -1,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .mode = MIGRATE_SYNC,
+               .ignore_skip_hint = true,
+               .whole_zone = true,
+@@ -1834,11 +1837,7 @@ static void compact_node(int nid)
+               if (!populated_zone(zone))
+                       continue;
+ 
+-              cc.nr_freepages = 0;
+-              cc.nr_migratepages = 0;
+               cc.zone = zone;
+-              INIT_LIST_HEAD(&cc.freepages);
+-              INIT_LIST_HEAD(&cc.migratepages);
+ 
+               compact_zone(zone, &cc);
+ 
+@@ -1947,8 +1946,6 @@ static void kcompactd_do_work(pg_data_t *pgdat)
+       struct zone *zone;
+       struct compact_control cc = {
+               .order = pgdat->kcompactd_max_order,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .classzone_idx = pgdat->kcompactd_classzone_idx,
+               .mode = MIGRATE_SYNC_LIGHT,
+               .ignore_skip_hint = false,
+@@ -1972,16 +1969,10 @@ static void kcompactd_do_work(pg_data_t *pgdat)
+                                                       COMPACT_CONTINUE)
+                       continue;
+ 
+-              cc.nr_freepages = 0;
+-              cc.nr_migratepages = 0;
+-              cc.total_migrate_scanned = 0;
+-              cc.total_free_scanned = 0;
+-              cc.zone = zone;
+-              INIT_LIST_HEAD(&cc.freepages);
+-              INIT_LIST_HEAD(&cc.migratepages);
+-
+               if (kthread_should_stop())
+                       return;
++
++              cc.zone = zone;
+               status = compact_zone(zone, &cc);
+ 
+               if (status == COMPACT_SUCCESS) {
+-- 
+2.20.1
+
diff --git a/queue-4.19/series b/queue-4.19/series

index d3fdb0db895ea6caba7a5e78195abede9a7ee323..5a748c5d43312ac770cd57bd92a474854e7105a7 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -206,3 +206,6 @@ hwrng-core-don-t-wait-on-add_early_randomness.patch
  i2c-riic-clear-nack-in-tend-isr.patch
  cifs-fix-max-ea-value-size.patch
  cifs-fix-oplock-handling-for-smb-2.1-protocols.patch
+md-raid0-avoid-raid0-data-corruption-due-to-layout-c.patch
+fuse-fix-deadlock-with-aio-poll-and-fuse_iqueue-wait.patch
+mm-compaction.c-clear-total_-migrate-free-_scanned-b.patch
author	Sasha Levin <sashal@kernel.org>
	Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 3 Oct 2019 14:45:46 +0000 (10:45 -0400)
queue-4.19/fuse-fix-deadlock-with-aio-poll-and-fuse_iqueue-wait.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/md-raid0-avoid-raid0-data-corruption-due-to-layout-c.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mm-compaction.c-clear-total_-migrate-free-_scanned-b.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history