serial-samsung-removes-the-irq-not-found-warning.patch
serial-pl011-don-t-leak-amba_ports-entry-on-driver-register-error.patch
serial-8250-change-lock-order-in-serial8250_do_startup.patch
+writeback-protect-inode-i_io_list-with-inode-i_lock.patch
+writeback-avoid-skipping-inode-writeback.patch
+writeback-fix-sync-livelock-due-to-b_dirty_time-processing.patch
--- /dev/null
+From 5afced3bf28100d81fb2fe7e98918632a08feaf5 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 29 May 2020 15:05:22 +0200
+Subject: writeback: Avoid skipping inode writeback
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5afced3bf28100d81fb2fe7e98918632a08feaf5 upstream.
+
+Inode's i_io_list list head is used to attach inode to several different
+lists - wb->{b_dirty, b_dirty_time, b_io, b_more_io}. When flush worker
+prepares a list of inodes to writeback e.g. for sync(2), it moves inodes
+to b_io list. Thus it is critical for sync(2) data integrity guarantees
+that inode is not requeued to any other writeback list when inode is
+queued for processing by flush worker. That's the reason why
+writeback_single_inode() does not touch i_io_list (unless the inode is
+completely clean) and why __mark_inode_dirty() does not touch i_io_list
+if I_SYNC flag is set.
+
+However there are two flaws in the current logic:
+
+1) When inode has only I_DIRTY_TIME set but it is already queued in b_io
+list due to sync(2), concurrent __mark_inode_dirty(inode, I_DIRTY_SYNC)
+can still move inode back to b_dirty list resulting in skipping
+writeback of inode time stamps during sync(2).
+
+2) When inode is on b_dirty_time list and writeback_single_inode() races
+with __mark_inode_dirty() like:
+
+writeback_single_inode() __mark_inode_dirty(inode, I_DIRTY_PAGES)
+ inode->i_state |= I_SYNC
+ __writeback_single_inode()
+ inode->i_state |= I_DIRTY_PAGES;
+ if (inode->i_state & I_SYNC)
+ bail
+ if (!(inode->i_state & I_DIRTY_ALL))
+ - not true so nothing done
+
+We end up with I_DIRTY_PAGES inode on b_dirty_time list and thus
+standard background writeback will not writeback this inode leading to
+possible dirty throttling stalls etc. (thanks to Martijn Coenen for this
+analysis).
+
+Fix these problems by tracking whether inode is queued in b_io or
+b_more_io lists in a new I_SYNC_QUEUED flag. When this flag is set, we
+know flush worker has queued inode and we should not touch i_io_list.
+On the other hand we also know that once flush worker is done with the
+inode it will requeue the inode to appropriate dirty list. When
+I_SYNC_QUEUED is not set, __mark_inode_dirty() can (and must) move inode
+to appropriate dirty list.
+
+Reported-by: Martijn Coenen <maco@android.com>
+Reviewed-by: Martijn Coenen <maco@android.com>
+Tested-by: Martijn Coenen <maco@android.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c | 17 ++++++++++++-----
+ include/linux/fs.h | 8 ++++++--
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -162,6 +162,7 @@ static void inode_io_list_del_locked(str
+ assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+
++ inode->i_state &= ~I_SYNC_QUEUED;
+ list_del_init(&inode->i_io_list);
+ wb_io_lists_depopulated(wb);
+ }
+@@ -1062,6 +1063,7 @@ static void redirty_tail_locked(struct i
+ inode->dirtied_when = jiffies;
+ }
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty);
++ inode->i_state &= ~I_SYNC_QUEUED;
+ }
+
+ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+@@ -1137,8 +1139,11 @@ static int move_expired_inodes(struct li
+ break;
+ list_move(&inode->i_io_list, &tmp);
+ moved++;
++ spin_lock(&inode->i_lock);
+ if (flags & EXPIRE_DIRTY_ATIME)
+- set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
++ inode->i_state |= I_DIRTY_TIME_EXPIRED;
++ inode->i_state |= I_SYNC_QUEUED;
++ spin_unlock(&inode->i_lock);
+ if (sb_is_blkdev_sb(inode->i_sb))
+ continue;
+ if (sb && sb != inode->i_sb)
+@@ -1313,6 +1318,7 @@ static void requeue_inode(struct inode *
+ } else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
++ inode->i_state &= ~I_SYNC_QUEUED;
+ } else {
+ /* The inode is clean. Remove from writeback lists. */
+ inode_io_list_del_locked(inode, wb);
+@@ -2140,11 +2146,12 @@ void __mark_inode_dirty(struct inode *in
+ inode->i_state |= flags;
+
+ /*
+- * If the inode is being synced, just update its dirty state.
+- * The unlocker will place the inode on the appropriate
+- * superblock list, based upon its state.
++ * If the inode is queued for writeback by flush worker, just
++ * update its dirty state. Once the flush worker is done with
++ * the inode it will place it on the appropriate superblock
++ * list, based upon its state.
+ */
+- if (inode->i_state & I_SYNC)
++ if (inode->i_state & I_SYNC_QUEUED)
+ goto out_unlock_inode;
+
+ /*
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1882,6 +1882,10 @@ struct super_operations {
+ * wb stat updates to grab mapping->tree_lock. See
+ * inode_switch_wb_work_fn() for details.
+ *
++ * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
++ * Used to detect that mark_inode_dirty() should not move
++ * inode between dirty lists.
++ *
+ * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ */
+ #define I_DIRTY_SYNC (1 << 0)
+@@ -1899,9 +1903,9 @@ struct super_operations {
+ #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP)
+ #define I_LINKABLE (1 << 10)
+ #define I_DIRTY_TIME (1 << 11)
+-#define __I_DIRTY_TIME_EXPIRED 12
+-#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
++#define I_DIRTY_TIME_EXPIRED (1 << 12)
+ #define I_WB_SWITCH (1 << 13)
++#define I_SYNC_QUEUED (1 << 17)
+
+ #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+ #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
--- /dev/null
+From f9cae926f35e8230330f28c7b743ad088611a8de Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 29 May 2020 16:08:58 +0200
+Subject: writeback: Fix sync livelock due to b_dirty_time processing
+
+From: Jan Kara <jack@suse.cz>
+
+commit f9cae926f35e8230330f28c7b743ad088611a8de upstream.
+
+When we are processing writeback for sync(2), move_expired_inodes()
+didn't set any inode expiry value (older_than_this). This can result in
+writeback never completing if there's steady stream of inodes added to
+b_dirty_time list as writeback rechecks dirty lists after each writeback
+round whether there's more work to be done. Fix the problem by using
+sync(2) start time is inode expiry value when processing b_dirty_time
+list similarly as for ordinarily dirtied inodes. This requires some
+refactoring of older_than_this handling which simplifies the code
+noticeably as a bonus.
+
+Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
+CC: stable@vger.kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c | 44 +++++++++++++++------------------------
+ include/trace/events/writeback.h | 13 +++++------
+ 2 files changed, 23 insertions(+), 34 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -45,7 +45,6 @@ struct wb_completion {
+ struct wb_writeback_work {
+ long nr_pages;
+ struct super_block *sb;
+- unsigned long *older_than_this;
+ enum writeback_sync_modes sync_mode;
+ unsigned int tagged_writepages:1;
+ unsigned int for_kupdate:1;
+@@ -1109,16 +1108,13 @@ static bool inode_dirtied_after(struct i
+ #define EXPIRE_DIRTY_ATIME 0x0001
+
+ /*
+- * Move expired (dirtied before work->older_than_this) dirty inodes from
++ * Move expired (dirtied before dirtied_before) dirty inodes from
+ * @delaying_queue to @dispatch_queue.
+ */
+ static int move_expired_inodes(struct list_head *delaying_queue,
+ struct list_head *dispatch_queue,
+- int flags,
+- struct wb_writeback_work *work)
++ int flags, unsigned long dirtied_before)
+ {
+- unsigned long *older_than_this = NULL;
+- unsigned long expire_time;
+ LIST_HEAD(tmp);
+ struct list_head *pos, *node;
+ struct super_block *sb = NULL;
+@@ -1126,16 +1122,9 @@ static int move_expired_inodes(struct li
+ int do_sb_sort = 0;
+ int moved = 0;
+
+- if ((flags & EXPIRE_DIRTY_ATIME) == 0)
+- older_than_this = work->older_than_this;
+- else if (!work->for_sync) {
+- expire_time = jiffies - (dirtytime_expire_interval * HZ);
+- older_than_this = &expire_time;
+- }
+ while (!list_empty(delaying_queue)) {
+ inode = wb_inode(delaying_queue->prev);
+- if (older_than_this &&
+- inode_dirtied_after(inode, *older_than_this))
++ if (inode_dirtied_after(inode, dirtied_before))
+ break;
+ list_move(&inode->i_io_list, &tmp);
+ moved++;
+@@ -1181,18 +1170,22 @@ out:
+ * |
+ * +--> dequeue for IO
+ */
+-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
++static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
++ unsigned long dirtied_before)
+ {
+ int moved;
++ unsigned long time_expire_jif = dirtied_before;
+
+ assert_spin_locked(&wb->list_lock);
+ list_splice_init(&wb->b_more_io, &wb->b_io);
+- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
++ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
++ if (!work->for_sync)
++ time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
+ moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
+- EXPIRE_DIRTY_ATIME, work);
++ EXPIRE_DIRTY_ATIME, time_expire_jif);
+ if (moved)
+ wb_io_lists_populated(wb);
+- trace_writeback_queue_io(wb, work, moved);
++ trace_writeback_queue_io(wb, work, dirtied_before, moved);
+ }
+
+ static int write_inode(struct inode *inode, struct writeback_control *wbc)
+@@ -1703,7 +1696,7 @@ static long writeback_inodes_wb(struct b
+ blk_start_plug(&plug);
+ spin_lock(&wb->list_lock);
+ if (list_empty(&wb->b_io))
+- queue_io(wb, &work);
++ queue_io(wb, &work, jiffies);
+ __writeback_inodes_wb(wb, &work);
+ spin_unlock(&wb->list_lock);
+ blk_finish_plug(&plug);
+@@ -1723,7 +1716,7 @@ static long writeback_inodes_wb(struct b
+ * takes longer than a dirty_writeback_interval interval, then leave a
+ * one-second gap.
+ *
+- * older_than_this takes precedence over nr_to_write. So we'll only write back
++ * dirtied_before takes precedence over nr_to_write. So we'll only write back
+ * all dirty pages if they are all attached to "old" mappings.
+ */
+ static long wb_writeback(struct bdi_writeback *wb,
+@@ -1731,14 +1724,11 @@ static long wb_writeback(struct bdi_writ
+ {
+ unsigned long wb_start = jiffies;
+ long nr_pages = work->nr_pages;
+- unsigned long oldest_jif;
++ unsigned long dirtied_before = jiffies;
+ struct inode *inode;
+ long progress;
+ struct blk_plug plug;
+
+- oldest_jif = jiffies;
+- work->older_than_this = &oldest_jif;
+-
+ blk_start_plug(&plug);
+ spin_lock(&wb->list_lock);
+ for (;;) {
+@@ -1772,14 +1762,14 @@ static long wb_writeback(struct bdi_writ
+ * safe.
+ */
+ if (work->for_kupdate) {
+- oldest_jif = jiffies -
++ dirtied_before = jiffies -
+ msecs_to_jiffies(dirty_expire_interval * 10);
+ } else if (work->for_background)
+- oldest_jif = jiffies;
++ dirtied_before = jiffies;
+
+ trace_writeback_start(wb, work);
+ if (list_empty(&wb->b_io))
+- queue_io(wb, work);
++ queue_io(wb, work, dirtied_before);
+ if (work->sb)
+ progress = writeback_sb_inodes(work->sb, wb, work);
+ else
+--- a/include/trace/events/writeback.h
++++ b/include/trace/events/writeback.h
+@@ -390,8 +390,9 @@ DEFINE_WBC_EVENT(wbc_writepage);
+ TRACE_EVENT(writeback_queue_io,
+ TP_PROTO(struct bdi_writeback *wb,
+ struct wb_writeback_work *work,
++ unsigned long dirtied_before,
+ int moved),
+- TP_ARGS(wb, work, moved),
++ TP_ARGS(wb, work, dirtied_before, moved),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ __field(unsigned long, older)
+@@ -401,19 +402,17 @@ TRACE_EVENT(writeback_queue_io,
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ ),
+ TP_fast_assign(
+- unsigned long *older_than_this = work->older_than_this;
+ strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+- __entry->older = older_than_this ? *older_than_this : 0;
+- __entry->age = older_than_this ?
+- (jiffies - *older_than_this) * 1000 / HZ : -1;
++ __entry->older = dirtied_before;
++ __entry->age = (jiffies - dirtied_before) * 1000 / HZ;
+ __entry->moved = moved;
+ __entry->reason = work->reason;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ ),
+ TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
+ __entry->name,
+- __entry->older, /* older_than_this in jiffies */
+- __entry->age, /* older_than_this in relative milliseconds */
++ __entry->older, /* dirtied_before in jiffies */
++ __entry->age, /* dirtied_before in relative milliseconds */
+ __entry->moved,
+ __print_symbolic(__entry->reason, WB_WORK_REASON),
+ __get_str(cgroup)
--- /dev/null
+From b35250c0816c7cf7d0a8de92f5fafb6a7508a708 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 10 Jun 2020 17:36:03 +0200
+Subject: writeback: Protect inode->i_io_list with inode->i_lock
+
+From: Jan Kara <jack@suse.cz>
+
+commit b35250c0816c7cf7d0a8de92f5fafb6a7508a708 upstream.
+
+Currently, operations on inode->i_io_list are protected by
+wb->list_lock. In the following patches we'll need to maintain
+consistency between inode->i_state and inode->i_io_list so change the
+code so that inode->i_lock protects also all inode's i_io_list handling.
+
+Reviewed-by: Martijn Coenen <maco@android.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+CC: stable@vger.kernel.org # Prerequisite for "writeback: Avoid skipping inode writeback"
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c | 22 +++++++++++++++++-----
+ 1 file changed, 17 insertions(+), 5 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -160,6 +160,7 @@ static void inode_io_list_del_locked(str
+ struct bdi_writeback *wb)
+ {
+ assert_spin_locked(&wb->list_lock);
++ assert_spin_locked(&inode->i_lock);
+
+ list_del_init(&inode->i_io_list);
+ wb_io_lists_depopulated(wb);
+@@ -1034,7 +1035,9 @@ void inode_io_list_del(struct inode *ino
+ struct bdi_writeback *wb;
+
+ wb = inode_to_wb_and_lock_list(inode);
++ spin_lock(&inode->i_lock);
+ inode_io_list_del_locked(inode, wb);
++ spin_unlock(&inode->i_lock);
+ spin_unlock(&wb->list_lock);
+ }
+
+@@ -1047,8 +1050,10 @@ void inode_io_list_del(struct inode *ino
+ * the case then the inode must have been redirtied while it was being written
+ * out and we don't reset its dirtied_when.
+ */
+-static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
++static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
+ {
++ assert_spin_locked(&inode->i_lock);
++
+ if (!list_empty(&wb->b_dirty)) {
+ struct inode *tail;
+
+@@ -1059,6 +1064,13 @@ static void redirty_tail(struct inode *i
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty);
+ }
+
++static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
++{
++ spin_lock(&inode->i_lock);
++ redirty_tail_locked(inode, wb);
++ spin_unlock(&inode->i_lock);
++}
++
+ /*
+ * requeue inode for re-scanning after bdi->b_io list is exhausted.
+ */
+@@ -1269,7 +1281,7 @@ static void requeue_inode(struct inode *
+ * writeback is not making progress due to locked
+ * buffers. Skip this inode for now.
+ */
+- redirty_tail(inode, wb);
++ redirty_tail_locked(inode, wb);
+ return;
+ }
+
+@@ -1289,7 +1301,7 @@ static void requeue_inode(struct inode *
+ * retrying writeback of the dirty page/inode
+ * that cannot be performed immediately.
+ */
+- redirty_tail(inode, wb);
++ redirty_tail_locked(inode, wb);
+ }
+ } else if (inode->i_state & I_DIRTY) {
+ /*
+@@ -1297,7 +1309,7 @@ static void requeue_inode(struct inode *
+ * such as delayed allocation during submission or metadata
+ * updates after data IO completion.
+ */
+- redirty_tail(inode, wb);
++ redirty_tail_locked(inode, wb);
+ } else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+@@ -1543,8 +1555,8 @@ static long writeback_sb_inodes(struct s
+ */
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
++ redirty_tail_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
+- redirty_tail(inode, wb);
+ continue;
+ }
+ if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {