]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.4
authorSasha Levin <sashal@kernel.org>
Wed, 13 May 2020 00:51:01 +0000 (20:51 -0400)
committerSasha Levin <sashal@kernel.org>
Wed, 13 May 2020 00:51:01 +0000 (20:51 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
18 files changed:
queue-4.4/blktrace-fix-dereference-after-null-check.patch [new file with mode: 0644]
queue-4.4/blktrace-fix-potential-deadlock-between-delete-sysfs.patch [new file with mode: 0644]
queue-4.4/blktrace-fix-trace-mutex-deadlock.patch [new file with mode: 0644]
queue-4.4/blktrace-fix-unlocked-access-to-init-start-stop-tear.patch [new file with mode: 0644]
queue-4.4/blktrace-protect-q-blk_trace-with-rcu.patch [new file with mode: 0644]
queue-4.4/chardev-add-helper-function-to-register-char-devs-wi.patch [new file with mode: 0644]
queue-4.4/ext4-add-cond_resched-to-ext4_protect_reserved_inode.patch [new file with mode: 0644]
queue-4.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch [new file with mode: 0644]
queue-4.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6.patch [new file with mode: 0644]
queue-4.4/phy-micrel-disable-auto-negotiation-on-startup.patch [new file with mode: 0644]
queue-4.4/phy-micrel-ensure-interrupts-are-reenabled-on-resume.patch [new file with mode: 0644]
queue-4.4/ptp-create-pins-together-with-the-rest-of-attributes.patch [new file with mode: 0644]
queue-4.4/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_regis.patch [new file with mode: 0644]
queue-4.4/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch [new file with mode: 0644]
queue-4.4/ptp-fix-the-race-between-the-release-of-ptp_clock-an.patch [new file with mode: 0644]
queue-4.4/ptp-free-ptp-device-pin-descriptors-properly.patch [new file with mode: 0644]
queue-4.4/ptp-use-is_visible-method-to-hide-unused-attributes.patch [new file with mode: 0644]
queue-4.4/series

diff --git a/queue-4.4/blktrace-fix-dereference-after-null-check.patch b/queue-4.4/blktrace-fix-dereference-after-null-check.patch
new file mode 100644 (file)
index 0000000..54d5784
--- /dev/null
@@ -0,0 +1,70 @@
+From af666a41a5823e30f88387ff0dc85cd3d6d0cbe4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Mar 2020 13:58:19 +0300
+Subject: blktrace: fix dereference after null check
+
+From: Cengiz Can <cengiz@kernel.wtf>
+
+commit 153031a301bb07194e9c37466cfce8eacb977621 upstream.
+
+There was a recent change in blktrace.c that added a RCU protection to
+`q->blk_trace` in order to fix a use-after-free issue during access.
+
+However the change missed an edge case that can lead to dereferencing of
+`bt` pointer even when it's NULL:
+
+Coverity static analyzer marked this as a FORWARD_NULL issue with CID
+1460458.
+
+```
+/kernel/trace/blktrace.c: 1904 in sysfs_blk_trace_attr_store()
+1898            ret = 0;
+1899            if (bt == NULL)
+1900                    ret = blk_trace_setup_queue(q, bdev);
+1901
+1902            if (ret == 0) {
+1903                    if (attr == &dev_attr_act_mask)
+>>>     CID 1460458:  Null pointer dereferences  (FORWARD_NULL)
+>>>     Dereferencing null pointer "bt".
+1904                            bt->act_mask = value;
+1905                    else if (attr == &dev_attr_pid)
+1906                            bt->pid = value;
+1907                    else if (attr == &dev_attr_start_lba)
+1908                            bt->start_lba = value;
+1909                    else if (attr == &dev_attr_end_lba)
+```
+
+Added a reassignment with RCU annotation to fix the issue.
+
+Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Bob Liu <bob.liu@oracle.com>
+Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Cengiz Can <cengiz@kernel.wtf>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/blktrace.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index bf9bf3f22edb5..6737564680193 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -1822,8 +1822,11 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       }
+       ret = 0;
+-      if (bt == NULL)
++      if (bt == NULL) {
+               ret = blk_trace_setup_queue(q, bdev);
++              bt = rcu_dereference_protected(q->blk_trace,
++                              lockdep_is_held(&q->blk_trace_mutex));
++      }
+       if (ret == 0) {
+               if (attr == &dev_attr_act_mask)
+-- 
+2.20.1
+
diff --git a/queue-4.4/blktrace-fix-potential-deadlock-between-delete-sysfs.patch b/queue-4.4/blktrace-fix-potential-deadlock-between-delete-sysfs.patch
new file mode 100644 (file)
index 0000000..bf5488e
--- /dev/null
@@ -0,0 +1,155 @@
+From 8f7d1986c7ee15bdfbb32b33a45da62f94583f01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Sep 2017 13:12:20 -0600
+Subject: blktrace: Fix potential deadlock between delete & sysfs ops
+
+From: Waiman Long <longman@redhat.com>
+
+commit 5acb3cc2c2e9d3020a4fee43763c6463767f1572 upstream.
+
+The lockdep code had reported the following unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(s_active#228);
+                               lock(&bdev->bd_mutex/1);
+                               lock(s_active#228);
+  lock(&bdev->bd_mutex);
+
+ *** DEADLOCK ***
+
+The deadlock may happen when one task (CPU1) is trying to delete a
+partition in a block device and another task (CPU0) is accessing
+tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that
+partition.
+
+The s_active isn't an actual lock. It is a reference count (kn->count)
+on the sysfs (kernfs) file. Removal of a sysfs file, however, require
+a wait until all the references are gone. The reference count is
+treated like a rwsem using lockdep instrumentation code.
+
+The fact that a thread is in the sysfs callback method or in the
+ioctl call means there is a reference to the opended sysfs or device
+file. That should prevent the underlying block structure from being
+removed.
+
+Instead of using bd_mutex in the block_device structure, a new
+blk_trace_mutex is now added to the request_queue structure to protect
+access to the blk_trace structure.
+
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+Fix typo in patch subject line, and prune a comment detailing how
+the code used to work.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-core.c        |  3 +++
+ include/linux/blkdev.h  |  1 +
+ kernel/trace/blktrace.c | 18 ++++++++++++------
+ 3 files changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 7662f97dded69..dc4119a1e1229 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -719,6 +719,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+       kobject_init(&q->kobj, &blk_queue_ktype);
++#ifdef CONFIG_BLK_DEV_IO_TRACE
++      mutex_init(&q->blk_trace_mutex);
++#endif
+       mutex_init(&q->sysfs_lock);
+       spin_lock_init(&q->__queue_lock);
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 4ae5d6ecd7275..ab819210e0112 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -432,6 +432,7 @@ struct request_queue {
+       int                     node;
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+       struct blk_trace        *blk_trace;
++      struct mutex            blk_trace_mutex;
+ #endif
+       /*
+        * for flush operations
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index 210b8e726a974..5e6fc28414028 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -644,6 +644,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
+ }
+ EXPORT_SYMBOL_GPL(blk_trace_startstop);
++/*
++ * When reading or writing the blktrace sysfs files, the references to the
++ * opened sysfs or device files should prevent the underlying block device
++ * from being removed. So no further delete protection is really needed.
++ */
++
+ /**
+  * blk_trace_ioctl: - handle the ioctls associated with tracing
+  * @bdev:     the block device
+@@ -661,7 +667,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+       if (!q)
+               return -ENXIO;
+-      mutex_lock(&bdev->bd_mutex);
++      mutex_lock(&q->blk_trace_mutex);
+       switch (cmd) {
+       case BLKTRACESETUP:
+@@ -687,7 +693,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+               break;
+       }
+-      mutex_unlock(&bdev->bd_mutex);
++      mutex_unlock(&q->blk_trace_mutex);
+       return ret;
+ }
+@@ -1652,7 +1658,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+       if (q == NULL)
+               goto out_bdput;
+-      mutex_lock(&bdev->bd_mutex);
++      mutex_lock(&q->blk_trace_mutex);
+       if (attr == &dev_attr_enable) {
+               ret = sprintf(buf, "%u\n", !!q->blk_trace);
+@@ -1671,7 +1677,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+               ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+ out_unlock_bdev:
+-      mutex_unlock(&bdev->bd_mutex);
++      mutex_unlock(&q->blk_trace_mutex);
+ out_bdput:
+       bdput(bdev);
+ out:
+@@ -1713,7 +1719,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       if (q == NULL)
+               goto out_bdput;
+-      mutex_lock(&bdev->bd_mutex);
++      mutex_lock(&q->blk_trace_mutex);
+       if (attr == &dev_attr_enable) {
+               if (!!value == !!q->blk_trace) {
+@@ -1743,7 +1749,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       }
+ out_unlock_bdev:
+-      mutex_unlock(&bdev->bd_mutex);
++      mutex_unlock(&q->blk_trace_mutex);
+ out_bdput:
+       bdput(bdev);
+ out:
+-- 
+2.20.1
+
diff --git a/queue-4.4/blktrace-fix-trace-mutex-deadlock.patch b/queue-4.4/blktrace-fix-trace-mutex-deadlock.patch
new file mode 100644 (file)
index 0000000..b02497a
--- /dev/null
@@ -0,0 +1,47 @@
+From 3ce9f765dac1051141b48809f8608d840c655b7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 19 Nov 2017 11:52:55 -0700
+Subject: blktrace: fix trace mutex deadlock
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 2967acbb257a6a9bf912f4778b727e00972eac9b upstream.
+
+A previous commit changed the locking around registration/cleanup,
+but direct callers of blk_trace_remove() were missed. This means
+that if we hit the error path in setup, we will deadlock on
+attempting to re-acquire the queue trace mutex.
+
+Fixes: 1f2cac107c59 ("blktrace: fix unlocked access to init/start-stop/teardown")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/blktrace.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index dc5fd20429d8e..ea18c6997eb23 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -572,7 +572,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+               return ret;
+       if (copy_to_user(arg, &buts, sizeof(buts))) {
+-              blk_trace_remove(q);
++              __blk_trace_remove(q);
+               return -EFAULT;
+       }
+       return 0;
+@@ -618,7 +618,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
+               return ret;
+       if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
+-              blk_trace_remove(q);
++              __blk_trace_remove(q);
+               return -EFAULT;
+       }
+-- 
+2.20.1
+
diff --git a/queue-4.4/blktrace-fix-unlocked-access-to-init-start-stop-tear.patch b/queue-4.4/blktrace-fix-unlocked-access-to-init-start-stop-tear.patch
new file mode 100644 (file)
index 0000000..cf1c26d
--- /dev/null
@@ -0,0 +1,156 @@
+From bf5b9cf66d2ef05afd456e3b524edc72d1ae1452 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Nov 2017 09:13:48 -0700
+Subject: blktrace: fix unlocked access to init/start-stop/teardown
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 1f2cac107c591c24b60b115d6050adc213d10fc0 upstream.
+
+sg.c calls into the blktrace functions without holding the proper queue
+mutex for doing setup, start/stop, or teardown.
+
+Add internal unlocked variants, and export the ones that do the proper
+locking.
+
+Fixes: 6da127ad0918 ("blktrace: Add blktrace ioctls to SCSI generic devices")
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/blktrace.c | 58 ++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 48 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index 5e6fc28414028..dc5fd20429d8e 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -323,7 +323,7 @@ static void blk_trace_cleanup(struct blk_trace *bt)
+       put_probe_ref();
+ }
+-int blk_trace_remove(struct request_queue *q)
++static int __blk_trace_remove(struct request_queue *q)
+ {
+       struct blk_trace *bt;
+@@ -336,6 +336,17 @@ int blk_trace_remove(struct request_queue *q)
+       return 0;
+ }
++
++int blk_trace_remove(struct request_queue *q)
++{
++      int ret;
++
++      mutex_lock(&q->blk_trace_mutex);
++      ret = __blk_trace_remove(q);
++      mutex_unlock(&q->blk_trace_mutex);
++
++      return ret;
++}
+ EXPORT_SYMBOL_GPL(blk_trace_remove);
+ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
+@@ -546,9 +557,8 @@ err:
+       return ret;
+ }
+-int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+-                  struct block_device *bdev,
+-                  char __user *arg)
++static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
++                           struct block_device *bdev, char __user *arg)
+ {
+       struct blk_user_trace_setup buts;
+       int ret;
+@@ -567,6 +577,19 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+       }
+       return 0;
+ }
++
++int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
++                  struct block_device *bdev,
++                  char __user *arg)
++{
++      int ret;
++
++      mutex_lock(&q->blk_trace_mutex);
++      ret = __blk_trace_setup(q, name, dev, bdev, arg);
++      mutex_unlock(&q->blk_trace_mutex);
++
++      return ret;
++}
+ EXPORT_SYMBOL_GPL(blk_trace_setup);
+ #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+@@ -603,7 +626,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
+ }
+ #endif
+-int blk_trace_startstop(struct request_queue *q, int start)
++static int __blk_trace_startstop(struct request_queue *q, int start)
+ {
+       int ret;
+       struct blk_trace *bt = q->blk_trace;
+@@ -642,6 +665,17 @@ int blk_trace_startstop(struct request_queue *q, int start)
+       return ret;
+ }
++
++int blk_trace_startstop(struct request_queue *q, int start)
++{
++      int ret;
++
++      mutex_lock(&q->blk_trace_mutex);
++      ret = __blk_trace_startstop(q, start);
++      mutex_unlock(&q->blk_trace_mutex);
++
++      return ret;
++}
+ EXPORT_SYMBOL_GPL(blk_trace_startstop);
+ /*
+@@ -672,7 +706,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+       switch (cmd) {
+       case BLKTRACESETUP:
+               bdevname(bdev, b);
+-              ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
++              ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+               break;
+ #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+       case BLKTRACESETUP32:
+@@ -683,10 +717,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+       case BLKTRACESTART:
+               start = 1;
+       case BLKTRACESTOP:
+-              ret = blk_trace_startstop(q, start);
++              ret = __blk_trace_startstop(q, start);
+               break;
+       case BLKTRACETEARDOWN:
+-              ret = blk_trace_remove(q);
++              ret = __blk_trace_remove(q);
+               break;
+       default:
+               ret = -ENOTTY;
+@@ -704,10 +738,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+  **/
+ void blk_trace_shutdown(struct request_queue *q)
+ {
++      mutex_lock(&q->blk_trace_mutex);
++
+       if (q->blk_trace) {
+-              blk_trace_startstop(q, 0);
+-              blk_trace_remove(q);
++              __blk_trace_startstop(q, 0);
++              __blk_trace_remove(q);
+       }
++
++      mutex_unlock(&q->blk_trace_mutex);
+ }
+ /*
+-- 
+2.20.1
+
diff --git a/queue-4.4/blktrace-protect-q-blk_trace-with-rcu.patch b/queue-4.4/blktrace-protect-q-blk_trace-with-rcu.patch
new file mode 100644 (file)
index 0000000..db1d0de
--- /dev/null
@@ -0,0 +1,406 @@
+From 983c9cde9359d5198ebd9adfcb1b9378b98d64c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Feb 2020 15:28:12 +0100
+Subject: blktrace: Protect q->blk_trace with RCU
+
+From: Jan Kara <jack@suse.cz>
+
+commit c780e86dd48ef6467a1146cf7d0fe1e05a635039 upstream.
+
+KASAN is reporting that __blk_add_trace() has a use-after-free issue
+when accessing q->blk_trace. Indeed the switching of block tracing (and
+thus eventual freeing of q->blk_trace) is completely unsynchronized with
+the currently running tracing and thus it can happen that the blk_trace
+structure is being freed just while __blk_add_trace() works on it.
+Protect accesses to q->blk_trace by RCU during tracing and make sure we
+wait for the end of RCU grace period when shutting down tracing. Luckily
+that is rare enough event that we can afford that. Note that postponing
+the freeing of blk_trace to an RCU callback should better be avoided as
+it could have unexpected user visible side-effects as debugfs files
+would be still existing for a short while block tracing has been shut
+down.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
+CC: stable@vger.kernel.org
+Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Tested-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reported-by: Tristan Madani <tristmd@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[bwh: Backported to 4.4:
+ - Drop changes in blk_trace_note_message_enabled(), blk_trace_bio_get_cgid()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/blkdev.h       |   2 +-
+ include/linux/blktrace_api.h |   6 +-
+ kernel/trace/blktrace.c      | 110 +++++++++++++++++++++++++----------
+ 3 files changed, 86 insertions(+), 32 deletions(-)
+
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index ab819210e0112..6e83ea61436a7 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -431,7 +431,7 @@ struct request_queue {
+       unsigned int            sg_reserved_size;
+       int                     node;
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+-      struct blk_trace        *blk_trace;
++      struct blk_trace __rcu  *blk_trace;
+       struct mutex            blk_trace_mutex;
+ #endif
+       /*
+diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
+index afc1343df3c7a..e644bfe50019c 100644
+--- a/include/linux/blktrace_api.h
++++ b/include/linux/blktrace_api.h
+@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...);
+  **/
+ #define blk_add_trace_msg(q, fmt, ...)                                        \
+       do {                                                            \
+-              struct blk_trace *bt = (q)->blk_trace;                  \
++              struct blk_trace *bt;                                   \
++                                                                      \
++              rcu_read_lock();                                        \
++              bt = rcu_dereference((q)->blk_trace);                   \
+               if (unlikely(bt))                                       \
+                       __trace_note_message(bt, fmt, ##__VA_ARGS__);   \
++              rcu_read_unlock();                                      \
+       } while (0)
+ #define BLK_TN_MAX_MSG                128
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index ea18c6997eb23..bf9bf3f22edb5 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -319,6 +319,7 @@ static void put_probe_ref(void)
+ static void blk_trace_cleanup(struct blk_trace *bt)
+ {
++      synchronize_rcu();
+       blk_trace_free(bt);
+       put_probe_ref();
+ }
+@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
+ static int __blk_trace_startstop(struct request_queue *q, int start)
+ {
+       int ret;
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (bt == NULL)
+               return -EINVAL;
+@@ -739,8 +742,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+ void blk_trace_shutdown(struct request_queue *q)
+ {
+       mutex_lock(&q->blk_trace_mutex);
+-
+-      if (q->blk_trace) {
++      if (rcu_dereference_protected(q->blk_trace,
++                                    lockdep_is_held(&q->blk_trace_mutex))) {
+               __blk_trace_startstop(q, 0);
+               __blk_trace_remove(q);
+       }
+@@ -766,10 +769,14 @@ void blk_trace_shutdown(struct request_queue *q)
+ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+                            unsigned int nr_bytes, u32 what)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+               what |= BLK_TC_ACT(BLK_TC_PC);
+@@ -780,6 +787,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+               __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
+                               rq->cmd_flags, what, rq->errors, 0, NULL);
+       }
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_rq_abort(void *ignore,
+@@ -829,13 +837,18 @@ static void blk_add_trace_rq_complete(void *ignore,
+ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+                             u32 what, int error)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+                       bio->bi_rw, what, error, 0, NULL);
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_bio_bounce(void *ignore,
+@@ -880,10 +893,13 @@ static void blk_add_trace_getrq(void *ignore,
+       if (bio)
+               blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
+       else {
+-              struct blk_trace *bt = q->blk_trace;
++              struct blk_trace *bt;
++              rcu_read_lock();
++              bt = rcu_dereference(q->blk_trace);
+               if (bt)
+                       __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
++              rcu_read_unlock();
+       }
+ }
+@@ -895,27 +911,35 @@ static void blk_add_trace_sleeprq(void *ignore,
+       if (bio)
+               blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
+       else {
+-              struct blk_trace *bt = q->blk_trace;
++              struct blk_trace *bt;
++              rcu_read_lock();
++              bt = rcu_dereference(q->blk_trace);
+               if (bt)
+                       __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
+                                       0, 0, NULL);
++              rcu_read_unlock();
+       }
+ }
+ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt)
+               __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+                                   unsigned int depth, bool explicit)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt) {
+               __be64 rpdu = cpu_to_be64(depth);
+               u32 what;
+@@ -927,14 +951,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+               __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
+       }
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_split(void *ignore,
+                               struct request_queue *q, struct bio *bio,
+                               unsigned int pdu)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt) {
+               __be64 rpdu = cpu_to_be64(pdu);
+@@ -942,6 +969,7 @@ static void blk_add_trace_split(void *ignore,
+                               bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
+                               bio->bi_error, sizeof(rpdu), &rpdu);
+       }
++      rcu_read_unlock();
+ }
+ /**
+@@ -961,11 +989,15 @@ static void blk_add_trace_bio_remap(void *ignore,
+                                   struct request_queue *q, struct bio *bio,
+                                   dev_t dev, sector_t from)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+       struct blk_io_trace_remap r;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+@@ -974,6 +1006,7 @@ static void blk_add_trace_bio_remap(void *ignore,
+       __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+                       bio->bi_rw, BLK_TA_REMAP, bio->bi_error,
+                       sizeof(r), &r);
++      rcu_read_unlock();
+ }
+ /**
+@@ -994,11 +1027,15 @@ static void blk_add_trace_rq_remap(void *ignore,
+                                  struct request *rq, dev_t dev,
+                                  sector_t from)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+       struct blk_io_trace_remap r;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+@@ -1007,6 +1044,7 @@ static void blk_add_trace_rq_remap(void *ignore,
+       __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+                       rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+                       sizeof(r), &r);
++      rcu_read_unlock();
+ }
+ /**
+@@ -1024,10 +1062,14 @@ void blk_add_driver_data(struct request_queue *q,
+                        struct request *rq,
+                        void *data, size_t len)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
+               __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
+@@ -1035,6 +1077,7 @@ void blk_add_driver_data(struct request_queue *q,
+       else
+               __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
+                               BLK_TA_DRV_DATA, rq->errors, len, data);
++      rcu_read_unlock();
+ }
+ EXPORT_SYMBOL_GPL(blk_add_driver_data);
+@@ -1526,6 +1569,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
+               return -EINVAL;
+       put_probe_ref();
++      synchronize_rcu();
+       blk_trace_free(bt);
+       return 0;
+ }
+@@ -1686,6 +1730,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q;
+       struct block_device *bdev;
++      struct blk_trace *bt;
+       ssize_t ret = -ENXIO;
+       bdev = bdget(part_devt(p));
+@@ -1698,21 +1743,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+       mutex_lock(&q->blk_trace_mutex);
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (attr == &dev_attr_enable) {
+-              ret = sprintf(buf, "%u\n", !!q->blk_trace);
++              ret = sprintf(buf, "%u\n", !!bt);
+               goto out_unlock_bdev;
+       }
+-      if (q->blk_trace == NULL)
++      if (bt == NULL)
+               ret = sprintf(buf, "disabled\n");
+       else if (attr == &dev_attr_act_mask)
+-              ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
++              ret = blk_trace_mask2str(buf, bt->act_mask);
+       else if (attr == &dev_attr_pid)
+-              ret = sprintf(buf, "%u\n", q->blk_trace->pid);
++              ret = sprintf(buf, "%u\n", bt->pid);
+       else if (attr == &dev_attr_start_lba)
+-              ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
++              ret = sprintf(buf, "%llu\n", bt->start_lba);
+       else if (attr == &dev_attr_end_lba)
+-              ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
++              ret = sprintf(buf, "%llu\n", bt->end_lba);
+ out_unlock_bdev:
+       mutex_unlock(&q->blk_trace_mutex);
+@@ -1729,6 +1776,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       struct block_device *bdev;
+       struct request_queue *q;
+       struct hd_struct *p;
++      struct blk_trace *bt;
+       u64 value;
+       ssize_t ret = -EINVAL;
+@@ -1759,8 +1807,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       mutex_lock(&q->blk_trace_mutex);
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (attr == &dev_attr_enable) {
+-              if (!!value == !!q->blk_trace) {
++              if (!!value == !!bt) {
+                       ret = 0;
+                       goto out_unlock_bdev;
+               }
+@@ -1772,18 +1822,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       }
+       ret = 0;
+-      if (q->blk_trace == NULL)
++      if (bt == NULL)
+               ret = blk_trace_setup_queue(q, bdev);
+       if (ret == 0) {
+               if (attr == &dev_attr_act_mask)
+-                      q->blk_trace->act_mask = value;
++                      bt->act_mask = value;
+               else if (attr == &dev_attr_pid)
+-                      q->blk_trace->pid = value;
++                      bt->pid = value;
+               else if (attr == &dev_attr_start_lba)
+-                      q->blk_trace->start_lba = value;
++                      bt->start_lba = value;
+               else if (attr == &dev_attr_end_lba)
+-                      q->blk_trace->end_lba = value;
++                      bt->end_lba = value;
+       }
+ out_unlock_bdev:
+-- 
+2.20.1
+
diff --git a/queue-4.4/chardev-add-helper-function-to-register-char-devs-wi.patch b/queue-4.4/chardev-add-helper-function-to-register-char-devs-wi.patch
new file mode 100644 (file)
index 0000000..f408164
--- /dev/null
@@ -0,0 +1,218 @@
+From 97a8c1f2fc99b003af733258a9da0e81f7a28f65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Mar 2017 12:48:08 -0600
+Subject: chardev: add helper function to register char devs with a struct
+ device
+
+From: Logan Gunthorpe <logang@deltatee.com>
+
+commit 233ed09d7fdacf592ee91e6c97ce5f4364fbe7c0 upstream.
+
+Credit for this patch goes is shared with Dan Williams [1]. I've
+taken things one step further to make the helper function more
+useful and clean up calling code.
+
+There's a common pattern in the kernel whereby a struct cdev is placed
+in a structure along side a struct device which manages the life-cycle
+of both. In the naive approach, the reference counting is broken and
+the struct device can free everything before the chardev code
+is entirely released.
+
+Many developers have solved this problem by linking the internal kobjs
+in this fashion:
+
+cdev.kobj.parent = &parent_dev.kobj;
+
+The cdev code explicitly gets and puts a reference to it's kobj parent.
+So this seems like it was intended to be used this way. Dmitrty Torokhov
+first put this in place in 2012 with this commit:
+
+2f0157f char_dev: pin parent kobject
+
+and the first instance of the fix was then done in the input subsystem
+in the following commit:
+
+4a215aa Input: fix use-after-free introduced with dynamic minor changes
+
+Subsequently over the years, however, this issue seems to have tripped
+up multiple developers independently. For example, see these commits:
+
+0d5b7da iio: Prevent race between IIO chardev opening and IIO device
+(by Lars-Peter Clausen in 2013)
+
+ba0ef85 tpm: Fix initialization of the cdev
+(by Jason Gunthorpe in 2015)
+
+5b28dde [media] media: fix use-after-free in cdev_put() when app exits
+after driver unbind
+(by Shauh Khan in 2016)
+
+This technique is similarly done in at least 15 places within the kernel
+and probably should have been done so in another, at least, 5 places.
+The kobj line also looks very suspect in that one would not expect
+drivers to have to mess with kobject internals in this way.
+Even highly experienced kernel developers can be surprised by this
+code, as seen in [2].
+
+To help alleviate this situation, and hopefully prevent future
+wasted effort on this problem, this patch introduces a helper function
+to register a char device along with its parent struct device.
+This creates a more regular API for tying a char device to its parent
+without the developer having to set members in the underlying kobject.
+
+This patch introduce cdev_device_add and cdev_device_del which
+replaces a common pattern including setting the kobj parent, calling
+cdev_add and then calling device_add. It also introduces cdev_set_parent
+for the few cases that set the kobject parent without using device_add.
+
+[1] https://lkml.org/lkml/2017/2/13/700
+[2] https://lkml.org/lkml/2017/2/10/370
+
+Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
+Reviewed-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/char_dev.c        | 86 ++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/cdev.h |  5 +++
+ 2 files changed, 91 insertions(+)
+
+diff --git a/fs/char_dev.c b/fs/char_dev.c
+index f1f3bb8127997..9154a2d7b195a 100644
+--- a/fs/char_dev.c
++++ b/fs/char_dev.c
+@@ -472,6 +472,85 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
+       return 0;
+ }
++/**
++ * cdev_set_parent() - set the parent kobject for a char device
++ * @p: the cdev structure
++ * @kobj: the kobject to take a reference to
++ *
++ * cdev_set_parent() sets a parent kobject which will be referenced
++ * appropriately so the parent is not freed before the cdev. This
++ * should be called before cdev_add.
++ */
++void cdev_set_parent(struct cdev *p, struct kobject *kobj)
++{
++      WARN_ON(!kobj->state_initialized);
++      p->kobj.parent = kobj;
++}
++
++/**
++ * cdev_device_add() - add a char device and it's corresponding
++ *    struct device, linkink
++ * @dev: the device structure
++ * @cdev: the cdev structure
++ *
++ * cdev_device_add() adds the char device represented by @cdev to the system,
++ * just as cdev_add does. It then adds @dev to the system using device_add
++ * The dev_t for the char device will be taken from the struct device which
++ * needs to be initialized first. This helper function correctly takes a
++ * reference to the parent device so the parent will not get released until
++ * all references to the cdev are released.
++ *
++ * This helper uses dev->devt for the device number. If it is not set
++ * it will not add the cdev and it will be equivalent to device_add.
++ *
++ * This function should be used whenever the struct cdev and the
++ * struct device are members of the same structure whose lifetime is
++ * managed by the struct device.
++ *
++ * NOTE: Callers must assume that userspace was able to open the cdev and
++ * can call cdev fops callbacks at any time, even if this function fails.
++ */
++int cdev_device_add(struct cdev *cdev, struct device *dev)
++{
++      int rc = 0;
++
++      if (dev->devt) {
++              cdev_set_parent(cdev, &dev->kobj);
++
++              rc = cdev_add(cdev, dev->devt, 1);
++              if (rc)
++                      return rc;
++      }
++
++      rc = device_add(dev);
++      if (rc)
++              cdev_del(cdev);
++
++      return rc;
++}
++
++/**
++ * cdev_device_del() - inverse of cdev_device_add
++ * @dev: the device structure
++ * @cdev: the cdev structure
++ *
++ * cdev_device_del() is a helper function to call cdev_del and device_del.
++ * It should be used whenever cdev_device_add is used.
++ *
++ * If dev->devt is not set it will not remove the cdev and will be equivalent
++ * to device_del.
++ *
++ * NOTE: This guarantees that associated sysfs callbacks are not running
++ * or runnable, however any cdevs already open will remain and their fops
++ * will still be callable even after this function returns.
++ */
++void cdev_device_del(struct cdev *cdev, struct device *dev)
++{
++      device_del(dev);
++      if (dev->devt)
++              cdev_del(cdev);
++}
++
+ static void cdev_unmap(dev_t dev, unsigned count)
+ {
+       kobj_unmap(cdev_map, dev, count);
+@@ -483,6 +562,10 @@ static void cdev_unmap(dev_t dev, unsigned count)
+  *
+  * cdev_del() removes @p from the system, possibly freeing the structure
+  * itself.
++ *
++ * NOTE: This guarantees that cdev device will no longer be able to be
++ * opened, however any cdevs already open will remain and their fops will
++ * still be callable even after cdev_del returns.
+  */
+ void cdev_del(struct cdev *p)
+ {
+@@ -571,5 +654,8 @@ EXPORT_SYMBOL(cdev_init);
+ EXPORT_SYMBOL(cdev_alloc);
+ EXPORT_SYMBOL(cdev_del);
+ EXPORT_SYMBOL(cdev_add);
++EXPORT_SYMBOL(cdev_set_parent);
++EXPORT_SYMBOL(cdev_device_add);
++EXPORT_SYMBOL(cdev_device_del);
+ EXPORT_SYMBOL(__register_chrdev);
+ EXPORT_SYMBOL(__unregister_chrdev);
+diff --git a/include/linux/cdev.h b/include/linux/cdev.h
+index f8763615a5f2d..408bc09ce497b 100644
+--- a/include/linux/cdev.h
++++ b/include/linux/cdev.h
+@@ -4,6 +4,7 @@
+ #include <linux/kobject.h>
+ #include <linux/kdev_t.h>
+ #include <linux/list.h>
++#include <linux/device.h>
+ struct file_operations;
+ struct inode;
+@@ -26,6 +27,10 @@ void cdev_put(struct cdev *p);
+ int cdev_add(struct cdev *, dev_t, unsigned);
++void cdev_set_parent(struct cdev *p, struct kobject *kobj);
++int cdev_device_add(struct cdev *cdev, struct device *dev);
++void cdev_device_del(struct cdev *cdev, struct device *dev);
++
+ void cdev_del(struct cdev *);
+ void cd_forget(struct inode *);
+-- 
+2.20.1
+
diff --git a/queue-4.4/ext4-add-cond_resched-to-ext4_protect_reserved_inode.patch b/queue-4.4/ext4-add-cond_resched-to-ext4_protect_reserved_inode.patch
new file mode 100644 (file)
index 0000000..f4f2fa3
--- /dev/null
@@ -0,0 +1,69 @@
+From 0aab6dfe4fbb5946ad60d17d44809b87c6db0f7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Feb 2020 20:17:52 -0500
+Subject: ext4: add cond_resched() to ext4_protect_reserved_inode
+
+From: Shijie Luo <luoshijie1@huawei.com>
+
+commit af133ade9a40794a37104ecbcc2827c0ea373a3c upstream.
+
+When journal size is set too big by "mkfs.ext4 -J size=", or when
+we mount a crafted image to make journal inode->i_size too big,
+the loop, "while (i < num)", holds cpu too long. This could cause
+soft lockup.
+
+[  529.357541] Call trace:
+[  529.357551]  dump_backtrace+0x0/0x198
+[  529.357555]  show_stack+0x24/0x30
+[  529.357562]  dump_stack+0xa4/0xcc
+[  529.357568]  watchdog_timer_fn+0x300/0x3e8
+[  529.357574]  __hrtimer_run_queues+0x114/0x358
+[  529.357576]  hrtimer_interrupt+0x104/0x2d8
+[  529.357580]  arch_timer_handler_virt+0x38/0x58
+[  529.357584]  handle_percpu_devid_irq+0x90/0x248
+[  529.357588]  generic_handle_irq+0x34/0x50
+[  529.357590]  __handle_domain_irq+0x68/0xc0
+[  529.357593]  gic_handle_irq+0x6c/0x150
+[  529.357595]  el1_irq+0xb8/0x140
+[  529.357599]  __ll_sc_atomic_add_return_acquire+0x14/0x20
+[  529.357668]  ext4_map_blocks+0x64/0x5c0 [ext4]
+[  529.357693]  ext4_setup_system_zone+0x330/0x458 [ext4]
+[  529.357717]  ext4_fill_super+0x2170/0x2ba8 [ext4]
+[  529.357722]  mount_bdev+0x1a8/0x1e8
+[  529.357746]  ext4_mount+0x44/0x58 [ext4]
+[  529.357748]  mount_fs+0x50/0x170
+[  529.357752]  vfs_kern_mount.part.9+0x54/0x188
+[  529.357755]  do_mount+0x5ac/0xd78
+[  529.357758]  ksys_mount+0x9c/0x118
+[  529.357760]  __arm64_sys_mount+0x28/0x38
+[  529.357764]  el0_svc_common+0x78/0x130
+[  529.357766]  el0_svc_handler+0x38/0x78
+[  529.357769]  el0_svc+0x8/0xc
+[  541.356516] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [mount:18674]
+
+Link: https://lore.kernel.org/r/20200211011752.29242-1-luoshijie1@huawei.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Shijie Luo <luoshijie1@huawei.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/block_validity.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
+index ccd80f2b3b19b..d5055b3adcccd 100644
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -152,6 +152,7 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
+               return PTR_ERR(inode);
+       num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+       while (i < num) {
++              cond_resched();
+               map.m_lblk = i;
+               map.m_len = num - i;
+               n = ext4_map_blocks(NULL, inode, &map, 0);
+-- 
+2.20.1
+
diff --git a/queue-4.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch b/queue-4.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch
new file mode 100644 (file)
index 0000000..8dd6f27
--- /dev/null
@@ -0,0 +1,249 @@
+From 8a8ddda4cb5f9ea6bcc6b32d5938a978b73ee835 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Dec 2019 15:35:52 +0100
+Subject: net: ipv6: add net argument to ip6_dst_lookup_flow
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+commit c4e85f73afb6384123e5ef1bba3315b2e3ad031e upstream.
+
+This will be used in the conversion of ipv6_stub to ip6_dst_lookup_flow,
+as some modules currently pass a net argument without a socket to
+ip6_dst_lookup. This is equivalent to commit 343d60aada5a ("ipv6: change
+ipv6_stub_impl.ipv6_dst_lookup to take net argument").
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ipv6.h               | 2 +-
+ net/dccp/ipv6.c                  | 6 +++---
+ net/ipv6/af_inet6.c              | 2 +-
+ net/ipv6/datagram.c              | 2 +-
+ net/ipv6/inet6_connection_sock.c | 4 ++--
+ net/ipv6/ip6_output.c            | 8 ++++----
+ net/ipv6/raw.c                   | 2 +-
+ net/ipv6/syncookies.c            | 2 +-
+ net/ipv6/tcp_ipv6.c              | 4 ++--
+ net/l2tp/l2tp_ip6.c              | 2 +-
+ net/sctp/ipv6.c                  | 4 ++--
+ 11 files changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index c07cf9596b6fb..6258264a0bf73 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -853,7 +853,7 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
+ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+                  struct flowi6 *fl6);
+-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
++struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
+                                     const struct in6_addr *final_dst);
+ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+                                        const struct in6_addr *final_dst);
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 10eabd1a60aa7..736cc95b52011 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -209,7 +209,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
+       final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               dst = NULL;
+@@ -276,7 +276,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+       security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
+       /* sk = NULL, but it is safe for now. RST socket required. */
+-      dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
++      dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+       if (!IS_ERR(dst)) {
+               skb_dst_set(skb, dst);
+               ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
+@@ -879,7 +879,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+       opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+       final_p = fl6_update_dst(&fl6, opt, &final);
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto failure;
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index 46bf78a2843ac..9d04393e87019 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -683,7 +683,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
+                                        &final);
+               rcu_read_unlock();
+-              dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+               if (IS_ERR(dst)) {
+                       sk->sk_route_caps = 0;
+                       sk->sk_err_soft = -PTR_ERR(dst);
+diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
+index 27cdf543c5390..f33154365b643 100644
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -179,7 +179,7 @@ ipv4_connected:
+       final_p = fl6_update_dst(&fl6, opt, &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       err = 0;
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
+index a7ca2cde2ecbc..b31ab511c7671 100644
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -88,7 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
+       fl6->fl6_sport = htons(ireq->ir_num);
+       security_req_classify_flow(req, flowi6_to_flowi(fl6));
+-      dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+       if (IS_ERR(dst))
+               return NULL;
+@@ -142,7 +142,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+       dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+       if (!dst) {
+-              dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+               if (!IS_ERR(dst))
+                       ip6_dst_store(sk, dst, NULL, NULL);
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index e39dc94486b2c..1e2b8d33d303b 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1057,13 +1057,13 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+  *    It returns a valid dst pointer on success, or a pointer encoded
+  *    error code.
+  */
+-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
++struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
+                                     const struct in6_addr *final_dst)
+ {
+       struct dst_entry *dst = NULL;
+       int err;
+-      err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
++      err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
+       if (err)
+               return ERR_PTR(err);
+       if (final_dst)
+@@ -1071,7 +1071,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+       if (!fl6->flowi6_oif)
+               fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
+-      return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
++      return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
+ }
+ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+@@ -1096,7 +1096,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+       dst = ip6_sk_dst_check(sk, dst, fl6);
+       if (!dst)
+-              dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
+       return dst;
+ }
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 86c75e97cfec3..67cdcd3d644fa 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -889,7 +889,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       if (hdrincl)
+               fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto out;
+diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
+index aee87282d3521..fb3ba2a511196 100644
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -231,7 +231,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+               fl6.fl6_sport = inet_sk(sk)->inet_sport;
+               security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+-              dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+               if (IS_ERR(dst))
+                       goto out_free;
+       }
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 6a36fcc5c4e11..b4ffcec732b49 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -245,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+       security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto failure;
+@@ -831,7 +831,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
+        * Underlying function will use this to retrieve the network
+        * namespace
+        */
+-      dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
++      dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+       if (!IS_ERR(dst)) {
+               skb_dst_set(buff, dst);
+               ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
+index af04a8a682697..2b5230ef8536a 100644
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -619,7 +619,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto out;
+diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
+index dd097e065f39d..1a6849add0e34 100644
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -268,7 +268,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
+       final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+       if (!asoc || saddr) {
+               t->dst = dst;
+               memcpy(fl, &_fl, sizeof(_fl));
+@@ -326,7 +326,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
+               fl6->saddr = laddr->a.v6.sin6_addr;
+               fl6->fl6_sport = laddr->a.v6.sin6_port;
+               final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+-              bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
++              bdst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+               if (IS_ERR(bdst))
+                       continue;
+-- 
+2.20.1
+
diff --git a/queue-4.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6.patch b/queue-4.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6.patch
new file mode 100644 (file)
index 0000000..ea49ee3
--- /dev/null
@@ -0,0 +1,206 @@
+From e3a32350aae0a76746648667d9515e286910ed50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Dec 2019 15:35:53 +0100
+Subject: net: ipv6_stub: use ip6_dst_lookup_flow instead of ip6_dst_lookup
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+commit 6c8991f41546c3c472503dff1ea9daaddf9331c2 upstream.
+
+ipv6_stub uses the ip6_dst_lookup function to allow other modules to
+perform IPv6 lookups. However, this function skips the XFRM layer
+entirely.
+
+All users of ipv6_stub->ip6_dst_lookup use ip_route_output_flow (via the
+ip_route_output_key and ip_route_output helpers) for their IPv4 lookups,
+which calls xfrm_lookup_route(). This patch fixes this inconsistent
+behavior by switching the stub to ip6_dst_lookup_flow, which also calls
+xfrm_lookup_route().
+
+This requires some changes in all the callers, as these two functions
+take different arguments and have different return types.
+
+Fixes: 5f81bd2e5d80 ("ipv6: export a stub for IPv6 symbols used by vxlan")
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.4:
+ - Drop changes in lwt_bpf.c, mlx5, and rxe
+ - Adjust filename, context, indentation]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/core/addr.c |  6 +++---
+ drivers/net/geneve.c           |  4 +++-
+ drivers/net/vxlan.c            | 10 ++++------
+ include/net/addrconf.h         |  6 ++++--
+ net/ipv6/addrconf_core.c       | 11 ++++++-----
+ net/ipv6/af_inet6.c            |  2 +-
+ net/mpls/af_mpls.c             |  7 +++----
+ net/tipc/udp_media.c           |  9 ++++++---
+ 8 files changed, 30 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
+index 68835de07e071..a8349100854ef 100644
+--- a/drivers/infiniband/core/addr.c
++++ b/drivers/infiniband/core/addr.c
+@@ -293,9 +293,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
+       fl6.saddr = src_in->sin6_addr;
+       fl6.flowi6_oif = addr->bound_dev_if;
+-      ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+-      if (ret < 0)
+-              goto put;
++      dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
++      if (IS_ERR(dst))
++              return PTR_ERR(dst);
+       if (ipv6_addr_any(&fl6.saddr)) {
+               ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 1988bc00de3c7..ec13e2ae6d16e 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -781,7 +781,9 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
+               fl6->daddr = geneve->remote.sin6.sin6_addr;
+       }
+-      if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
++      dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
++                                            NULL);
++      if (IS_ERR(dst)) {
+               netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
+               return ERR_PTR(-ENETUNREACH);
+       }
+diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
+index 752f44a0e3afc..d6ae6d3c98ed8 100644
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1864,7 +1864,6 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+ {
+       struct dst_entry *ndst;
+       struct flowi6 fl6;
+-      int err;
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_oif = oif;
+@@ -1873,11 +1872,10 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+       fl6.flowi6_mark = skb->mark;
+       fl6.flowi6_proto = IPPROTO_UDP;
+-      err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
+-                                       vxlan->vn6_sock->sock->sk,
+-                                       &ndst, &fl6);
+-      if (err < 0)
+-              return ERR_PTR(err);
++      ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, vxlan->vn6_sock->sock->sk,
++                                             &fl6, NULL);
++      if (unlikely(IS_ERR(ndst)))
++              return ERR_PTR(-ENETUNREACH);
+       *saddr = fl6.saddr;
+       return ndst;
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index af032e5405f62..27a1833c7b00f 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -192,8 +192,10 @@ struct ipv6_stub {
+                                const struct in6_addr *addr);
+       int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
+                                const struct in6_addr *addr);
+-      int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
+-                             struct dst_entry **dst, struct flowi6 *fl6);
++      struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net,
++                                                const struct sock *sk,
++                                                struct flowi6 *fl6,
++                                                const struct in6_addr *final_dst);
+       void (*udpv6_encap_enable)(void);
+       void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
+                             const struct in6_addr *solicited_addr,
+diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
+index bfa941fc11650..129324b36fb60 100644
+--- a/net/ipv6/addrconf_core.c
++++ b/net/ipv6/addrconf_core.c
+@@ -107,15 +107,16 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
+ }
+ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+-static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
+-                                      struct dst_entry **u2,
+-                                      struct flowi6 *u3)
++static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net,
++                                                         const struct sock *sk,
++                                                         struct flowi6 *fl6,
++                                                         const struct in6_addr *final_dst)
+ {
+-      return -EAFNOSUPPORT;
++      return ERR_PTR(-EAFNOSUPPORT);
+ }
+ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
+-      .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
++      .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
+ };
+ EXPORT_SYMBOL_GPL(ipv6_stub);
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index 9d04393e87019..37a562fc13d5b 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -841,7 +841,7 @@ static struct pernet_operations inet6_net_ops = {
+ static const struct ipv6_stub ipv6_stub_impl = {
+       .ipv6_sock_mc_join = ipv6_sock_mc_join,
+       .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+-      .ipv6_dst_lookup = ip6_dst_lookup,
++      .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
+       .udpv6_encap_enable = udpv6_encap_enable,
+       .ndisc_send_na = ndisc_send_na,
+       .nd_tbl = &nd_tbl,
+diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
+index c2ce7dec51980..50d9138b2a1cc 100644
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -470,16 +470,15 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net,
+       struct net_device *dev;
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+-      int err;
+       if (!ipv6_stub)
+               return ERR_PTR(-EAFNOSUPPORT);
+       memset(&fl6, 0, sizeof(fl6));
+       memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
+-      err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
+-      if (err)
+-              return ERR_PTR(err);
++      dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
++      if (IS_ERR(dst))
++              return ERR_CAST(dst);
+       dev = dst->dev;
+       dev_hold(dev);
+diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
+index cb39f1c4251ea..ac20794392420 100644
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -200,10 +200,13 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+                       .saddr = src->ipv6,
+                       .flowi6_proto = IPPROTO_UDP
+               };
+-              err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst,
+-                                               &fl6);
+-              if (err)
++              ndst = ipv6_stub->ipv6_dst_lookup_flow(net,
++                                                     ub->ubsock->sk,
++                                                     &fl6, NULL);
++              if (IS_ERR(ndst)) {
++                      err = PTR_ERR(ndst);
+                       goto tx_error;
++              }
+               ttl = ip6_dst_hoplimit(ndst);
+               err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
+                                          ndst->dev, &src->ipv6,
+-- 
+2.20.1
+
diff --git a/queue-4.4/phy-micrel-disable-auto-negotiation-on-startup.patch b/queue-4.4/phy-micrel-disable-auto-negotiation-on-startup.patch
new file mode 100644 (file)
index 0000000..8b4ad31
--- /dev/null
@@ -0,0 +1,58 @@
+From c19c2ad01c9ed92507f536b2c63f7eb755cb277a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Feb 2016 19:18:23 +0100
+Subject: phy: micrel: Disable auto negotiation on startup
+
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+
+[ Upstream commit 99f81afc139c6edd14d77a91ee91685a414a1c66 ]
+
+Disable auto negotiation on init to properly detect an already plugged
+cable at boot.
+
+At boot, when the phy is started, it is in the PHY_UP state.
+However, if a cable is plugged at boot, because auto negociation is already
+enabled at the time we get the first interrupt, the phy is already running.
+But the state machine then switches from PHY_UP to PHY_AN and calls
+phy_start_aneg(). phy_start_aneg() will not do anything because aneg is
+already enabled on the phy. It will then wait for a interrupt before going
+further. This interrupt will never happen unless the cable is unplugged and
+then replugged.
+
+It was working properly before 321beec5047a (net: phy: Use interrupts when
+available in NOLINK state) because switching to NOLINK meant starting
+polling the phy, even if IRQ were enabled.
+
+Fixes: 321beec5047a (net: phy: Use interrupts when available in NOLINK state)
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 4eba646789c30..98166e144f2dd 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -285,6 +285,17 @@ static int kszphy_config_init(struct phy_device *phydev)
+       if (priv->led_mode >= 0)
+               kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
++      if (phy_interrupt_is_valid(phydev)) {
++              int ctl = phy_read(phydev, MII_BMCR);
++
++              if (ctl < 0)
++                      return ctl;
++
++              ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
++              if (ret < 0)
++                      return ret;
++      }
++
+       return 0;
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.4/phy-micrel-ensure-interrupts-are-reenabled-on-resume.patch b/queue-4.4/phy-micrel-ensure-interrupts-are-reenabled-on-resume.patch
new file mode 100644 (file)
index 0000000..584f10c
--- /dev/null
@@ -0,0 +1,62 @@
+From 192c93e5cbb1c3f6f5f563397fd47bdcb14b1f55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Feb 2016 19:18:22 +0100
+Subject: phy: micrel: Ensure interrupts are reenabled on resume
+
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+
+[ Upstream commit f5aba91d7f186cba84af966a741a0346de603cd4 ]
+
+At least on ksz8081, when getting back from power down, interrupts are
+disabled. ensure they are reenabled if they were previously enabled.
+
+This fixes resuming which is failing on the xplained boards from atmel
+since 321beec5047a (net: phy: Use interrupts when available in NOLINK
+state)
+
+Fixes: 321beec5047a (net: phy: Use interrupts when available in NOLINK state)
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 98166e144f2dd..48788ef0ac639 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -603,6 +603,21 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
+ {
+ }
++static int kszphy_resume(struct phy_device *phydev)
++{
++      int value;
++
++      mutex_lock(&phydev->lock);
++
++      value = phy_read(phydev, MII_BMCR);
++      phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
++
++      kszphy_config_intr(phydev);
++      mutex_unlock(&phydev->lock);
++
++      return 0;
++}
++
+ static int kszphy_probe(struct phy_device *phydev)
+ {
+       const struct kszphy_type *type = phydev->drv->driver_data;
+@@ -794,7 +809,7 @@ static struct phy_driver ksphy_driver[] = {
+       .ack_interrupt  = kszphy_ack_interrupt,
+       .config_intr    = kszphy_config_intr,
+       .suspend        = genphy_suspend,
+-      .resume         = genphy_resume,
++      .resume         = kszphy_resume,
+       .driver         = { .owner = THIS_MODULE,},
+ }, {
+       .phy_id         = PHY_ID_KSZ8061,
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-create-pins-together-with-the-rest-of-attributes.patch b/queue-4.4/ptp-create-pins-together-with-the-rest-of-attributes.patch
new file mode 100644 (file)
index 0000000..6efaee1
--- /dev/null
@@ -0,0 +1,171 @@
+From 1d25f0caeabb1ff4e0494022f4905583ccc3e9ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2017 10:23:34 -0800
+Subject: ptp: create "pins" together with the rest of attributes
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit 85a66e55019583da1e0f18706b7a8281c9f6de5b upstream.
+
+Let's switch to using device_create_with_groups(), which will allow us to
+create "pins" attribute group together with the rest of ptp device
+attributes, and before userspace gets notified about ptp device creation.
+
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c   | 20 +++++++++++---------
+ drivers/ptp/ptp_private.h |  7 ++++---
+ drivers/ptp/ptp_sysfs.c   | 39 +++++++++------------------------------
+ 3 files changed, 24 insertions(+), 42 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 113b66f1fc9ed..5f5d54f5153fb 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -214,16 +214,17 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+       mutex_init(&ptp->pincfg_mux);
+       init_waitqueue_head(&ptp->tsev_wq);
++      err = ptp_populate_pin_groups(ptp);
++      if (err)
++              goto no_pin_groups;
++
+       /* Create a new device in our class. */
+-      ptp->dev = device_create(ptp_class, parent, ptp->devid, ptp,
+-                               "ptp%d", ptp->index);
++      ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
++                                           ptp, ptp->pin_attr_groups,
++                                           "ptp%d", ptp->index);
+       if (IS_ERR(ptp->dev))
+               goto no_device;
+-      err = ptp_populate_sysfs(ptp);
+-      if (err)
+-              goto no_sysfs;
+-
+       /* Register a new PPS source. */
+       if (info->pps) {
+               struct pps_source_info pps;
+@@ -251,10 +252,10 @@ no_clock:
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ no_pps:
+-      ptp_cleanup_sysfs(ptp);
+-no_sysfs:
+       device_destroy(ptp_class, ptp->devid);
+ no_device:
++      ptp_cleanup_pin_groups(ptp);
++no_pin_groups:
+       mutex_destroy(&ptp->tsevq_mux);
+       mutex_destroy(&ptp->pincfg_mux);
+ no_slot:
+@@ -272,8 +273,9 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
+       /* Release the clock's resources. */
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+-      ptp_cleanup_sysfs(ptp);
++
+       device_destroy(ptp_class, ptp->devid);
++      ptp_cleanup_pin_groups(ptp);
+       posix_clock_unregister(&ptp->clock);
+       return 0;
+diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
+index 9c5d41421b651..d95888974d0c6 100644
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -54,6 +54,8 @@ struct ptp_clock {
+       struct device_attribute *pin_dev_attr;
+       struct attribute **pin_attr;
+       struct attribute_group pin_attr_group;
++      /* 1st entry is a pointer to the real group, 2nd is NULL terminator */
++      const struct attribute_group *pin_attr_groups[2];
+ };
+ /*
+@@ -94,8 +96,7 @@ uint ptp_poll(struct posix_clock *pc,
+ extern const struct attribute_group *ptp_groups[];
+-int ptp_cleanup_sysfs(struct ptp_clock *ptp);
+-
+-int ptp_populate_sysfs(struct ptp_clock *ptp);
++int ptp_populate_pin_groups(struct ptp_clock *ptp);
++void ptp_cleanup_pin_groups(struct ptp_clock *ptp);
+ #endif
+diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
+index a55a6eb4dfde9..731d0423c8aa7 100644
+--- a/drivers/ptp/ptp_sysfs.c
++++ b/drivers/ptp/ptp_sysfs.c
+@@ -268,25 +268,14 @@ static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
+       return count;
+ }
+-int ptp_cleanup_sysfs(struct ptp_clock *ptp)
++int ptp_populate_pin_groups(struct ptp_clock *ptp)
+ {
+-      struct device *dev = ptp->dev;
+-      struct ptp_clock_info *info = ptp->info;
+-
+-      if (info->n_pins) {
+-              sysfs_remove_group(&dev->kobj, &ptp->pin_attr_group);
+-              kfree(ptp->pin_attr);
+-              kfree(ptp->pin_dev_attr);
+-      }
+-      return 0;
+-}
+-
+-static int ptp_populate_pins(struct ptp_clock *ptp)
+-{
+-      struct device *dev = ptp->dev;
+       struct ptp_clock_info *info = ptp->info;
+       int err = -ENOMEM, i, n_pins = info->n_pins;
++      if (!n_pins)
++              return 0;
++
+       ptp->pin_dev_attr = kzalloc(n_pins * sizeof(*ptp->pin_dev_attr),
+                                   GFP_KERNEL);
+       if (!ptp->pin_dev_attr)
+@@ -310,28 +299,18 @@ static int ptp_populate_pins(struct ptp_clock *ptp)
+       ptp->pin_attr_group.name = "pins";
+       ptp->pin_attr_group.attrs = ptp->pin_attr;
+-      err = sysfs_create_group(&dev->kobj, &ptp->pin_attr_group);
+-      if (err)
+-              goto no_group;
++      ptp->pin_attr_groups[0] = &ptp->pin_attr_group;
++
+       return 0;
+-no_group:
+-      kfree(ptp->pin_attr);
+ no_pin_attr:
+       kfree(ptp->pin_dev_attr);
+ no_dev_attr:
+       return err;
+ }
+-int ptp_populate_sysfs(struct ptp_clock *ptp)
++void ptp_cleanup_pin_groups(struct ptp_clock *ptp)
+ {
+-      struct ptp_clock_info *info = ptp->info;
+-      int err;
+-
+-      if (info->n_pins) {
+-              err = ptp_populate_pins(ptp);
+-              if (err)
+-                      return err;
+-      }
+-      return 0;
++      kfree(ptp->pin_attr);
++      kfree(ptp->pin_dev_attr);
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_regis.patch b/queue-4.4/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_regis.patch
new file mode 100644 (file)
index 0000000..c48e31a
--- /dev/null
@@ -0,0 +1,37 @@
+From e07336a2fcddd851f89dfcbfda427edebd1291c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2017 10:23:31 -0800
+Subject: ptp: do not explicitly set drvdata in ptp_clock_register()
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit 882f312dc0751c973db26478f07f082c584d16aa upstream.
+
+We do not need explicitly call dev_set_drvdata(), as it is done for us by
+device_create().
+
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 60a5e0c63a136..113b66f1fc9ed 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -220,8 +220,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+       if (IS_ERR(ptp->dev))
+               goto no_device;
+-      dev_set_drvdata(ptp->dev, ptp);
+-
+       err = ptp_populate_sysfs(ptp);
+       if (err)
+               goto no_sysfs;
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch b/queue-4.4/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch
new file mode 100644 (file)
index 0000000..876af2f
--- /dev/null
@@ -0,0 +1,54 @@
+From 0e637f58378032270601f378e5a23b473527d9a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Nov 2018 09:54:55 +0800
+Subject: ptp: Fix pass zero to ERR_PTR() in ptp_clock_register
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+commit aea0a897af9e44c258e8ab9296fad417f1bc063a upstream.
+
+Fix smatch warning:
+
+drivers/ptp/ptp_clock.c:298 ptp_clock_register() warn:
+ passing zero to 'ERR_PTR'
+
+'err' should be set while device_create_with_groups and
+pps_register_source fails
+
+Fixes: 85a66e550195 ("ptp: create "pins" together with the rest of attributes")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 5f5d54f5153fb..a29772667f79b 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -222,8 +222,10 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+       ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+                                            ptp, ptp->pin_attr_groups,
+                                            "ptp%d", ptp->index);
+-      if (IS_ERR(ptp->dev))
++      if (IS_ERR(ptp->dev)) {
++              err = PTR_ERR(ptp->dev);
+               goto no_device;
++      }
+       /* Register a new PPS source. */
+       if (info->pps) {
+@@ -234,6 +236,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+               pps.owner = info->owner;
+               ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
+               if (!ptp->pps_source) {
++                      err = -EINVAL;
+                       pr_err("failed to register pps source\n");
+                       goto no_pps;
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-fix-the-race-between-the-release-of-ptp_clock-an.patch b/queue-4.4/ptp-fix-the-race-between-the-release-of-ptp_clock-an.patch
new file mode 100644 (file)
index 0000000..0d43e62
--- /dev/null
@@ -0,0 +1,329 @@
+From fa40eab18c05963de6eaefd898a7ee28e1899c2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Dec 2019 03:26:27 +0100
+Subject: ptp: fix the race between the release of ptp_clock and cdev
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+commit a33121e5487b424339636b25c35d3a180eaa5f5e upstream.
+
+In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
+device is removed, closing this file leads to a race. This reproduces
+easily in a kvm virtual machine:
+
+ts# cat openptp0.c
+int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
+ts# uname -r
+5.5.0-rc3-46cf053e
+ts# cat /proc/cmdline
+... slub_debug=FZP
+ts# modprobe ptp_kvm
+ts# ./openptp0 &
+[1] 670
+opened /dev/ptp0, sleeping 10s...
+ts# rmmod ptp_kvm
+ts# ls /dev/ptp*
+ls: cannot access '/dev/ptp*': No such file or directory
+ts# ...woken up
+[   48.010809] general protection fault: 0000 [#1] SMP
+[   48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
+[   48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[   48.016270] RIP: 0010:module_put.part.0+0x7/0x80
+[   48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
+[   48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
+[   48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
+[   48.019470] ...                                              ^^^ a slub poison
+[   48.023854] Call Trace:
+[   48.024050]  __fput+0x21f/0x240
+[   48.024288]  task_work_run+0x79/0x90
+[   48.024555]  do_exit+0x2af/0xab0
+[   48.024799]  ? vfs_write+0x16a/0x190
+[   48.025082]  do_group_exit+0x35/0x90
+[   48.025387]  __x64_sys_exit_group+0xf/0x10
+[   48.025737]  do_syscall_64+0x3d/0x130
+[   48.026056]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   48.026479] RIP: 0033:0x7f53b12082f6
+[   48.026792] ...
+[   48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
+[   48.045001] Fixing recursive fault but reboot is needed!
+
+This happens in:
+
+static void __fput(struct file *file)
+{   ...
+    if (file->f_op->release)
+        file->f_op->release(inode, file); <<< cdev is kfree'd here
+    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+             !(mode & FMODE_PATH))) {
+        cdev_put(inode->i_cdev); <<< cdev fields are accessed here
+
+Namely:
+
+__fput()
+  posix_clock_release()
+    kref_put(&clk->kref, delete_clock) <<< the last reference
+      delete_clock()
+        delete_ptp_clock()
+          kfree(ptp) <<< cdev is embedded in ptp
+  cdev_put
+    module_put(p->owner) <<< *p is kfree'd, bang!
+
+Here cdev is embedded in posix_clock which is embedded in ptp_clock.
+The race happens because ptp_clock's lifetime is controlled by two
+refcounts: kref and cdev.kobj in posix_clock. This is wrong.
+
+Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
+created especially for such cases. This way the parent device with its
+ptp_clock is not released until all references to the cdev are released.
+This adds a requirement that an initialized but not exposed struct
+device should be provided to posix_clock_register() by a caller instead
+of a simple dev_t.
+
+This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
+the race between the release of watchdog_core_data and cdev"). See
+details of the implementation in the commit 233ed09d7fda ("chardev: add
+helper function to register char devs with a struct device").
+
+Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
+Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
+Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c     | 31 ++++++++++++++-----------------
+ drivers/ptp/ptp_private.h   |  2 +-
+ include/linux/posix-clock.h | 19 +++++++++++--------
+ kernel/time/posix-clock.c   | 31 +++++++++++++------------------
+ 4 files changed, 39 insertions(+), 44 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index a29772667f79b..977b52dbcbba2 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -171,9 +171,9 @@ static struct posix_clock_operations ptp_clock_ops = {
+       .read           = ptp_read,
+ };
+-static void delete_ptp_clock(struct posix_clock *pc)
++static void ptp_clock_release(struct device *dev)
+ {
+-      struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
++      struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
+       mutex_destroy(&ptp->tsevq_mux);
+       mutex_destroy(&ptp->pincfg_mux);
+@@ -205,7 +205,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+       }
+       ptp->clock.ops = ptp_clock_ops;
+-      ptp->clock.release = delete_ptp_clock;
+       ptp->info = info;
+       ptp->devid = MKDEV(major, index);
+       ptp->index = index;
+@@ -218,15 +217,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+       if (err)
+               goto no_pin_groups;
+-      /* Create a new device in our class. */
+-      ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+-                                           ptp, ptp->pin_attr_groups,
+-                                           "ptp%d", ptp->index);
+-      if (IS_ERR(ptp->dev)) {
+-              err = PTR_ERR(ptp->dev);
+-              goto no_device;
+-      }
+-
+       /* Register a new PPS source. */
+       if (info->pps) {
+               struct pps_source_info pps;
+@@ -242,8 +232,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+               }
+       }
+-      /* Create a posix clock. */
+-      err = posix_clock_register(&ptp->clock, ptp->devid);
++      /* Initialize a new device of our class in our clock structure. */
++      device_initialize(&ptp->dev);
++      ptp->dev.devt = ptp->devid;
++      ptp->dev.class = ptp_class;
++      ptp->dev.parent = parent;
++      ptp->dev.groups = ptp->pin_attr_groups;
++      ptp->dev.release = ptp_clock_release;
++      dev_set_drvdata(&ptp->dev, ptp);
++      dev_set_name(&ptp->dev, "ptp%d", ptp->index);
++
++      /* Create a posix clock and link it to the device. */
++      err = posix_clock_register(&ptp->clock, &ptp->dev);
+       if (err) {
+               pr_err("failed to create posix clock\n");
+               goto no_clock;
+@@ -255,8 +255,6 @@ no_clock:
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ no_pps:
+-      device_destroy(ptp_class, ptp->devid);
+-no_device:
+       ptp_cleanup_pin_groups(ptp);
+ no_pin_groups:
+       mutex_destroy(&ptp->tsevq_mux);
+@@ -277,7 +275,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+-      device_destroy(ptp_class, ptp->devid);
+       ptp_cleanup_pin_groups(ptp);
+       posix_clock_unregister(&ptp->clock);
+diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
+index d95888974d0c6..15346e840caa9 100644
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -40,7 +40,7 @@ struct timestamp_event_queue {
+ struct ptp_clock {
+       struct posix_clock clock;
+-      struct device *dev;
++      struct device dev;
+       struct ptp_clock_info *info;
+       dev_t devid;
+       int index; /* index into clocks.map */
+diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
+index 83b22ae9ae12a..b39420a0321c3 100644
+--- a/include/linux/posix-clock.h
++++ b/include/linux/posix-clock.h
+@@ -104,29 +104,32 @@ struct posix_clock_operations {
+  *
+  * @ops:     Functional interface to the clock
+  * @cdev:    Character device instance for this clock
+- * @kref:    Reference count.
++ * @dev:     Pointer to the clock's device.
+  * @rwsem:   Protects the 'zombie' field from concurrent access.
+  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
+- * @release: A function to free the structure when the reference count reaches
+- *           zero. May be NULL if structure is statically allocated.
+  *
+  * Drivers should embed their struct posix_clock within a private
+  * structure, obtaining a reference to it during callbacks using
+  * container_of().
++ *
++ * Drivers should supply an initialized but not exposed struct device
++ * to posix_clock_register(). It is used to manage lifetime of the
++ * driver's private structure. It's 'release' field should be set to
++ * a release function for this private structure.
+  */
+ struct posix_clock {
+       struct posix_clock_operations ops;
+       struct cdev cdev;
+-      struct kref kref;
++      struct device *dev;
+       struct rw_semaphore rwsem;
+       bool zombie;
+-      void (*release)(struct posix_clock *clk);
+ };
+ /**
+  * posix_clock_register() - register a new clock
+- * @clk:   Pointer to the clock. Caller must provide 'ops' and 'release'
+- * @devid: Allocated device id
++ * @clk:   Pointer to the clock. Caller must provide 'ops' field
++ * @dev:   Pointer to the initialized device. Caller must provide
++ *         'release' field
+  *
+  * A clock driver calls this function to register itself with the
+  * clock device subsystem. If 'clk' points to dynamically allocated
+@@ -135,7 +138,7 @@ struct posix_clock {
+  *
+  * Returns zero on success, non-zero otherwise.
+  */
+-int posix_clock_register(struct posix_clock *clk, dev_t devid);
++int posix_clock_register(struct posix_clock *clk, struct device *dev);
+ /**
+  * posix_clock_unregister() - unregister a clock
+diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
+index e24008c098c6b..45a0a26023d4b 100644
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -25,8 +25,6 @@
+ #include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+-static void delete_clock(struct kref *kref);
+-
+ /*
+  * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+  */
+@@ -168,7 +166,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
+               err = 0;
+       if (!err) {
+-              kref_get(&clk->kref);
++              get_device(clk->dev);
+               fp->private_data = clk;
+       }
+ out:
+@@ -184,7 +182,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp)
+       if (clk->ops.release)
+               err = clk->ops.release(clk);
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+       fp->private_data = NULL;
+@@ -206,38 +204,35 @@ static const struct file_operations posix_clock_file_operations = {
+ #endif
+ };
+-int posix_clock_register(struct posix_clock *clk, dev_t devid)
++int posix_clock_register(struct posix_clock *clk, struct device *dev)
+ {
+       int err;
+-      kref_init(&clk->kref);
+       init_rwsem(&clk->rwsem);
+       cdev_init(&clk->cdev, &posix_clock_file_operations);
++      err = cdev_device_add(&clk->cdev, dev);
++      if (err) {
++              pr_err("%s unable to add device %d:%d\n",
++                      dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
++              return err;
++      }
+       clk->cdev.owner = clk->ops.owner;
+-      err = cdev_add(&clk->cdev, devid, 1);
++      clk->dev = dev;
+-      return err;
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_register);
+-static void delete_clock(struct kref *kref)
+-{
+-      struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+-
+-      if (clk->release)
+-              clk->release(clk);
+-}
+-
+ void posix_clock_unregister(struct posix_clock *clk)
+ {
+-      cdev_del(&clk->cdev);
++      cdev_device_del(&clk->cdev, clk->dev);
+       down_write(&clk->rwsem);
+       clk->zombie = true;
+       up_write(&clk->rwsem);
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_unregister);
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-free-ptp-device-pin-descriptors-properly.patch b/queue-4.4/ptp-free-ptp-device-pin-descriptors-properly.patch
new file mode 100644 (file)
index 0000000..b09dc02
--- /dev/null
@@ -0,0 +1,57 @@
+From cff306c18d002a4a0ea187cae2115fc75d7f073a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2020 14:00:09 +0100
+Subject: ptp: free ptp device pin descriptors properly
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+commit 75718584cb3c64e6269109d4d54f888ac5a5fd15 upstream.
+
+There is a bug in ptp_clock_unregister(), where ptp_cleanup_pin_groups()
+first frees ptp->pin_{,dev_}attr, but then posix_clock_unregister() needs
+them to destroy a related sysfs device.
+
+These functions can not be just swapped, as posix_clock_unregister() frees
+ptp which is needed in the ptp_cleanup_pin_groups(). Fix this by calling
+ptp_cleanup_pin_groups() in ptp_clock_release(), right before ptp is freed.
+
+This makes this patch fix an UAF bug in a patch which fixes an UAF bug.
+
+Reported-by: Antti Laakso <antti.laakso@intel.com>
+Fixes: a33121e5487b ("ptp: fix the race between the release of ptp_clock and cdev")
+Link: https://lore.kernel.org/netdev/3d2bd09735dbdaf003585ca376b7c1e5b69a19bd.camel@intel.com/
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_clock.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index 977b52dbcbba2..efe68b13704d8 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -175,6 +175,7 @@ static void ptp_clock_release(struct device *dev)
+ {
+       struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
++      ptp_cleanup_pin_groups(ptp);
+       mutex_destroy(&ptp->tsevq_mux);
+       mutex_destroy(&ptp->pincfg_mux);
+       ida_simple_remove(&ptp_clocks_map, ptp->index);
+@@ -275,9 +276,8 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+-      ptp_cleanup_pin_groups(ptp);
+-
+       posix_clock_unregister(&ptp->clock);
++
+       return 0;
+ }
+ EXPORT_SYMBOL(ptp_clock_unregister);
+-- 
+2.20.1
+
diff --git a/queue-4.4/ptp-use-is_visible-method-to-hide-unused-attributes.patch b/queue-4.4/ptp-use-is_visible-method-to-hide-unused-attributes.patch
new file mode 100644 (file)
index 0000000..339fb86
--- /dev/null
@@ -0,0 +1,215 @@
+From f988e8551a81f003ad32e2c403ea528e953b566f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2017 10:23:33 -0800
+Subject: ptp: use is_visible method to hide unused attributes
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit af59e717d5ff9c8dbf9bcc581c0dfb3b2a9c9030 upstream.
+
+Instead of creating selected attributes after the device is created (and
+after userspace potentially seen uevent), lets use attribute group
+is_visible() method to control which attributes are shown. This will allow
+us to create all attributes (except "pins" group, which will be taken care
+of later) before userspace gets notified about new ptp class device.
+
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_sysfs.c | 125 ++++++++++++++++++----------------------
+ 1 file changed, 55 insertions(+), 70 deletions(-)
+
+diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
+index 302e626fe6b01..a55a6eb4dfde9 100644
+--- a/drivers/ptp/ptp_sysfs.c
++++ b/drivers/ptp/ptp_sysfs.c
+@@ -46,27 +46,6 @@ PTP_SHOW_INT(n_periodic_outputs, n_per_out);
+ PTP_SHOW_INT(n_programmable_pins, n_pins);
+ PTP_SHOW_INT(pps_available, pps);
+-static struct attribute *ptp_attrs[] = {
+-      &dev_attr_clock_name.attr,
+-      &dev_attr_max_adjustment.attr,
+-      &dev_attr_n_alarms.attr,
+-      &dev_attr_n_external_timestamps.attr,
+-      &dev_attr_n_periodic_outputs.attr,
+-      &dev_attr_n_programmable_pins.attr,
+-      &dev_attr_pps_available.attr,
+-      NULL,
+-};
+-
+-static const struct attribute_group ptp_group = {
+-      .attrs = ptp_attrs,
+-};
+-
+-const struct attribute_group *ptp_groups[] = {
+-      &ptp_group,
+-      NULL,
+-};
+-
+-
+ static ssize_t extts_enable_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+@@ -91,6 +70,7 @@ static ssize_t extts_enable_store(struct device *dev,
+ out:
+       return err;
+ }
++static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store);
+ static ssize_t extts_fifo_show(struct device *dev,
+                              struct device_attribute *attr, char *page)
+@@ -124,6 +104,7 @@ out:
+       mutex_unlock(&ptp->tsevq_mux);
+       return cnt;
+ }
++static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL);
+ static ssize_t period_store(struct device *dev,
+                           struct device_attribute *attr,
+@@ -151,6 +132,7 @@ static ssize_t period_store(struct device *dev,
+ out:
+       return err;
+ }
++static DEVICE_ATTR(period, 0220, NULL, period_store);
+ static ssize_t pps_enable_store(struct device *dev,
+                               struct device_attribute *attr,
+@@ -177,6 +159,57 @@ static ssize_t pps_enable_store(struct device *dev,
+ out:
+       return err;
+ }
++static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store);
++
++static struct attribute *ptp_attrs[] = {
++      &dev_attr_clock_name.attr,
++
++      &dev_attr_max_adjustment.attr,
++      &dev_attr_n_alarms.attr,
++      &dev_attr_n_external_timestamps.attr,
++      &dev_attr_n_periodic_outputs.attr,
++      &dev_attr_n_programmable_pins.attr,
++      &dev_attr_pps_available.attr,
++
++      &dev_attr_extts_enable.attr,
++      &dev_attr_fifo.attr,
++      &dev_attr_period.attr,
++      &dev_attr_pps_enable.attr,
++      NULL
++};
++
++static umode_t ptp_is_attribute_visible(struct kobject *kobj,
++                                      struct attribute *attr, int n)
++{
++      struct device *dev = kobj_to_dev(kobj);
++      struct ptp_clock *ptp = dev_get_drvdata(dev);
++      struct ptp_clock_info *info = ptp->info;
++      umode_t mode = attr->mode;
++
++      if (attr == &dev_attr_extts_enable.attr ||
++          attr == &dev_attr_fifo.attr) {
++              if (!info->n_ext_ts)
++                      mode = 0;
++      } else if (attr == &dev_attr_period.attr) {
++              if (!info->n_per_out)
++                      mode = 0;
++      } else if (attr == &dev_attr_pps_enable.attr) {
++              if (!info->pps)
++                      mode = 0;
++      }
++
++      return mode;
++}
++
++static const struct attribute_group ptp_group = {
++      .is_visible     = ptp_is_attribute_visible,
++      .attrs          = ptp_attrs,
++};
++
++const struct attribute_group *ptp_groups[] = {
++      &ptp_group,
++      NULL
++};
+ static int ptp_pin_name2index(struct ptp_clock *ptp, const char *name)
+ {
+@@ -235,26 +268,11 @@ static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
+       return count;
+ }
+-static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store);
+-static DEVICE_ATTR(fifo,         0444, extts_fifo_show, NULL);
+-static DEVICE_ATTR(period,       0220, NULL, period_store);
+-static DEVICE_ATTR(pps_enable,   0220, NULL, pps_enable_store);
+-
+ int ptp_cleanup_sysfs(struct ptp_clock *ptp)
+ {
+       struct device *dev = ptp->dev;
+       struct ptp_clock_info *info = ptp->info;
+-      if (info->n_ext_ts) {
+-              device_remove_file(dev, &dev_attr_extts_enable);
+-              device_remove_file(dev, &dev_attr_fifo);
+-      }
+-      if (info->n_per_out)
+-              device_remove_file(dev, &dev_attr_period);
+-
+-      if (info->pps)
+-              device_remove_file(dev, &dev_attr_pps_enable);
+-
+       if (info->n_pins) {
+               sysfs_remove_group(&dev->kobj, &ptp->pin_attr_group);
+               kfree(ptp->pin_attr);
+@@ -307,46 +325,13 @@ no_dev_attr:
+ int ptp_populate_sysfs(struct ptp_clock *ptp)
+ {
+-      struct device *dev = ptp->dev;
+       struct ptp_clock_info *info = ptp->info;
+       int err;
+-      if (info->n_ext_ts) {
+-              err = device_create_file(dev, &dev_attr_extts_enable);
+-              if (err)
+-                      goto out1;
+-              err = device_create_file(dev, &dev_attr_fifo);
+-              if (err)
+-                      goto out2;
+-      }
+-      if (info->n_per_out) {
+-              err = device_create_file(dev, &dev_attr_period);
+-              if (err)
+-                      goto out3;
+-      }
+-      if (info->pps) {
+-              err = device_create_file(dev, &dev_attr_pps_enable);
+-              if (err)
+-                      goto out4;
+-      }
+       if (info->n_pins) {
+               err = ptp_populate_pins(ptp);
+               if (err)
+-                      goto out5;
++                      return err;
+       }
+       return 0;
+-out5:
+-      if (info->pps)
+-              device_remove_file(dev, &dev_attr_pps_enable);
+-out4:
+-      if (info->n_per_out)
+-              device_remove_file(dev, &dev_attr_period);
+-out3:
+-      if (info->n_ext_ts)
+-              device_remove_file(dev, &dev_attr_fifo);
+-out2:
+-      if (info->n_ext_ts)
+-              device_remove_file(dev, &dev_attr_extts_enable);
+-out1:
+-      return err;
+ }
+-- 
+2.20.1
+
index 425c38f424de7ab775af57b7bd73d82bea2d0b46..583c4851b77e1f9f7ed83bb7a720df7b3d41c900 100644 (file)
@@ -14,3 +14,20 @@ usb-uas-add-quirk-for-lacie-2big-quadra.patch
 usb-serial-garmin_gps-add-sanity-checking-for-data-length.patch
 batman-adv-fix-batadv_nc_random_weight_tq.patch
 scripts-decodecode-fix-trapping-instruction-formatting.patch
+phy-micrel-disable-auto-negotiation-on-startup.patch
+phy-micrel-ensure-interrupts-are-reenabled-on-resume.patch
+ext4-add-cond_resched-to-ext4_protect_reserved_inode.patch
+net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch
+net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6.patch
+blktrace-fix-potential-deadlock-between-delete-sysfs.patch
+blktrace-fix-unlocked-access-to-init-start-stop-tear.patch
+blktrace-fix-trace-mutex-deadlock.patch
+blktrace-protect-q-blk_trace-with-rcu.patch
+blktrace-fix-dereference-after-null-check.patch
+ptp-do-not-explicitly-set-drvdata-in-ptp_clock_regis.patch
+ptp-use-is_visible-method-to-hide-unused-attributes.patch
+ptp-create-pins-together-with-the-rest-of-attributes.patch
+chardev-add-helper-function-to-register-char-devs-wi.patch
+ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch
+ptp-fix-the-race-between-the-release-of-ptp_clock-an.patch
+ptp-free-ptp-device-pin-descriptors-properly.patch