[thirdparty/kernel/stable-queue.git] / releases / 4.4.103 / autofs-don-t-fail-mount-for-transient-error.patch

From ecc0c469f27765ed1e2b967be0aa17cee1a60b76 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 17 Nov 2017 15:29:13 -0800
Subject: autofs: don't fail mount for transient error

From: NeilBrown <neilb@suse.com>

commit ecc0c469f27765ed1e2b967be0aa17cee1a60b76 upstream.

Currently if the autofs kernel module gets an error when writing to the
pipe which links to the daemon, then it marks the whole moutpoint as
catatonic, and it will stop working.

It is possible that the error is transient.  This can happen if the
daemon is slow and more than 16 requests queue up.  If a subsequent
process tries to queue a request, and is then signalled, the write to
the pipe will return -ERESTARTSYS and autofs will take that as total
failure.

So change the code to assess -ERESTARTSYS and -ENOMEM as transient
failures which only abort the current request, not the whole mountpoint.

It isn't a crash or a data corruption, but having autofs mountpoints
suddenly stop working is rather inconvenient.

Ian said:

: And given the problems with a half dozen (or so) user space applications
: consuming large amounts of CPU under heavy mount and umount activity this
: could happen more easily than we expect.

Link: http://lkml.kernel.org/r/87y3norvgp.fsf@notabene.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.com>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 fs/autofs4/waitq.c |   15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -87,7 +87,8 @@ static int autofs4_write(struct autofs_s
 		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
-	return (bytes > 0);
+	/* if 'wr' returned 0 (impossible) we assume -EIO (safe) */
+	return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
 }
 	
 static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
@@ -101,6 +102,7 @@ static void autofs4_notify_daemon(struct
 	} pkt;
 	struct file *pipe = NULL;
 	size_t pktsz;
+	int ret;
 
 	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
 		(unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
@@ -173,7 +175,18 @@ static void autofs4_notify_daemon(struct
 	mutex_unlock(&sbi->wq_mutex);
 
 	if (autofs4_write(sbi, pipe, &pkt, pktsz))
+	switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
+	case 0:
+		break;
+	case -ENOMEM:
+	case -ERESTARTSYS:
+		/* Just fail this one */
+		autofs4_wait_release(sbi, wq->wait_queue_token, ret);
+		break;
+	default:
 		autofs4_catatonic_mode(sbi);
+		break;
+	}
 	fput(pipe);
 }
Commit	Line	Data
d431789d GKH	1	From ecc0c469f27765ed1e2b967be0aa17cee1a60b76 Mon Sep 17 00:00:00 2001
	2	From: NeilBrown <neilb@suse.com>
	3	Date: Fri, 17 Nov 2017 15:29:13 -0800
	4	Subject: autofs: don't fail mount for transient error
	5
	6	From: NeilBrown <neilb@suse.com>
	7
	8	commit ecc0c469f27765ed1e2b967be0aa17cee1a60b76 upstream.
	9
	10	Currently if the autofs kernel module gets an error when writing to the
	11	pipe which links to the daemon, then it marks the whole moutpoint as
	12	catatonic, and it will stop working.
	13
	14	It is possible that the error is transient. This can happen if the
	15	daemon is slow and more than 16 requests queue up. If a subsequent
	16	process tries to queue a request, and is then signalled, the write to
	17	the pipe will return -ERESTARTSYS and autofs will take that as total
	18	failure.
	19
	20	So change the code to assess -ERESTARTSYS and -ENOMEM as transient
	21	failures which only abort the current request, not the whole mountpoint.
	22
	23	It isn't a crash or a data corruption, but having autofs mountpoints
	24	suddenly stop working is rather inconvenient.
	25
	26	Ian said:
	27
	28	: And given the problems with a half dozen (or so) user space applications
	29	: consuming large amounts of CPU under heavy mount and umount activity this
	30	: could happen more easily than we expect.
	31
	32	Link: http://lkml.kernel.org/r/87y3norvgp.fsf@notabene.neil.brown.name
	33	Signed-off-by: NeilBrown <neilb@suse.com>
	34	Acked-by: Ian Kent <raven@themaw.net>
	35	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	36	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
	37	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	38
	39	---
	40	fs/autofs4/waitq.c \| 15 ++++++++++++++-
	41	1 file changed, 14 insertions(+), 1 deletion(-)
	42
	43	--- a/fs/autofs4/waitq.c
	44	+++ b/fs/autofs4/waitq.c
	45	@@ -87,7 +87,8 @@ static int autofs4_write(struct autofs_s
	46	spin_unlock_irqrestore(&current->sighand->siglock, flags);
	47	}
	48
	49	- return (bytes > 0);
	50	+ /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */
	51	+ return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
	52	}
	53
	54	static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
	55	@@ -101,6 +102,7 @@ static void autofs4_notify_daemon(struct
	56	} pkt;
	57	struct file *pipe = NULL;
	58	size_t pktsz;
	59	+ int ret;
	60
	61	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
	62	(unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
	63	@@ -173,7 +175,18 @@ static void autofs4_notify_daemon(struct
	64	mutex_unlock(&sbi->wq_mutex);
65
66	if (autofs4_write(sbi, pipe, &pkt, pktsz))
67	+ switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
68	+ case 0:
69	+ break;
70	+ case -ENOMEM:
71	+ case -ERESTARTSYS:
72	+ /* Just fail this one */
73	+ autofs4_wait_release(sbi, wq->wait_queue_token, ret);
74	+ break;
75	+ default:
76	autofs4_catatonic_mode(sbi);
77	+ break;
78	+ }
79	fput(pipe);
80	}
81