]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.suse/SoN-30-nfs-swap_ops.patch
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.suse / SoN-30-nfs-swap_ops.patch
CommitLineData
2cb7cef9
BS
1From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2Subject: nfs: enable swap on NFS
3Patch-mainline: No
4References: FATE#303834
5
6Implement all the new swapfile a_ops for NFS. This will set the NFS socket to
7SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset
8SOCK_MEMALLOC before engaging the protocol ->connect() method.
9
10PF_MEMALLOC should allow the allocation of struct socket and related objects
11and the early (re)setting of SOCK_MEMALLOC should allow us to receive the
12packets required for the TCP connection buildup.
13
14(swapping continues over a server reset during heavy network traffic)
15
16Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
17Acked-by: Neil Brown <neilb@suse.de>
18Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
19
20---
21 fs/Kconfig | 17 ++++++++++
22 fs/nfs/file.c | 18 ++++++++++
23 fs/nfs/write.c | 22 +++++++++++++
24 include/linux/nfs_fs.h | 2 +
25 include/linux/sunrpc/xprt.h | 5 ++-
26 net/sunrpc/sched.c | 9 ++++-
27 net/sunrpc/xprtsock.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
28 7 files changed, 143 insertions(+), 3 deletions(-)
29
30--- a/fs/Kconfig
31+++ b/fs/Kconfig
32@@ -1748,6 +1748,18 @@ config ROOT_NFS
33
34 Most people say N here.
35
36+config NFS_SWAP
37+ bool "Provide swap over NFS support"
38+ default n
39+ depends on NFS_FS
40+ select SUNRPC_SWAP
41+ help
42+ This option enables swapon to work on files located on NFS mounts.
43+
44+ For more details, see Documentation/network-swap.txt
45+
46+ If unsure, say N.
47+
48 config NFSD
49 tristate "NFS server support"
50 depends on INET
51@@ -1869,6 +1881,11 @@ config SUNRPC_XPRT_RDMA
52
53 If unsure, say N.
54
55+config SUNRPC_SWAP
56+ def_bool n
57+ depends on SUNRPC
58+ select NETVM
59+
60 config RPCSEC_GSS_KRB5
61 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
62 depends on SUNRPC && EXPERIMENTAL
63--- a/fs/nfs/file.c
64+++ b/fs/nfs/file.c
65@@ -434,6 +434,18 @@ static int nfs_launder_page(struct page
66 return nfs_wb_page(inode, page);
67 }
68
69+#ifdef CONFIG_NFS_SWAP
70+static int nfs_swapon(struct file *file)
71+{
72+ return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
73+}
74+
75+static int nfs_swapoff(struct file *file)
76+{
77+ return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
78+}
79+#endif
80+
81 const struct address_space_operations nfs_file_aops = {
82 .readpage = nfs_readpage,
83 .readpages = nfs_readpages,
84@@ -446,6 +458,12 @@ const struct address_space_operations nf
85 .releasepage = nfs_release_page,
86 .direct_IO = nfs_direct_IO,
87 .launder_page = nfs_launder_page,
88+#ifdef CONFIG_NFS_SWAP
89+ .swapon = nfs_swapon,
90+ .swapoff = nfs_swapoff,
91+ .swap_out = nfs_swap_out,
92+ .swap_in = nfs_readpage,
93+#endif
94 };
95
96 static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
97--- a/fs/nfs/write.c
98+++ b/fs/nfs/write.c
99@@ -336,6 +336,28 @@ int nfs_writepage(struct page *page, str
100 return ret;
101 }
102
103+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
104+ unsigned int offset, unsigned int count);
105+
106+int nfs_swap_out(struct file *file, struct page *page,
107+ struct writeback_control *wbc)
108+{
109+ struct nfs_open_context *ctx = nfs_file_open_context(file);
110+ int status;
111+
112+ status = nfs_writepage_setup(ctx, page, 0, nfs_page_length(page));
113+ if (status < 0) {
114+ nfs_set_pageerror(page);
115+ goto out;
116+ }
117+
118+ status = nfs_writepage_locked(page, wbc);
119+
120+out:
121+ unlock_page(page);
122+ return status;
123+}
124+
125 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
126 {
127 int ret;
128--- a/include/linux/nfs_fs.h
129+++ b/include/linux/nfs_fs.h
130@@ -464,6 +464,8 @@ extern int nfs_writepages(struct addres
131 extern int nfs_flush_incompatible(struct file *file, struct page *page);
132 extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
133 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
134+extern int nfs_swap_out(struct file *file, struct page *page,
135+ struct writeback_control *wbc);
136
137 /*
138 * Try to write back everything synchronously (but check the
139--- a/include/linux/sunrpc/xprt.h
140+++ b/include/linux/sunrpc/xprt.h
141@@ -147,7 +147,9 @@ struct rpc_xprt {
142 unsigned int max_reqs; /* total slots */
143 unsigned long state; /* transport state */
144 unsigned char shutdown : 1, /* being shut down */
145- resvport : 1; /* use a reserved port */
146+ resvport : 1, /* use a reserved port */
147+ swapper : 1; /* we're swapping over this
148+ transport */
149 unsigned int bind_index; /* bind function index */
150
151 /*
152@@ -249,6 +251,7 @@ void xprt_release_rqst_cong(struct rpc
153 void xprt_disconnect_done(struct rpc_xprt *xprt);
154 void xprt_force_disconnect(struct rpc_xprt *xprt);
155 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
156+int xs_swapper(struct rpc_xprt *xprt, int enable);
157
158 /*
159 * Reserved bit positions in xprt->state
160--- a/net/sunrpc/sched.c
161+++ b/net/sunrpc/sched.c
162@@ -729,7 +729,10 @@ struct rpc_buffer {
163 void *rpc_malloc(struct rpc_task *task, size_t size)
164 {
165 struct rpc_buffer *buf;
166- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
167+ gfp_t gfp = GFP_NOWAIT;
168+
169+ if (RPC_IS_SWAPPER(task))
170+ gfp |= __GFP_MEMALLOC;
171
172 size += sizeof(struct rpc_buffer);
173 if (size <= RPC_BUFFER_MAXSIZE)
174@@ -800,6 +803,8 @@ static void rpc_init_task(struct rpc_tas
175 kref_get(&task->tk_client->cl_kref);
176 if (task->tk_client->cl_softrtry)
177 task->tk_flags |= RPC_TASK_SOFT;
178+ if (task->tk_client->cl_xprt->swapper)
179+ task->tk_flags |= RPC_TASK_SWAPPER;
180 }
181
182 if (task->tk_ops->rpc_call_prepare != NULL)
183@@ -825,7 +830,7 @@ static void rpc_init_task(struct rpc_tas
184 static struct rpc_task *
185 rpc_alloc_task(void)
186 {
187- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
188+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
189 }
190
191 /*
192--- a/net/sunrpc/xprtsock.c
193+++ b/net/sunrpc/xprtsock.c
194@@ -1454,6 +1454,55 @@ static inline void xs_reclassify_socket6
195 }
196 #endif
197
198+#ifdef CONFIG_SUNRPC_SWAP
199+static void xs_set_memalloc(struct rpc_xprt *xprt)
200+{
201+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
202+
203+ if (xprt->swapper)
204+ sk_set_memalloc(transport->inet);
205+}
206+
207+#define RPC_BUF_RESERVE_PAGES \
208+ kmalloc_estimate_objs(sizeof(struct rpc_rqst), GFP_KERNEL, RPC_MAX_SLOT_TABLE)
209+#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
210+
211+/**
212+ * xs_swapper - Tag this transport as being used for swap.
213+ * @xprt: transport to tag
214+ * @enable: enable/disable
215+ *
216+ */
217+int xs_swapper(struct rpc_xprt *xprt, int enable)
218+{
219+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
220+ int err = 0;
221+
222+ if (enable) {
223+ /*
224+ * keep one extra sock reference so the reserve won't dip
225+ * when the socket gets reconnected.
226+ */
227+ err = sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
228+ if (!err) {
229+ xprt->swapper = 1;
230+ xs_set_memalloc(xprt);
231+ }
232+ } else if (xprt->swapper) {
233+ xprt->swapper = 0;
234+ sk_clear_memalloc(transport->inet);
235+ sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
236+ }
237+
238+ return err;
239+}
240+EXPORT_SYMBOL_GPL(xs_swapper);
241+#else
242+static void xs_set_memalloc(struct rpc_xprt *xprt)
243+{
244+}
245+#endif
246+
247 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
248 {
249 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
250@@ -1478,6 +1527,8 @@ static void xs_udp_finish_connecting(str
251 transport->sock = sock;
252 transport->inet = sk;
253
254+ xs_set_memalloc(xprt);
255+
256 write_unlock_bh(&sk->sk_callback_lock);
257 }
258 xs_udp_do_set_buffer_size(xprt);
259@@ -1495,11 +1546,15 @@ static void xs_udp_connect_worker4(struc
260 container_of(work, struct sock_xprt, connect_worker.work);
261 struct rpc_xprt *xprt = &transport->xprt;
262 struct socket *sock = transport->sock;
263+ unsigned long pflags = current->flags;
264 int err, status = -EIO;
265
266 if (xprt->shutdown || !xprt_bound(xprt))
267 goto out;
268
269+ if (xprt->swapper)
270+ current->flags |= PF_MEMALLOC;
271+
272 /* Start by resetting any existing state */
273 xs_close(xprt);
274
275@@ -1522,6 +1577,7 @@ static void xs_udp_connect_worker4(struc
276 out:
277 xprt_wake_pending_tasks(xprt, status);
278 xprt_clear_connecting(xprt);
279+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
280 }
281
282 /**
283@@ -1536,11 +1592,15 @@ static void xs_udp_connect_worker6(struc
284 container_of(work, struct sock_xprt, connect_worker.work);
285 struct rpc_xprt *xprt = &transport->xprt;
286 struct socket *sock = transport->sock;
287+ unsigned long pflags = current->flags;
288 int err, status = -EIO;
289
290 if (xprt->shutdown || !xprt_bound(xprt))
291 goto out;
292
293+ if (xprt->swapper)
294+ current->flags |= PF_MEMALLOC;
295+
296 /* Start by resetting any existing state */
297 xs_close(xprt);
298
299@@ -1563,6 +1623,7 @@ static void xs_udp_connect_worker6(struc
300 out:
301 xprt_wake_pending_tasks(xprt, status);
302 xprt_clear_connecting(xprt);
303+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
304 }
305
306 /*
307@@ -1632,6 +1693,8 @@ static int xs_tcp_finish_connecting(stru
308 write_unlock_bh(&sk->sk_callback_lock);
309 }
310
311+ xs_set_memalloc(xprt);
312+
313 /* Tell the socket layer to start connecting... */
314 xprt->stat.connect_count++;
315 xprt->stat.connect_start = jiffies;
316@@ -1650,11 +1713,15 @@ static void xs_tcp_connect_worker4(struc
317 container_of(work, struct sock_xprt, connect_worker.work);
318 struct rpc_xprt *xprt = &transport->xprt;
319 struct socket *sock = transport->sock;
320+ unsigned long pflags = current->flags;
321 int err, status = -EIO;
322
323 if (xprt->shutdown || !xprt_bound(xprt))
324 goto out;
325
326+ if (xprt->swapper)
327+ current->flags |= PF_MEMALLOC;
328+
329 if (!sock) {
330 /* start from scratch */
331 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
332@@ -1696,6 +1763,7 @@ out:
333 xprt_wake_pending_tasks(xprt, status);
334 out_clear:
335 xprt_clear_connecting(xprt);
336+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
337 }
338
339 /**
340@@ -1710,11 +1778,15 @@ static void xs_tcp_connect_worker6(struc
341 container_of(work, struct sock_xprt, connect_worker.work);
342 struct rpc_xprt *xprt = &transport->xprt;
343 struct socket *sock = transport->sock;
344+ unsigned long pflags = current->flags;
345 int err, status = -EIO;
346
347 if (xprt->shutdown || !xprt_bound(xprt))
348 goto out;
349
350+ if (xprt->swapper)
351+ current->flags |= PF_MEMALLOC;
352+
353 if (!sock) {
354 /* start from scratch */
355 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
356@@ -1755,6 +1827,7 @@ out:
357 xprt_wake_pending_tasks(xprt, status);
358 out_clear:
359 xprt_clear_connecting(xprt);
360+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
361 }
362
363 /**