]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.suse/SoN-30-nfs-swap_ops.patch
Reenabled linux-xen, added patches for Xen Kernel Version 2.6.27.31,
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / SoN-30-nfs-swap_ops.patch
1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: nfs: enable swap on NFS
3 Patch-mainline: No
4 References: FATE#303834
5
6 Implement all the new swapfile a_ops for NFS. This will set the NFS socket to
7 SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset
8 SOCK_MEMALLOC before engaging the protocol ->connect() method.
9
10 PF_MEMALLOC should allow the allocation of struct socket and related objects
11 and the early (re)setting of SOCK_MEMALLOC should allow us to receive the
12 packets required for the TCP connection buildup.
13
14 (swapping continues over a server reset during heavy network traffic)
15
16 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
17 Acked-by: Neil Brown <neilb@suse.de>
18 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
19
20 ---
21 fs/Kconfig | 17 ++++++++++
22 fs/nfs/file.c | 18 ++++++++++
23 fs/nfs/write.c | 22 +++++++++++++
24 include/linux/nfs_fs.h | 2 +
25 include/linux/sunrpc/xprt.h | 5 ++-
26 net/sunrpc/sched.c | 9 ++++-
27 net/sunrpc/xprtsock.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
28 7 files changed, 143 insertions(+), 3 deletions(-)
29
30 --- a/fs/Kconfig
31 +++ b/fs/Kconfig
32 @@ -1748,6 +1748,18 @@ config ROOT_NFS
33
34 Most people say N here.
35
36 +config NFS_SWAP
37 + bool "Provide swap over NFS support"
38 + default n
39 + depends on NFS_FS
40 + select SUNRPC_SWAP
41 + help
42 + This option enables swapon to work on files located on NFS mounts.
43 +
44 + For more details, see Documentation/network-swap.txt
45 +
46 + If unsure, say N.
47 +
48 config NFSD
49 tristate "NFS server support"
50 depends on INET
51 @@ -1869,6 +1881,11 @@ config SUNRPC_XPRT_RDMA
52
53 If unsure, say N.
54
55 +config SUNRPC_SWAP
56 + def_bool n
57 + depends on SUNRPC
58 + select NETVM
59 +
60 config RPCSEC_GSS_KRB5
61 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
62 depends on SUNRPC && EXPERIMENTAL
63 --- a/fs/nfs/file.c
64 +++ b/fs/nfs/file.c
65 @@ -434,6 +434,18 @@ static int nfs_launder_page(struct page
66 return nfs_wb_page(inode, page);
67 }
68
69 +#ifdef CONFIG_NFS_SWAP
70 +static int nfs_swapon(struct file *file)
71 +{
72 + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
73 +}
74 +
75 +static int nfs_swapoff(struct file *file)
76 +{
77 + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
78 +}
79 +#endif
80 +
81 const struct address_space_operations nfs_file_aops = {
82 .readpage = nfs_readpage,
83 .readpages = nfs_readpages,
84 @@ -446,6 +458,12 @@ const struct address_space_operations nf
85 .releasepage = nfs_release_page,
86 .direct_IO = nfs_direct_IO,
87 .launder_page = nfs_launder_page,
88 +#ifdef CONFIG_NFS_SWAP
89 + .swapon = nfs_swapon,
90 + .swapoff = nfs_swapoff,
91 + .swap_out = nfs_swap_out,
92 + .swap_in = nfs_readpage,
93 +#endif
94 };
95
96 static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
97 --- a/fs/nfs/write.c
98 +++ b/fs/nfs/write.c
99 @@ -336,6 +336,28 @@ int nfs_writepage(struct page *page, str
100 return ret;
101 }
102
103 +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
104 + unsigned int offset, unsigned int count);
105 +
106 +int nfs_swap_out(struct file *file, struct page *page,
107 + struct writeback_control *wbc)
108 +{
109 + struct nfs_open_context *ctx = nfs_file_open_context(file);
110 + int status;
111 +
112 + status = nfs_writepage_setup(ctx, page, 0, nfs_page_length(page));
113 + if (status < 0) {
114 + nfs_set_pageerror(page);
115 + goto out;
116 + }
117 +
118 + status = nfs_writepage_locked(page, wbc);
119 +
120 +out:
121 + unlock_page(page);
122 + return status;
123 +}
124 +
125 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
126 {
127 int ret;
128 --- a/include/linux/nfs_fs.h
129 +++ b/include/linux/nfs_fs.h
130 @@ -464,6 +464,8 @@ extern int nfs_writepages(struct addres
131 extern int nfs_flush_incompatible(struct file *file, struct page *page);
132 extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
133 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
134 +extern int nfs_swap_out(struct file *file, struct page *page,
135 + struct writeback_control *wbc);
136
137 /*
138 * Try to write back everything synchronously (but check the
139 --- a/include/linux/sunrpc/xprt.h
140 +++ b/include/linux/sunrpc/xprt.h
141 @@ -147,7 +147,9 @@ struct rpc_xprt {
142 unsigned int max_reqs; /* total slots */
143 unsigned long state; /* transport state */
144 unsigned char shutdown : 1, /* being shut down */
145 - resvport : 1; /* use a reserved port */
146 + resvport : 1, /* use a reserved port */
147 + swapper : 1; /* we're swapping over this
148 + transport */
149 unsigned int bind_index; /* bind function index */
150
151 /*
152 @@ -249,6 +251,7 @@ void xprt_release_rqst_cong(struct rpc
153 void xprt_disconnect_done(struct rpc_xprt *xprt);
154 void xprt_force_disconnect(struct rpc_xprt *xprt);
155 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
156 +int xs_swapper(struct rpc_xprt *xprt, int enable);
157
158 /*
159 * Reserved bit positions in xprt->state
160 --- a/net/sunrpc/sched.c
161 +++ b/net/sunrpc/sched.c
162 @@ -729,7 +729,10 @@ struct rpc_buffer {
163 void *rpc_malloc(struct rpc_task *task, size_t size)
164 {
165 struct rpc_buffer *buf;
166 - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
167 + gfp_t gfp = GFP_NOWAIT;
168 +
169 + if (RPC_IS_SWAPPER(task))
170 + gfp |= __GFP_MEMALLOC;
171
172 size += sizeof(struct rpc_buffer);
173 if (size <= RPC_BUFFER_MAXSIZE)
174 @@ -800,6 +803,8 @@ static void rpc_init_task(struct rpc_tas
175 kref_get(&task->tk_client->cl_kref);
176 if (task->tk_client->cl_softrtry)
177 task->tk_flags |= RPC_TASK_SOFT;
178 + if (task->tk_client->cl_xprt->swapper)
179 + task->tk_flags |= RPC_TASK_SWAPPER;
180 }
181
182 if (task->tk_ops->rpc_call_prepare != NULL)
183 @@ -825,7 +830,7 @@ static void rpc_init_task(struct rpc_tas
184 static struct rpc_task *
185 rpc_alloc_task(void)
186 {
187 - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
188 + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
189 }
190
191 /*
192 --- a/net/sunrpc/xprtsock.c
193 +++ b/net/sunrpc/xprtsock.c
194 @@ -1454,6 +1454,55 @@ static inline void xs_reclassify_socket6
195 }
196 #endif
197
198 +#ifdef CONFIG_SUNRPC_SWAP
199 +static void xs_set_memalloc(struct rpc_xprt *xprt)
200 +{
201 + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
202 +
203 + if (xprt->swapper)
204 + sk_set_memalloc(transport->inet);
205 +}
206 +
207 +#define RPC_BUF_RESERVE_PAGES \
208 + kmalloc_estimate_objs(sizeof(struct rpc_rqst), GFP_KERNEL, RPC_MAX_SLOT_TABLE)
209 +#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
210 +
211 +/**
212 + * xs_swapper - Tag this transport as being used for swap.
213 + * @xprt: transport to tag
214 + * @enable: enable/disable
215 + *
216 + */
217 +int xs_swapper(struct rpc_xprt *xprt, int enable)
218 +{
219 + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
220 + int err = 0;
221 +
222 + if (enable) {
223 + /*
224 + * keep one extra sock reference so the reserve won't dip
225 + * when the socket gets reconnected.
226 + */
227 + err = sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
228 + if (!err) {
229 + xprt->swapper = 1;
230 + xs_set_memalloc(xprt);
231 + }
232 + } else if (xprt->swapper) {
233 + xprt->swapper = 0;
234 + sk_clear_memalloc(transport->inet);
235 + sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
236 + }
237 +
238 + return err;
239 +}
240 +EXPORT_SYMBOL_GPL(xs_swapper);
241 +#else
242 +static void xs_set_memalloc(struct rpc_xprt *xprt)
243 +{
244 +}
245 +#endif
246 +
247 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
248 {
249 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
250 @@ -1478,6 +1527,8 @@ static void xs_udp_finish_connecting(str
251 transport->sock = sock;
252 transport->inet = sk;
253
254 + xs_set_memalloc(xprt);
255 +
256 write_unlock_bh(&sk->sk_callback_lock);
257 }
258 xs_udp_do_set_buffer_size(xprt);
259 @@ -1495,11 +1546,15 @@ static void xs_udp_connect_worker4(struc
260 container_of(work, struct sock_xprt, connect_worker.work);
261 struct rpc_xprt *xprt = &transport->xprt;
262 struct socket *sock = transport->sock;
263 + unsigned long pflags = current->flags;
264 int err, status = -EIO;
265
266 if (xprt->shutdown || !xprt_bound(xprt))
267 goto out;
268
269 + if (xprt->swapper)
270 + current->flags |= PF_MEMALLOC;
271 +
272 /* Start by resetting any existing state */
273 xs_close(xprt);
274
275 @@ -1522,6 +1577,7 @@ static void xs_udp_connect_worker4(struc
276 out:
277 xprt_wake_pending_tasks(xprt, status);
278 xprt_clear_connecting(xprt);
279 + tsk_restore_flags(current, pflags, PF_MEMALLOC);
280 }
281
282 /**
283 @@ -1536,11 +1592,15 @@ static void xs_udp_connect_worker6(struc
284 container_of(work, struct sock_xprt, connect_worker.work);
285 struct rpc_xprt *xprt = &transport->xprt;
286 struct socket *sock = transport->sock;
287 + unsigned long pflags = current->flags;
288 int err, status = -EIO;
289
290 if (xprt->shutdown || !xprt_bound(xprt))
291 goto out;
292
293 + if (xprt->swapper)
294 + current->flags |= PF_MEMALLOC;
295 +
296 /* Start by resetting any existing state */
297 xs_close(xprt);
298
299 @@ -1563,6 +1623,7 @@ static void xs_udp_connect_worker6(struc
300 out:
301 xprt_wake_pending_tasks(xprt, status);
302 xprt_clear_connecting(xprt);
303 + tsk_restore_flags(current, pflags, PF_MEMALLOC);
304 }
305
306 /*
307 @@ -1632,6 +1693,8 @@ static int xs_tcp_finish_connecting(stru
308 write_unlock_bh(&sk->sk_callback_lock);
309 }
310
311 + xs_set_memalloc(xprt);
312 +
313 /* Tell the socket layer to start connecting... */
314 xprt->stat.connect_count++;
315 xprt->stat.connect_start = jiffies;
316 @@ -1650,11 +1713,15 @@ static void xs_tcp_connect_worker4(struc
317 container_of(work, struct sock_xprt, connect_worker.work);
318 struct rpc_xprt *xprt = &transport->xprt;
319 struct socket *sock = transport->sock;
320 + unsigned long pflags = current->flags;
321 int err, status = -EIO;
322
323 if (xprt->shutdown || !xprt_bound(xprt))
324 goto out;
325
326 + if (xprt->swapper)
327 + current->flags |= PF_MEMALLOC;
328 +
329 if (!sock) {
330 /* start from scratch */
331 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
332 @@ -1696,6 +1763,7 @@ out:
333 xprt_wake_pending_tasks(xprt, status);
334 out_clear:
335 xprt_clear_connecting(xprt);
336 + tsk_restore_flags(current, pflags, PF_MEMALLOC);
337 }
338
339 /**
340 @@ -1710,11 +1778,15 @@ static void xs_tcp_connect_worker6(struc
341 container_of(work, struct sock_xprt, connect_worker.work);
342 struct rpc_xprt *xprt = &transport->xprt;
343 struct socket *sock = transport->sock;
344 + unsigned long pflags = current->flags;
345 int err, status = -EIO;
346
347 if (xprt->shutdown || !xprt_bound(xprt))
348 goto out;
349
350 + if (xprt->swapper)
351 + current->flags |= PF_MEMALLOC;
352 +
353 if (!sock) {
354 /* start from scratch */
355 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
356 @@ -1755,6 +1827,7 @@ out:
357 xprt_wake_pending_tasks(xprt, status);
358 out_clear:
359 xprt_clear_connecting(xprt);
360 + tsk_restore_flags(current, pflags, PF_MEMALLOC);
361 }
362
363 /**