]> git.ipfire.org Git - people/ms/linux.git/blame - fs/afs/volume.c
[AFS]: Handle multiple mounts of an AFS superblock correctly.
[people/ms/linux.git] / fs / afs / volume.c
CommitLineData
ec26815a 1/* AFS volume management
1da177e4 2 *
08e0e7c8 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
1da177e4
LT
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/fs.h>
17#include <linux/pagemap.h>
1da177e4
LT
18#include "internal.h"
19
1da177e4 20static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
1da177e4 21
1da177e4
LT
22/*
23 * lookup a volume by name
24 * - this can be one of the following:
25 * "%[cell:]volume[.]" R/W volume
26 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
27 * or R/W (rwparent=1) volume
28 * "%[cell:]volume.readonly" R/O volume
29 * "#[cell:]volume.readonly" R/O volume
30 * "%[cell:]volume.backup" Backup volume
31 * "#[cell:]volume.backup" Backup volume
32 *
33 * The cell name is optional, and defaults to the current cell.
34 *
35 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
36 * Guide
37 * - Rule 1: Explicit type suffix forces access of that type or nothing
38 * (no suffix, then use Rule 2 & 3)
39 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
40 * if not available
41 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
42 * explicitly told otherwise
43 */
08e0e7c8
DH
44struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
45 int rwpath)
1da177e4
LT
46{
47 struct afs_vlocation *vlocation = NULL;
48 struct afs_volume *volume = NULL;
08e0e7c8 49 struct afs_server *server = NULL;
1da177e4
LT
50 afs_voltype_t type;
51 const char *cellname, *volname, *suffix;
52 char srvtmask;
53 int force, ret, loop, cellnamesz, volnamesz;
54
55 _enter("%s,,%d,", name, rwpath);
56
57 if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
58 printk("kAFS: unparsable volume name\n");
08e0e7c8 59 return ERR_PTR(-EINVAL);
1da177e4
LT
60 }
61
62 /* determine the type of volume we're looking for */
63 force = 0;
64 type = AFSVL_ROVOL;
65
66 if (rwpath || name[0] == '%') {
67 type = AFSVL_RWVOL;
68 force = 1;
69 }
70
71 suffix = strrchr(name, '.');
72 if (suffix) {
73 if (strcmp(suffix, ".readonly") == 0) {
74 type = AFSVL_ROVOL;
75 force = 1;
ec26815a 76 } else if (strcmp(suffix, ".backup") == 0) {
1da177e4
LT
77 type = AFSVL_BACKVOL;
78 force = 1;
ec26815a
DH
79 } else if (suffix[1] == 0) {
80 } else {
1da177e4
LT
81 suffix = NULL;
82 }
83 }
84
85 /* split the cell and volume names */
86 name++;
87 volname = strchr(name, ':');
88 if (volname) {
89 cellname = name;
90 cellnamesz = volname - name;
91 volname++;
ec26815a 92 } else {
1da177e4
LT
93 volname = name;
94 cellname = NULL;
95 cellnamesz = 0;
96 }
97
98 volnamesz = suffix ? suffix - volname : strlen(volname);
99
100 _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
101 cellnamesz, cellnamesz, cellname ?: "", cell,
102 volnamesz, volnamesz, volname, suffix ?: "-",
103 type,
104 force ? " FORCE" : "");
105
106 /* lookup the cell record */
107 if (cellname || !cell) {
08e0e7c8
DH
108 cell = afs_cell_lookup(cellname, cellnamesz);
109 if (IS_ERR(cell)) {
110 ret = PTR_ERR(cell);
1da177e4
LT
111 printk("kAFS: unable to lookup cell '%s'\n",
112 cellname ?: "");
113 goto error;
114 }
ec26815a 115 } else {
1da177e4
LT
116 afs_get_cell(cell);
117 }
118
119 /* lookup the volume location record */
08e0e7c8
DH
120 vlocation = afs_vlocation_lookup(cell, volname, volnamesz);
121 if (IS_ERR(vlocation)) {
122 ret = PTR_ERR(vlocation);
123 vlocation = NULL;
1da177e4 124 goto error;
08e0e7c8 125 }
1da177e4
LT
126
127 /* make the final decision on the type we want */
128 ret = -ENOMEDIUM;
129 if (force && !(vlocation->vldb.vidmask & (1 << type)))
130 goto error;
131
132 srvtmask = 0;
133 for (loop = 0; loop < vlocation->vldb.nservers; loop++)
134 srvtmask |= vlocation->vldb.srvtmask[loop];
135
136 if (force) {
137 if (!(srvtmask & (1 << type)))
138 goto error;
ec26815a 139 } else if (srvtmask & AFS_VOL_VTM_RO) {
1da177e4 140 type = AFSVL_ROVOL;
ec26815a 141 } else if (srvtmask & AFS_VOL_VTM_RW) {
1da177e4 142 type = AFSVL_RWVOL;
ec26815a 143 } else {
1da177e4
LT
144 goto error;
145 }
146
147 down_write(&cell->vl_sem);
148
149 /* is the volume already active? */
150 if (vlocation->vols[type]) {
151 /* yes - re-use it */
152 volume = vlocation->vols[type];
153 afs_get_volume(volume);
154 goto success;
155 }
156
157 /* create a new volume record */
158 _debug("creating new volume record");
159
160 ret = -ENOMEM;
f8314dc6 161 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
1da177e4
LT
162 if (!volume)
163 goto error_up;
164
1da177e4
LT
165 atomic_set(&volume->usage, 1);
166 volume->type = type;
167 volume->type_force = force;
168 volume->cell = cell;
169 volume->vid = vlocation->vldb.vid[type];
170
171 init_rwsem(&volume->server_sem);
172
173 /* look up all the applicable server records */
174 for (loop = 0; loop < 8; loop++) {
175 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
08e0e7c8
DH
176 server = afs_lookup_server(
177 volume->cell, &vlocation->vldb.servers[loop]);
178 if (IS_ERR(server)) {
179 ret = PTR_ERR(server);
1da177e4 180 goto error_discard;
08e0e7c8 181 }
1da177e4 182
08e0e7c8 183 volume->servers[volume->nservers] = server;
1da177e4
LT
184 volume->nservers++;
185 }
186 }
187
188 /* attach the cache and volume location */
189#ifdef AFS_CACHING_SUPPORT
190 cachefs_acquire_cookie(vlocation->cache,
191 &afs_vnode_cache_index_def,
192 volume,
193 &volume->cache);
194#endif
195
196 afs_get_vlocation(vlocation);
197 volume->vlocation = vlocation;
198
199 vlocation->vols[type] = volume;
200
ec26815a 201success:
1da177e4
LT
202 _debug("kAFS selected %s volume %08x",
203 afs_voltypes[volume->type], volume->vid);
08e0e7c8
DH
204 up_write(&cell->vl_sem);
205 afs_put_vlocation(vlocation);
206 afs_put_cell(cell);
207 _leave(" = %p", volume);
208 return volume;
1da177e4
LT
209
210 /* clean up */
ec26815a 211error_up:
1da177e4 212 up_write(&cell->vl_sem);
ec26815a 213error:
1da177e4
LT
214 afs_put_vlocation(vlocation);
215 afs_put_cell(cell);
08e0e7c8
DH
216 _leave(" = %d", ret);
217 return ERR_PTR(ret);
1da177e4 218
ec26815a 219error_discard:
1da177e4
LT
220 up_write(&cell->vl_sem);
221
222 for (loop = volume->nservers - 1; loop >= 0; loop--)
223 afs_put_server(volume->servers[loop]);
224
225 kfree(volume);
226 goto error;
ec26815a 227}
1da177e4 228
1da177e4
LT
229/*
230 * destroy a volume record
231 */
232void afs_put_volume(struct afs_volume *volume)
233{
234 struct afs_vlocation *vlocation;
235 int loop;
236
237 if (!volume)
238 return;
239
240 _enter("%p", volume);
241
08e0e7c8 242 ASSERTCMP(atomic_read(&volume->usage), >, 0);
1da177e4 243
08e0e7c8 244 vlocation = volume->vlocation;
1da177e4
LT
245
246 /* to prevent a race, the decrement and the dequeue must be effectively
247 * atomic */
248 down_write(&vlocation->cell->vl_sem);
249
250 if (likely(!atomic_dec_and_test(&volume->usage))) {
251 up_write(&vlocation->cell->vl_sem);
252 _leave("");
253 return;
254 }
255
256 vlocation->vols[volume->type] = NULL;
257
258 up_write(&vlocation->cell->vl_sem);
259
260 /* finish cleaning up the volume */
261#ifdef AFS_CACHING_SUPPORT
262 cachefs_relinquish_cookie(volume->cache, 0);
263#endif
264 afs_put_vlocation(vlocation);
265
266 for (loop = volume->nservers - 1; loop >= 0; loop--)
267 afs_put_server(volume->servers[loop]);
268
269 kfree(volume);
270
271 _leave(" [destroyed]");
ec26815a 272}
1da177e4 273
1da177e4
LT
274/*
275 * pick a server to use to try accessing this volume
276 * - returns with an elevated usage count on the server chosen
277 */
08e0e7c8 278struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
1da177e4 279{
08e0e7c8 280 struct afs_volume *volume = vnode->volume;
1da177e4
LT
281 struct afs_server *server;
282 int ret, state, loop;
283
284 _enter("%s", volume->vlocation->vldb.name);
285
08e0e7c8
DH
286 /* stick with the server we're already using if we can */
287 if (vnode->server && vnode->server->fs_state == 0) {
288 afs_get_server(vnode->server);
289 _leave(" = %p [current]", vnode->server);
290 return vnode->server;
291 }
292
1da177e4
LT
293 down_read(&volume->server_sem);
294
295 /* handle the no-server case */
296 if (volume->nservers == 0) {
297 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
298 up_read(&volume->server_sem);
299 _leave(" = %d [no servers]", ret);
08e0e7c8 300 return ERR_PTR(ret);
1da177e4
LT
301 }
302
303 /* basically, just search the list for the first live server and use
304 * that */
305 ret = 0;
306 for (loop = 0; loop < volume->nservers; loop++) {
307 server = volume->servers[loop];
308 state = server->fs_state;
309
08e0e7c8
DH
310 _debug("consider %d [%d]", loop, state);
311
1da177e4
LT
312 switch (state) {
313 /* found an apparently healthy server */
314 case 0:
315 afs_get_server(server);
316 up_read(&volume->server_sem);
08e0e7c8
DH
317 _leave(" = %p (picked %08x)",
318 server, ntohl(server->addr.s_addr));
319 return server;
1da177e4
LT
320
321 case -ENETUNREACH:
322 if (ret == 0)
323 ret = state;
324 break;
325
326 case -EHOSTUNREACH:
327 if (ret == 0 ||
328 ret == -ENETUNREACH)
329 ret = state;
330 break;
331
332 case -ECONNREFUSED:
333 if (ret == 0 ||
334 ret == -ENETUNREACH ||
335 ret == -EHOSTUNREACH)
336 ret = state;
337 break;
338
339 default:
340 case -EREMOTEIO:
341 if (ret == 0 ||
342 ret == -ENETUNREACH ||
343 ret == -EHOSTUNREACH ||
344 ret == -ECONNREFUSED)
345 ret = state;
346 break;
347 }
348 }
349
350 /* no available servers
351 * - TODO: handle the no active servers case better
352 */
353 up_read(&volume->server_sem);
354 _leave(" = %d", ret);
08e0e7c8 355 return ERR_PTR(ret);
ec26815a 356}
1da177e4 357
1da177e4
LT
358/*
359 * release a server after use
360 * - releases the ref on the server struct that was acquired by picking
361 * - records result of using a particular server to access a volume
362 * - return 0 to try again, 1 if okay or to issue error
363 */
08e0e7c8 364int afs_volume_release_fileserver(struct afs_vnode *vnode,
1da177e4
LT
365 struct afs_server *server,
366 int result)
367{
08e0e7c8 368 struct afs_volume *volume = vnode->volume;
1da177e4
LT
369 unsigned loop;
370
371 _enter("%s,%08x,%d",
372 volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
373 result);
374
375 switch (result) {
376 /* success */
377 case 0:
378 server->fs_act_jif = jiffies;
08e0e7c8 379 server->fs_state = 0;
1da177e4
LT
380 break;
381
382 /* the fileserver denied all knowledge of the volume */
383 case -ENOMEDIUM:
384 server->fs_act_jif = jiffies;
385 down_write(&volume->server_sem);
386
08e0e7c8 387 /* firstly, find where the server is in the active list (if it
1da177e4
LT
388 * is) */
389 for (loop = 0; loop < volume->nservers; loop++)
390 if (volume->servers[loop] == server)
391 goto present;
392
393 /* no longer there - may have been discarded by another op */
394 goto try_next_server_upw;
395
396 present:
397 volume->nservers--;
398 memmove(&volume->servers[loop],
399 &volume->servers[loop + 1],
400 sizeof(volume->servers[loop]) *
401 (volume->nservers - loop));
402 volume->servers[volume->nservers] = NULL;
403 afs_put_server(server);
404 volume->rjservers++;
405
406 if (volume->nservers > 0)
407 /* another server might acknowledge its existence */
408 goto try_next_server_upw;
409
410 /* handle the case where all the fileservers have rejected the
411 * volume
412 * - TODO: try asking the fileservers for volume information
413 * - TODO: contact the VL server again to see if the volume is
414 * no longer registered
415 */
416 up_write(&volume->server_sem);
417 afs_put_server(server);
418 _leave(" [completely rejected]");
419 return 1;
420
421 /* problem reaching the server */
422 case -ENETUNREACH:
423 case -EHOSTUNREACH:
424 case -ECONNREFUSED:
08e0e7c8 425 case -ETIME:
1da177e4
LT
426 case -ETIMEDOUT:
427 case -EREMOTEIO:
428 /* mark the server as dead
429 * TODO: vary dead timeout depending on error
430 */
431 spin_lock(&server->fs_lock);
432 if (!server->fs_state) {
433 server->fs_dead_jif = jiffies + HZ * 10;
434 server->fs_state = result;
435 printk("kAFS: SERVER DEAD state=%d\n", result);
436 }
437 spin_unlock(&server->fs_lock);
438 goto try_next_server;
439
440 /* miscellaneous error */
441 default:
442 server->fs_act_jif = jiffies;
443 case -ENOMEM:
444 case -ENONET:
445 break;
446 }
447
448 /* tell the caller to accept the result */
449 afs_put_server(server);
450 _leave("");
451 return 1;
452
453 /* tell the caller to loop around and try the next server */
ec26815a 454try_next_server_upw:
1da177e4 455 up_write(&volume->server_sem);
ec26815a 456try_next_server:
1da177e4
LT
457 afs_put_server(server);
458 _leave(" [try next server]");
459 return 0;
ec26815a 460}