]> git.ipfire.org Git - people/ms/linux.git/blob - fs/jfs/jfs_logmgr.h
Linux-2.6.12-rc2
[people/ms/linux.git] / fs / jfs / jfs_logmgr.h
1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 #ifndef _H_JFS_LOGMGR
20 #define _H_JFS_LOGMGR
21
22 #include "jfs_filsys.h"
23 #include "jfs_lock.h"
24
25 /*
26 * log manager configuration parameters
27 */
28
29 /* log page size */
30 #define LOGPSIZE 4096
31 #define L2LOGPSIZE 12
32
33 #define LOGPAGES 16 /* Log pages per mounted file system */
34
35 /*
36 * log logical volume
37 *
38 * a log is used to make the commit operation on journalled
39 * files within the same logical volume group atomic.
40 * a log is implemented with a logical volume.
41 * there is one log per logical volume group.
42 *
43 * block 0 of the log logical volume is not used (ipl etc).
44 * block 1 contains a log "superblock" and is used by logFormat(),
45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46 * of the log but is not otherwise used during normal processing.
47 * blocks 2 - (N-1) are used to contain log records.
48 *
49 * when a volume group is varied-on-line, logRedo() must have
50 * been executed before the file systems (logical volumes) in
51 * the volume group can be mounted.
52 */
53 /*
54 * log superblock (block 1 of logical volume)
55 */
56 #define LOGSUPER_B 1
57 #define LOGSTART_B 2
58
59 #define LOGMAGIC 0x87654321
60 #define LOGVERSION 1
61
62 #define MAX_ACTIVE 128 /* Max active file systems sharing log */
63
64 struct logsuper {
65 __le32 magic; /* 4: log lv identifier */
66 __le32 version; /* 4: version number */
67 __le32 serial; /* 4: log open/mount counter */
68 __le32 size; /* 4: size in number of LOGPSIZE blocks */
69 __le32 bsize; /* 4: logical block size in byte */
70 __le32 l2bsize; /* 4: log2 of bsize */
71
72 __le32 flag; /* 4: option */
73 __le32 state; /* 4: state - see below */
74
75 __le32 end; /* 4: addr of last log record set by logredo */
76 char uuid[16]; /* 16: 128-bit journal uuid */
77 char label[16]; /* 16: journal label */
78 struct {
79 char uuid[16];
80 } active[MAX_ACTIVE]; /* 2048: active file systems list */
81 };
82
83 #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84
85 /* log flag: commit option (see jfs_filsys.h) */
86
87 /* log state */
88 #define LOGMOUNT 0 /* log mounted by lmLogInit() */
89 #define LOGREDONE 1 /* log shutdown by lmLogShutdown().
90 * log redo completed by logredo().
91 */
92 #define LOGWRAP 2 /* log wrapped */
93 #define LOGREADERR 3 /* log read error detected in logredo() */
94
95
96 /*
97 * log logical page
98 *
99 * (this comment should be rewritten !)
100 * the header and trailer structures (h,t) will normally have
101 * the same page and eor value.
102 * An exception to this occurs when a complete page write is not
103 * accomplished on a power failure. Since the hardware may "split write"
104 * sectors in the page, any out of order sequence may occur during powerfail
105 * and needs to be recognized during log replay. The xor value is
106 * an "exclusive or" of all log words in the page up to eor. This
107 * 32 bit eor is stored with the top 16 bits in the header and the
108 * bottom 16 bits in the trailer. logredo can easily recognize pages
109 * that were not completed by reconstructing this eor and checking
110 * the log page.
111 *
112 * Previous versions of the operating system did not allow split
113 * writes and detected partially written records in logredo by
114 * ordering the updates to the header, trailer, and the move of data
115 * into the logdata area. The order: (1) data is moved (2) header
116 * is updated (3) trailer is updated. In logredo, when the header
117 * differed from the trailer, the header and trailer were reconciled
118 * as follows: if h.page != t.page they were set to the smaller of
119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120 * h.eor != t.eor they were set to the smaller of their two values.
121 */
122 struct logpage {
123 struct { /* header */
124 __le32 page; /* 4: log sequence page number */
125 __le16 rsrvd; /* 2: */
126 __le16 eor; /* 2: end-of-log offset of lasrt record write */
127 } h;
128
129 __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
130
131 struct { /* trailer */
132 __le32 page; /* 4: normally the same as h.page */
133 __le16 rsrvd; /* 2: */
134 __le16 eor; /* 2: normally the same as h.eor */
135 } t;
136 };
137
138 #define LOGPHDRSIZE 8 /* log page header size */
139 #define LOGPTLRSIZE 8 /* log page trailer size */
140
141
142 /*
143 * log record
144 *
145 * (this comment should be rewritten !)
146 * jfs uses only "after" log records (only a single writer is allowed
147 * in a page, pages are written to temporary paging space if
148 * if they must be written to disk before commit, and i/o is
149 * scheduled for modified pages to their home location after
150 * the log records containing the after values and the commit
151 * record is written to the log on disk, undo discards the copy
152 * in main-memory.)
153 *
154 * a log record consists of a data area of variable length followed by
155 * a descriptor of fixed size LOGRDSIZE bytes.
156 * the data area is rounded up to an integral number of 4-bytes and
157 * must be no longer than LOGPSIZE.
158 * the descriptor is of size of multiple of 4-bytes and aligned on a
159 * 4-byte boundary.
160 * records are packed one after the other in the data area of log pages.
161 * (sometimes a DUMMY record is inserted so that at least one record ends
162 * on every page or the longest record is placed on at most two pages).
163 * the field eor in page header/trailer points to the byte following
164 * the last record on a page.
165 */
166
167 /* log record types */
168 #define LOG_COMMIT 0x8000
169 #define LOG_SYNCPT 0x4000
170 #define LOG_MOUNT 0x2000
171 #define LOG_REDOPAGE 0x0800
172 #define LOG_NOREDOPAGE 0x0080
173 #define LOG_NOREDOINOEXT 0x0040
174 #define LOG_UPDATEMAP 0x0008
175 #define LOG_NOREDOFILE 0x0001
176
177 /* REDOPAGE/NOREDOPAGE log record data type */
178 #define LOG_INODE 0x0001
179 #define LOG_XTREE 0x0002
180 #define LOG_DTREE 0x0004
181 #define LOG_BTROOT 0x0010
182 #define LOG_EA 0x0020
183 #define LOG_ACL 0x0040
184 #define LOG_DATA 0x0080
185 #define LOG_NEW 0x0100
186 #define LOG_EXTEND 0x0200
187 #define LOG_RELOCATE 0x0400
188 #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
189
190 /* UPDATEMAP log record descriptor type */
191 #define LOG_ALLOCXADLIST 0x0080
192 #define LOG_ALLOCPXDLIST 0x0040
193 #define LOG_ALLOCXAD 0x0020
194 #define LOG_ALLOCPXD 0x0010
195 #define LOG_FREEXADLIST 0x0008
196 #define LOG_FREEPXDLIST 0x0004
197 #define LOG_FREEXAD 0x0002
198 #define LOG_FREEPXD 0x0001
199
200
201 struct lrd {
202 /*
203 * type independent area
204 */
205 __le32 logtid; /* 4: log transaction identifier */
206 __le32 backchain; /* 4: ptr to prev record of same transaction */
207 __le16 type; /* 2: record type */
208 __le16 length; /* 2: length of data in record (in byte) */
209 __le32 aggregate; /* 4: file system lv/aggregate */
210 /* (16) */
211
212 /*
213 * type dependent area (20)
214 */
215 union {
216
217 /*
218 * COMMIT: commit
219 *
220 * transaction commit: no type-dependent information;
221 */
222
223 /*
224 * REDOPAGE: after-image
225 *
226 * apply after-image;
227 *
228 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
229 */
230 struct {
231 __le32 fileset; /* 4: fileset number */
232 __le32 inode; /* 4: inode number */
233 __le16 type; /* 2: REDOPAGE record type */
234 __le16 l2linesize; /* 2: log2 of line size */
235 pxd_t pxd; /* 8: on-disk page pxd */
236 } redopage; /* (20) */
237
238 /*
239 * NOREDOPAGE: the page is freed
240 *
241 * do not apply after-image records which precede this record
242 * in the log with the same page block number to this page.
243 *
244 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
245 */
246 struct {
247 __le32 fileset; /* 4: fileset number */
248 __le32 inode; /* 4: inode number */
249 __le16 type; /* 2: NOREDOPAGE record type */
250 __le16 rsrvd; /* 2: reserved */
251 pxd_t pxd; /* 8: on-disk page pxd */
252 } noredopage; /* (20) */
253
254 /*
255 * UPDATEMAP: update block allocation map
256 *
257 * either in-line PXD,
258 * or out-of-line XADLIST;
259 *
260 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
261 */
262 struct {
263 __le32 fileset; /* 4: fileset number */
264 __le32 inode; /* 4: inode number */
265 __le16 type; /* 2: UPDATEMAP record type */
266 __le16 nxd; /* 2: number of extents */
267 pxd_t pxd; /* 8: pxd */
268 } updatemap; /* (20) */
269
270 /*
271 * NOREDOINOEXT: the inode extent is freed
272 *
273 * do not apply after-image records which precede this
274 * record in the log with the any of the 4 page block
275 * numbers in this inode extent.
276 *
277 * NOTE: The fileset and pxd fields MUST remain in
278 * the same fields in the REDOPAGE record format.
279 *
280 */
281 struct {
282 __le32 fileset; /* 4: fileset number */
283 __le32 iagnum; /* 4: IAG number */
284 __le32 inoext_idx; /* 4: inode extent index */
285 pxd_t pxd; /* 8: on-disk page pxd */
286 } noredoinoext; /* (20) */
287
288 /*
289 * SYNCPT: log sync point
290 *
291 * replay log upto syncpt address specified;
292 */
293 struct {
294 __le32 sync; /* 4: syncpt address (0 = here) */
295 } syncpt;
296
297 /*
298 * MOUNT: file system mount
299 *
300 * file system mount: no type-dependent information;
301 */
302
303 /*
304 * ? FREEXTENT: free specified extent(s)
305 *
306 * free specified extent(s) from block allocation map
307 * N.B.: nextents should be length of data/sizeof(xad_t)
308 */
309 struct {
310 __le32 type; /* 4: FREEXTENT record type */
311 __le32 nextent; /* 4: number of extents */
312
313 /* data: PXD or XAD list */
314 } freextent;
315
316 /*
317 * ? NOREDOFILE: this file is freed
318 *
319 * do not apply records which precede this record in the log
320 * with the same inode number.
321 *
322 * NOREDILE must be the first to be written at commit
323 * (last to be read in logredo()) - it prevents
324 * replay of preceding updates of all preceding generations
325 * of the inumber esp. the on-disk inode itself,
326 * but does NOT prevent
327 * replay of the
328 */
329 struct {
330 __le32 fileset; /* 4: fileset number */
331 __le32 inode; /* 4: inode number */
332 } noredofile;
333
334 /*
335 * ? NEWPAGE:
336 *
337 * metadata type dependent
338 */
339 struct {
340 __le32 fileset; /* 4: fileset number */
341 __le32 inode; /* 4: inode number */
342 __le32 type; /* 4: NEWPAGE record type */
343 pxd_t pxd; /* 8: on-disk page pxd */
344 } newpage;
345
346 /*
347 * ? DUMMY: filler
348 *
349 * no type-dependent information
350 */
351 } log;
352 }; /* (36) */
353
354 #define LOGRDSIZE (sizeof(struct lrd))
355
356 /*
357 * line vector descriptor
358 */
359 struct lvd {
360 __le16 offset;
361 __le16 length;
362 };
363
364
365 /*
366 * log logical volume
367 */
368 struct jfs_log {
369
370 struct list_head sb_list;/* This is used to sync metadata
371 * before writing syncpt.
372 */
373 struct list_head journal_list; /* Global list */
374 struct block_device *bdev; /* 4: log lv pointer */
375 int serial; /* 4: log mount serial number */
376
377 s64 base; /* @8: log extent address (inline log ) */
378 int size; /* 4: log size in log page (in page) */
379 int l2bsize; /* 4: log2 of bsize */
380
381 long flag; /* 4: flag */
382
383 struct lbuf *lbuf_free; /* 4: free lbufs */
384 wait_queue_head_t free_wait; /* 4: */
385
386 /* log write */
387 int logtid; /* 4: log tid */
388 int page; /* 4: page number of eol page */
389 int eor; /* 4: eor of last record in eol page */
390 struct lbuf *bp; /* 4: current log page buffer */
391
392 struct semaphore loglock; /* 4: log write serialization lock */
393
394 /* syncpt */
395 int nextsync; /* 4: bytes to write before next syncpt */
396 int active; /* 4: */
397 wait_queue_head_t syncwait; /* 4: */
398
399 /* commit */
400 uint cflag; /* 4: */
401 struct list_head cqueue; /* FIFO commit queue */
402 struct tblock *flush_tblk; /* tblk we're waiting on for flush */
403 int gcrtc; /* 4: GC_READY transaction count */
404 struct tblock *gclrt; /* 4: latest GC_READY transaction */
405 spinlock_t gclock; /* 4: group commit lock */
406 int logsize; /* 4: log data area size in byte */
407 int lsn; /* 4: end-of-log */
408 int clsn; /* 4: clsn */
409 int syncpt; /* 4: addr of last syncpt record */
410 int sync; /* 4: addr from last logsync() */
411 struct list_head synclist; /* 8: logsynclist anchor */
412 spinlock_t synclock; /* 4: synclist lock */
413 struct lbuf *wqueue; /* 4: log pageout queue */
414 int count; /* 4: count */
415 char uuid[16]; /* 16: 128-bit uuid of log device */
416
417 int no_integrity; /* 3: flag to disable journaling to disk */
418 };
419
420 /*
421 * Log flag
422 */
423 #define log_INLINELOG 1
424 #define log_SYNCBARRIER 2
425 #define log_QUIESCE 3
426 #define log_FLUSH 4
427
428 /*
429 * group commit flag
430 */
431 /* jfs_log */
432 #define logGC_PAGEOUT 0x00000001
433
434 /* tblock/lbuf */
435 #define tblkGC_QUEUE 0x0001
436 #define tblkGC_READY 0x0002
437 #define tblkGC_COMMIT 0x0004
438 #define tblkGC_COMMITTED 0x0008
439 #define tblkGC_EOP 0x0010
440 #define tblkGC_FREE 0x0020
441 #define tblkGC_LEADER 0x0040
442 #define tblkGC_ERROR 0x0080
443 #define tblkGC_LAZY 0x0100 // D230860
444 #define tblkGC_UNLOCKED 0x0200 // D230860
445
446 /*
447 * log cache buffer header
448 */
449 struct lbuf {
450 struct jfs_log *l_log; /* 4: log associated with buffer */
451
452 /*
453 * data buffer base area
454 */
455 uint l_flag; /* 4: pageout control flags */
456
457 struct lbuf *l_wqnext; /* 4: write queue link */
458 struct lbuf *l_freelist; /* 4: freelistlink */
459
460 int l_pn; /* 4: log page number */
461 int l_eor; /* 4: log record eor */
462 int l_ceor; /* 4: committed log record eor */
463
464 s64 l_blkno; /* 8: log page block number */
465 caddr_t l_ldata; /* 4: data page */
466
467 wait_queue_head_t l_ioevent; /* 4: i/o done event */
468 struct page *l_page; /* The page itself */
469 };
470
471 /* Reuse l_freelist for redrive list */
472 #define l_redrive_next l_freelist
473
474 /*
475 * logsynclist block
476 *
477 * common logsyncblk prefix for jbuf_t and tblock
478 */
479 struct logsyncblk {
480 u16 xflag; /* flags */
481 u16 flag; /* only meaninful in tblock */
482 lid_t lid; /* lock id */
483 s32 lsn; /* log sequence number */
484 struct list_head synclist; /* log sync list link */
485 };
486
487 /*
488 * logsynclist serialization (per log)
489 */
490
491 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
492 #define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
493 #define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
494
495 /* compute the difference in bytes of lsn from sync point */
496 #define logdiff(diff, lsn, log)\
497 {\
498 diff = (lsn) - (log)->syncpt;\
499 if (diff < 0)\
500 diff += (log)->logsize;\
501 }
502
503 extern int lmLogOpen(struct super_block *sb);
504 extern int lmLogClose(struct super_block *sb);
505 extern int lmLogShutdown(struct jfs_log * log);
506 extern int lmLogInit(struct jfs_log * log);
507 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
508 extern void jfs_flush_journal(struct jfs_log * log, int wait);
509
510 #endif /* _H_JFS_LOGMGR */