]> git.ipfire.org Git - people/ms/linux.git/blob - fs/reiserfs/file.c
Linux-2.6.12-rc2
[people/ms/linux.git] / fs / reiserfs / file.c
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5
6 #include <linux/time.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/reiserfs_acl.h>
9 #include <linux/reiserfs_xattr.h>
10 #include <linux/smp_lock.h>
11 #include <asm/uaccess.h>
12 #include <linux/pagemap.h>
13 #include <linux/swap.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/buffer_head.h>
17 #include <linux/quotaops.h>
18
19 /*
20 ** We pack the tails of files on file close, not at the time they are written.
21 ** This implies an unnecessary copy of the tail and an unnecessary indirect item
22 ** insertion/balancing, for files that are written in one write.
23 ** It avoids unnecessary tail packings (balances) for files that are written in
24 ** multiple writes and are small enough to have tails.
25 **
26 ** file_release is called by the VFS layer when the file is closed. If
27 ** this is the last open file descriptor, and the file
28 ** small enough to have a tail, and the tail is currently in an
29 ** unformatted node, the tail is converted back into a direct item.
30 **
31 ** We use reiserfs_truncate_file to pack the tail, since it already has
32 ** all the conditions coded.
33 */
34 static int reiserfs_file_release (struct inode * inode, struct file * filp)
35 {
36
37 struct reiserfs_transaction_handle th ;
38 int err;
39 int jbegin_failure = 0;
40
41 if (!S_ISREG (inode->i_mode))
42 BUG ();
43
44 /* fast out for when nothing needs to be done */
45 if ((atomic_read(&inode->i_count) > 1 ||
46 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
47 !tail_has_to_be_packed(inode)) &&
48 REISERFS_I(inode)->i_prealloc_count <= 0) {
49 return 0;
50 }
51
52 reiserfs_write_lock(inode->i_sb);
53 down (&inode->i_sem);
54 /* freeing preallocation only involves relogging blocks that
55 * are already in the current transaction. preallocation gets
56 * freed at the end of each transaction, so it is impossible for
57 * us to log any additional blocks (including quota blocks)
58 */
59 err = journal_begin(&th, inode->i_sb, 1);
60 if (err) {
61 /* uh oh, we can't allow the inode to go away while there
62 * is still preallocation blocks pending. Try to join the
63 * aborted transaction
64 */
65 jbegin_failure = err;
66 err = journal_join_abort(&th, inode->i_sb, 1);
67
68 if (err) {
69 /* hmpf, our choices here aren't good. We can pin the inode
70 * which will disallow unmount from every happening, we can
71 * do nothing, which will corrupt random memory on unmount,
72 * or we can forcibly remove the file from the preallocation
73 * list, which will leak blocks on disk. Lets pin the inode
74 * and let the admin know what is going on.
75 */
76 igrab(inode);
77 reiserfs_warning(inode->i_sb, "pinning inode %lu because the "
78 "preallocation can't be freed");
79 goto out;
80 }
81 }
82 reiserfs_update_inode_transaction(inode) ;
83
84 #ifdef REISERFS_PREALLOCATE
85 reiserfs_discard_prealloc (&th, inode);
86 #endif
87 err = journal_end(&th, inode->i_sb, 1);
88
89 /* copy back the error code from journal_begin */
90 if (!err)
91 err = jbegin_failure;
92
93 if (!err && atomic_read(&inode->i_count) <= 1 &&
94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
95 tail_has_to_be_packed (inode)) {
96 /* if regular file is released by last holder and it has been
97 appended (we append by unformatted node only) or its direct
98 item(s) had to be converted, then it may have to be
99 indirect2direct converted */
100 err = reiserfs_truncate_file(inode, 0) ;
101 }
102 out:
103 up (&inode->i_sem);
104 reiserfs_write_unlock(inode->i_sb);
105 return err;
106 }
107
108 static void reiserfs_vfs_truncate_file(struct inode *inode) {
109 reiserfs_truncate_file(inode, 1) ;
110 }
111
112 /* Sync a reiserfs file. */
113
114 /*
115 * FIXME: sync_mapping_buffers() never has anything to sync. Can
116 * be removed...
117 */
118
119 static int reiserfs_sync_file(
120 struct file * p_s_filp,
121 struct dentry * p_s_dentry,
122 int datasync
123 ) {
124 struct inode * p_s_inode = p_s_dentry->d_inode;
125 int n_err;
126 int barrier_done;
127
128 if (!S_ISREG(p_s_inode->i_mode))
129 BUG ();
130 n_err = sync_mapping_buffers(p_s_inode->i_mapping) ;
131 reiserfs_write_lock(p_s_inode->i_sb);
132 barrier_done = reiserfs_commit_for_inode(p_s_inode);
133 reiserfs_write_unlock(p_s_inode->i_sb);
134 if (barrier_done != 1)
135 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
136 if (barrier_done < 0)
137 return barrier_done;
138 return ( n_err < 0 ) ? -EIO : 0;
139 }
140
141 /* I really do not want to play with memory shortage right now, so
142 to simplify the code, we are not going to write more than this much pages at
143 a time. This still should considerably improve performance compared to 4k
144 at a time case. This is 32 pages of 4k size. */
145 #define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
146
147 /* Allocates blocks for a file to fulfil write request.
148 Maps all unmapped but prepared pages from the list.
149 Updates metadata with newly allocated blocknumbers as needed */
150 static int reiserfs_allocate_blocks_for_region(
151 struct reiserfs_transaction_handle *th,
152 struct inode *inode, /* Inode we work with */
153 loff_t pos, /* Writing position */
154 int num_pages, /* number of pages write going
155 to touch */
156 int write_bytes, /* amount of bytes to write */
157 struct page **prepared_pages, /* array of
158 prepared pages
159 */
160 int blocks_to_allocate /* Amount of blocks we
161 need to allocate to
162 fit the data into file
163 */
164 )
165 {
166 struct cpu_key key; // cpu key of item that we are going to deal with
167 struct item_head *ih; // pointer to item head that we are going to deal with
168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
169 __u32 * item; // pointer to item we are going to deal with
170 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
173 size_t res; // return value of various functions that we call.
174 int curr_block; // current block used to keep track of unmapped blocks.
175 int i; // loop counter
176 int itempos; // position in item
177 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
178 // first page
179 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
180 __u64 hole_size ; // amount of blocks for a file hole, if it needed to be created.
181 int modifying_this_item = 0; // Flag for items traversal code to keep track
182 // of the fact that we already prepared
183 // current block for journal
184 int will_prealloc = 0;
185 RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?");
186
187 /* only preallocate if this is a small write */
188 if (REISERFS_I(inode)->i_prealloc_count ||
189 (!(write_bytes & (inode->i_sb->s_blocksize -1)) &&
190 blocks_to_allocate <
191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
192 will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
193
194 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
195 sizeof(b_blocknr_t), GFP_NOFS);
196
197 /* First we compose a key to point at the writing position, we want to do
198 that outside of any locking region. */
199 make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/*key length*/);
200
201 /* If we came here, it means we absolutely need to open a transaction,
202 since we need to allocate some blocks */
203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
205 if (res)
206 goto error_exit;
207 reiserfs_update_inode_transaction(inode) ;
208
209 /* Look for the in-tree position of our write, need path for block allocator */
210 res = search_for_position_by_key(inode->i_sb, &key, &path);
211 if ( res == IO_ERROR ) {
212 res = -EIO;
213 goto error_exit;
214 }
215
216 /* Allocate blocks */
217 /* First fill in "hint" structure for block allocator */
218 hint.th = th; // transaction handle.
219 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
220 hint.inode = inode; // Inode is needed by block allocator too.
221 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
222 hint.key = key.on_disk_key; // on disk key of file.
223 hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already.
224 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
225 hint.preallocate = will_prealloc;
226
227 /* Call block allocator to allocate blocks */
228 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
229 if ( res != CARRY_ON ) {
230 if ( res == NO_DISK_SPACE ) {
231 /* We flush the transaction in case of no space. This way some
232 blocks might become free */
233 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
234 res = restart_transaction(th, inode, &path);
235 if (res)
236 goto error_exit;
237
238 /* We might have scheduled, so search again */
239 res = search_for_position_by_key(inode->i_sb, &key, &path);
240 if ( res == IO_ERROR ) {
241 res = -EIO;
242 goto error_exit;
243 }
244
245 /* update changed info for hint structure. */
246 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
247 if ( res != CARRY_ON ) {
248 res = -ENOSPC;
249 pathrelse(&path);
250 goto error_exit;
251 }
252 } else {
253 res = -ENOSPC;
254 pathrelse(&path);
255 goto error_exit;
256 }
257 }
258
259 #ifdef __BIG_ENDIAN
260 // Too bad, I have not found any way to convert a given region from
261 // cpu format to little endian format
262 {
263 int i;
264 for ( i = 0; i < blocks_to_allocate ; i++)
265 allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]);
266 }
267 #endif
268
269 /* Blocks allocating well might have scheduled and tree might have changed,
270 let's search the tree again */
271 /* find where in the tree our write should go */
272 res = search_for_position_by_key(inode->i_sb, &key, &path);
273 if ( res == IO_ERROR ) {
274 res = -EIO;
275 goto error_exit_free_blocks;
276 }
277
278 bh = get_last_bh( &path ); // Get a bufferhead for last element in path.
279 ih = get_ih( &path ); // Get a pointer to last item head in path.
280 item = get_item( &path ); // Get a pointer to last item in path
281
282 /* Let's see what we have found */
283 if ( res != POSITION_FOUND ) { /* position not found, this means that we
284 might need to append file with holes
285 first */
286 // Since we are writing past the file's end, we need to find out if
287 // there is a hole that needs to be inserted before our writing
288 // position, and how many blocks it is going to cover (we need to
289 // populate pointers to file blocks representing the hole with zeros)
290
291 {
292 int item_offset = 1;
293 /*
294 * if ih is stat data, its offset is 0 and we don't want to
295 * add 1 to pos in the hole_size calculation
296 */
297 if (is_statdata_le_ih(ih))
298 item_offset = 0;
299 hole_size = (pos + item_offset -
300 (le_key_k_offset( get_inode_item_key_version(inode),
301 &(ih->ih_key)) +
302 op_bytes_number(ih, inode->i_sb->s_blocksize))) >>
303 inode->i_sb->s_blocksize_bits;
304 }
305
306 if ( hole_size > 0 ) {
307 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time.
308 /* area filled with zeroes, to supply as list of zero blocknumbers
309 We allocate it outside of loop just in case loop would spin for
310 several iterations. */
311 char *zeros = kmalloc(to_paste*UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
312 if ( !zeros ) {
313 res = -ENOMEM;
314 goto error_exit_free_blocks;
315 }
316 memset ( zeros, 0, to_paste*UNFM_P_SIZE);
317 do {
318 to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE );
319 if ( is_indirect_le_ih(ih) ) {
320 /* Ok, there is existing indirect item already. Need to append it */
321 /* Calculate position past inserted item */
322 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3);
323 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)zeros, UNFM_P_SIZE*to_paste);
324 if ( res ) {
325 kfree(zeros);
326 goto error_exit_free_blocks;
327 }
328 } else if ( is_statdata_le_ih(ih) ) {
329 /* No existing item, create it */
330 /* item head for new item */
331 struct item_head ins_ih;
332
333 /* create a key for our new item */
334 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3);
335
336 /* Create new item head for our new item */
337 make_le_item_head (&ins_ih, &key, key.version, 1,
338 TYPE_INDIRECT, to_paste*UNFM_P_SIZE,
339 0 /* free space */);
340
341 /* Find where such item should live in the tree */
342 res = search_item (inode->i_sb, &key, &path);
343 if ( res != ITEM_NOT_FOUND ) {
344 /* item should not exist, otherwise we have error */
345 if ( res != -ENOSPC ) {
346 reiserfs_warning (inode->i_sb,
347 "green-9008: search_by_key (%K) returned %d",
348 &key, res);
349 }
350 res = -EIO;
351 kfree(zeros);
352 goto error_exit_free_blocks;
353 }
354 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros);
355 } else {
356 reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key);
357 }
358 if ( res ) {
359 kfree(zeros);
360 goto error_exit_free_blocks;
361 }
362 /* Now we want to check if transaction is too full, and if it is
363 we restart it. This will also free the path. */
364 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
365 res = restart_transaction(th, inode, &path);
366 if (res) {
367 pathrelse (&path);
368 kfree(zeros);
369 goto error_exit;
370 }
371 }
372
373 /* Well, need to recalculate path and stuff */
374 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits));
375 res = search_for_position_by_key(inode->i_sb, &key, &path);
376 if ( res == IO_ERROR ) {
377 res = -EIO;
378 kfree(zeros);
379 goto error_exit_free_blocks;
380 }
381 bh=get_last_bh(&path);
382 ih=get_ih(&path);
383 item = get_item(&path);
384 hole_size -= to_paste;
385 } while ( hole_size );
386 kfree(zeros);
387 }
388 }
389
390 // Go through existing indirect items first
391 // replace all zeroes with blocknumbers from list
392 // Note that if no corresponding item was found, by previous search,
393 // it means there are no existing in-tree representation for file area
394 // we are going to overwrite, so there is nothing to scan through for holes.
395 for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) {
396 retry:
397
398 if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) {
399 /* We run out of data in this indirect item, let's look for another
400 one. */
401 /* First if we are already modifying current item, log it */
402 if ( modifying_this_item ) {
403 journal_mark_dirty (th, inode->i_sb, bh);
404 modifying_this_item = 0;
405 }
406 /* Then set the key to look for a new indirect item (offset of old
407 item is added to old item length */
408 set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize));
409 /* Search ofor position of new key in the tree. */
410 res = search_for_position_by_key(inode->i_sb, &key, &path);
411 if ( res == IO_ERROR) {
412 res = -EIO;
413 goto error_exit_free_blocks;
414 }
415 bh=get_last_bh(&path);
416 ih=get_ih(&path);
417 item = get_item(&path);
418 itempos = path.pos_in_item;
419 continue; // loop to check all kinds of conditions and so on.
420 }
421 /* Ok, we have correct position in item now, so let's see if it is
422 representing file hole (blocknumber is zero) and fill it if needed */
423 if ( !item[itempos] ) {
424 /* Ok, a hole. Now we need to check if we already prepared this
425 block to be journaled */
426 while ( !modifying_this_item ) { // loop until succeed
427 /* Well, this item is not journaled yet, so we must prepare
428 it for journal first, before we can change it */
429 struct item_head tmp_ih; // We copy item head of found item,
430 // here to detect if fs changed under
431 // us while we were preparing for
432 // journal.
433 int fs_gen; // We store fs generation here to find if someone
434 // changes fs under our feet
435
436 copy_item_head (&tmp_ih, ih); // Remember itemhead
437 fs_gen = get_generation (inode->i_sb); // remember fs generation
438 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
439 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
440 // Sigh, fs was changed under us, we need to look for new
441 // location of item we are working with
442
443 /* unmark prepaerd area as journaled and search for it's
444 new position */
445 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
446 res = search_for_position_by_key(inode->i_sb, &key, &path);
447 if ( res == IO_ERROR) {
448 res = -EIO;
449 goto error_exit_free_blocks;
450 }
451 bh=get_last_bh(&path);
452 ih=get_ih(&path);
453 item = get_item(&path);
454 itempos = path.pos_in_item;
455 goto retry;
456 }
457 modifying_this_item = 1;
458 }
459 item[itempos] = allocated_blocks[curr_block]; // Assign new block
460 curr_block++;
461 }
462 itempos++;
463 }
464
465 if ( modifying_this_item ) { // We need to log last-accessed block, if it
466 // was modified, but not logged yet.
467 journal_mark_dirty (th, inode->i_sb, bh);
468 }
469
470 if ( curr_block < blocks_to_allocate ) {
471 // Oh, well need to append to indirect item, or to create indirect item
472 // if there weren't any
473 if ( is_indirect_le_ih(ih) ) {
474 // Existing indirect item - append. First calculate key for append
475 // position. We do not need to recalculate path as it should
476 // already point to correct place.
477 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3);
478 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)(allocated_blocks+curr_block), UNFM_P_SIZE*(blocks_to_allocate-curr_block));
479 if ( res ) {
480 goto error_exit_free_blocks;
481 }
482 } else if (is_statdata_le_ih(ih) ) {
483 // Last found item was statdata. That means we need to create indirect item.
484 struct item_head ins_ih; /* itemhead for new item */
485
486 /* create a key for our new item */
487 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one,
488 // because that's
489 // where first
490 // indirect item
491 // begins
492 /* Create new item head for our new item */
493 make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT,
494 (blocks_to_allocate-curr_block)*UNFM_P_SIZE,
495 0 /* free space */);
496 /* Find where such item should live in the tree */
497 res = search_item (inode->i_sb, &key, &path);
498 if ( res != ITEM_NOT_FOUND ) {
499 /* Well, if we have found such item already, or some error
500 occured, we need to warn user and return error */
501 if ( res != -ENOSPC ) {
502 reiserfs_warning (inode->i_sb,
503 "green-9009: search_by_key (%K) "
504 "returned %d", &key, res);
505 }
506 res = -EIO;
507 goto error_exit_free_blocks;
508 }
509 /* Insert item into the tree with the data as its body */
510 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block));
511 } else {
512 reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key);
513 }
514 }
515
516 // the caller is responsible for closing the transaction
517 // unless we return an error, they are also responsible for logging
518 // the inode.
519 //
520 pathrelse(&path);
521 /*
522 * cleanup prellocation from previous writes
523 * if this is a partial block write
524 */
525 if (write_bytes & (inode->i_sb->s_blocksize -1))
526 reiserfs_discard_prealloc(th, inode);
527 reiserfs_write_unlock(inode->i_sb);
528
529 // go through all the pages/buffers and map the buffers to newly allocated
530 // blocks (so that system knows where to write these pages later).
531 curr_block = 0;
532 for ( i = 0; i < num_pages ; i++ ) {
533 struct page *page=prepared_pages[i]; //current page
534 struct buffer_head *head = page_buffers(page);// first buffer for a page
535 int block_start, block_end; // in-page offsets for buffers.
536
537 if (!page_buffers(page))
538 reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???");
539
540 /* For each buffer in page */
541 for(bh = head, block_start = 0; bh != head || !block_start;
542 block_start=block_end, bh = bh->b_this_page) {
543 if (!bh)
544 reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?");
545 block_end = block_start+inode->i_sb->s_blocksize;
546 if (i == 0 && block_end <= from )
547 /* if this buffer is before requested data to map, skip it */
548 continue;
549 if (i == num_pages - 1 && block_start >= to)
550 /* If this buffer is after requested data to map, abort
551 processing of current page */
552 break;
553
554 if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it
555 map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block]));
556 curr_block++;
557 set_buffer_new(bh);
558 }
559 }
560 }
561
562 RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird");
563
564 kfree(allocated_blocks);
565 return 0;
566
567 // Need to deal with transaction here.
568 error_exit_free_blocks:
569 pathrelse(&path);
570 // free blocks
571 for( i = 0; i < blocks_to_allocate; i++ )
572 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1);
573
574 error_exit:
575 if (th->t_trans_id) {
576 int err;
577 // update any changes we made to blk count
578 reiserfs_update_sd(th, inode);
579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
580 if (err)
581 res = err;
582 }
583 reiserfs_write_unlock(inode->i_sb);
584 kfree(allocated_blocks);
585
586 return res;
587 }
588
589 /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
590 static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
591 size_t num_pages /* amount of pages */) {
592 int i; // loop counter
593
594 for (i=0; i < num_pages ; i++) {
595 struct page *page = prepared_pages[i];
596
597 try_to_free_buffers(page);
598 unlock_page(page);
599 page_cache_release(page);
600 }
601 }
602
603 /* This function will copy data from userspace to specified pages within
604 supplied byte range */
605 static int reiserfs_copy_from_user_to_file_region(
606 loff_t pos, /* In-file position */
607 int num_pages, /* Number of pages affected */
608 int write_bytes, /* Amount of bytes to write */
609 struct page **prepared_pages, /* pointer to
610 array to
611 prepared pages
612 */
613 const char __user *buf /* Pointer to user-supplied
614 data*/
615 )
616 {
617 long page_fault=0; // status of copy_from_user.
618 int i; // loop counter.
619 int offset; // offset in page
620
621 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
622 size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
623 struct page *page=prepared_pages[i]; // Current page we process.
624
625 fault_in_pages_readable( buf, count);
626
627 /* Copy data from userspace to the current page */
628 kmap(page);
629 page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data.
630 /* Flush processor's dcache for this page */
631 flush_dcache_page(page);
632 kunmap(page);
633 buf+=count;
634 write_bytes-=count;
635
636 if (page_fault)
637 break; // Was there a fault? abort.
638 }
639
640 return page_fault?-EFAULT:0;
641 }
642
643 /* taken fs/buffer.c:__block_commit_write */
644 int reiserfs_commit_page(struct inode *inode, struct page *page,
645 unsigned from, unsigned to)
646 {
647 unsigned block_start, block_end;
648 int partial = 0;
649 unsigned blocksize;
650 struct buffer_head *bh, *head;
651 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
652 int new;
653 int logit = reiserfs_file_data_log(inode);
654 struct super_block *s = inode->i_sb;
655 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
656 struct reiserfs_transaction_handle th;
657 int ret = 0;
658
659 th.t_trans_id = 0;
660 blocksize = 1 << inode->i_blkbits;
661
662 if (logit) {
663 reiserfs_write_lock(s);
664 ret = journal_begin(&th, s, bh_per_page + 1);
665 if (ret)
666 goto drop_write_lock;
667 reiserfs_update_inode_transaction(inode);
668 }
669 for(bh = head = page_buffers(page), block_start = 0;
670 bh != head || !block_start;
671 block_start=block_end, bh = bh->b_this_page)
672 {
673
674 new = buffer_new(bh);
675 clear_buffer_new(bh);
676 block_end = block_start + blocksize;
677 if (block_end <= from || block_start >= to) {
678 if (!buffer_uptodate(bh))
679 partial = 1;
680 } else {
681 set_buffer_uptodate(bh);
682 if (logit) {
683 reiserfs_prepare_for_journal(s, bh, 1);
684 journal_mark_dirty(&th, s, bh);
685 } else if (!buffer_dirty(bh)) {
686 mark_buffer_dirty(bh);
687 /* do data=ordered on any page past the end
688 * of file and any buffer marked BH_New.
689 */
690 if (reiserfs_data_ordered(inode->i_sb) &&
691 (new || page->index >= i_size_index)) {
692 reiserfs_add_ordered_list(inode, bh);
693 }
694 }
695 }
696 }
697 if (logit) {
698 ret = journal_end(&th, s, bh_per_page + 1);
699 drop_write_lock:
700 reiserfs_write_unlock(s);
701 }
702 /*
703 * If this is a partial write which happened to make all buffers
704 * uptodate then we can optimize away a bogus readpage() for
705 * the next read(). Here we 'discover' whether the page went
706 * uptodate as a result of this (potentially partial) write.
707 */
708 if (!partial)
709 SetPageUptodate(page);
710 return ret;
711 }
712
713
714 /* Submit pages for write. This was separated from actual file copying
715 because we might want to allocate block numbers in-between.
716 This function assumes that caller will adjust file size to correct value. */
717 static int reiserfs_submit_file_region_for_write(
718 struct reiserfs_transaction_handle *th,
719 struct inode *inode,
720 loff_t pos, /* Writing position offset */
721 size_t num_pages, /* Number of pages to write */
722 size_t write_bytes, /* number of bytes to write */
723 struct page **prepared_pages /* list of pages */
724 )
725 {
726 int status; // return status of block_commit_write.
727 int retval = 0; // Return value we are going to return.
728 int i; // loop counter
729 int offset; // Writing offset in page.
730 int orig_write_bytes = write_bytes;
731 int sd_update = 0;
732
733 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
734 int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
735 struct page *page=prepared_pages[i]; // Current page we process.
736
737 status = reiserfs_commit_page(inode, page, offset, offset+count);
738 if ( status )
739 retval = status; // To not overcomplicate matters We are going to
740 // submit all the pages even if there was error.
741 // we only remember error status to report it on
742 // exit.
743 write_bytes-=count;
744 }
745 /* now that we've gotten all the ordered buffers marked dirty,
746 * we can safely update i_size and close any running transaction
747 */
748 if ( pos + orig_write_bytes > inode->i_size) {
749 inode->i_size = pos + orig_write_bytes; // Set new size
750 /* If the file have grown so much that tail packing is no
751 * longer possible, reset "need to pack" flag */
752 if ( (have_large_tails (inode->i_sb) &&
753 inode->i_size > i_block_size (inode)*4) ||
754 (have_small_tails (inode->i_sb) &&
755 inode->i_size > i_block_size(inode)) )
756 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
757 else if ( (have_large_tails (inode->i_sb) &&
758 inode->i_size < i_block_size (inode)*4) ||
759 (have_small_tails (inode->i_sb) &&
760 inode->i_size < i_block_size(inode)) )
761 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
762
763 if (th->t_trans_id) {
764 reiserfs_write_lock(inode->i_sb);
765 reiserfs_update_sd(th, inode); // And update on-disk metadata
766 reiserfs_write_unlock(inode->i_sb);
767 } else
768 inode->i_sb->s_op->dirty_inode(inode);
769
770 sd_update = 1;
771 }
772 if (th->t_trans_id) {
773 reiserfs_write_lock(inode->i_sb);
774 if (!sd_update)
775 reiserfs_update_sd(th, inode);
776 status = journal_end(th, th->t_super, th->t_blocks_allocated);
777 if (status)
778 retval = status;
779 reiserfs_write_unlock(inode->i_sb);
780 }
781 th->t_trans_id = 0;
782
783 /*
784 * we have to unlock the pages after updating i_size, otherwise
785 * we race with writepage
786 */
787 for ( i = 0; i < num_pages ; i++) {
788 struct page *page=prepared_pages[i];
789 unlock_page(page);
790 mark_page_accessed(page);
791 page_cache_release(page);
792 }
793 return retval;
794 }
795
796 /* Look if passed writing region is going to touch file's tail
797 (if it is present). And if it is, convert the tail to unformatted node */
798 static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal with */
799 loff_t pos, /* Writing position */
800 int write_bytes /* amount of bytes to write */
801 )
802 {
803 INITIALIZE_PATH(path); // needed for search_for_position
804 struct cpu_key key; // Key that would represent last touched writing byte.
805 struct item_head *ih; // item header of found block;
806 int res; // Return value of various functions we call.
807 int cont_expand_offset; // We will put offset for generic_cont_expand here
808 // This can be int just because tails are created
809 // only for small files.
810
811 /* this embodies a dependency on a particular tail policy */
812 if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) {
813 /* such a big files do not have tails, so we won't bother ourselves
814 to look for tails, simply return */
815 return 0;
816 }
817
818 reiserfs_write_lock(inode->i_sb);
819 /* find the item containing the last byte to be written, or if
820 * writing past the end of the file then the last item of the
821 * file (and then we check its type). */
822 make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/*key length*/);
823 res = search_for_position_by_key(inode->i_sb, &key, &path);
824 if ( res == IO_ERROR ) {
825 reiserfs_write_unlock(inode->i_sb);
826 return -EIO;
827 }
828 ih = get_ih(&path);
829 res = 0;
830 if ( is_direct_le_ih(ih) ) {
831 /* Ok, closest item is file tail (tails are stored in "direct"
832 * items), so we need to unpack it. */
833 /* To not overcomplicate matters, we just call generic_cont_expand
834 which will in turn call other stuff and finally will boil down to
835 reiserfs_get_block() that would do necessary conversion. */
836 cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key));
837 pathrelse(&path);
838 res = generic_cont_expand( inode, cont_expand_offset);
839 } else
840 pathrelse(&path);
841
842 reiserfs_write_unlock(inode->i_sb);
843 return res;
844 }
845
846 /* This function locks pages starting from @pos for @inode.
847 @num_pages pages are locked and stored in
848 @prepared_pages array. Also buffers are allocated for these pages.
849 First and last page of the region is read if it is overwritten only
850 partially. If last page did not exist before write (file hole or file
851 append), it is zeroed, then.
852 Returns number of unallocated blocks that should be allocated to cover
853 new file data.*/
854 static int reiserfs_prepare_file_region_for_write(
855 struct inode *inode /* Inode of the file */,
856 loff_t pos, /* position in the file */
857 size_t num_pages, /* number of pages to
858 prepare */
859 size_t write_bytes, /* Amount of bytes to be
860 overwritten from
861 @pos */
862 struct page **prepared_pages /* pointer to array
863 where to store
864 prepared pages */
865 )
866 {
867 int res=0; // Return values of different functions we call.
868 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
869 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
870 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
871 /* offset of last modified byte in last
872 page */
873 struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
874 int i; // Simple counter
875 int blocks = 0; /* Return value (blocks that should be allocated) */
876 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead
877 // of a page.
878 unsigned block_start, block_end; // Starting and ending offsets of current
879 // buffer in the page.
880 struct buffer_head *wait[2], **wait_bh=wait; // Buffers for page, if
881 // Page appeared to be not up
882 // to date. Note how we have
883 // at most 2 buffers, this is
884 // because we at most may
885 // partially overwrite two
886 // buffers for one page. One at // the beginning of write area
887 // and one at the end.
888 // Everything inthe middle gets // overwritten totally.
889
890 struct cpu_key key; // cpu key of item that we are going to deal with
891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with
892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with
893 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
894 __u32 * item=NULL; // pointer to item we are going to deal with
895 int item_pos=-1; /* Position in indirect item */
896
897
898 if ( num_pages < 1 ) {
899 reiserfs_warning (inode->i_sb,
900 "green-9001: reiserfs_prepare_file_region_for_write "
901 "called with zero number of pages to process");
902 return -EFAULT;
903 }
904
905 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
906 that nobody would touch these until we release the pages. Then
907 we'd start to deal with mapping buffers to blocks. */
908 for ( i = 0; i < num_pages; i++) {
909 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
910 if ( !prepared_pages[i]) {
911 res = -ENOMEM;
912 goto failed_page_grabbing;
913 }
914 if (!page_has_buffers(prepared_pages[i]))
915 create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0);
916 }
917
918 /* Let's count amount of blocks for a case where all the blocks
919 overwritten are new (we will substract already allocated blocks later)*/
920 if ( num_pages > 2 )
921 /* These are full-overwritten pages so we count all the blocks in
922 these pages are counted as needed to be allocated */
923 blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
924
925 /* count blocks needed for first page (possibly partially written) */
926 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) +
927 !!(from & (inode->i_sb->s_blocksize-1)); /* roundup */
928
929 /* Now we account for last page. If last page == first page (we
930 overwrite only one page), we substract all the blocks past the
931 last writing position in a page out of already calculated number
932 of blocks */
933 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) -
934 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
935 /* Note how we do not roundup here since partial blocks still
936 should be allocated */
937
938 /* Now if all the write area lies past the file end, no point in
939 maping blocks, since there is none, so we just zero out remaining
940 parts of first and last pages in write area (if needed) */
941 if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) {
942 if ( from != 0 ) {/* First page needs to be partially zeroed */
943 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
944 memset(kaddr, 0, from);
945 kunmap_atomic( kaddr, KM_USER0);
946 }
947 if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */
948 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
949 memset(kaddr+to, 0, PAGE_CACHE_SIZE - to);
950 kunmap_atomic( kaddr, KM_USER0);
951 }
952
953 /* Since all blocks are new - use already calculated value */
954 return blocks;
955 }
956
957 /* Well, since we write somewhere into the middle of a file, there is
958 possibility we are writing over some already allocated blocks, so
959 let's map these blocks and substract number of such blocks out of blocks
960 we need to allocate (calculated above) */
961 /* Mask write position to start on blocksize, we do it out of the
962 loop for performance reasons */
963 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
964 /* Set cpu key to the starting position in a file (on left block boundary)*/
965 make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/*key length*/);
966
967 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
968 for ( i = 0; i < num_pages ; i++ ) {
969
970 head = page_buffers(prepared_pages[i]);
971 /* For each buffer in the page */
972 for(bh = head, block_start = 0; bh != head || !block_start;
973 block_start=block_end, bh = bh->b_this_page) {
974 if (!bh)
975 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
976 /* Find where this buffer ends */
977 block_end = block_start+inode->i_sb->s_blocksize;
978 if (i == 0 && block_end <= from )
979 /* if this buffer is before requested data to map, skip it*/
980 continue;
981
982 if (i == num_pages - 1 && block_start >= to) {
983 /* If this buffer is after requested data to map, abort
984 processing of current page */
985 break;
986 }
987
988 if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) {
989 /* This is optimisation for a case where buffer is mapped
990 and have blocknumber assigned. In case significant amount
991 of such buffers are present, we may avoid some amount
992 of search_by_key calls.
993 Probably it would be possible to move parts of this code
994 out of BKL, but I afraid that would overcomplicate code
995 without any noticeable benefit.
996 */
997 item_pos++;
998 /* Update the key */
999 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
1000 blocks--; // Decrease the amount of blocks that need to be
1001 // allocated
1002 continue; // Go to the next buffer
1003 }
1004
1005 if ( !itembuf || /* if first iteration */
1006 item_pos >= ih_item_len(ih)/UNFM_P_SIZE)
1007 { /* or if we progressed past the
1008 current unformatted_item */
1009 /* Try to find next item */
1010 res = search_for_position_by_key(inode->i_sb, &key, &path);
1011 /* Abort if no more items */
1012 if ( res != POSITION_FOUND ) {
1013 /* make sure later loops don't use this item */
1014 itembuf = NULL;
1015 item = NULL;
1016 break;
1017 }
1018
1019 /* Update information about current indirect item */
1020 itembuf = get_last_bh( &path );
1021 ih = get_ih( &path );
1022 item = get_item( &path );
1023 item_pos = path.pos_in_item;
1024
1025 RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected");
1026 }
1027
1028 /* See if there is some block associated with the file
1029 at that position, map the buffer to this block */
1030 if ( get_block_num(item,item_pos) ) {
1031 map_bh(bh, inode->i_sb, get_block_num(item,item_pos));
1032 blocks--; // Decrease the amount of blocks that need to be
1033 // allocated
1034 }
1035 item_pos++;
1036 /* Update the key */
1037 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
1038 }
1039 }
1040 pathrelse(&path); // Free the path
1041 reiserfs_write_unlock(inode->i_sb);
1042
1043 /* Now zero out unmappend buffers for the first and last pages of
1044 write area or issue read requests if page is mapped. */
1045 /* First page, see if it is not uptodate */
1046 if ( !PageUptodate(prepared_pages[0]) ) {
1047 head = page_buffers(prepared_pages[0]);
1048
1049 /* For each buffer in page */
1050 for(bh = head, block_start = 0; bh != head || !block_start;
1051 block_start=block_end, bh = bh->b_this_page) {
1052
1053 if (!bh)
1054 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
1055 /* Find where this buffer ends */
1056 block_end = block_start+inode->i_sb->s_blocksize;
1057 if ( block_end <= from )
1058 /* if this buffer is before requested data to map, skip it*/
1059 continue;
1060 if ( block_start < from ) { /* Aha, our partial buffer */
1061 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to
1062 issue READ request for it to
1063 not loose data */
1064 ll_rw_block(READ, 1, &bh);
1065 *wait_bh++=bh;
1066 } else { /* Not mapped, zero it */
1067 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
1068 memset(kaddr+block_start, 0, from-block_start);
1069 kunmap_atomic( kaddr, KM_USER0);
1070 set_buffer_uptodate(bh);
1071 }
1072 }
1073 }
1074 }
1075
1076 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
1077 if ( !PageUptodate(prepared_pages[num_pages-1]) ||
1078 ((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) {
1079 head = page_buffers(prepared_pages[num_pages-1]);
1080
1081 /* for each buffer in page */
1082 for(bh = head, block_start = 0; bh != head || !block_start;
1083 block_start=block_end, bh = bh->b_this_page) {
1084
1085 if (!bh)
1086 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
1087 /* Find where this buffer ends */
1088 block_end = block_start+inode->i_sb->s_blocksize;
1089 if ( block_start >= to )
1090 /* if this buffer is after requested data to map, skip it*/
1091 break;
1092 if ( block_end > to ) { /* Aha, our partial buffer */
1093 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to
1094 issue READ request for it to
1095 not loose data */
1096 ll_rw_block(READ, 1, &bh);
1097 *wait_bh++=bh;
1098 } else { /* Not mapped, zero it */
1099 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
1100 memset(kaddr+to, 0, block_end-to);
1101 kunmap_atomic( kaddr, KM_USER0);
1102 set_buffer_uptodate(bh);
1103 }
1104 }
1105 }
1106 }
1107
1108 /* Wait for read requests we made to happen, if necessary */
1109 while(wait_bh > wait) {
1110 wait_on_buffer(*--wait_bh);
1111 if (!buffer_uptodate(*wait_bh)) {
1112 res = -EIO;
1113 goto failed_read;
1114 }
1115 }
1116
1117 return blocks;
1118 failed_page_grabbing:
1119 num_pages = i;
1120 failed_read:
1121 reiserfs_unprepare_pages(prepared_pages, num_pages);
1122 return res;
1123 }
1124
1125 /* Write @count bytes at position @ppos in a file indicated by @file
1126 from the buffer @buf.
1127
1128 generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
1129 something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was
1130 written for (ext2/3). This is for several reasons:
1131
1132 * It has no understanding of any filesystem specific optimizations.
1133
1134 * It enters the filesystem repeatedly for each page that is written.
1135
1136 * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
1137 * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
1138 * to reiserfs which allows for fewer tree traversals.
1139
1140 * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
1141
1142 * Asking the block allocation code for blocks one at a time is slightly less efficient.
1143
1144 All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
1145 use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make
1146 things right finally.
1147
1148 Future Features: providing search_by_key with hints.
1149
1150 */
1151 static ssize_t reiserfs_file_write( struct file *file, /* the file we are going to write into */
1152 const char __user *buf, /* pointer to user supplied data
1153 (in userspace) */
1154 size_t count, /* amount of bytes to write */
1155 loff_t *ppos /* pointer to position in file that we start writing at. Should be updated to
1156 * new current position before returning. */ )
1157 {
1158 size_t already_written = 0; // Number of bytes already written to the file.
1159 loff_t pos; // Current position in the file.
1160 ssize_t res; // return value of various functions that we call.
1161 int err = 0;
1162 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to.
1163 /* To simplify coding at this time, we store
1164 locked pages in array for now */
1165 struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
1166 struct reiserfs_transaction_handle th;
1167 th.t_trans_id = 0;
1168
1169 if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment
1170 ssize_t result, after_file_end = 0;
1171 if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) {
1172 /* If we are appending a file, we need to put this savelink in here.
1173 If we will crash while doing direct io, finish_unfinished will
1174 cut the garbage from the file end. */
1175 reiserfs_write_lock(inode->i_sb);
1176 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
1177 if (err) {
1178 reiserfs_write_unlock (inode->i_sb);
1179 return err;
1180 }
1181 reiserfs_update_inode_transaction(inode);
1182 add_save_link (&th, inode, 1 /* Truncate */);
1183 after_file_end = 1;
1184 err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
1185 reiserfs_write_unlock(inode->i_sb);
1186 if (err)
1187 return err;
1188 }
1189 result = generic_file_write(file, buf, count, ppos);
1190
1191 if ( after_file_end ) { /* Now update i_size and remove the savelink */
1192 struct reiserfs_transaction_handle th;
1193 reiserfs_write_lock(inode->i_sb);
1194 err = journal_begin(&th, inode->i_sb, 1);
1195 if (err) {
1196 reiserfs_write_unlock (inode->i_sb);
1197 return err;
1198 }
1199 reiserfs_update_inode_transaction(inode);
1200 reiserfs_update_sd(&th, inode);
1201 err = journal_end(&th, inode->i_sb, 1);
1202 if (err) {
1203 reiserfs_write_unlock (inode->i_sb);
1204 return err;
1205 }
1206 err = remove_save_link (inode, 1/* truncate */);
1207 reiserfs_write_unlock(inode->i_sb);
1208 if (err)
1209 return err;
1210 }
1211
1212 return result;
1213 }
1214
1215 if ( unlikely((ssize_t) count < 0 ))
1216 return -EINVAL;
1217
1218 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1219 return -EFAULT;
1220
1221 down(&inode->i_sem); // locks the entire file for just us
1222
1223 pos = *ppos;
1224
1225 /* Check if we can write to specified region of file, file
1226 is not overly big and this kind of stuff. Adjust pos and
1227 count, if needed */
1228 res = generic_write_checks(file, &pos, &count, 0);
1229 if (res)
1230 goto out;
1231
1232 if ( count == 0 )
1233 goto out;
1234
1235 res = remove_suid(file->f_dentry);
1236 if (res)
1237 goto out;
1238
1239 inode_update_time(inode, 1); /* Both mtime and ctime */
1240
1241 // Ok, we are done with all the checks.
1242
1243 // Now we should start real work
1244
1245 /* If we are going to write past the file's packed tail or if we are going
1246 to overwrite part of the tail, we need that tail to be converted into
1247 unformatted node */
1248 res = reiserfs_check_for_tail_and_convert( inode, pos, count);
1249 if (res)
1250 goto out;
1251
1252 while ( count > 0) {
1253 /* This is the main loop in which we running until some error occures
1254 or until we write all of the data. */
1255 size_t num_pages;/* amount of pages we are going to write this iteration */
1256 size_t write_bytes; /* amount of bytes to write during this iteration */
1257 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
1258
1259 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
1260 num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
1261 pages */
1262 ((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT);
1263 /* convert size to amount of
1264 pages */
1265 reiserfs_write_lock(inode->i_sb);
1266 if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
1267 || num_pages > reiserfs_can_fit_pages(inode->i_sb) ) {
1268 /* If we were asked to write more data than we want to or if there
1269 is not that much space, then we shorten amount of data to write
1270 for this iteration. */
1271 num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
1272 /* Also we should not forget to set size in bytes accordingly */
1273 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1274 (pos & (PAGE_CACHE_SIZE-1));
1275 /* If position is not on the
1276 start of the page, we need
1277 to substract the offset
1278 within page */
1279 } else
1280 write_bytes = count;
1281
1282 /* reserve the blocks to be allocated later, so that later on
1283 we still have the space to write the blocks to */
1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1285 reiserfs_write_unlock(inode->i_sb);
1286
1287 if ( !num_pages ) { /* If we do not have enough space even for */
1288 res = -ENOSPC; /* single page, return -ENOSPC */
1289 if ( pos > (inode->i_size & (inode->i_sb->s_blocksize-1)))
1290 break; // In case we are writing past the file end, break.
1291 // Otherwise we are possibly overwriting the file, so
1292 // let's set write size to be equal or less than blocksize.
1293 // This way we get it correctly for file holes.
1294 // But overwriting files on absolutelly full volumes would not
1295 // be very efficient. Well, people are not supposed to fill
1296 // 100% of disk space anyway.
1297 write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
1298 num_pages = 1;
1299 // No blocks were claimed before, so do it now.
1300 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1301 }
1302
1303 /* Prepare for writing into the region, read in all the
1304 partially overwritten pages, if needed. And lock the pages,
1305 so that nobody else can access these until we are done.
1306 We get number of actual blocks needed as a result.*/
1307 blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages);
1308 if ( blocks_to_allocate < 0 ) {
1309 res = blocks_to_allocate;
1310 reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1311 break;
1312 }
1313
1314 /* First we correct our estimate of how many blocks we need */
1315 reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate );
1316
1317 if ( blocks_to_allocate > 0) {/*We only allocate blocks if we need to*/
1318 /* Fill in all the possible holes and append the file if needed */
1319 res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate);
1320 }
1321
1322 /* well, we have allocated the blocks, so it is time to free
1323 the reservation we made earlier. */
1324 reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate);
1325 if ( res ) {
1326 reiserfs_unprepare_pages(prepared_pages, num_pages);
1327 break;
1328 }
1329
1330 /* NOTE that allocating blocks and filling blocks can be done in reverse order
1331 and probably we would do that just to get rid of garbage in files after a
1332 crash */
1333
1334 /* Copy data from user-supplied buffer to file's pages */
1335 res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf);
1336 if ( res ) {
1337 reiserfs_unprepare_pages(prepared_pages, num_pages);
1338 break;
1339 }
1340
1341 /* Send the pages to disk and unlock them. */
1342 res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages,
1343 write_bytes,prepared_pages);
1344 if ( res )
1345 break;
1346
1347 already_written += write_bytes;
1348 buf += write_bytes;
1349 *ppos = pos += write_bytes;
1350 count -= write_bytes;
1351 balance_dirty_pages_ratelimited(inode->i_mapping);
1352 }
1353
1354 /* this is only true on error */
1355 if (th.t_trans_id) {
1356 reiserfs_write_lock(inode->i_sb);
1357 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1358 reiserfs_write_unlock(inode->i_sb);
1359 if (err) {
1360 res = err;
1361 goto out;
1362 }
1363 }
1364
1365 if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
1366 res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA|OSYNC_DATA);
1367
1368 up(&inode->i_sem);
1369 reiserfs_async_progress_wait(inode->i_sb);
1370 return (already_written != 0)?already_written:res;
1371
1372 out:
1373 up(&inode->i_sem); // unlock the file on exit.
1374 return res;
1375 }
1376
1377 static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user *buf,
1378 size_t count, loff_t pos)
1379 {
1380 return generic_file_aio_write(iocb, buf, count, pos);
1381 }
1382
1383
1384
1385 struct file_operations reiserfs_file_operations = {
1386 .read = generic_file_read,
1387 .write = reiserfs_file_write,
1388 .ioctl = reiserfs_ioctl,
1389 .mmap = generic_file_mmap,
1390 .release = reiserfs_file_release,
1391 .fsync = reiserfs_sync_file,
1392 .sendfile = generic_file_sendfile,
1393 .aio_read = generic_file_aio_read,
1394 .aio_write = reiserfs_aio_write,
1395 };
1396
1397
1398 struct inode_operations reiserfs_file_inode_operations = {
1399 .truncate = reiserfs_vfs_truncate_file,
1400 .setattr = reiserfs_setattr,
1401 .setxattr = reiserfs_setxattr,
1402 .getxattr = reiserfs_getxattr,
1403 .listxattr = reiserfs_listxattr,
1404 .removexattr = reiserfs_removexattr,
1405 .permission = reiserfs_permission,
1406 };
1407
1408