]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.fixes/zisofs-large-pagesize-read.patch
Fix oinkmaster patch.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.fixes / zisofs-large-pagesize-read.patch
CommitLineData
82094b55
AF
1From: Jan Kara <jack@suse.cz>
2Subject: [PATCH] zisofs: Implement reading of compressed files when PAGE_CACHE_SIZE > compress block size
3Patch-mainline: 2.6.33
4References: bnc#540349
5
6Also split and cleanup zisofs_readpage() when we are changing it anyway.
7
8Signed-off-by: Jan Kara <jack@suse.cz>
9---
10 fs/isofs/compress.c | 533 +++++++++++++++++++++++++++------------------------
11 fs/isofs/rock.c | 3 +-
12 2 files changed, 286 insertions(+), 250 deletions(-)
13
14diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
15index defb932..0b3fa79 100644
16--- a/fs/isofs/compress.c
17+++ b/fs/isofs/compress.c
18@@ -36,286 +36,323 @@ static void *zisofs_zlib_workspace;
19 static DEFINE_MUTEX(zisofs_zlib_lock);
20
21 /*
22- * When decompressing, we typically obtain more than one page
23- * per reference. We inject the additional pages into the page
24- * cache as a form of readahead.
25+ * Read data of @inode from @block_start to @block_end and uncompress
26+ * to one zisofs block. Store the data in the @pages array with @pcount
27+ * entries. Start storing at offset @poffset of the first page.
28 */
29-static int zisofs_readpage(struct file *file, struct page *page)
30+static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
31+ loff_t block_end, int pcount,
32+ struct page **pages, unsigned poffset,
33+ int *errp)
34 {
35- struct inode *inode = file->f_path.dentry->d_inode;
36- struct address_space *mapping = inode->i_mapping;
37- unsigned int maxpage, xpage, fpage, blockindex;
38- unsigned long offset;
39- unsigned long blockptr, blockendptr, cstart, cend, csize;
40- struct buffer_head *bh, *ptrbh[2];
41- unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
42- unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
43- unsigned long bufmask = bufsize - 1;
44- int err = -EIO;
45- int i;
46- unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
47 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
48- /* unsigned long zisofs_block_size = 1UL << zisofs_block_shift; */
49- unsigned int zisofs_block_page_shift = zisofs_block_shift-PAGE_CACHE_SHIFT;
50- unsigned long zisofs_block_pages = 1UL << zisofs_block_page_shift;
51- unsigned long zisofs_block_page_mask = zisofs_block_pages-1;
52- struct page *pages[zisofs_block_pages];
53- unsigned long index = page->index;
54- int indexblocks;
55-
56- /* We have already been given one page, this is the one
57- we must do. */
58- xpage = index & zisofs_block_page_mask;
59- pages[xpage] = page;
60-
61- /* The remaining pages need to be allocated and inserted */
62- offset = index & ~zisofs_block_page_mask;
63- blockindex = offset >> zisofs_block_page_shift;
64- maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
65-
66- /*
67- * If this page is wholly outside i_size we just return zero;
68- * do_generic_file_read() will handle this for us
69- */
70- if (page->index >= maxpage) {
71- SetPageUptodate(page);
72- unlock_page(page);
73+ unsigned int bufsize = ISOFS_BUFFER_SIZE(inode);
74+ unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
75+ unsigned int bufmask = bufsize - 1;
76+ int i, block_size = block_end - block_start;
77+ z_stream stream = { .total_out = 0,
78+ .avail_in = 0,
79+ .avail_out = 0, };
80+ int zerr;
81+ int needblocks = (block_size + (block_start & bufmask) + bufmask)
82+ >> bufshift;
83+ int haveblocks;
84+ blkcnt_t blocknum;
85+ struct buffer_head *bhs[needblocks + 1];
86+ int curbh, curpage;
87+
88+ if (block_size > deflateBound(1UL << zisofs_block_shift)) {
89+ *errp = -EIO;
90 return 0;
91 }
92-
93- maxpage = min(zisofs_block_pages, maxpage-offset);
94-
95- for ( i = 0 ; i < maxpage ; i++, offset++ ) {
96- if ( i != xpage ) {
97- pages[i] = grab_cache_page_nowait(mapping, offset);
98- }
99- page = pages[i];
100- if ( page ) {
101- ClearPageError(page);
102- kmap(page);
103+ /* Empty block? */
104+ if (block_size == 0) {
105+ for ( i = 0 ; i < pcount ; i++ ) {
106+ if (!pages[i])
107+ continue;
108+ memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
109+ flush_dcache_page(pages[i]);
110+ SetPageUptodate(pages[i]);
111 }
112+ return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
113 }
114
115- /* This is the last page filled, plus one; used in case of abort. */
116- fpage = 0;
117+ /* Because zlib is not thread-safe, do all the I/O at the top. */
118+ blocknum = block_start >> bufshift;
119+ memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
120+ haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
121+ ll_rw_block(READ, haveblocks, bhs);
122
123- /* Find the pointer to this specific chunk */
124- /* Note: we're not using isonum_731() here because the data is known aligned */
125- /* Note: header_size is in 32-bit words (4 bytes) */
126- blockptr = (header_size + blockindex) << 2;
127- blockendptr = blockptr + 4;
128+ curbh = 0;
129+ curpage = 0;
130+ /*
131+ * First block is special since it may be fractional. We also wait for
132+ * it before grabbing the zlib mutex; odds are that the subsequent
133+ * blocks are going to come in in short order so we don't hold the zlib
134+ * mutex longer than necessary.
135+ */
136
137- indexblocks = ((blockptr^blockendptr) >> bufshift) ? 2 : 1;
138- ptrbh[0] = ptrbh[1] = NULL;
139+ if (!bhs[0])
140+ goto b_eio;
141
142- if ( isofs_get_blocks(inode, blockptr >> bufshift, ptrbh, indexblocks) != indexblocks ) {
143- if ( ptrbh[0] ) brelse(ptrbh[0]);
144- printk(KERN_DEBUG "zisofs: Null buffer on reading block table, inode = %lu, block = %lu\n",
145- inode->i_ino, blockptr >> bufshift);
146- goto eio;
147- }
148- ll_rw_block(READ, indexblocks, ptrbh);
149-
150- bh = ptrbh[0];
151- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
152- printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
153- inode->i_ino, blockptr >> bufshift);
154- if ( ptrbh[1] )
155- brelse(ptrbh[1]);
156- goto eio;
157- }
158- cstart = le32_to_cpu(*(__le32 *)(bh->b_data + (blockptr & bufmask)));
159-
160- if ( indexblocks == 2 ) {
161- /* We just crossed a block boundary. Switch to the next block */
162- brelse(bh);
163- bh = ptrbh[1];
164- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
165- printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
166- inode->i_ino, blockendptr >> bufshift);
167- goto eio;
168- }
169+ wait_on_buffer(bhs[0]);
170+ if (!buffer_uptodate(bhs[0])) {
171+ *errp = -EIO;
172+ goto b_eio;
173 }
174- cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
175- brelse(bh);
176
177- if (cstart > cend)
178- goto eio;
179+ stream.workspace = zisofs_zlib_workspace;
180+ mutex_lock(&zisofs_zlib_lock);
181
182- csize = cend-cstart;
183-
184- if (csize > deflateBound(1UL << zisofs_block_shift))
185- goto eio;
186-
187- /* Now page[] contains an array of pages, any of which can be NULL,
188- and the locks on which we hold. We should now read the data and
189- release the pages. If the pages are NULL the decompressed data
190- for that particular page should be discarded. */
191-
192- if ( csize == 0 ) {
193- /* This data block is empty. */
194-
195- for ( fpage = 0 ; fpage < maxpage ; fpage++ ) {
196- if ( (page = pages[fpage]) != NULL ) {
197- memset(page_address(page), 0, PAGE_CACHE_SIZE);
198-
199- flush_dcache_page(page);
200- SetPageUptodate(page);
201- kunmap(page);
202- unlock_page(page);
203- if ( fpage == xpage )
204- err = 0; /* The critical page */
205- else
206- page_cache_release(page);
207+ zerr = zlib_inflateInit(&stream);
208+ if (zerr != Z_OK) {
209+ if (zerr == Z_MEM_ERROR)
210+ *errp = -ENOMEM;
211+ else
212+ *errp = -EIO;
213+ printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
214+ zerr);
215+ goto z_eio;
216+ }
217+
218+ while (curpage < pcount && curbh < haveblocks &&
219+ zerr != Z_STREAM_END) {
220+ if (!stream.avail_out) {
221+ if (pages[curpage]) {
222+ stream.next_out = page_address(pages[curpage])
223+ + poffset;
224+ stream.avail_out = PAGE_CACHE_SIZE - poffset;
225+ poffset = 0;
226+ } else {
227+ stream.next_out = (void *)&zisofs_sink_page;
228+ stream.avail_out = PAGE_CACHE_SIZE;
229 }
230 }
231- } else {
232- /* This data block is compressed. */
233- z_stream stream;
234- int bail = 0, left_out = -1;
235- int zerr;
236- int needblocks = (csize + (cstart & bufmask) + bufmask) >> bufshift;
237- int haveblocks;
238- struct buffer_head *bhs[needblocks+1];
239- struct buffer_head **bhptr;
240-
241- /* Because zlib is not thread-safe, do all the I/O at the top. */
242-
243- blockptr = cstart >> bufshift;
244- memset(bhs, 0, (needblocks+1)*sizeof(struct buffer_head *));
245- haveblocks = isofs_get_blocks(inode, blockptr, bhs, needblocks);
246- ll_rw_block(READ, haveblocks, bhs);
247-
248- bhptr = &bhs[0];
249- bh = *bhptr++;
250-
251- /* First block is special since it may be fractional.
252- We also wait for it before grabbing the zlib
253- mutex; odds are that the subsequent blocks are
254- going to come in in short order so we don't hold
255- the zlib mutex longer than necessary. */
256-
257- if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
258- printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
259- fpage, xpage, csize);
260- goto b_eio;
261- }
262- stream.next_in = bh->b_data + (cstart & bufmask);
263- stream.avail_in = min(bufsize-(cstart & bufmask), csize);
264- csize -= stream.avail_in;
265-
266- stream.workspace = zisofs_zlib_workspace;
267- mutex_lock(&zisofs_zlib_lock);
268-
269- zerr = zlib_inflateInit(&stream);
270- if ( zerr != Z_OK ) {
271- if ( err && zerr == Z_MEM_ERROR )
272- err = -ENOMEM;
273- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
274- zerr);
275- goto z_eio;
276+ if (!stream.avail_in) {
277+ wait_on_buffer(bhs[curbh]);
278+ if (!buffer_uptodate(bhs[curbh])) {
279+ *errp = -EIO;
280+ break;
281+ }
282+ stream.next_in = bhs[curbh]->b_data +
283+ (block_start & bufmask);
284+ stream.avail_in = min_t(unsigned, bufsize -
285+ (block_start & bufmask),
286+ block_size);
287+ block_size -= stream.avail_in;
288+ block_start = 0;
289 }
290
291- while ( !bail && fpage < maxpage ) {
292- page = pages[fpage];
293- if ( page )
294- stream.next_out = page_address(page);
295- else
296- stream.next_out = (void *)&zisofs_sink_page;
297- stream.avail_out = PAGE_CACHE_SIZE;
298-
299- while ( stream.avail_out ) {
300- int ao, ai;
301- if ( stream.avail_in == 0 && left_out ) {
302- if ( !csize ) {
303- printk(KERN_WARNING "zisofs: ZF read beyond end of input\n");
304- bail = 1;
305- break;
306- } else {
307- bh = *bhptr++;
308- if ( !bh ||
309- (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
310- /* Reached an EIO */
311- printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
312- fpage, xpage, csize);
313-
314- bail = 1;
315- break;
316- }
317- stream.next_in = bh->b_data;
318- stream.avail_in = min(csize,bufsize);
319- csize -= stream.avail_in;
320- }
321- }
322- ao = stream.avail_out; ai = stream.avail_in;
323- zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
324- left_out = stream.avail_out;
325- if ( zerr == Z_BUF_ERROR && stream.avail_in == 0 )
326- continue;
327- if ( zerr != Z_OK ) {
328- /* EOF, error, or trying to read beyond end of input */
329- if ( err && zerr == Z_MEM_ERROR )
330- err = -ENOMEM;
331- if ( zerr != Z_STREAM_END )
332- printk(KERN_DEBUG "zisofs: zisofs_inflate returned %d, inode = %lu, index = %lu, fpage = %d, xpage = %d, avail_in = %d, avail_out = %d, ai = %d, ao = %d\n",
333- zerr, inode->i_ino, index,
334- fpage, xpage,
335- stream.avail_in, stream.avail_out,
336- ai, ao);
337- bail = 1;
338- break;
339+ while (stream.avail_out && stream.avail_in) {
340+ zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
341+ if (zerr == Z_BUF_ERROR && stream.avail_in == 0)
342+ break;
343+ if (zerr == Z_STREAM_END)
344+ break;
345+ if (zerr != Z_OK) {
346+ /* EOF, error, or trying to read beyond end of input */
347+ if (zerr == Z_MEM_ERROR)
348+ *errp = -ENOMEM;
349+ else {
350+ printk(KERN_DEBUG
351+ "zisofs: zisofs_inflate returned"
352+ " %d, inode = %lu,"
353+ " page idx = %d, bh idx = %d,"
354+ " avail_in = %d,"
355+ " avail_out = %d\n",
356+ zerr, inode->i_ino, curpage,
357+ curbh, stream.avail_in,
358+ stream.avail_out);
359+ *errp = -EIO;
360 }
361+ goto inflate_out;
362 }
363+ }
364
365- if ( stream.avail_out && zerr == Z_STREAM_END ) {
366- /* Fractional page written before EOF. This may
367- be the last page in the file. */
368- memset(stream.next_out, 0, stream.avail_out);
369- stream.avail_out = 0;
370+ if (!stream.avail_out) {
371+ /* This page completed */
372+ if (pages[curpage]) {
373+ flush_dcache_page(pages[curpage]);
374+ SetPageUptodate(pages[curpage]);
375 }
376+ curpage++;
377+ }
378+ if (!stream.avail_in)
379+ curbh++;
380+ }
381+inflate_out:
382+ zlib_inflateEnd(&stream);
383
384- if ( !stream.avail_out ) {
385- /* This page completed */
386- if ( page ) {
387- flush_dcache_page(page);
388- SetPageUptodate(page);
389- kunmap(page);
390- unlock_page(page);
391- if ( fpage == xpage )
392- err = 0; /* The critical page */
393- else
394- page_cache_release(page);
395- }
396- fpage++;
397- }
398+z_eio:
399+ mutex_unlock(&zisofs_zlib_lock);
400+
401+b_eio:
402+ for (i = 0; i < haveblocks; i++)
403+ brelse(bhs[i]);
404+ return stream.total_out;
405+}
406+
407+/*
408+ * Uncompress data so that pages[full_page] is fully uptodate and possibly
409+ * fills in other pages if we have data for them.
410+ */
411+static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
412+ struct page **pages)
413+{
414+ loff_t start_off, end_off;
415+ loff_t block_start, block_end;
416+ unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
417+ unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
418+ unsigned int blockptr;
419+ loff_t poffset = 0;
420+ blkcnt_t cstart_block, cend_block;
421+ struct buffer_head *bh;
422+ unsigned int blkbits = ISOFS_BUFFER_BITS(inode);
423+ unsigned int blksize = 1 << blkbits;
424+ int err;
425+ loff_t ret;
426+
427+ BUG_ON(!pages[full_page]);
428+
429+ /*
430+ * We want to read at least 'full_page' page. Because we have to
431+ * uncompress the whole compression block anyway, fill the surrounding
432+ * pages with the data we have anyway...
433+ */
434+ start_off = page_offset(pages[full_page]);
435+ end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
436+
437+ cstart_block = start_off >> zisofs_block_shift;
438+ cend_block = (end_off + (1 << zisofs_block_shift) - 1)
439+ >> zisofs_block_shift;
440+
441+ WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
442+ ((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
443+
444+ /* Find the pointer to this specific chunk */
445+ /* Note: we're not using isonum_731() here because the data is known aligned */
446+ /* Note: header_size is in 32-bit words (4 bytes) */
447+ blockptr = (header_size + cstart_block) << 2;
448+ bh = isofs_bread(inode, blockptr >> blkbits);
449+ if (!bh)
450+ return -EIO;
451+ block_start = le32_to_cpu(*(__le32 *)
452+ (bh->b_data + (blockptr & (blksize - 1))));
453+
454+ while (cstart_block < cend_block && pcount > 0) {
455+ /* Load end of the compressed block in the file */
456+ blockptr += 4;
457+ /* Traversed to next block? */
458+ if (!(blockptr & (blksize - 1))) {
459+ brelse(bh);
460+
461+ bh = isofs_bread(inode, blockptr >> blkbits);
462+ if (!bh)
463+ return -EIO;
464+ }
465+ block_end = le32_to_cpu(*(__le32 *)
466+ (bh->b_data + (blockptr & (blksize - 1))));
467+ if (block_start > block_end) {
468+ brelse(bh);
469+ return -EIO;
470+ }
471+ err = 0;
472+ ret = zisofs_uncompress_block(inode, block_start, block_end,
473+ pcount, pages, poffset, &err);
474+ poffset += ret;
475+ pages += poffset >> PAGE_CACHE_SHIFT;
476+ pcount -= poffset >> PAGE_CACHE_SHIFT;
477+ full_page -= poffset >> PAGE_CACHE_SHIFT;
478+ poffset &= ~PAGE_CACHE_MASK;
479+
480+ if (err) {
481+ brelse(bh);
482+ /*
483+ * Did we finish reading the page we really wanted
484+ * to read?
485+ */
486+ if (full_page < 0)
487+ return 0;
488+ return err;
489 }
490- zlib_inflateEnd(&stream);
491
492- z_eio:
493- mutex_unlock(&zisofs_zlib_lock);
494+ block_start = block_end;
495+ cstart_block++;
496+ }
497+
498+ if (poffset && *pages) {
499+ memset(page_address(*pages) + poffset, 0,
500+ PAGE_CACHE_SIZE - poffset);
501+ flush_dcache_page(*pages);
502+ SetPageUptodate(*pages);
503+ }
504+ return 0;
505+}
506
507- b_eio:
508- for ( i = 0 ; i < haveblocks ; i++ ) {
509- if ( bhs[i] )
510- brelse(bhs[i]);
511+/*
512+ * When decompressing, we typically obtain more than one page
513+ * per reference. We inject the additional pages into the page
514+ * cache as a form of readahead.
515+ */
516+static int zisofs_readpage(struct file *file, struct page *page)
517+{
518+ struct inode *inode = file->f_path.dentry->d_inode;
519+ struct address_space *mapping = inode->i_mapping;
520+ int err;
521+ int i, pcount, full_page;
522+ unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
523+ unsigned int zisofs_pages_per_cblock =
524+ PAGE_CACHE_SHIFT <= zisofs_block_shift ?
525+ (1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
526+ struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
527+ pgoff_t index = page->index, end_index;
528+
529+ end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
530+ /*
531+ * If this page is wholly outside i_size we just return zero;
532+ * do_generic_file_read() will handle this for us
533+ */
534+ if (index >= end_index) {
535+ SetPageUptodate(page);
536+ unlock_page(page);
537+ return 0;
538+ }
539+
540+ if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
541+ /* We have already been given one page, this is the one
542+ we must do. */
543+ full_page = index & (zisofs_pages_per_cblock - 1);
544+ pcount = min_t(int, zisofs_pages_per_cblock,
545+ end_index - (index & ~(zisofs_pages_per_cblock - 1)));
546+ index -= full_page;
547+ } else {
548+ full_page = 0;
549+ pcount = 1;
550+ }
551+ pages[full_page] = page;
552+
553+ for (i = 0; i < pcount; i++, index++) {
554+ if (i != full_page)
555+ pages[i] = grab_cache_page_nowait(mapping, index);
556+ if (pages[i]) {
557+ ClearPageError(pages[i]);
558+ kmap(pages[i]);
559 }
560 }
561
562-eio:
563+ err = zisofs_fill_pages(inode, full_page, pcount, pages);
564
565 /* Release any residual pages, do not SetPageUptodate */
566- while ( fpage < maxpage ) {
567- page = pages[fpage];
568- if ( page ) {
569- flush_dcache_page(page);
570- if ( fpage == xpage )
571- SetPageError(page);
572- kunmap(page);
573- unlock_page(page);
574- if ( fpage != xpage )
575- page_cache_release(page);
576+ for (i = 0; i < pcount; i++) {
577+ if (pages[i]) {
578+ flush_dcache_page(pages[i]);
579+ if (i == full_page && err)
580+ SetPageError(pages[i]);
581+ kunmap(pages[i]);
582+ unlock_page(pages[i]);
583+ if (i != full_page)
584+ page_cache_release(pages[i]);
585 }
586- fpage++;
587 }
588
589 /* At this point, err contains 0 or -EIO depending on the "critical" page */
590diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
591index c2fb2dd..96a685c 100644
592--- a/fs/isofs/rock.c
593+++ b/fs/isofs/rock.c
594@@ -518,8 +518,7 @@ repeat:
595 if (algo == SIG('p', 'z')) {
596 int block_shift =
597 isonum_711(&rr->u.ZF.parms[1]);
598- if (block_shift < PAGE_CACHE_SHIFT
599- || block_shift > 17) {
600+ if (block_shift > 17) {
601 printk(KERN_WARNING "isofs: "
602 "Can't handle ZF block "
603 "size of 2^%d\n",
604--
6051.6.0.2
606