From: Greg Kroah-Hartman Date: Fri, 25 Jun 2010 23:31:52 +0000 (-0700) Subject: .27 patches X-Git-Tag: v2.6.31.14~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4d8b0b18ebd0a7b358fc07203eab41878ab04334;p=thirdparty%2Fkernel%2Fstable-queue.git .27 patches --- diff --git a/queue-2.6.27/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch b/queue-2.6.27/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch new file mode 100644 index 00000000000..f2fa336afd1 --- /dev/null +++ b/queue-2.6.27/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch @@ -0,0 +1,49 @@ +From 42007efd569f1cf3bfb9a61da60ef6c2179508ca Mon Sep 17 00:00:00 2001 +From: Eric Sandeen +Date: Sun, 16 May 2010 01:00:00 -0400 +Subject: ext4: check s_log_groups_per_flex in online resize code + +From: Eric Sandeen + +commit 42007efd569f1cf3bfb9a61da60ef6c2179508ca upstream. + +If groups_per_flex < 2, sbi->s_flex_groups[] doesn't get filled out, +and every other access to this first tests s_log_groups_per_flex; +same thing needs to happen in resize or we'll wander off into +a null pointer when doing an online resize of the file system. + +Thanks to Christoph Biedl, who came up with the trivial testcase: + +# truncate --size 128M fsfile +# mkfs.ext3 -F fsfile +# tune2fs -O extents,uninit_bg,dir_index,flex_bg,huge_file,dir_nlink,extra_isize fsfile +# e2fsck -yDf -C0 fsfile +# truncate --size 132M fsfile +# losetup /dev/loop0 fsfile +# mount /dev/loop0 mnt +# resize2fs -p /dev/loop0 + + https://bugzilla.kernel.org/show_bug.cgi?id=13549 + +Reported-by: Alessandro Polverini +Test-case-by: Christoph Biedl +Signed-off-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -935,7 +935,8 @@ int ext4_group_add(struct super_block *s + percpu_counter_add(&sbi->s_freeinodes_counter, + EXT4_INODES_PER_GROUP(sb)); + +- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { ++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && ++ sbi->s_log_groups_per_flex) { + ext4_group_t flex_group; + flex_group = ext4_flex_group(sbi, input->group); + sbi->s_flex_groups[flex_group].free_blocks += diff --git a/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch b/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch new file mode 100644 index 00000000000..7f6d1a2a775 --- /dev/null +++ b/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch @@ -0,0 +1,189 @@ +From dev@jaysonking.com Fri Jun 25 15:33:09 2010 +From: Aneesh Kumar K.V +Date: Fri, 28 May 2010 14:26:57 -0500 +Subject: ext4: Fix file fragmentation during large file write. +Cc: "Jayson R. King" , Theodore Ts'o , "Aneesh Kumar K.V" , Dave Chinner , Ext4 Developers List , Kay Diederichs +Message-ID: <4C001901.1070207@jaysonking.com> + + +From: Aneesh Kumar K.V + +commit 22208dedbd7626e5fc4339c417f8d24cc21f79d7 upstream. + +The range_cyclic writeback mode uses the address_space writeback_index +as the start index for writeback. With delayed allocation we were +updating writeback_index wrongly resulting in highly fragmented file. +This patch reduces the number of extents reduced from 4000 to 27 for a +3GB file. + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Theodore Ts'o +[dev@jaysonking.com: Some changed lines from the original version of this patch were dropped, since they were rolled up with another cherry-picked patch applied to 2.6.27.y earlier.] +[dev@jaysonking.com: Use of wbc->no_nrwrite_index_update was dropped, since write_cache_pages_da() implies it.] +Signed-off-by: Jayson R. King +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 79 ++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 46 insertions(+), 33 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1721,7 +1721,11 @@ static int mpage_da_submit_io(struct mpa + + pages_skipped = mpd->wbc->pages_skipped; + err = mapping->a_ops->writepage(page, mpd->wbc); +- if (!err) ++ if (!err && (pages_skipped == mpd->wbc->pages_skipped)) ++ /* ++ * have successfully written the page ++ * without skipping the same ++ */ + mpd->pages_written++; + /* + * In error case, we have to continue because +@@ -2295,7 +2299,6 @@ static int mpage_da_writepages(struct ad + struct writeback_control *wbc, + struct mpage_da_data *mpd) + { +- long to_write; + int ret; + + if (!mpd->get_block) +@@ -2310,19 +2313,18 @@ static int mpage_da_writepages(struct ad + mpd->pages_written = 0; + mpd->retval = 0; + +- to_write = wbc->nr_to_write; +- + ret = write_cache_pages_da(mapping, wbc, mpd); +- + /* + * Handle last extent of pages + */ + if (!mpd->io_done && mpd->next_page != mpd->first_page) { + if (mpage_da_map_blocks(mpd) == 0) + mpage_da_submit_io(mpd); +- } + +- wbc->nr_to_write = to_write - mpd->pages_written; ++ mpd->io_done = 1; ++ ret = MPAGE_DA_EXTENT_TAIL; ++ } ++ wbc->nr_to_write -= mpd->pages_written; + return ret; + } + +@@ -2567,11 +2569,13 @@ static int ext4_da_writepages_trans_bloc + static int ext4_da_writepages(struct address_space *mapping, + struct writeback_control *wbc) + { ++ pgoff_t index; ++ int range_whole = 0; + handle_t *handle = NULL; + struct mpage_da_data mpd; + struct inode *inode = mapping->host; ++ long pages_written = 0, pages_skipped; + int needed_blocks, ret = 0, nr_to_writebump = 0; +- long to_write, pages_skipped = 0; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + + /* +@@ -2605,16 +2609,20 @@ static int ext4_da_writepages(struct add + nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; + wbc->nr_to_write = sbi->s_mb_stream_request; + } ++ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) ++ range_whole = 1; + +- +- pages_skipped = wbc->pages_skipped; ++ if (wbc->range_cyclic) ++ index = mapping->writeback_index; ++ else ++ index = wbc->range_start >> PAGE_CACHE_SHIFT; + + mpd.wbc = wbc; + mpd.inode = mapping->host; + +-restart_loop: +- to_write = wbc->nr_to_write; +- while (!ret && to_write > 0) { ++ pages_skipped = wbc->pages_skipped; ++ ++ while (!ret && wbc->nr_to_write > 0) { + + /* + * we insert one extent at a time. So we need +@@ -2647,46 +2655,51 @@ restart_loop: + goto out_writepages; + } + } +- to_write -= wbc->nr_to_write; +- + mpd.get_block = ext4_da_get_block_write; + ret = mpage_da_writepages(mapping, wbc, &mpd); + + ext4_journal_stop(handle); + +- if (mpd.retval == -ENOSPC) ++ if (mpd.retval == -ENOSPC) { ++ /* commit the transaction which would ++ * free blocks released in the transaction ++ * and try again ++ */ + jbd2_journal_force_commit_nested(sbi->s_journal); +- +- /* reset the retry count */ +- if (ret == MPAGE_DA_EXTENT_TAIL) { ++ wbc->pages_skipped = pages_skipped; ++ ret = 0; ++ } else if (ret == MPAGE_DA_EXTENT_TAIL) { + /* + * got one extent now try with + * rest of the pages + */ +- to_write += wbc->nr_to_write; ++ pages_written += mpd.pages_written; ++ wbc->pages_skipped = pages_skipped; + ret = 0; +- } else if (wbc->nr_to_write) { ++ } else if (wbc->nr_to_write) + /* + * There is no more writeout needed + * or we requested for a noblocking writeout + * and we found the device congested + */ +- to_write += wbc->nr_to_write; + break; +- } +- wbc->nr_to_write = to_write; +- } +- +- if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) { +- /* We skipped pages in this loop */ +- wbc->nr_to_write = to_write + +- wbc->pages_skipped - pages_skipped; +- wbc->pages_skipped = pages_skipped; +- goto restart_loop; + } ++ if (pages_skipped != wbc->pages_skipped) ++ printk(KERN_EMERG "This should not happen leaving %s " ++ "with nr_to_write = %ld ret = %d\n", ++ __func__, wbc->nr_to_write, ret); ++ ++ /* Update index */ ++ index += pages_written; ++ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) ++ /* ++ * set the writeback_index so that range_cyclic ++ * mode will write it back later ++ */ ++ mapping->writeback_index = index; + + out_writepages: +- wbc->nr_to_write = to_write - nr_to_writebump; ++ wbc->nr_to_write -= nr_to_writebump; + return ret; + } + diff --git a/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch b/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch new file mode 100644 index 00000000000..d551b7acdac --- /dev/null +++ b/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch @@ -0,0 +1,102 @@ +From dev@jaysonking.com Fri Jun 25 15:33:41 2010 +From: Aneesh Kumar K.V +Date: Fri, 28 May 2010 14:27:23 -0500 +Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages +To: Stable team , LKML , Greg Kroah-Hartman +Cc: "Theodore Ts'o" , Dave Chinner , "Jayson R. King" , Kay Diederichs , Ext4 Developers List , "Aneesh Kumar K.V" +Message-ID: <4C00191B.3030702@jaysonking.com> + + +From: Aneesh Kumar K.V + +commit 2acf2c261b823d9d9ed954f348b97620297a36b5 upstream. + +With delayed allocation we lock the page in write_cache_pages() and +try to build an in memory extent of contiguous blocks. This is needed +so that we can get large contiguous blocks request. If range_cyclic +mode is enabled, write_cache_pages() will loop back to the 0 index if +no I/O has been done yet, and try to start writing from the beginning +of the range. That causes an attempt to take the page lock of lower +index page while holding the page lock of higher index page, which can +cause a dead lock with another writeback thread. + +The solution is to implement the range_cyclic behavior in +ext4_da_writepages() instead. + +http://bugzilla.kernel.org/show_bug.cgi?id=12579 + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Jayson R. King +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2575,6 +2575,7 @@ static int ext4_da_writepages(struct add + struct mpage_da_data mpd; + struct inode *inode = mapping->host; + long pages_written = 0, pages_skipped; ++ int range_cyclic, cycled = 1, io_done = 0; + int needed_blocks, ret = 0, nr_to_writebump = 0; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + +@@ -2612,9 +2613,15 @@ static int ext4_da_writepages(struct add + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + +- if (wbc->range_cyclic) ++ range_cyclic = wbc->range_cyclic; ++ if (wbc->range_cyclic) { + index = mapping->writeback_index; +- else ++ if (index) ++ cycled = 0; ++ wbc->range_start = index << PAGE_CACHE_SHIFT; ++ wbc->range_end = LLONG_MAX; ++ wbc->range_cyclic = 0; ++ } else + index = wbc->range_start >> PAGE_CACHE_SHIFT; + + mpd.wbc = wbc; +@@ -2622,6 +2629,7 @@ static int ext4_da_writepages(struct add + + pages_skipped = wbc->pages_skipped; + ++retry: + while (!ret && wbc->nr_to_write > 0) { + + /* +@@ -2676,6 +2684,7 @@ static int ext4_da_writepages(struct add + pages_written += mpd.pages_written; + wbc->pages_skipped = pages_skipped; + ret = 0; ++ io_done = 1; + } else if (wbc->nr_to_write) + /* + * There is no more writeout needed +@@ -2684,6 +2693,13 @@ static int ext4_da_writepages(struct add + */ + break; + } ++ if (!io_done && !cycled) { ++ cycled = 1; ++ index = 0; ++ wbc->range_start = index << PAGE_CACHE_SHIFT; ++ wbc->range_end = mapping->writeback_index - 1; ++ goto retry; ++ } + if (pages_skipped != wbc->pages_skipped) + printk(KERN_EMERG "This should not happen leaving %s " + "with nr_to_write = %ld ret = %d\n", +@@ -2691,6 +2707,7 @@ static int ext4_da_writepages(struct add + + /* Update index */ + index += pages_written; ++ wbc->range_cyclic = range_cyclic; + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + /* + * set the writeback_index so that range_cyclic diff --git a/queue-2.6.27/ext4-use-our-own-write_cache_pages.patch b/queue-2.6.27/ext4-use-our-own-write_cache_pages.patch new file mode 100644 index 00000000000..7b1596ecc51 --- /dev/null +++ b/queue-2.6.27/ext4-use-our-own-write_cache_pages.patch @@ -0,0 +1,206 @@ +From dev@jaysonking.com Fri Jun 25 15:32:26 2010 +From: Theodore Ts'o +Date: Fri, 28 May 2010 14:26:25 -0500 +Subject: ext4: Use our own write_cache_pages() +Cc: "Theodore Ts'o" , Dave Chinner , "Jayson R. King" , Kay Diederichs , Ext4 Developers List , "Aneesh Kumar K.V" +Message-ID: <4C0018E1.5060007@jaysonking.com> + + +From: Theodore Ts'o + +commit 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 upstream. + +Make a copy of write_cache_pages() for the benefit of +ext4_da_writepages(). This allows us to simplify the code some, and +will allow us to further customize the code in future patches. + +There are some nasty hacks in write_cache_pages(), which Linus has +(correctly) characterized as vile. I've just copied it into +write_cache_pages_da(), without trying to clean those bits up lest I +break something in the ext4's delalloc implementation, which is a bit +fragile right now. This will allow Dave Chinner to clean up +write_cache_pages() in mm/page-writeback.c, without worrying about +breaking ext4. Eventually write_cache_pages_da() will go away when I +rewrite ext4's delayed allocation and create a general +ext4_writepages() which is used for all of ext4's writeback. Until +now this is the lowest risk way to clean up the core +write_cache_pages() function. + +Signed-off-by: "Theodore Ts'o" +Cc: Dave Chinner +[dev@jaysonking.com: Dropped the hunks which reverted the use of no_nrwrite_index_update, since those lines weren't ever created on 2.6.27.y] +[dev@jaysonking.com: Copied from 2.6.27.y's version of write_cache_pages(), plus the changes to it from patch "vfs: Add no_nrwrite_index_update writeback control flag"] +Signed-off-by: Jayson R. King +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 132 insertions(+), 12 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2059,17 +2059,6 @@ static int __mpage_da_writepage(struct p + struct buffer_head *bh, *head, fake; + sector_t logical; + +- if (mpd->io_done) { +- /* +- * Rest of the page in the page_vec +- * redirty then and skip then. We will +- * try to to write them again after +- * starting a new transaction +- */ +- redirty_page_for_writepage(wbc, page); +- unlock_page(page); +- return MPAGE_DA_EXTENT_TAIL; +- } + /* + * Can we merge this page to current extent? + */ +@@ -2160,6 +2149,137 @@ static int __mpage_da_writepage(struct p + } + + /* ++ * write_cache_pages_da - walk the list of dirty pages of the given ++ * address space and call the callback function (which usually writes ++ * the pages). ++ * ++ * This is a forked version of write_cache_pages(). Differences: ++ * Range cyclic is ignored. ++ * no_nrwrite_index_update is always presumed true ++ */ ++static int write_cache_pages_da(struct address_space *mapping, ++ struct writeback_control *wbc, ++ struct mpage_da_data *mpd) ++{ ++ struct backing_dev_info *bdi = mapping->backing_dev_info; ++ int ret = 0; ++ int done = 0; ++ struct pagevec pvec; ++ int nr_pages; ++ pgoff_t index; ++ pgoff_t end; /* Inclusive */ ++ long nr_to_write = wbc->nr_to_write; ++ ++ if (wbc->nonblocking && bdi_write_congested(bdi)) { ++ wbc->encountered_congestion = 1; ++ return 0; ++ } ++ ++ pagevec_init(&pvec, 0); ++ index = wbc->range_start >> PAGE_CACHE_SHIFT; ++ end = wbc->range_end >> PAGE_CACHE_SHIFT; ++ ++ while (!done && (index <= end)) { ++ int i; ++ ++ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, ++ PAGECACHE_TAG_DIRTY, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); ++ if (nr_pages == 0) ++ break; ++ ++ for (i = 0; i < nr_pages; i++) { ++ struct page *page = pvec.pages[i]; ++ ++ /* ++ * At this point, the page may be truncated or ++ * invalidated (changing page->mapping to NULL), or ++ * even swizzled back from swapper_space to tmpfs file ++ * mapping. However, page->index will not change ++ * because we have a reference on the page. ++ */ ++ if (page->index > end) { ++ done = 1; ++ break; ++ } ++ ++ lock_page(page); ++ ++ /* ++ * Page truncated or invalidated. We can freely skip it ++ * then, even for data integrity operations: the page ++ * has disappeared concurrently, so there could be no ++ * real expectation of this data interity operation ++ * even if there is now a new, dirty page at the same ++ * pagecache address. ++ */ ++ if (unlikely(page->mapping != mapping)) { ++continue_unlock: ++ unlock_page(page); ++ continue; ++ } ++ ++ if (!PageDirty(page)) { ++ /* someone wrote it for us */ ++ goto continue_unlock; ++ } ++ ++ if (PageWriteback(page)) { ++ if (wbc->sync_mode != WB_SYNC_NONE) ++ wait_on_page_writeback(page); ++ else ++ goto continue_unlock; ++ } ++ ++ BUG_ON(PageWriteback(page)); ++ if (!clear_page_dirty_for_io(page)) ++ goto continue_unlock; ++ ++ ret = __mpage_da_writepage(page, wbc, mpd); ++ ++ if (unlikely(ret)) { ++ if (ret == AOP_WRITEPAGE_ACTIVATE) { ++ unlock_page(page); ++ ret = 0; ++ } else { ++ done = 1; ++ break; ++ } ++ } ++ ++ if (nr_to_write > 0) { ++ nr_to_write--; ++ if (nr_to_write == 0 && ++ wbc->sync_mode == WB_SYNC_NONE) { ++ /* ++ * We stop writing back only if we are ++ * not doing integrity sync. In case of ++ * integrity sync we have to keep going ++ * because someone may be concurrently ++ * dirtying pages, and we might have ++ * synced a lot of newly appeared dirty ++ * pages, but have not synced all of the ++ * old dirty pages. ++ */ ++ done = 1; ++ break; ++ } ++ } ++ ++ if (wbc->nonblocking && bdi_write_congested(bdi)) { ++ wbc->encountered_congestion = 1; ++ done = 1; ++ break; ++ } ++ } ++ pagevec_release(&pvec); ++ cond_resched(); ++ } ++ return ret; ++} ++ ++ ++/* + * mpage_da_writepages - walk the list of dirty pages of the given + * address space, allocates non-allocated blocks, maps newly-allocated + * blocks to existing bhs and issue IO them +@@ -2192,7 +2312,7 @@ static int mpage_da_writepages(struct ad + + to_write = wbc->nr_to_write; + +- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); ++ ret = write_cache_pages_da(mapping, wbc, mpd); + + /* + * Handle last extent of pages diff --git a/queue-2.6.27/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch b/queue-2.6.27/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch new file mode 100644 index 00000000000..6b1506cb147 --- /dev/null +++ b/queue-2.6.27/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch @@ -0,0 +1,191 @@ +From cea7daa3589d6b550546a8c8963599f7c1a3ae5c Mon Sep 17 00:00:00 2001 +From: Toshiyuki Okajima +Date: Fri, 30 Apr 2010 14:32:13 +0100 +Subject: KEYS: find_keyring_by_name() can gain access to a freed keyring + +From: Toshiyuki Okajima + +commit cea7daa3589d6b550546a8c8963599f7c1a3ae5c upstream. + +find_keyring_by_name() can gain access to a keyring that has had its reference +count reduced to zero, and is thus ready to be freed. This then allows the +dead keyring to be brought back into use whilst it is being destroyed. + +The following timeline illustrates the process: + +|(cleaner) (user) +| +| free_user(user) sys_keyctl() +| | | +| key_put(user->session_keyring) keyctl_get_keyring_ID() +| || //=> keyring->usage = 0 | +| |schedule_work(&key_cleanup_task) lookup_user_key() +| || | +| kmem_cache_free(,user) | +| . |[KEY_SPEC_USER_KEYRING] +| . install_user_keyrings() +| . || +| key_cleanup() [<= worker_thread()] || +| | || +| [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] +| | || +| atomic_read() == 0 || +| |{ rb_ease(&key->serial_node,) } || +| | || +| [spin_unlock(&key_serial_lock)] |find_keyring_by_name() +| | ||| +| keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] +| || ||| +| |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) +| |. ||| *** GET freeing keyring *** +| |. ||[read_unlock(&keyring_name_lock)] +| || || +| |list_del() |[mutex_unlock(&key_user_k..mutex)] +| || | +| |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** +| | . +| kmem_cache_free(,keyring) . +| . +| atomic_dec(&keyring->usage) +v *** DESTROYED *** +TIME + +If CONFIG_SLUB_DEBUG=y then we may see the following message generated: + + ============================================================================= + BUG key_jar: Poison overwritten + ----------------------------------------------------------------------------- + + INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b + INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 + INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 + INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 + INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 + + Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ + Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk + +Alternatively, we may see a system panic happen, such as: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 + IP: [] kmem_cache_alloc+0x5b/0xe9 + PGD 6b2b4067 PUD 6a80d067 PMD 0 + Oops: 0000 [#1] SMP + last sysfs file: /sys/kernel/kexec_crash_loaded + CPU 1 + ... + Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY + RIP: 0010:[] [] kmem_cache_alloc+0x5b/0xe9 + RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 + RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b + RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 + RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 + R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 + R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce + FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) + Stack: + 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 + 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce + 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 + Call Trace: + [] ? get_empty_filp+0x70/0x12f + [] ? do_filp_open+0x145/0x590 + [] ? tlb_finish_mmu+0x2a/0x33 + [] ? unmap_region+0xd3/0xe2 + [] ? virt_to_head_page+0x9/0x2d + [] ? alloc_fd+0x69/0x10e + [] ? do_sys_open+0x56/0xfc + [] ? system_call_fastpath+0x16/0x1b + Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef + RIP [] kmem_cache_alloc+0x5b/0xe9 + +This problem is that find_keyring_by_name does not confirm that the keyring is +valid before accepting it. + +Skipping keyrings that have been reduced to a zero count seems the way to go. +To this end, use atomic_inc_not_zero() to increment the usage count and skip +the candidate keyring if that returns false. + +The following script _may_ cause the bug to happen, but there's no guarantee +as the window of opportunity is small: + + #!/bin/sh + LOOP=100000 + USER=dummy_user + /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } + for ((i=0; i /dev/null" $USER + done + (( add == 1 )) && /usr/sbin/userdel -r $USER + exit + +Note that the nominated user must not be in use. + +An alternative way of testing this may be: + + for ((i=0; i<100000; i++)) + do + keyctl session foo /bin/true || break + done >&/dev/null + +as that uses a keyring named "foo" rather than relying on the user and +user-session named keyrings. + +Reported-by: Toshiyuki Okajima +Signed-off-by: David Howells +Tested-by: Toshiyuki Okajima +Acked-by: Serge Hallyn +Signed-off-by: James Morris +Cc: Ben Hutchings +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/keyring.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -523,9 +523,8 @@ struct key *find_keyring_by_name(const c + struct key *keyring; + int bucket; + +- keyring = ERR_PTR(-EINVAL); + if (!name) +- goto error; ++ return ERR_PTR(-EINVAL); + + bucket = keyring_hash(name); + +@@ -549,17 +548,18 @@ struct key *find_keyring_by_name(const c + KEY_SEARCH) < 0) + continue; + +- /* we've got a match */ +- atomic_inc(&keyring->usage); +- read_unlock(&keyring_name_lock); +- goto error; ++ /* we've got a match but we might end up racing with ++ * key_cleanup() if the keyring is currently 'dead' ++ * (ie. it has a zero usage count) */ ++ if (!atomic_inc_not_zero(&keyring->usage)) ++ continue; ++ goto out; + } + } + +- read_unlock(&keyring_name_lock); + keyring = ERR_PTR(-ENOKEY); +- +- error: ++out: ++ read_unlock(&keyring_name_lock); + return keyring; + + } /* end find_keyring_by_name() */ diff --git a/queue-2.6.27/keys-return-more-accurate-error-codes.patch b/queue-2.6.27/keys-return-more-accurate-error-codes.patch new file mode 100644 index 00000000000..383962aa96e --- /dev/null +++ b/queue-2.6.27/keys-return-more-accurate-error-codes.patch @@ -0,0 +1,46 @@ +From 4d09ec0f705cf88a12add029c058b53f288cfaa2 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 17 May 2010 14:42:35 +0100 +Subject: KEYS: Return more accurate error codes + +From: Dan Carpenter + +commit 4d09ec0f705cf88a12add029c058b53f288cfaa2 upstream. + +We were using the wrong variable here so the error codes weren't being returned +properly. The original code returns -ENOKEY. + +Signed-off-by: Dan Carpenter +Signed-off-by: David Howells +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -508,7 +508,7 @@ try_again: + + ret = install_thread_keyring(); + if (ret < 0) { +- key = ERR_PTR(ret); ++ key_ref = ERR_PTR(ret); + goto error; + } + goto reget_creds; +@@ -526,7 +526,7 @@ try_again: + + ret = install_process_keyring(); + if (ret < 0) { +- key = ERR_PTR(ret); ++ key_ref = ERR_PTR(ret); + goto error; + } + goto reget_creds; +@@ -585,7 +585,7 @@ try_again: + + case KEY_SPEC_GROUP_KEYRING: + /* group keyrings are not yet supported */ +- key = ERR_PTR(-EINVAL); ++ key_ref = ERR_PTR(-EINVAL); + goto error; + + case KEY_SPEC_REQKEY_AUTH_KEY: diff --git a/queue-2.6.27/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch b/queue-2.6.27/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch new file mode 100644 index 00000000000..3080f86c577 --- /dev/null +++ b/queue-2.6.27/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch @@ -0,0 +1,34 @@ +From 550f0d922286556c7ea43974bb7921effb5a5278 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 3 May 2010 20:44:21 +0000 +Subject: parisc: clear floating point exception flag on SIGFPE signal + +From: Helge Deller + +commit 550f0d922286556c7ea43974bb7921effb5a5278 upstream. + +Clear the floating point exception flag before returning to +user space. This is needed, else the libc trampoline handler +may hit the same SIGFPE again while building up a trampoline +to a signal handler. + +Fixes debian bug #559406. + +Signed-off-by: Helge Deller +Signed-off-by: Kyle McMartin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/math-emu/decode_exc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/parisc/math-emu/decode_exc.c ++++ b/arch/parisc/math-emu/decode_exc.c +@@ -342,6 +342,7 @@ decode_fpu(unsigned int Fpu_register[], + return SIGNALCODE(SIGFPE, FPE_FLTINV); + case DIVISIONBYZEROEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); ++ Clear_excp_register(exception_index); + return SIGNALCODE(SIGFPE, FPE_FLTDIV); + case INEXACTEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); diff --git a/queue-2.6.27/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch b/queue-2.6.27/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch new file mode 100644 index 00000000000..36a78a2698d --- /dev/null +++ b/queue-2.6.27/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch @@ -0,0 +1,223 @@ +From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Wed, 28 Apr 2010 10:30:59 +0000 +Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4) + +From: Neil Horman + +commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 upstream. + +Ok, version 4 + +Change Notes: +1) Minor cleanups, from Vlads notes + +Summary: + +Hey- + Recently, it was reported to me that the kernel could oops in the +following way: + +<5> kernel BUG at net/core/skbuff.c:91! +<5> invalid operand: 0000 [#1] +<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter +ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U) +vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5 +ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss +snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore +pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi +mptbase sd_mod scsi_mod +<5> CPU: 0 +<5> EIP: 0060:[] Not tainted VLI +<5> EFLAGS: 00010216 (2.6.9-89.0.25.EL) +<5> EIP is at skb_over_panic+0x1f/0x2d +<5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44 +<5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40 +<5> ds: 007b es: 007b ss: 0068 +<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0) +<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180 +e0c2947d +<5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004 +df653490 +<5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e +00000004 +<5> Call Trace: +<5> [] sctp_addto_chunk+0xb0/0x128 [sctp] +<5> [] sctp_addto_chunk+0xb5/0x128 [sctp] +<5> [] sctp_init_cause+0x3f/0x47 [sctp] +<5> [] sctp_process_unk_param+0xac/0xb8 [sctp] +<5> [] sctp_verify_init+0xcc/0x134 [sctp] +<5> [] sctp_sf_do_5_1B_init+0x83/0x28e [sctp] +<5> [] sctp_do_sm+0x41/0x77 [sctp] +<5> [] cache_grow+0x140/0x233 +<5> [] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp] +<5> [] sctp_inq_push+0xe/0x10 [sctp] +<5> [] sctp_rcv+0x454/0x509 [sctp] +<5> [] ipt_hook+0x17/0x1c [iptable_filter] +<5> [] nf_iterate+0x40/0x81 +<5> [] ip_local_deliver_finish+0x0/0x151 +<5> [] ip_local_deliver_finish+0xc6/0x151 +<5> [] nf_hook_slow+0x83/0xb5 +<5> [] ip_local_deliver+0x1a2/0x1a9 +<5> [] ip_local_deliver_finish+0x0/0x151 +<5> [] ip_rcv+0x334/0x3b4 +<5> [] netif_receive_skb+0x320/0x35b +<5> [] init_stall_timer+0x67/0x6a [uhci_hcd] +<5> [] process_backlog+0x6c/0xd9 +<5> [] net_rx_action+0xfe/0x1f8 +<5> [] __do_softirq+0x35/0x79 +<5> [] handle_IRQ_event+0x0/0x4f +<5> [] do_softirq+0x46/0x4d + +Its an skb_over_panic BUG halt that results from processing an init chunk in +which too many of its variable length parameters are in some way malformed. + +The problem is in sctp_process_unk_param: +if (NULL == *errp) + *errp = sctp_make_op_error_space(asoc, chunk, + ntohs(chunk->chunk_hdr->length)); + + if (*errp) { + sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length))); + sctp_addto_chunk(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); + +When we allocate an error chunk, we assume that the worst case scenario requires +that we have chunk_hdr->length data allocated, which would be correct nominally, +given that we call sctp_addto_chunk for the violating parameter. Unfortunately, +we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error +chunk, so the worst case situation in which all parameters are in violation +requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data. + +The result of this error is that a deliberately malformed packet sent to a +listening host can cause a remote DOS, described in CVE-2010-1173: +http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173 + +I've tested the below fix and confirmed that it fixes the issue. We move to a +strategy whereby we allocate a fixed size error chunk and ignore errors we don't +have space to report. Tested by me successfully + +Signed-off-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/sctp/structs.h | 1 + net/sctp/sm_make_chunk.c | 62 +++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 58 insertions(+), 5 deletions(-) + +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -753,6 +753,7 @@ int sctp_user_addto_chunk(struct sctp_ch + struct iovec *data); + void sctp_chunk_free(struct sctp_chunk *); + void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data); ++void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data); + struct sctp_chunk *sctp_chunkify(struct sk_buff *, + const struct sctp_association *, + struct sock *); +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -107,7 +107,7 @@ static const struct sctp_paramhdr prsctp + __constant_htons(sizeof(struct sctp_paramhdr)), + }; + +-/* A helper to initialize to initialize an op error inside a ++/* A helper to initialize an op error inside a + * provided chunk, as most cause codes will be embedded inside an + * abort chunk. + */ +@@ -124,6 +124,29 @@ void sctp_init_cause(struct sctp_chunk + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); + } + ++/* A helper to initialize an op error inside a ++ * provided chunk, as most cause codes will be embedded inside an ++ * abort chunk. Differs from sctp_init_cause in that it won't oops ++ * if there isn't enough space in the op error chunk ++ */ ++int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, ++ size_t paylen) ++{ ++ sctp_errhdr_t err; ++ __u16 len; ++ ++ /* Cause code constants are now defined in network order. */ ++ err.cause = cause_code; ++ len = sizeof(sctp_errhdr_t) + paylen; ++ err.length = htons(len); ++ ++ if (skb_tailroom(chunk->skb) > len) ++ return -ENOSPC; ++ chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, ++ sizeof(sctp_errhdr_t), ++ &err); ++ return 0; ++} + /* 3.3.2 Initiation (INIT) (1) + * + * This chunk is used to initiate a SCTP association between two +@@ -1114,6 +1137,24 @@ nodata: + return retval; + } + ++/* Create an Operation Error chunk of a fixed size, ++ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) ++ * This is a helper function to allocate an error chunk for ++ * for those invalid parameter codes in which we may not want ++ * to report all the errors, if the incomming chunk is large ++ */ ++static inline struct sctp_chunk *sctp_make_op_error_fixed( ++ const struct sctp_association *asoc, ++ const struct sctp_chunk *chunk) ++{ ++ size_t size = asoc ? asoc->pathmtu : 0; ++ ++ if (!size) ++ size = SCTP_DEFAULT_MAXSEGMENT; ++ ++ return sctp_make_op_error_space(asoc, chunk, size); ++} ++ + /* Create an Operation Error chunk. */ + struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, +@@ -1354,6 +1395,18 @@ void *sctp_addto_chunk(struct sctp_chunk + return target; + } + ++/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient ++ * space in the chunk ++ */ ++void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, ++ int len, const void *data) ++{ ++ if (skb_tailroom(chunk->skb) > len) ++ return sctp_addto_chunk(chunk, len, data); ++ else ++ return NULL; ++} ++ + /* Append bytes from user space to the end of a chunk. Will panic if + * chunk is not big enough. + * Returns a kernel err value. +@@ -1957,13 +2010,12 @@ static sctp_ierror_t sctp_process_unk_pa + * returning multiple unknown parameters. + */ + if (NULL == *errp) +- *errp = sctp_make_op_error_space(asoc, chunk, +- ntohs(chunk->chunk_hdr->length)); ++ *errp = sctp_make_op_error_fixed(asoc, chunk); + + if (*errp) { +- sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, ++ sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length))); +- sctp_addto_chunk(*errp, ++ sctp_addto_chunk_fixed(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); + } else { diff --git a/queue-2.6.27/series b/queue-2.6.27/series index 55e3b8df82a..0bbf3de9a8c 100644 --- a/queue-2.6.27/series +++ b/queue-2.6.27/series @@ -10,3 +10,13 @@ md-set-mddev-readonly-flag-on-blkdev-blkroset-ioctl.patch do_generic_file_read-clear-page-errors-when-issuing-a-fresh-read-of-the-page.patch ipmi-handle-run_to_completion-properly-in-deliver_recv_msg.patch gconfig-fix-build-failure-on-fedora-13.patch +ext4-check-s_log_groups_per_flex-in-online-resize-code.patch +ext4-use-our-own-write_cache_pages.patch +ext4-fix-file-fragmentation-during-large-file-write.patch +ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch +sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch +vfs-add-nofollow-flag-to-umount-2.patch +tipc-fix-oops-on-send-prior-to-entering-networked-mode-v3.patch +parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch +keys-return-more-accurate-error-codes.patch +keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch diff --git a/queue-2.6.27/tipc-fix-oops-on-send-prior-to-entering-networked-mode-v3.patch b/queue-2.6.27/tipc-fix-oops-on-send-prior-to-entering-networked-mode-v3.patch new file mode 100644 index 00000000000..38be33697a9 --- /dev/null +++ b/queue-2.6.27/tipc-fix-oops-on-send-prior-to-entering-networked-mode-v3.patch @@ -0,0 +1,208 @@ +From d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Wed, 3 Mar 2010 08:31:23 +0000 +Subject: tipc: Fix oops on send prior to entering networked mode (v3) + +From: Neil Horman + +commit d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 upstream. + +Fix TIPC to disallow sending to remote addresses prior to entering NET_MODE + +user programs can oops the kernel by sending datagrams via AF_TIPC prior to +entering networked mode. The following backtrace has been observed: + +ID: 13459 TASK: ffff810014640040 CPU: 0 COMMAND: "tipc-client" +[exception RIP: tipc_node_select_next_hop+90] +RIP: ffffffff8869d3c3 RSP: ffff81002d9a5ab8 RFLAGS: 00010202 +RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000001 +RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000001001001 +RBP: 0000000001001001 R8: 0074736575716552 R9: 0000000000000000 +R10: ffff81003fbd0680 R11: 00000000000000c8 R12: 0000000000000008 +R13: 0000000000000001 R14: 0000000000000001 R15: ffff810015c6ca00 +ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 +RIP: 0000003cbd8d49a3 RSP: 00007fffc84e0be8 RFLAGS: 00010206 +RAX: 000000000000002c RBX: ffffffff8005d116 RCX: 0000000000000000 +RDX: 0000000000000008 RSI: 00007fffc84e0c00 RDI: 0000000000000003 +RBP: 0000000000000000 R8: 00007fffc84e0c10 R9: 0000000000000010 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007fffc84e0d10 R14: 0000000000000000 R15: 00007fffc84e0c30 +ORIG_RAX: 000000000000002c CS: 0033 SS: 002b + +What happens is that, when the tipc module in inserted it enters a standalone +node mode in which communication to its own address is allowed <0.0.0> but not +to other addresses, since the appropriate data structures have not been +allocated yet (specifically the tipc_net pointer). There is nothing stopping a +client from trying to send such a message however, and if that happens, we +attempt to dereference tipc_net.zones while the pointer is still NULL, and +explode. The fix is pretty straightforward. Since these oopses all arise from +the dereference of global pointers prior to their assignment to allocated +values, and since these allocations are small (about 2k total), lets convert +these pointers to static arrays of the appropriate size. All the accesses to +these bits consider 0/NULL to be a non match when searching, so all the lookups +still work properly, and there is no longer a chance of a bad dererence +anywhere. As a bonus, this lets us eliminate the setup/teardown routines for +those pointers, and elimnates the need to preform any locking around them to +prevent access while their being allocated/freed. + +I've updated the tipc_net structure to behave this way to fix the exact reported +problem, and also fixed up the tipc_bearers and media_list arrays to fix an +obvious simmilar problem that arises from issuing tipc-config commands to +manipulate bearers/links prior to entering networked mode + +I've tested this for a few hours by running the sanity tests and stress test +with the tipcutils suite, and nothing has fallen over. There have been a few +lockdep warnings, but those were there before, and can be addressed later, as +they didn't actually result in any deadlock. + +Signed-off-by: Neil Horman +CC: Allan Stephens +CC: David S. Miller +CC: tipc-discussion@lists.sourceforge.net +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/tipc/bearer.c | 37 ++++++------------------------------- + net/tipc/bearer.h | 2 +- + net/tipc/net.c | 25 ++++--------------------- + 3 files changed, 11 insertions(+), 53 deletions(-) + +--- a/net/tipc/bearer.c ++++ b/net/tipc/bearer.c +@@ -45,10 +45,10 @@ + + #define MAX_ADDR_STR 32 + +-static struct media *media_list = NULL; ++static struct media media_list[MAX_MEDIA]; + static u32 media_count = 0; + +-struct bearer *tipc_bearers = NULL; ++struct bearer tipc_bearers[MAX_BEARERS]; + + /** + * media_name_valid - validate media name +@@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type, + int res = -EINVAL; + + write_lock_bh(&tipc_net_lock); +- if (!media_list) +- goto exit; + ++ if (tipc_mode != TIPC_NET_MODE) { ++ warn("Media <%s> rejected, not in networked mode yet\n", name); ++ goto exit; ++ } + if (!media_name_valid(name)) { + warn("Media <%s> rejected, illegal name\n", name); + goto exit; +@@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name + + + +-int tipc_bearer_init(void) +-{ +- int res; +- +- write_lock_bh(&tipc_net_lock); +- tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); +- media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); +- if (tipc_bearers && media_list) { +- res = 0; +- } else { +- kfree(tipc_bearers); +- kfree(media_list); +- tipc_bearers = NULL; +- media_list = NULL; +- res = -ENOMEM; +- } +- write_unlock_bh(&tipc_net_lock); +- return res; +-} +- + void tipc_bearer_stop(void) + { + u32 i; + +- if (!tipc_bearers) +- return; +- + for (i = 0; i < MAX_BEARERS; i++) { + if (tipc_bearers[i].active) + tipc_bearers[i].publ.blocked = 1; +@@ -695,10 +674,6 @@ void tipc_bearer_stop(void) + if (tipc_bearers[i].active) + bearer_disable(tipc_bearers[i].publ.name); + } +- kfree(tipc_bearers); +- kfree(media_list); +- tipc_bearers = NULL; +- media_list = NULL; + media_count = 0; + } + +--- a/net/tipc/bearer.h ++++ b/net/tipc/bearer.h +@@ -114,7 +114,7 @@ struct bearer_name { + + struct link; + +-extern struct bearer *tipc_bearers; ++extern struct bearer tipc_bearers[]; + + void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); + struct sk_buff *tipc_media_get_names(void); +--- a/net/tipc/net.c ++++ b/net/tipc/net.c +@@ -116,7 +116,8 @@ + */ + + DEFINE_RWLOCK(tipc_net_lock); +-struct network tipc_net = { NULL }; ++struct _zone *tipc_zones[256] = { NULL, }; ++struct network tipc_net = { tipc_zones }; + + struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) + { +@@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 d + } + } + +-static int net_init(void) +-{ +- memset(&tipc_net, 0, sizeof(tipc_net)); +- tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC); +- if (!tipc_net.zones) { +- return -ENOMEM; +- } +- return 0; +-} +- + static void net_stop(void) + { + u32 z_num; + +- if (!tipc_net.zones) +- return; +- +- for (z_num = 1; z_num <= tipc_max_zones; z_num++) { ++ for (z_num = 1; z_num <= tipc_max_zones; z_num++) + tipc_zone_delete(tipc_net.zones[z_num]); +- } +- kfree(tipc_net.zones); +- tipc_net.zones = NULL; + } + + static void net_route_named_msg(struct sk_buff *buf) +@@ -282,9 +267,7 @@ int tipc_net_start(u32 addr) + tipc_named_reinit(); + tipc_port_reinit(); + +- if ((res = tipc_bearer_init()) || +- (res = net_init()) || +- (res = tipc_cltr_init()) || ++ if ((res = tipc_cltr_init()) || + (res = tipc_bclink_init())) { + return res; + } diff --git a/queue-2.6.27/vfs-add-nofollow-flag-to-umount-2.patch b/queue-2.6.27/vfs-add-nofollow-flag-to-umount-2.patch new file mode 100644 index 00000000000..1d4ea0f1ceb --- /dev/null +++ b/queue-2.6.27/vfs-add-nofollow-flag-to-umount-2.patch @@ -0,0 +1,57 @@ +From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 10 Feb 2010 12:15:53 +0100 +Subject: vfs: add NOFOLLOW flag to umount(2) + +From: Miklos Szeredi + +commit db1f05bb85d7966b9176e293f3ceead1cb8b5d79 upstream. + +Add a new UMOUNT_NOFOLLOW flag to umount(2). This is needed to prevent +symlink attacks in unprivileged unmounts (fuse, samba, ncpfs). + +Additionally, return -EINVAL if an unknown flag is used (and specify +an explicitly unused flag: UMOUNT_UNUSED). This makes it possible for +the caller to determine if a flag is supported or not. + +CC: Eugene Teo +CC: Michael Kerrisk +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 9 ++++++++- + include/linux/fs.h | 2 ++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1132,8 +1132,15 @@ SYSCALL_DEFINE2(umount, char __user *, n + { + struct path path; + int retval; ++ int lookup_flags = 0; + +- retval = user_path(name, &path); ++ if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) ++ return -EINVAL; ++ ++ if (!(flags & UMOUNT_NOFOLLOW)) ++ lookup_flags |= LOOKUP_FOLLOW; ++ ++ retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); + if (retval) + goto out; + retval = -EINVAL; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1061,6 +1061,8 @@ extern int send_sigurg(struct fown_struc + #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ + #define MNT_DETACH 0x00000002 /* Just detach from the tree */ + #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ ++#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ ++#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ + + extern struct list_head super_blocks; + extern spinlock_t sb_lock;