--- /dev/null
+From 42007efd569f1cf3bfb9a61da60ef6c2179508ca Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Sun, 16 May 2010 01:00:00 -0400
+Subject: ext4: check s_log_groups_per_flex in online resize code
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 42007efd569f1cf3bfb9a61da60ef6c2179508ca upstream.
+
+If groups_per_flex < 2, sbi->s_flex_groups[] doesn't get filled out,
+and every other access to this first tests s_log_groups_per_flex;
+same thing needs to happen in resize or we'll wander off into
+a null pointer when doing an online resize of the file system.
+
+Thanks to Christoph Biedl, who came up with the trivial testcase:
+
+# truncate --size 128M fsfile
+# mkfs.ext3 -F fsfile
+# tune2fs -O extents,uninit_bg,dir_index,flex_bg,huge_file,dir_nlink,extra_isize fsfile
+# e2fsck -yDf -C0 fsfile
+# truncate --size 132M fsfile
+# losetup /dev/loop0 fsfile
+# mount /dev/loop0 mnt
+# resize2fs -p /dev/loop0
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=13549
+
+Reported-by: Alessandro Polverini <alex@nibbles.it>
+Test-case-by: Christoph Biedl <bugzilla.kernel.bpeb@manchmal.in-ulm.de>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/resize.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -935,7 +935,8 @@ int ext4_group_add(struct super_block *s
+ percpu_counter_add(&sbi->s_freeinodes_counter,
+ EXT4_INODES_PER_GROUP(sb));
+
+- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
++ sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group;
+ flex_group = ext4_flex_group(sbi, input->group);
+ sbi->s_flex_groups[flex_group].free_blocks +=
--- /dev/null
+From dev@jaysonking.com Fri Jun 25 15:33:09 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Fri, 28 May 2010 14:26:57 -0500
+Subject: ext4: Fix file fragmentation during large file write.
+Cc: "Jayson R. King" <dev@jaysonking.com>, Theodore Ts'o <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>, Dave Chinner <david@fromorbit.com>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Kay Diederichs <Kay.Diederichs@uni-konstanz.de>
+Message-ID: <4C001901.1070207@jaysonking.com>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 22208dedbd7626e5fc4339c417f8d24cc21f79d7 upstream.
+
+The range_cyclic writeback mode uses the address_space writeback_index
+as the start index for writeback. With delayed allocation we were
+updating writeback_index wrongly resulting in highly fragmented file.
+This patch reduces the number of extents reduced from 4000 to 27 for a
+3GB file.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+[dev@jaysonking.com: Some changed lines from the original version of this patch were dropped, since they were rolled up with another cherry-picked patch applied to 2.6.27.y earlier.]
+[dev@jaysonking.com: Use of wbc->no_nrwrite_index_update was dropped, since write_cache_pages_da() implies it.]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 79 ++++++++++++++++++++++++++++++++------------------------
+ 1 file changed, 46 insertions(+), 33 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1721,7 +1721,11 @@ static int mpage_da_submit_io(struct mpa
+
+ pages_skipped = mpd->wbc->pages_skipped;
+ err = mapping->a_ops->writepage(page, mpd->wbc);
+- if (!err)
++ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
++ /*
++ * have successfully written the page
++ * without skipping the same
++ */
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+@@ -2295,7 +2299,6 @@ static int mpage_da_writepages(struct ad
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
+ {
+- long to_write;
+ int ret;
+
+ if (!mpd->get_block)
+@@ -2310,19 +2313,18 @@ static int mpage_da_writepages(struct ad
+ mpd->pages_written = 0;
+ mpd->retval = 0;
+
+- to_write = wbc->nr_to_write;
+-
+ ret = write_cache_pages_da(mapping, wbc, mpd);
+-
+ /*
+ * Handle last extent of pages
+ */
+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
+- }
+
+- wbc->nr_to_write = to_write - mpd->pages_written;
++ mpd->io_done = 1;
++ ret = MPAGE_DA_EXTENT_TAIL;
++ }
++ wbc->nr_to_write -= mpd->pages_written;
+ return ret;
+ }
+
+@@ -2567,11 +2569,13 @@ static int ext4_da_writepages_trans_bloc
+ static int ext4_da_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+ {
++ pgoff_t index;
++ int range_whole = 0;
+ handle_t *handle = NULL;
+ struct mpage_da_data mpd;
+ struct inode *inode = mapping->host;
++ long pages_written = 0, pages_skipped;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+- long to_write, pages_skipped = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+ /*
+@@ -2605,16 +2609,20 @@ static int ext4_da_writepages(struct add
+ nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+ wbc->nr_to_write = sbi->s_mb_stream_request;
+ }
++ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
++ range_whole = 1;
+
+-
+- pages_skipped = wbc->pages_skipped;
++ if (wbc->range_cyclic)
++ index = mapping->writeback_index;
++ else
++ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+
+ mpd.wbc = wbc;
+ mpd.inode = mapping->host;
+
+-restart_loop:
+- to_write = wbc->nr_to_write;
+- while (!ret && to_write > 0) {
++ pages_skipped = wbc->pages_skipped;
++
++ while (!ret && wbc->nr_to_write > 0) {
+
+ /*
+ * we insert one extent at a time. So we need
+@@ -2647,46 +2655,51 @@ restart_loop:
+ goto out_writepages;
+ }
+ }
+- to_write -= wbc->nr_to_write;
+-
+ mpd.get_block = ext4_da_get_block_write;
+ ret = mpage_da_writepages(mapping, wbc, &mpd);
+
+ ext4_journal_stop(handle);
+
+- if (mpd.retval == -ENOSPC)
++ if (mpd.retval == -ENOSPC) {
++ /* commit the transaction which would
++ * free blocks released in the transaction
++ * and try again
++ */
+ jbd2_journal_force_commit_nested(sbi->s_journal);
+-
+- /* reset the retry count */
+- if (ret == MPAGE_DA_EXTENT_TAIL) {
++ wbc->pages_skipped = pages_skipped;
++ ret = 0;
++ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
+ /*
+ * got one extent now try with
+ * rest of the pages
+ */
+- to_write += wbc->nr_to_write;
++ pages_written += mpd.pages_written;
++ wbc->pages_skipped = pages_skipped;
+ ret = 0;
+- } else if (wbc->nr_to_write) {
++ } else if (wbc->nr_to_write)
+ /*
+ * There is no more writeout needed
+ * or we requested for a noblocking writeout
+ * and we found the device congested
+ */
+- to_write += wbc->nr_to_write;
+ break;
+- }
+- wbc->nr_to_write = to_write;
+- }
+-
+- if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
+- /* We skipped pages in this loop */
+- wbc->nr_to_write = to_write +
+- wbc->pages_skipped - pages_skipped;
+- wbc->pages_skipped = pages_skipped;
+- goto restart_loop;
+ }
++ if (pages_skipped != wbc->pages_skipped)
++ printk(KERN_EMERG "This should not happen leaving %s "
++ "with nr_to_write = %ld ret = %d\n",
++ __func__, wbc->nr_to_write, ret);
++
++ /* Update index */
++ index += pages_written;
++ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
++ /*
++ * set the writeback_index so that range_cyclic
++ * mode will write it back later
++ */
++ mapping->writeback_index = index;
+
+ out_writepages:
+- wbc->nr_to_write = to_write - nr_to_writebump;
++ wbc->nr_to_write -= nr_to_writebump;
+ return ret;
+ }
+
--- /dev/null
+From dev@jaysonking.com Fri Jun 25 15:33:41 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Fri, 28 May 2010 14:27:23 -0500
+Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
+To: Stable team <stable@kernel.org>, LKML <linux-kernel@vger.kernel.org>, Greg Kroah-Hartman <gregkh@suse.de>
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Dave Chinner <david@fromorbit.com>, "Jayson R. King" <dev@jaysonking.com>, Kay Diederichs <Kay.Diederichs@uni-konstanz.de>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <4C00191B.3030702@jaysonking.com>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 2acf2c261b823d9d9ed954f348b97620297a36b5 upstream.
+
+With delayed allocation we lock the page in write_cache_pages() and
+try to build an in memory extent of contiguous blocks. This is needed
+so that we can get large contiguous blocks request. If range_cyclic
+mode is enabled, write_cache_pages() will loop back to the 0 index if
+no I/O has been done yet, and try to start writing from the beginning
+of the range. That causes an attempt to take the page lock of lower
+index page while holding the page lock of higher index page, which can
+cause a dead lock with another writeback thread.
+
+The solution is to implement the range_cyclic behavior in
+ext4_da_writepages() instead.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=12579
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2575,6 +2575,7 @@ static int ext4_da_writepages(struct add
+ struct mpage_da_data mpd;
+ struct inode *inode = mapping->host;
+ long pages_written = 0, pages_skipped;
++ int range_cyclic, cycled = 1, io_done = 0;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+@@ -2612,9 +2613,15 @@ static int ext4_da_writepages(struct add
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+
+- if (wbc->range_cyclic)
++ range_cyclic = wbc->range_cyclic;
++ if (wbc->range_cyclic) {
+ index = mapping->writeback_index;
+- else
++ if (index)
++ cycled = 0;
++ wbc->range_start = index << PAGE_CACHE_SHIFT;
++ wbc->range_end = LLONG_MAX;
++ wbc->range_cyclic = 0;
++ } else
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+
+ mpd.wbc = wbc;
+@@ -2622,6 +2629,7 @@ static int ext4_da_writepages(struct add
+
+ pages_skipped = wbc->pages_skipped;
+
++retry:
+ while (!ret && wbc->nr_to_write > 0) {
+
+ /*
+@@ -2676,6 +2684,7 @@ static int ext4_da_writepages(struct add
+ pages_written += mpd.pages_written;
+ wbc->pages_skipped = pages_skipped;
+ ret = 0;
++ io_done = 1;
+ } else if (wbc->nr_to_write)
+ /*
+ * There is no more writeout needed
+@@ -2684,6 +2693,13 @@ static int ext4_da_writepages(struct add
+ */
+ break;
+ }
++ if (!io_done && !cycled) {
++ cycled = 1;
++ index = 0;
++ wbc->range_start = index << PAGE_CACHE_SHIFT;
++ wbc->range_end = mapping->writeback_index - 1;
++ goto retry;
++ }
+ if (pages_skipped != wbc->pages_skipped)
+ printk(KERN_EMERG "This should not happen leaving %s "
+ "with nr_to_write = %ld ret = %d\n",
+@@ -2691,6 +2707,7 @@ static int ext4_da_writepages(struct add
+
+ /* Update index */
+ index += pages_written;
++ wbc->range_cyclic = range_cyclic;
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ /*
+ * set the writeback_index so that range_cyclic
--- /dev/null
+From dev@jaysonking.com Fri Jun 25 15:32:26 2010
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 28 May 2010 14:26:25 -0500
+Subject: ext4: Use our own write_cache_pages()
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Dave Chinner <david@fromorbit.com>, "Jayson R. King" <dev@jaysonking.com>, Kay Diederichs <Kay.Diederichs@uni-konstanz.de>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <4C0018E1.5060007@jaysonking.com>
+
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 upstream.
+
+Make a copy of write_cache_pages() for the benefit of
+ext4_da_writepages(). This allows us to simplify the code some, and
+will allow us to further customize the code in future patches.
+
+There are some nasty hacks in write_cache_pages(), which Linus has
+(correctly) characterized as vile. I've just copied it into
+write_cache_pages_da(), without trying to clean those bits up lest I
+break something in the ext4's delalloc implementation, which is a bit
+fragile right now. This will allow Dave Chinner to clean up
+write_cache_pages() in mm/page-writeback.c, without worrying about
+breaking ext4. Eventually write_cache_pages_da() will go away when I
+rewrite ext4's delayed allocation and create a general
+ext4_writepages() which is used for all of ext4's writeback. Until
+now this is the lowest risk way to clean up the core
+write_cache_pages() function.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Dave Chinner <david@fromorbit.com>
+[dev@jaysonking.com: Dropped the hunks which reverted the use of no_nrwrite_index_update, since those lines weren't ever created on 2.6.27.y]
+[dev@jaysonking.com: Copied from 2.6.27.y's version of write_cache_pages(), plus the changes to it from patch "vfs: Add no_nrwrite_index_update writeback control flag"]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 132 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2059,17 +2059,6 @@ static int __mpage_da_writepage(struct p
+ struct buffer_head *bh, *head, fake;
+ sector_t logical;
+
+- if (mpd->io_done) {
+- /*
+- * Rest of the page in the page_vec
+- * redirty then and skip then. We will
+- * try to to write them again after
+- * starting a new transaction
+- */
+- redirty_page_for_writepage(wbc, page);
+- unlock_page(page);
+- return MPAGE_DA_EXTENT_TAIL;
+- }
+ /*
+ * Can we merge this page to current extent?
+ */
+@@ -2160,6 +2149,137 @@ static int __mpage_da_writepage(struct p
+ }
+
+ /*
++ * write_cache_pages_da - walk the list of dirty pages of the given
++ * address space and call the callback function (which usually writes
++ * the pages).
++ *
++ * This is a forked version of write_cache_pages(). Differences:
++ * Range cyclic is ignored.
++ * no_nrwrite_index_update is always presumed true
++ */
++static int write_cache_pages_da(struct address_space *mapping,
++ struct writeback_control *wbc,
++ struct mpage_da_data *mpd)
++{
++ struct backing_dev_info *bdi = mapping->backing_dev_info;
++ int ret = 0;
++ int done = 0;
++ struct pagevec pvec;
++ int nr_pages;
++ pgoff_t index;
++ pgoff_t end; /* Inclusive */
++ long nr_to_write = wbc->nr_to_write;
++
++ if (wbc->nonblocking && bdi_write_congested(bdi)) {
++ wbc->encountered_congestion = 1;
++ return 0;
++ }
++
++ pagevec_init(&pvec, 0);
++ index = wbc->range_start >> PAGE_CACHE_SHIFT;
++ end = wbc->range_end >> PAGE_CACHE_SHIFT;
++
++ while (!done && (index <= end)) {
++ int i;
++
++ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
++ PAGECACHE_TAG_DIRTY,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
++ if (nr_pages == 0)
++ break;
++
++ for (i = 0; i < nr_pages; i++) {
++ struct page *page = pvec.pages[i];
++
++ /*
++ * At this point, the page may be truncated or
++ * invalidated (changing page->mapping to NULL), or
++ * even swizzled back from swapper_space to tmpfs file
++ * mapping. However, page->index will not change
++ * because we have a reference on the page.
++ */
++ if (page->index > end) {
++ done = 1;
++ break;
++ }
++
++ lock_page(page);
++
++ /*
++ * Page truncated or invalidated. We can freely skip it
++ * then, even for data integrity operations: the page
++ * has disappeared concurrently, so there could be no
++ * real expectation of this data interity operation
++ * even if there is now a new, dirty page at the same
++ * pagecache address.
++ */
++ if (unlikely(page->mapping != mapping)) {
++continue_unlock:
++ unlock_page(page);
++ continue;
++ }
++
++ if (!PageDirty(page)) {
++ /* someone wrote it for us */
++ goto continue_unlock;
++ }
++
++ if (PageWriteback(page)) {
++ if (wbc->sync_mode != WB_SYNC_NONE)
++ wait_on_page_writeback(page);
++ else
++ goto continue_unlock;
++ }
++
++ BUG_ON(PageWriteback(page));
++ if (!clear_page_dirty_for_io(page))
++ goto continue_unlock;
++
++ ret = __mpage_da_writepage(page, wbc, mpd);
++
++ if (unlikely(ret)) {
++ if (ret == AOP_WRITEPAGE_ACTIVATE) {
++ unlock_page(page);
++ ret = 0;
++ } else {
++ done = 1;
++ break;
++ }
++ }
++
++ if (nr_to_write > 0) {
++ nr_to_write--;
++ if (nr_to_write == 0 &&
++ wbc->sync_mode == WB_SYNC_NONE) {
++ /*
++ * We stop writing back only if we are
++ * not doing integrity sync. In case of
++ * integrity sync we have to keep going
++ * because someone may be concurrently
++ * dirtying pages, and we might have
++ * synced a lot of newly appeared dirty
++ * pages, but have not synced all of the
++ * old dirty pages.
++ */
++ done = 1;
++ break;
++ }
++ }
++
++ if (wbc->nonblocking && bdi_write_congested(bdi)) {
++ wbc->encountered_congestion = 1;
++ done = 1;
++ break;
++ }
++ }
++ pagevec_release(&pvec);
++ cond_resched();
++ }
++ return ret;
++}
++
++
++/*
+ * mpage_da_writepages - walk the list of dirty pages of the given
+ * address space, allocates non-allocated blocks, maps newly-allocated
+ * blocks to existing bhs and issue IO them
+@@ -2192,7 +2312,7 @@ static int mpage_da_writepages(struct ad
+
+ to_write = wbc->nr_to_write;
+
+- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
++ ret = write_cache_pages_da(mapping, wbc, mpd);
+
+ /*
+ * Handle last extent of pages
--- /dev/null
+From cea7daa3589d6b550546a8c8963599f7c1a3ae5c Mon Sep 17 00:00:00 2001
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Date: Fri, 30 Apr 2010 14:32:13 +0100
+Subject: KEYS: find_keyring_by_name() can gain access to a freed keyring
+
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+
+commit cea7daa3589d6b550546a8c8963599f7c1a3ae5c upstream.
+
+find_keyring_by_name() can gain access to a keyring that has had its reference
+count reduced to zero, and is thus ready to be freed. This then allows the
+dead keyring to be brought back into use whilst it is being destroyed.
+
+The following timeline illustrates the process:
+
+|(cleaner) (user)
+|
+| free_user(user) sys_keyctl()
+| | |
+| key_put(user->session_keyring) keyctl_get_keyring_ID()
+| || //=> keyring->usage = 0 |
+| |schedule_work(&key_cleanup_task) lookup_user_key()
+| || |
+| kmem_cache_free(,user) |
+| . |[KEY_SPEC_USER_KEYRING]
+| . install_user_keyrings()
+| . ||
+| key_cleanup() [<= worker_thread()] ||
+| | ||
+| [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)]
+| | ||
+| atomic_read() == 0 ||
+| |{ rb_ease(&key->serial_node,) } ||
+| | ||
+| [spin_unlock(&key_serial_lock)] |find_keyring_by_name()
+| | |||
+| keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)]
+| || |||
+| |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage)
+| |. ||| *** GET freeing keyring ***
+| |. ||[read_unlock(&keyring_name_lock)]
+| || ||
+| |list_del() |[mutex_unlock(&key_user_k..mutex)]
+| || |
+| |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned **
+| | .
+| kmem_cache_free(,keyring) .
+| .
+| atomic_dec(&keyring->usage)
+v *** DESTROYED ***
+TIME
+
+If CONFIG_SLUB_DEBUG=y then we may see the following message generated:
+
+ =============================================================================
+ BUG key_jar: Poison overwritten
+ -----------------------------------------------------------------------------
+
+ INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b
+ INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086
+ INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10
+ INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3
+ INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300
+
+ Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
+ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk
+
+Alternatively, we may see a system panic happen, such as:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+ IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+ PGD 6b2b4067 PUD 6a80d067 PMD 0
+ Oops: 0000 [#1] SMP
+ last sysfs file: /sys/kernel/kexec_crash_loaded
+ CPU 1
+ ...
+ Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY
+ RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+ RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002
+ RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b
+ RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430
+ RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000
+ R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0
+ R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce
+ FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+ Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0)
+ Stack:
+ 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001
+ 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce
+ 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3
+ Call Trace:
+ [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f
+ [<ffffffff810face3>] ? do_filp_open+0x145/0x590
+ [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33
+ [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2
+ [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d
+ [<ffffffff81103916>] ? alloc_fd+0x69/0x10e
+ [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc
+ [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b
+ Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef
+ RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+
+This problem is that find_keyring_by_name does not confirm that the keyring is
+valid before accepting it.
+
+Skipping keyrings that have been reduced to a zero count seems the way to go.
+To this end, use atomic_inc_not_zero() to increment the usage count and skip
+the candidate keyring if that returns false.
+
+The following script _may_ cause the bug to happen, but there's no guarantee
+as the window of opportunity is small:
+
+ #!/bin/sh
+ LOOP=100000
+ USER=dummy_user
+ /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; }
+ for ((i=0; i<LOOP; i++))
+ do
+ /bin/su -c "echo '$i' > /dev/null" $USER
+ done
+ (( add == 1 )) && /usr/sbin/userdel -r $USER
+ exit
+
+Note that the nominated user must not be in use.
+
+An alternative way of testing this may be:
+
+ for ((i=0; i<100000; i++))
+ do
+ keyctl session foo /bin/true || break
+ done >&/dev/null
+
+as that uses a keyring named "foo" rather than relying on the user and
+user-session named keyrings.
+
+Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Acked-by: Serge Hallyn <serue@us.ibm.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/keyring.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -523,9 +523,8 @@ struct key *find_keyring_by_name(const c
+ struct key *keyring;
+ int bucket;
+
+- keyring = ERR_PTR(-EINVAL);
+ if (!name)
+- goto error;
++ return ERR_PTR(-EINVAL);
+
+ bucket = keyring_hash(name);
+
+@@ -549,17 +548,18 @@ struct key *find_keyring_by_name(const c
+ KEY_SEARCH) < 0)
+ continue;
+
+- /* we've got a match */
+- atomic_inc(&keyring->usage);
+- read_unlock(&keyring_name_lock);
+- goto error;
++ /* we've got a match but we might end up racing with
++ * key_cleanup() if the keyring is currently 'dead'
++ * (ie. it has a zero usage count) */
++ if (!atomic_inc_not_zero(&keyring->usage))
++ continue;
++ goto out;
+ }
+ }
+
+- read_unlock(&keyring_name_lock);
+ keyring = ERR_PTR(-ENOKEY);
+-
+- error:
++out:
++ read_unlock(&keyring_name_lock);
+ return keyring;
+
+ } /* end find_keyring_by_name() */
--- /dev/null
+From 4d09ec0f705cf88a12add029c058b53f288cfaa2 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <error27@gmail.com>
+Date: Mon, 17 May 2010 14:42:35 +0100
+Subject: KEYS: Return more accurate error codes
+
+From: Dan Carpenter <error27@gmail.com>
+
+commit 4d09ec0f705cf88a12add029c058b53f288cfaa2 upstream.
+
+We were using the wrong variable here so the error codes weren't being returned
+properly. The original code returns -ENOKEY.
+
+Signed-off-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -508,7 +508,7 @@ try_again:
+
+ ret = install_thread_keyring();
+ if (ret < 0) {
+- key = ERR_PTR(ret);
++ key_ref = ERR_PTR(ret);
+ goto error;
+ }
+ goto reget_creds;
+@@ -526,7 +526,7 @@ try_again:
+
+ ret = install_process_keyring();
+ if (ret < 0) {
+- key = ERR_PTR(ret);
++ key_ref = ERR_PTR(ret);
+ goto error;
+ }
+ goto reget_creds;
+@@ -585,7 +585,7 @@ try_again:
+
+ case KEY_SPEC_GROUP_KEYRING:
+ /* group keyrings are not yet supported */
+- key = ERR_PTR(-EINVAL);
++ key_ref = ERR_PTR(-EINVAL);
+ goto error;
+
+ case KEY_SPEC_REQKEY_AUTH_KEY:
--- /dev/null
+From 550f0d922286556c7ea43974bb7921effb5a5278 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 3 May 2010 20:44:21 +0000
+Subject: parisc: clear floating point exception flag on SIGFPE signal
+
+From: Helge Deller <deller@gmx.de>
+
+commit 550f0d922286556c7ea43974bb7921effb5a5278 upstream.
+
+Clear the floating point exception flag before returning to
+user space. This is needed, else the libc trampoline handler
+may hit the same SIGFPE again while building up a trampoline
+to a signal handler.
+
+Fixes debian bug #559406.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/parisc/math-emu/decode_exc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/parisc/math-emu/decode_exc.c
++++ b/arch/parisc/math-emu/decode_exc.c
+@@ -342,6 +342,7 @@ decode_fpu(unsigned int Fpu_register[],
+ return SIGNALCODE(SIGFPE, FPE_FLTINV);
+ case DIVISIONBYZEROEXCEPTION:
+ update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
++ Clear_excp_register(exception_index);
+ return SIGNALCODE(SIGFPE, FPE_FLTDIV);
+ case INEXACTEXCEPTION:
+ update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
--- /dev/null
+From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Wed, 28 Apr 2010 10:30:59 +0000
+Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 upstream.
+
+Ok, version 4
+
+Change Notes:
+1) Minor cleanups, from Vlads notes
+
+Summary:
+
+Hey-
+ Recently, it was reported to me that the kernel could oops in the
+following way:
+
+<5> kernel BUG at net/core/skbuff.c:91!
+<5> invalid operand: 0000 [#1]
+<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter
+ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U)
+vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5
+ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss
+snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore
+pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi
+mptbase sd_mod scsi_mod
+<5> CPU: 0
+<5> EIP: 0060:[<c02bff27>] Not tainted VLI
+<5> EFLAGS: 00010216 (2.6.9-89.0.25.EL)
+<5> EIP is at skb_over_panic+0x1f/0x2d
+<5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44
+<5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40
+<5> ds: 007b es: 007b ss: 0068
+<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0)
+<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180
+e0c2947d
+<5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004
+df653490
+<5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e
+00000004
+<5> Call Trace:
+<5> [<e0c29478>] sctp_addto_chunk+0xb0/0x128 [sctp]
+<5> [<e0c2947d>] sctp_addto_chunk+0xb5/0x128 [sctp]
+<5> [<e0c2877a>] sctp_init_cause+0x3f/0x47 [sctp]
+<5> [<e0c29d2e>] sctp_process_unk_param+0xac/0xb8 [sctp]
+<5> [<e0c29e90>] sctp_verify_init+0xcc/0x134 [sctp]
+<5> [<e0c20322>] sctp_sf_do_5_1B_init+0x83/0x28e [sctp]
+<5> [<e0c25333>] sctp_do_sm+0x41/0x77 [sctp]
+<5> [<c01555a4>] cache_grow+0x140/0x233
+<5> [<e0c26ba1>] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp]
+<5> [<e0c2b863>] sctp_inq_push+0xe/0x10 [sctp]
+<5> [<e0c34600>] sctp_rcv+0x454/0x509 [sctp]
+<5> [<e084e017>] ipt_hook+0x17/0x1c [iptable_filter]
+<5> [<c02d005e>] nf_iterate+0x40/0x81
+<5> [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5> [<c02e0c7f>] ip_local_deliver_finish+0xc6/0x151
+<5> [<c02d0362>] nf_hook_slow+0x83/0xb5
+<5> [<c02e0bb2>] ip_local_deliver+0x1a2/0x1a9
+<5> [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5> [<c02e103e>] ip_rcv+0x334/0x3b4
+<5> [<c02c66fd>] netif_receive_skb+0x320/0x35b
+<5> [<e0a0928b>] init_stall_timer+0x67/0x6a [uhci_hcd]
+<5> [<c02c67a4>] process_backlog+0x6c/0xd9
+<5> [<c02c690f>] net_rx_action+0xfe/0x1f8
+<5> [<c012a7b1>] __do_softirq+0x35/0x79
+<5> [<c0107efb>] handle_IRQ_event+0x0/0x4f
+<5> [<c01094de>] do_softirq+0x46/0x4d
+
+Its an skb_over_panic BUG halt that results from processing an init chunk in
+which too many of its variable length parameters are in some way malformed.
+
+The problem is in sctp_process_unk_param:
+if (NULL == *errp)
+ *errp = sctp_make_op_error_space(asoc, chunk,
+ ntohs(chunk->chunk_hdr->length));
+
+ if (*errp) {
+ sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ WORD_ROUND(ntohs(param.p->length)));
+ sctp_addto_chunk(*errp,
+ WORD_ROUND(ntohs(param.p->length)),
+ param.v);
+
+When we allocate an error chunk, we assume that the worst case scenario requires
+that we have chunk_hdr->length data allocated, which would be correct nominally,
+given that we call sctp_addto_chunk for the violating parameter. Unfortunately,
+we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error
+chunk, so the worst case situation in which all parameters are in violation
+requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data.
+
+The result of this error is that a deliberately malformed packet sent to a
+listening host can cause a remote DOS, described in CVE-2010-1173:
+http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173
+
+I've tested the below fix and confirmed that it fixes the issue. We move to a
+strategy whereby we allocate a fixed size error chunk and ignore errors we don't
+have space to report. Tested by me successfully
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/net/sctp/structs.h | 1
+ net/sctp/sm_make_chunk.c | 62 +++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 58 insertions(+), 5 deletions(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -753,6 +753,7 @@ int sctp_user_addto_chunk(struct sctp_ch
+ struct iovec *data);
+ void sctp_chunk_free(struct sctp_chunk *);
+ void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
++void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data);
+ struct sctp_chunk *sctp_chunkify(struct sk_buff *,
+ const struct sctp_association *,
+ struct sock *);
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -107,7 +107,7 @@ static const struct sctp_paramhdr prsctp
+ __constant_htons(sizeof(struct sctp_paramhdr)),
+ };
+
+-/* A helper to initialize to initialize an op error inside a
++/* A helper to initialize an op error inside a
+ * provided chunk, as most cause codes will be embedded inside an
+ * abort chunk.
+ */
+@@ -124,6 +124,29 @@ void sctp_init_cause(struct sctp_chunk
+ chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+ }
+
++/* A helper to initialize an op error inside a
++ * provided chunk, as most cause codes will be embedded inside an
++ * abort chunk. Differs from sctp_init_cause in that it won't oops
++ * if there isn't enough space in the op error chunk
++ */
++int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
++ size_t paylen)
++{
++ sctp_errhdr_t err;
++ __u16 len;
++
++ /* Cause code constants are now defined in network order. */
++ err.cause = cause_code;
++ len = sizeof(sctp_errhdr_t) + paylen;
++ err.length = htons(len);
++
++ if (skb_tailroom(chunk->skb) > len)
++ return -ENOSPC;
++ chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
++ sizeof(sctp_errhdr_t),
++ &err);
++ return 0;
++}
+ /* 3.3.2 Initiation (INIT) (1)
+ *
+ * This chunk is used to initiate a SCTP association between two
+@@ -1114,6 +1137,24 @@ nodata:
+ return retval;
+ }
+
++/* Create an Operation Error chunk of a fixed size,
++ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
++ * This is a helper function to allocate an error chunk for
++ * for those invalid parameter codes in which we may not want
++ * to report all the errors, if the incomming chunk is large
++ */
++static inline struct sctp_chunk *sctp_make_op_error_fixed(
++ const struct sctp_association *asoc,
++ const struct sctp_chunk *chunk)
++{
++ size_t size = asoc ? asoc->pathmtu : 0;
++
++ if (!size)
++ size = SCTP_DEFAULT_MAXSEGMENT;
++
++ return sctp_make_op_error_space(asoc, chunk, size);
++}
++
+ /* Create an Operation Error chunk. */
+ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+@@ -1354,6 +1395,18 @@ void *sctp_addto_chunk(struct sctp_chunk
+ return target;
+ }
+
++/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
++ * space in the chunk
++ */
++void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
++ int len, const void *data)
++{
++ if (skb_tailroom(chunk->skb) > len)
++ return sctp_addto_chunk(chunk, len, data);
++ else
++ return NULL;
++}
++
+ /* Append bytes from user space to the end of a chunk. Will panic if
+ * chunk is not big enough.
+ * Returns a kernel err value.
+@@ -1957,13 +2010,12 @@ static sctp_ierror_t sctp_process_unk_pa
+ * returning multiple unknown parameters.
+ */
+ if (NULL == *errp)
+- *errp = sctp_make_op_error_space(asoc, chunk,
+- ntohs(chunk->chunk_hdr->length));
++ *errp = sctp_make_op_error_fixed(asoc, chunk);
+
+ if (*errp) {
+- sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
++ sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ WORD_ROUND(ntohs(param.p->length)));
+- sctp_addto_chunk(*errp,
++ sctp_addto_chunk_fixed(*errp,
+ WORD_ROUND(ntohs(param.p->length)),
+ param.v);
+ } else {
do_generic_file_read-clear-page-errors-when-issuing-a-fresh-read-of-the-page.patch
ipmi-handle-run_to_completion-properly-in-deliver_recv_msg.patch
gconfig-fix-build-failure-on-fedora-13.patch
+ext4-check-s_log_groups_per_flex-in-online-resize-code.patch
+ext4-use-our-own-write_cache_pages.patch
+ext4-fix-file-fragmentation-during-large-file-write.patch
+ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch
+sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch
+vfs-add-nofollow-flag-to-umount-2.patch
+tipc-fix-oops-on-send-prior-to-entering-networked-mode-v3.patch
+parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch
+keys-return-more-accurate-error-codes.patch
+keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch
--- /dev/null
+From d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Wed, 3 Mar 2010 08:31:23 +0000
+Subject: tipc: Fix oops on send prior to entering networked mode (v3)
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 upstream.
+
+Fix TIPC to disallow sending to remote addresses prior to entering NET_MODE
+
+user programs can oops the kernel by sending datagrams via AF_TIPC prior to
+entering networked mode. The following backtrace has been observed:
+
+ID: 13459 TASK: ffff810014640040 CPU: 0 COMMAND: "tipc-client"
+[exception RIP: tipc_node_select_next_hop+90]
+RIP: ffffffff8869d3c3 RSP: ffff81002d9a5ab8 RFLAGS: 00010202
+RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000001
+RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000001001001
+RBP: 0000000001001001 R8: 0074736575716552 R9: 0000000000000000
+R10: ffff81003fbd0680 R11: 00000000000000c8 R12: 0000000000000008
+R13: 0000000000000001 R14: 0000000000000001 R15: ffff810015c6ca00
+ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
+RIP: 0000003cbd8d49a3 RSP: 00007fffc84e0be8 RFLAGS: 00010206
+RAX: 000000000000002c RBX: ffffffff8005d116 RCX: 0000000000000000
+RDX: 0000000000000008 RSI: 00007fffc84e0c00 RDI: 0000000000000003
+RBP: 0000000000000000 R8: 00007fffc84e0c10 R9: 0000000000000010
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fffc84e0d10 R14: 0000000000000000 R15: 00007fffc84e0c30
+ORIG_RAX: 000000000000002c CS: 0033 SS: 002b
+
+What happens is that, when the tipc module in inserted it enters a standalone
+node mode in which communication to its own address is allowed <0.0.0> but not
+to other addresses, since the appropriate data structures have not been
+allocated yet (specifically the tipc_net pointer). There is nothing stopping a
+client from trying to send such a message however, and if that happens, we
+attempt to dereference tipc_net.zones while the pointer is still NULL, and
+explode. The fix is pretty straightforward. Since these oopses all arise from
+the dereference of global pointers prior to their assignment to allocated
+values, and since these allocations are small (about 2k total), lets convert
+these pointers to static arrays of the appropriate size. All the accesses to
+these bits consider 0/NULL to be a non match when searching, so all the lookups
+still work properly, and there is no longer a chance of a bad dererence
+anywhere. As a bonus, this lets us eliminate the setup/teardown routines for
+those pointers, and elimnates the need to preform any locking around them to
+prevent access while their being allocated/freed.
+
+I've updated the tipc_net structure to behave this way to fix the exact reported
+problem, and also fixed up the tipc_bearers and media_list arrays to fix an
+obvious simmilar problem that arises from issuing tipc-config commands to
+manipulate bearers/links prior to entering networked mode
+
+I've tested this for a few hours by running the sanity tests and stress test
+with the tipcutils suite, and nothing has fallen over. There have been a few
+lockdep warnings, but those were there before, and can be addressed later, as
+they didn't actually result in any deadlock.
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+CC: Allan Stephens <allan.stephens@windriver.com>
+CC: David S. Miller <davem@davemloft.net>
+CC: tipc-discussion@lists.sourceforge.net
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/tipc/bearer.c | 37 ++++++-------------------------------
+ net/tipc/bearer.h | 2 +-
+ net/tipc/net.c | 25 ++++---------------------
+ 3 files changed, 11 insertions(+), 53 deletions(-)
+
+--- a/net/tipc/bearer.c
++++ b/net/tipc/bearer.c
+@@ -45,10 +45,10 @@
+
+ #define MAX_ADDR_STR 32
+
+-static struct media *media_list = NULL;
++static struct media media_list[MAX_MEDIA];
+ static u32 media_count = 0;
+
+-struct bearer *tipc_bearers = NULL;
++struct bearer tipc_bearers[MAX_BEARERS];
+
+ /**
+ * media_name_valid - validate media name
+@@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type,
+ int res = -EINVAL;
+
+ write_lock_bh(&tipc_net_lock);
+- if (!media_list)
+- goto exit;
+
++ if (tipc_mode != TIPC_NET_MODE) {
++ warn("Media <%s> rejected, not in networked mode yet\n", name);
++ goto exit;
++ }
+ if (!media_name_valid(name)) {
+ warn("Media <%s> rejected, illegal name\n", name);
+ goto exit;
+@@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name
+
+
+
+-int tipc_bearer_init(void)
+-{
+- int res;
+-
+- write_lock_bh(&tipc_net_lock);
+- tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
+- media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
+- if (tipc_bearers && media_list) {
+- res = 0;
+- } else {
+- kfree(tipc_bearers);
+- kfree(media_list);
+- tipc_bearers = NULL;
+- media_list = NULL;
+- res = -ENOMEM;
+- }
+- write_unlock_bh(&tipc_net_lock);
+- return res;
+-}
+-
+ void tipc_bearer_stop(void)
+ {
+ u32 i;
+
+- if (!tipc_bearers)
+- return;
+-
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (tipc_bearers[i].active)
+ tipc_bearers[i].publ.blocked = 1;
+@@ -695,10 +674,6 @@ void tipc_bearer_stop(void)
+ if (tipc_bearers[i].active)
+ bearer_disable(tipc_bearers[i].publ.name);
+ }
+- kfree(tipc_bearers);
+- kfree(media_list);
+- tipc_bearers = NULL;
+- media_list = NULL;
+ media_count = 0;
+ }
+
+--- a/net/tipc/bearer.h
++++ b/net/tipc/bearer.h
+@@ -114,7 +114,7 @@ struct bearer_name {
+
+ struct link;
+
+-extern struct bearer *tipc_bearers;
++extern struct bearer tipc_bearers[];
+
+ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
+ struct sk_buff *tipc_media_get_names(void);
+--- a/net/tipc/net.c
++++ b/net/tipc/net.c
+@@ -116,7 +116,8 @@
+ */
+
+ DEFINE_RWLOCK(tipc_net_lock);
+-struct network tipc_net = { NULL };
++struct _zone *tipc_zones[256] = { NULL, };
++struct network tipc_net = { tipc_zones };
+
+ struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
+ {
+@@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 d
+ }
+ }
+
+-static int net_init(void)
+-{
+- memset(&tipc_net, 0, sizeof(tipc_net));
+- tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
+- if (!tipc_net.zones) {
+- return -ENOMEM;
+- }
+- return 0;
+-}
+-
+ static void net_stop(void)
+ {
+ u32 z_num;
+
+- if (!tipc_net.zones)
+- return;
+-
+- for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
++ for (z_num = 1; z_num <= tipc_max_zones; z_num++)
+ tipc_zone_delete(tipc_net.zones[z_num]);
+- }
+- kfree(tipc_net.zones);
+- tipc_net.zones = NULL;
+ }
+
+ static void net_route_named_msg(struct sk_buff *buf)
+@@ -282,9 +267,7 @@ int tipc_net_start(u32 addr)
+ tipc_named_reinit();
+ tipc_port_reinit();
+
+- if ((res = tipc_bearer_init()) ||
+- (res = net_init()) ||
+- (res = tipc_cltr_init()) ||
++ if ((res = tipc_cltr_init()) ||
+ (res = tipc_bclink_init())) {
+ return res;
+ }
--- /dev/null
+From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@suse.cz>
+Date: Wed, 10 Feb 2010 12:15:53 +0100
+Subject: vfs: add NOFOLLOW flag to umount(2)
+
+From: Miklos Szeredi <mszeredi@suse.cz>
+
+commit db1f05bb85d7966b9176e293f3ceead1cb8b5d79 upstream.
+
+Add a new UMOUNT_NOFOLLOW flag to umount(2). This is needed to prevent
+symlink attacks in unprivileged unmounts (fuse, samba, ncpfs).
+
+Additionally, return -EINVAL if an unknown flag is used (and specify
+an explicitly unused flag: UMOUNT_UNUSED). This makes it possible for
+the caller to determine if a flag is supported or not.
+
+CC: Eugene Teo <eugene@redhat.com>
+CC: Michael Kerrisk <mtk.manpages@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/namespace.c | 9 ++++++++-
+ include/linux/fs.h | 2 ++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1132,8 +1132,15 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ {
+ struct path path;
+ int retval;
++ int lookup_flags = 0;
+
+- retval = user_path(name, &path);
++ if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
++ return -EINVAL;
++
++ if (!(flags & UMOUNT_NOFOLLOW))
++ lookup_flags |= LOOKUP_FOLLOW;
++
++ retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
+ if (retval)
+ goto out;
+ retval = -EINVAL;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1061,6 +1061,8 @@ extern int send_sigurg(struct fown_struc
+ #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
+ #define MNT_DETACH 0x00000002 /* Just detach from the tree */
+ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
++#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
++#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
+
+ extern struct list_head super_blocks;
+ extern spinlock_t sb_lock;