]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
actually commit the .29 patches...
authorGreg Kroah-Hartman <gregkh@suse.de>
Tue, 9 Jun 2009 09:42:02 +0000 (02:42 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Tue, 9 Jun 2009 09:42:02 +0000 (02:42 -0700)
32 files changed:
queue-2.6.29/alsa-hda-fix-audio-on-hp-tx25xx-series-notebooks.patch [new file with mode: 0644]
queue-2.6.29/cdc-acm-fix-long-standing-abuse-of-tty-low_latency.patch [new file with mode: 0644]
queue-2.6.29/crypto-api-fix-algorithm-module-auto-loading.patch [new file with mode: 0644]
queue-2.6.29/crypto-padlock-revert-aes-all-alias-to-aes.patch [new file with mode: 0644]
queue-2.6.29/ext4-add-auto_da_alloc-mount-option.patch [new file with mode: 0644]
queue-2.6.29/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch [new file with mode: 0644]
queue-2.6.29/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch [new file with mode: 0644]
queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch [new file with mode: 0644]
queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch [new file with mode: 0644]
queue-2.6.29/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch [new file with mode: 0644]
queue-2.6.29/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch [new file with mode: 0644]
queue-2.6.29/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch [new file with mode: 0644]
queue-2.6.29/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch [new file with mode: 0644]
queue-2.6.29/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch [new file with mode: 0644]
queue-2.6.29/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch [new file with mode: 0644]
queue-2.6.29/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch [new file with mode: 0644]
queue-2.6.29/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch [new file with mode: 0644]
queue-2.6.29/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch [new file with mode: 0644]
queue-2.6.29/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch [new file with mode: 0644]
queue-2.6.29/ext4-tighten-restrictions-on-inode-flags.patch [new file with mode: 0644]
queue-2.6.29/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch [new file with mode: 0644]
queue-2.6.29/jbd2-update-locking-coments.patch [new file with mode: 0644]
queue-2.6.29/keys-handle-there-being-no-fallback-destination-keyring-for-request_key.patch [new file with mode: 0644]
queue-2.6.29/ptrace-fix-possible-zombie-leak-on-ptrace_detach.patch [new file with mode: 0644]
queue-2.6.29/ptrace-kill-__ptrace_detach-fix-exit_state-check.patch [new file with mode: 0644]
queue-2.6.29/ptrace-reintroduce-__ptrace_detach-as-a-callee-of-ptrace_exit.patch [new file with mode: 0644]
queue-2.6.29/ptrace-simplify-ptrace_exit-ignoring_children-path.patch [new file with mode: 0644]
queue-2.6.29/tcp-fix-2-iw-selection.patch
queue-2.6.29/tcp-fix-msg_peek-race-check.patch
queue-2.6.29/x86-hpet-fix-periodic-mode-programming-on-amd-81xx.patch [new file with mode: 0644]
queue-2.6.29/x86-hpet-provide-separate-functions-to-stop-and-start-the-counter.patch [new file with mode: 0644]
queue-2.6.29/x86-hpet-stop-hpet_counter-when-programming-periodic-mode.patch [new file with mode: 0644]

diff --git a/queue-2.6.29/alsa-hda-fix-audio-on-hp-tx25xx-series-notebooks.patch b/queue-2.6.29/alsa-hda-fix-audio-on-hp-tx25xx-series-notebooks.patch
new file mode 100644 (file)
index 0000000..1b6ad7d
--- /dev/null
@@ -0,0 +1,32 @@
+From 87488957a68293357a94c8142de7d0ae17914912 Mon Sep 17 00:00:00 2001
+From: Adam Williamson <awilliam@redhat.com>
+Date: Thu, 21 May 2009 18:32:59 -0400
+Subject: ALSA: hda - fix audio on HP TX25xx series notebooks
+
+From: Adam Williamson <awilliam@redhat.com>
+
+commit 87488957a68293357a94c8142de7d0ae17914912 upstream.
+
+Fixes https://bugtrack.alsa-project.org/alsa-bug/view.php?id=4121
+
+Taken from https://bugzilla.redhat.com/show_bug.cgi?id=498060
+
+Signed-off-by: Adam Williamson <awilliam@redhat.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -11710,6 +11710,7 @@ static struct snd_pci_quirk alc268_cfg_t
+       SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
+       SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
+       SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
++      SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
+       SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
+       SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA),
+       SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA),
diff --git a/queue-2.6.29/cdc-acm-fix-long-standing-abuse-of-tty-low_latency.patch b/queue-2.6.29/cdc-acm-fix-long-standing-abuse-of-tty-low_latency.patch
new file mode 100644 (file)
index 0000000..ce46d09
--- /dev/null
@@ -0,0 +1,39 @@
+From 7a9a65ced11ece416b730d6f21040a18e62d78a8 Mon Sep 17 00:00:00 2001
+From: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Date: Tue, 14 Apr 2009 14:57:36 +0100
+Subject: cdc-acm: Fix long standing abuse of tty->low_latency
+
+From: Alan Cox <alan@lxorguk.ukuu.org.uk>
+
+commit 7a9a65ced11ece416b730d6f21040a18e62d78a8 upstream.
+
+ACM sets the low latency flag but calls the flip buffer routines from
+IRQ context which isn't permitted (and as of 2.6.29 causes a warning
+hence this one was caught)
+
+Fortunatelt ACM doesn't need to set this flag in the first place as it
+only set it to work around problems in ancient (pre tty flip rewrite)
+kernels.
+
+Reported-by: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/usb/class/cdc-acm.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -546,10 +546,6 @@ static int acm_tty_open(struct tty_struc
+       tty->driver_data = acm;
+       acm->tty = tty;
+-      /* force low_latency on so that our tty_push actually forces the data through,
+-         otherwise it is scheduled, and with high data rates data can get lost. */
+-      tty->low_latency = 1;
+-
+       if (usb_autopm_get_interface(acm->control) < 0)
+               goto early_bail;
+       else
diff --git a/queue-2.6.29/crypto-api-fix-algorithm-module-auto-loading.patch b/queue-2.6.29/crypto-api-fix-algorithm-module-auto-loading.patch
new file mode 100644 (file)
index 0000000..25b04d3
--- /dev/null
@@ -0,0 +1,36 @@
+From 37fc334cc8eb84f5fe0a5a1cbe6a6a68049e142a Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 21 Apr 2009 13:27:16 +0800
+Subject: crypto: api - Fix algorithm module auto-loading
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 37fc334cc8eb84f5fe0a5a1cbe6a6a68049e142a upstream.
+
+The commit a760a6656e6f00bb0144a42a048cf0266646e22c (crypto:
+api - Fix module load deadlock with fallback algorithms) broke
+the auto-loading of algorithms that require fallbacks.  The
+problem is that the fallback mask check is missing an and which
+cauess bits that should be considered to interfere with the
+result.
+
+Reported-by: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ crypto/api.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/crypto/api.c
++++ b/crypto/api.c
+@@ -221,7 +221,8 @@ struct crypto_alg *crypto_larval_lookup(
+               request_module(name);
+-              if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) &&
++              if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
++                    CRYPTO_ALG_NEED_FALLBACK) &&
+                   snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp))
+                       request_module(tmp);
diff --git a/queue-2.6.29/crypto-padlock-revert-aes-all-alias-to-aes.patch b/queue-2.6.29/crypto-padlock-revert-aes-all-alias-to-aes.patch
new file mode 100644 (file)
index 0000000..349db2b
--- /dev/null
@@ -0,0 +1,29 @@
+From acd246b7494c629aa617da49716409566cf52149 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 21 Apr 2009 13:55:20 +0800
+Subject: crypto: padlock - Revert aes-all alias to aes
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit acd246b7494c629aa617da49716409566cf52149 upstream.
+
+Since the padlock-aes driver doesn't require a fallback (it's
+only padlock-sha that does), it should use the aes alias rather
+than aes-all so that ones that do need a fallback can use it.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/crypto/padlock-aes.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/crypto/padlock-aes.c
++++ b/drivers/crypto/padlock-aes.c
+@@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algo
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Michal Ludvig");
+-MODULE_ALIAS("aes-all");
++MODULE_ALIAS("aes");
diff --git a/queue-2.6.29/ext4-add-auto_da_alloc-mount-option.patch b/queue-2.6.29/ext4-add-auto_da_alloc-mount-option.patch
new file mode 100644 (file)
index 0000000..12e1b05
--- /dev/null
@@ -0,0 +1,147 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:27:37 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:50 -0400
+Subject: ext4: Add auto_da_alloc mount option
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-9-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit afd4672dc7610b7feef5190168aa917cc2e417e4)
+
+Add a mount option which allows the user to disable automatic
+allocation of blocks whose allocation by delayed allocation when the
+file was originally truncated or when the file is renamed over an
+existing file.  This feature is intended to save users from the
+effects of naive application writers, but it reduces the effectiveness
+of the delayed allocation code.  This mount option disables this
+safety feature, which may be desirable for prodcutions systems where
+the risk of unclean shutdowns or unexpected system crashes is low.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h  |    2 +-
+ fs/ext4/inode.c |    2 +-
+ fs/ext4/namei.c |    3 ++-
+ fs/ext4/super.c |   25 +++++++++++++------------
+ 4 files changed, 17 insertions(+), 15 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -557,7 +557,7 @@ do {                                                                              \
+ #define EXT4_MOUNT_NO_UID32           0x02000  /* Disable 32-bit UIDs */
+ #define EXT4_MOUNT_XATTR_USER         0x04000 /* Extended user attributes */
+ #define EXT4_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */
+-#define EXT4_MOUNT_RESERVATION                0x10000 /* Preallocation */
++#define EXT4_MOUNT_NO_AUTO_DA_ALLOC   0x10000 /* No auto delalloc mapping */
+ #define EXT4_MOUNT_BARRIER            0x20000 /* Use block barriers */
+ #define EXT4_MOUNT_NOBH                       0x40000 /* No bufferheads */
+ #define EXT4_MOUNT_QUOTA              0x80000 /* Some quota option set */
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3887,7 +3887,7 @@ void ext4_truncate(struct inode *inode)
+       if (!ext4_can_truncate(inode))
+               return;
+-      if (inode->i_size == 0)
++      if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+               ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2457,7 +2457,8 @@ static int ext4_rename(struct inode *old
+               ext4_mark_inode_dirty(handle, new_inode);
+               if (!new_inode->i_nlink)
+                       ext4_orphan_add(handle, new_inode);
+-              force_da_alloc = 1;
++              if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
++                      force_da_alloc = 1;
+       }
+       retval = 0;
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -803,8 +803,6 @@ static int ext4_show_options(struct seq_
+       if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
+               seq_puts(seq, ",noacl");
+ #endif
+-      if (!test_opt(sb, RESERVATION))
+-              seq_puts(seq, ",noreservation");
+       if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+               seq_printf(seq, ",commit=%u",
+                          (unsigned) (sbi->s_commit_interval / HZ));
+@@ -855,6 +853,9 @@ static int ext4_show_options(struct seq_
+       if (test_opt(sb, DATA_ERR_ABORT))
+               seq_puts(seq, ",data_err=abort");
++      if (test_opt(sb, NO_AUTO_DA_ALLOC))
++              seq_puts(seq, ",auto_da_alloc=0");
++
+       ext4_show_quota_options(seq, sb);
+       return 0;
+ }
+@@ -1002,7 +1003,7 @@ enum {
+       Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+       Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
+       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+-      Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
++      Opt_auto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
+       Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
+       Opt_journal_update, Opt_journal_dev,
+       Opt_journal_checksum, Opt_journal_async_commit,
+@@ -1037,8 +1038,6 @@ static const match_table_t tokens = {
+       {Opt_nouser_xattr, "nouser_xattr"},
+       {Opt_acl, "acl"},
+       {Opt_noacl, "noacl"},
+-      {Opt_reservation, "reservation"},
+-      {Opt_noreservation, "noreservation"},
+       {Opt_noload, "noload"},
+       {Opt_nobh, "nobh"},
+       {Opt_bh, "bh"},
+@@ -1073,6 +1072,7 @@ static const match_table_t tokens = {
+       {Opt_nodelalloc, "nodelalloc"},
+       {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
+       {Opt_journal_ioprio, "journal_ioprio=%u"},
++      {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_err, NULL},
+ };
+@@ -1205,12 +1205,6 @@ static int parse_options(char *options, 
+                              "not supported\n");
+                       break;
+ #endif
+-              case Opt_reservation:
+-                      set_opt(sbi->s_mount_opt, RESERVATION);
+-                      break;
+-              case Opt_noreservation:
+-                      clear_opt(sbi->s_mount_opt, RESERVATION);
+-                      break;
+               case Opt_journal_update:
+                       /* @@@ FIXME */
+                       /* Eventually we will want to be able to create
+@@ -1471,6 +1465,14 @@ set_qf_format:
+                       *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
+                                                           option);
+                       break;
++              case Opt_auto_da_alloc:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option)
++                              clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
++                      else
++                              set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
++                      break;
+               default:
+                       printk(KERN_ERR
+                              "EXT4-fs: Unrecognized mount option \"%s\" "
+@@ -2099,7 +2101,6 @@ static int ext4_fill_super(struct super_
+       sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+       sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+-      set_opt(sbi->s_mount_opt, RESERVATION);
+       set_opt(sbi->s_mount_opt, BARRIER);
+       /*
diff --git a/queue-2.6.29/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch b/queue-2.6.29/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch
new file mode 100644 (file)
index 0000000..fb7a961
--- /dev/null
@@ -0,0 +1,119 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:26:06 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:46 -0400
+Subject: ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-5-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit ccd2506bd43113659aa904d5bea5d1300605e2a6)
+
+Add an ioctl which forces all of the delay allocated blocks to be
+allocated.  This also provides a function ext4_alloc_da_blocks() which
+will be used by the following commits to force files to be fully
+allocated to preserve application-expected ext3 behaviour.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h  |    3 +++
+ fs/ext4/inode.c |   42 ++++++++++++++++++++++++++++++++++++++++++
+ fs/ext4/ioctl.c |   14 ++++++++++++++
+ 3 files changed, 59 insertions(+)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -326,7 +326,9 @@ struct ext4_new_group_data {
+ #define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
+ #define EXT4_IOC_GROUP_ADD            _IOW('f', 8, struct ext4_new_group_input)
+ #define EXT4_IOC_MIGRATE              _IO('f', 9)
++ /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
+  /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
++#define EXT4_IOC_ALLOC_DA_BLKS                _IO('f', 12)
+ /*
+  * ioctl commands in 32 bit emulation
+@@ -1115,6 +1117,7 @@ extern int ext4_can_truncate(struct inod
+ extern void ext4_truncate(struct inode *);
+ extern void ext4_set_inode_flags(struct inode *);
+ extern void ext4_get_inode_flags(struct ext4_inode_info *);
++extern int ext4_alloc_da_blocks(struct inode *inode);
+ extern void ext4_set_aops(struct inode *inode);
+ extern int ext4_writepage_trans_blocks(struct inode *);
+ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2816,6 +2816,48 @@ out:
+       return;
+ }
++/*
++ * Force all delayed allocation blocks to be allocated for a given inode.
++ */
++int ext4_alloc_da_blocks(struct inode *inode)
++{
++      if (!EXT4_I(inode)->i_reserved_data_blocks &&
++          !EXT4_I(inode)->i_reserved_meta_blocks)
++              return 0;
++
++      /*
++       * We do something simple for now.  The filemap_flush() will
++       * also start triggering a write of the data blocks, which is
++       * not strictly speaking necessary (and for users of
++       * laptop_mode, not even desirable).  However, to do otherwise
++       * would require replicating code paths in:
++       *
++       * ext4_da_writepages() ->
++       *    write_cache_pages() ---> (via passed in callback function)
++       *        __mpage_da_writepage() -->
++       *           mpage_add_bh_to_extent()
++       *           mpage_da_map_blocks()
++       *
++       * The problem is that write_cache_pages(), located in
++       * mm/page-writeback.c, marks pages clean in preparation for
++       * doing I/O, which is not desirable if we're not planning on
++       * doing I/O at all.
++       *
++       * We could call write_cache_pages(), and then redirty all of
++       * the pages by calling redirty_page_for_writeback() but that
++       * would be ugly in the extreme.  So instead we would need to
++       * replicate parts of the code in the above functions,
++       * simplifying them becuase we wouldn't actually intend to
++       * write out the pages, but rather only collect contiguous
++       * logical block extents, call the multi-block allocator, and
++       * then update the buffer heads with the block allocations.
++       *
++       * For now, though, we'll cheat by calling filemap_flush(),
++       * which will map the blocks, and start the I/O, but not
++       * actually wait for the I/O to complete.
++       */
++      return filemap_flush(inode->i_mapping);
++}
+ /*
+  * bmap() is special.  It gets used by applications such as lilo and by
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -262,6 +262,20 @@ setversion_out:
+               return err;
+       }
++      case EXT4_IOC_ALLOC_DA_BLKS:
++      {
++              int err;
++              if (!is_owner_or_cap(inode))
++                      return -EACCES;
++
++              err = mnt_want_write(filp->f_path.mnt);
++              if (err)
++                      return err;
++              err = ext4_alloc_da_blocks(inode);
++              mnt_drop_write(filp->f_path.mnt);
++              return err;
++      }
++
+       default:
+               return -ENOTTY;
+       }
diff --git a/queue-2.6.29/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch b/queue-2.6.29/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch
new file mode 100644 (file)
index 0000000..66dfeae
--- /dev/null
@@ -0,0 +1,45 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:25:28 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:45 -0400
+Subject: ext4: Add fine print for the 32000 subdirectory limit
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-4-git-send-email-tytso@mit.edu>
+
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 722bde6875bfb49a0c84e5601eb82dd7ac02d27c)
+
+Some poeple are reading the ext4 feature list too literally and create
+dubious test cases involving very long filenames and 1k blocksize and
+then complain when they run into an htree-imposed limit.  So add fine
+print to the "fix 32000 subdirectory limit" ext4 feature.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -85,7 +85,7 @@ Note: More extensive information for get
+ * extent format more robust in face of on-disk corruption due to magics,
+ * internal redundancy in tree
+ * improved file allocation (multi-block alloc)
+-* fix 32000 subdirectory limit
++* lift 32000 subdirectory limit imposed by i_links_count[1]
+ * nsec timestamps for mtime, atime, ctime, create time
+ * inode version field on disk (NFSv4, Lustre)
+ * reduced e2fsck time via uninit_bg feature
+@@ -100,6 +100,9 @@ Note: More extensive information for get
+ * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
+   the ordering)
++[1] Filesystems with a block size of 1k may see a limit imposed by the
++directory hash tree having a maximum depth of two.
++
+ 2.2 Candidate features for future inclusion
+ * Online defrag (patches available but not well tested)
diff --git a/queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch b/queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch
new file mode 100644 (file)
index 0000000..26fc4b1
--- /dev/null
@@ -0,0 +1,62 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:26:29 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:47 -0400
+Subject: ext4: Automatically allocate delay allocated blocks on close
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-6-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 7d8f9f7d150dded7b68e61ca6403a1f166fb4edf)
+
+When closing a file that had been previously truncated, force any
+delay allocated blocks that to be allocated so that if the filesystem
+is mounted with data=ordered, the data blocks will be pushed out to
+disk along with the journal commit.  Many application programs expect
+this, so we do this to avoid zero length files if the system crashes
+unexpectedly.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h  |    1 +
+ fs/ext4/file.c  |    4 ++++
+ fs/ext4/inode.c |    3 +++
+ 3 files changed, 8 insertions(+)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -279,6 +279,7 @@ static inline __u32 ext4_mask_flags(umod
+ #define EXT4_STATE_NEW                        0x00000002 /* inode is newly created */
+ #define EXT4_STATE_XATTR              0x00000004 /* has in-inode xattrs */
+ #define EXT4_STATE_NO_EXPAND          0x00000008 /* No space for expansion */
++#define EXT4_STATE_DA_ALLOC_CLOSE     0x00000010 /* Alloc DA blks on close */
+ /* Used to pass group descriptor data when online resize is done */
+ struct ext4_new_group_input {
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -33,6 +33,10 @@
+  */
+ static int ext4_release_file(struct inode *inode, struct file *filp)
+ {
++      if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
++              ext4_alloc_da_blocks(inode);
++              EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
++      }
+       /* if we are the last writer on the inode, drop the block reservation */
+       if ((filp->f_mode & FMODE_WRITE) &&
+                       (atomic_read(&inode->i_writecount) == 1))
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3880,6 +3880,9 @@ void ext4_truncate(struct inode *inode)
+       if (!ext4_can_truncate(inode))
+               return;
++      if (inode->i_size == 0)
++              ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
++
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+               ext4_ext_truncate(inode);
+               return;
diff --git a/queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch b/queue-2.6.29/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch
new file mode 100644 (file)
index 0000000..ab12c10
--- /dev/null
@@ -0,0 +1,53 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:26:49 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:48 -0400
+Subject: ext4: Automatically allocate delay allocated blocks on rename
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-7-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 8750c6d5fcbd3342b3d908d157f81d345c5325a7)
+
+When renaming a file such that a link to another inode is overwritten,
+force any delay allocated blocks that to be allocated so that if the
+filesystem is mounted with data=ordered, the data blocks will be
+pushed out to disk along with the journal commit.  Many application
+programs expect this, so we do this to avoid zero length files if the
+system crashes unexpectedly.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2319,7 +2319,7 @@ static int ext4_rename(struct inode *old
+       struct inode *old_inode, *new_inode;
+       struct buffer_head *old_bh, *new_bh, *dir_bh;
+       struct ext4_dir_entry_2 *old_de, *new_de;
+-      int retval;
++      int retval, force_da_alloc = 0;
+       old_bh = new_bh = dir_bh = NULL;
+@@ -2457,6 +2457,7 @@ static int ext4_rename(struct inode *old
+               ext4_mark_inode_dirty(handle, new_inode);
+               if (!new_inode->i_nlink)
+                       ext4_orphan_add(handle, new_inode);
++              force_da_alloc = 1;
+       }
+       retval = 0;
+@@ -2465,6 +2466,8 @@ end_rename:
+       brelse(old_bh);
+       brelse(new_bh);
+       ext4_journal_stop(handle);
++      if (retval == 0 && force_da_alloc)
++              ext4_alloc_da_blocks(old_inode);
+       return retval;
+ }
diff --git a/queue-2.6.29/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch b/queue-2.6.29/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch
new file mode 100644 (file)
index 0000000..e23d9fa
--- /dev/null
@@ -0,0 +1,44 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:27:56 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:51 -0400
+Subject: ext4: Check for an valid i_mode when reading the inode from disk
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-10-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 563bdd61fe4dbd6b58cf7eb06f8d8f14479ae1dc)
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4367,7 +4367,8 @@ struct inode *ext4_iget(struct super_blo
+                       inode->i_op = &ext4_symlink_inode_operations;
+                       ext4_set_aops(inode);
+               }
+-      } else {
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++            S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               inode->i_op = &ext4_special_inode_operations;
+               if (raw_inode->i_block[0])
+                       init_special_inode(inode, inode->i_mode,
+@@ -4375,6 +4376,13 @@ struct inode *ext4_iget(struct super_blo
+               else
+                       init_special_inode(inode, inode->i_mode,
+                          new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
++      } else {
++              brelse(bh);
++              ret = -EIO;
++              ext4_error(inode->i_sb, __func__,
++                         "bogus i_mode (%o) for inode=%lu",
++                         inode->i_mode, inode->i_ino);
++              goto bad_inode;
+       }
+       brelse(iloc.bh);
+       ext4_set_inode_flags(inode);
diff --git a/queue-2.6.29/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch b/queue-2.6.29/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch
new file mode 100644 (file)
index 0000000..ade7ee2
--- /dev/null
@@ -0,0 +1,62 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:30:30 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue,  2 Jun 2009 08:07:58 -0400
+Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944479-20574-17-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 2a8964d63d50dd2d65d71d342bc7fb6ef4117614)
+
+The BH_Unwritten flag indicates that the buffer is allocated on disk
+but has not been written; that is, the disk was part of a persistent
+preallocation area.  That flag should only be set when a get_blocks()
+function is looking up a inode's logical to physical block mapping.
+
+When ext4_get_blocks_wrap() is called with create=1, the uninitialized
+extent is converted into an initialized one, so the BH_Unwritten flag
+is no longer appropriate.  Hence, we need to make sure the
+BH_Unwritten is not left set, since the combination of BH_Mapped and
+BH_Unwritten is not allowed; among other things, it will result ext4's
+get_block() to be called over and over again during the write_begin
+phase of write(2).
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1076,6 +1076,7 @@ int ext4_get_blocks_wrap(handle_t *handl
+       int retval;
+       clear_buffer_mapped(bh);
++      clear_buffer_unwritten(bh);
+       /*
+        * Try to see if we can get  the block without requesting
+@@ -1106,6 +1107,18 @@ int ext4_get_blocks_wrap(handle_t *handl
+               return retval;
+       /*
++       * When we call get_blocks without the create flag, the
++       * BH_Unwritten flag could have gotten set if the blocks
++       * requested were part of a uninitialized extent.  We need to
++       * clear this flag now that we are committed to convert all or
++       * part of the uninitialized extent to be an initialized
++       * extent.  This is because we need to avoid the combination
++       * of BH_Unwritten and BH_Mapped flags being simultaneously
++       * set on the buffer_head.
++       */
++      clear_buffer_unwritten(bh);
++
++      /*
+        * New blocks allocate and/or writing to uninitialized extent
+        * will possibly result in updating i_data, so we take
+        * the write lock of i_data_sem, and call get_blocks()
diff --git a/queue-2.6.29/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch b/queue-2.6.29/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch
new file mode 100644 (file)
index 0000000..427f0d4
--- /dev/null
@@ -0,0 +1,61 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:24:00 2009
+From: Duane Griffin <duaneg@dghda.com>
+Date: Tue,  2 Jun 2009 08:07:42 -0400
+Subject: ext4: don't inherit inappropriate inode flags from parent
+To: stable@kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1243944479-20574-1-git-send-email-tytso@mit.edu>
+
+
+From: Duane Griffin <duaneg@dghda.com>
+
+(cherry picked from commit 8fa43a81b97853fc69417bb6054182e78f95cbeb)
+
+At present INDEX and EXTENTS are the only flags that new ext4 inodes do
+NOT inherit from their parent.  In addition prevent the flags DIRTY,
+ECOMPR, IMAGIC, TOPDIR, HUGE_FILE and EXT_MIGRATE from being inherited.
+List inheritable flags explicitly to prevent future flags from
+accidentally being inherited.
+
+This fixes the TOPDIR flag inheritance bug reported at
+http://bugzilla.kernel.org/show_bug.cgi?id=9866.
+
+Signed-off-by: Duane Griffin <duaneg@dghda.com>
+Acked-by: Andreas Dilger <adilger@sun.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/ext4.h   |    7 +++++++
+ fs/ext4/ialloc.c |    2 +-
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -248,6 +248,13 @@ struct flex_groups {
+ #define EXT4_FL_USER_VISIBLE          0x000BDFFF /* User visible flags */
+ #define EXT4_FL_USER_MODIFIABLE               0x000B80FF /* User modifiable flags */
++/* Flags that should be inherited by new inodes from their parent. */
++#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
++                         EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
++                         EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
++                         EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
++                         EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
++
+ /*
+  * Inode dynamic state flags
+  */
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -889,7 +889,7 @@ got:
+        * newly created directory and file only if -o extent mount option is
+        * specified
+        */
+-      ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
++      ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED;
+       if (S_ISLNK(mode))
+               ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+       /* dirsync only applies to directories */
diff --git a/queue-2.6.29/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch b/queue-2.6.29/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch
new file mode 100644 (file)
index 0000000..0488d7b
--- /dev/null
@@ -0,0 +1,58 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:27:11 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue,  2 Jun 2009 08:07:49 -0400
+Subject: ext4: Fix discard of inode prealloc space with delayed allocation.
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944479-20574-8-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit d6014301b5599fba395c42a1e96a7fe86f7d0b2d)
+
+With delayed allocation we should not/cannot discard inode prealloc
+space during file close. We would still have dirty pages for which we
+haven't allocated blocks yet. With this fix after each get_blocks
+request we check whether we have zero reserved blocks and if yes and
+we don't have any writers on the file we discard inode prealloc space.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/file.c  |    3 ++-
+ fs/ext4/inode.c |    9 ++++++++-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -39,7 +39,8 @@ static int ext4_release_file(struct inod
+       }
+       /* if we are the last writer on the inode, drop the block reservation */
+       if ((filp->f_mode & FMODE_WRITE) &&
+-                      (atomic_read(&inode->i_writecount) == 1))
++                      (atomic_read(&inode->i_writecount) == 1) &&
++                      !EXT4_I(inode)->i_reserved_data_blocks)
+       {
+               down_write(&EXT4_I(inode)->i_data_sem);
+               ext4_discard_preallocations(inode);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1036,8 +1036,15 @@ static void ext4_da_update_reserve_space
+       /* update per-inode reservations */
+       BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
+       EXT4_I(inode)->i_reserved_data_blocks -= used;
+-
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++
++      /*
++       * If we have done all the pending block allocations and if
++       * there aren't any writers on the inode, we can discard the
++       * inode's preallocations.
++       */
++      if (!total && (atomic_read(&inode->i_writecount) == 0))
++              ext4_discard_preallocations(inode);
+ }
+ /*
diff --git a/queue-2.6.29/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch b/queue-2.6.29/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch
new file mode 100644 (file)
index 0000000..4bd1236
--- /dev/null
@@ -0,0 +1,85 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:30:53 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:59 -0400
+Subject: ext4: Fix race in ext4_inode_info.i_cached_extent
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-18-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4)
+
+If two CPU's simultaneously call ext4_ext_get_blocks() at the same
+time, there is nothing protecting the i_cached_extent structure from
+being used and updated at the same time.  This could potentially cause
+the wrong location on disk to be read or written to, including
+potentially causing the corruption of the block group descriptors
+and/or inode table.
+
+This bug has been in the ext4 code since almost the very beginning of
+ext4's development.  Fortunately once the data is stored in the page
+cache cache, ext4_get_blocks() doesn't need to be called, so trying to
+replicate this problem to the point where we could identify its root
+cause was *extremely* difficult.  Many thanks to Kevin Shanahan for
+working over several months to be able to reproduce this easily so we
+could finally nail down the cause of the corruption.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1740,11 +1740,13 @@ ext4_ext_put_in_cache(struct inode *inod
+ {
+       struct ext4_ext_cache *cex;
+       BUG_ON(len == 0);
++      spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       cex = &EXT4_I(inode)->i_cached_extent;
+       cex->ec_type = type;
+       cex->ec_block = block;
+       cex->ec_len = len;
+       cex->ec_start = start;
++      spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ }
+ /*
+@@ -1801,12 +1803,17 @@ ext4_ext_in_cache(struct inode *inode, e
+                       struct ext4_extent *ex)
+ {
+       struct ext4_ext_cache *cex;
++      int ret = EXT4_EXT_CACHE_NO;
++      /*
++       * We borrow i_block_reservation_lock to protect i_cached_extent
++       */
++      spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       cex = &EXT4_I(inode)->i_cached_extent;
+       /* has cache valid data? */
+       if (cex->ec_type == EXT4_EXT_CACHE_NO)
+-              return EXT4_EXT_CACHE_NO;
++              goto errout;
+       BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+                       cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+@@ -1817,11 +1824,11 @@ ext4_ext_in_cache(struct inode *inode, e
+               ext_debug("%u cached by %u:%u:%llu\n",
+                               block,
+                               cex->ec_block, cex->ec_len, cex->ec_start);
+-              return cex->ec_type;
++              ret = cex->ec_type;
+       }
+-
+-      /* not in cache */
+-      return EXT4_EXT_CACHE_NO;
++errout:
++      spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++      return ret;
+ }
+ /*
diff --git a/queue-2.6.29/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch b/queue-2.6.29/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch
new file mode 100644 (file)
index 0000000..84370e4
--- /dev/null
@@ -0,0 +1,49 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:29:03 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:54 -0400
+Subject: ext4: Fix softlockup caused by illegal i_file_acl value in on-disk inode
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-13-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 485c26ec70f823f2a9cf45982b724893e53a859e)
+
+If the block containing external extended attributes (which is stored
+in i_file_acl and i_file_acl_high) is larger than the on-disk
+filesystem, the process which tried to access the extended attributes
+will endlessly issue kernel printks complaining that
+"__find_get_block_slow() failed", locking up that CPU until the system
+is forcibly rebooted.
+
+So when we read in the inode, make sure the i_file_acl value is legal,
+and if not, flag the filesystem as being corrupted.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4351,6 +4351,18 @@ struct inode *ext4_iget(struct super_blo
+                       (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
+       }
++      if (ei->i_file_acl &&
++          ((ei->i_file_acl <
++            (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
++             EXT4_SB(sb)->s_gdb_count)) ||
++           (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++              ext4_error(sb, __func__,
++                         "bad extended attribute block %llu in inode #%lu",
++                         ei->i_file_acl, inode->i_ino);
++              ret = -EIO;
++              goto bad_inode;
++      }
++
+       if (S_ISREG(inode->i_mode)) {
+               inode->i_op = &ext4_file_inode_operations;
+               inode->i_fop = &ext4_file_operations;
diff --git a/queue-2.6.29/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch b/queue-2.6.29/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch
new file mode 100644 (file)
index 0000000..508f266
--- /dev/null
@@ -0,0 +1,57 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:29:47 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue,  2 Jun 2009 08:07:56 -0400
+Subject: ext4: Fix sub-block zeroing for writes into preallocated extents
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944479-20574-15-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 9c1ee184a30394e54165fa4c15923cabd952c106)
+
+We need to mark the buffer_head mapping preallocated space as new
+during write_begin. Otherwise we don't zero out the page cache content
+properly for a partial write. This will cause file corruption with
+preallocation.
+
+Now that we mark the buffer_head new we also need to have a valid
+buffer_head blocknr so that unmap_underlying_metadata() unmaps the
+correct block.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c |    2 ++
+ fs/ext4/inode.c   |    7 +++++++
+ 2 files changed, 9 insertions(+)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2776,6 +2776,8 @@ int ext4_ext_get_blocks(handle_t *handle
+                               if (allocated > max_blocks)
+                                       allocated = max_blocks;
+                               set_buffer_unwritten(bh_result);
++                              bh_result->b_bdev = inode->i_sb->s_bdev;
++                              bh_result->b_blocknr = newblock;
+                               goto out2;
+                       }
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2246,6 +2246,13 @@ static int ext4_da_get_block_prep(struct
+               set_buffer_delay(bh_result);
+       } else if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
++              /*
++               * With sub-block writes into unwritten extents
++               * we also need to mark the buffer as new so that
++               * the unwritten parts of the buffer gets correctly zeroed.
++               */
++              if (buffer_unwritten(bh_result))
++                      set_buffer_new(bh_result);
+               ret = 0;
+       }
diff --git a/queue-2.6.29/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch b/queue-2.6.29/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch
new file mode 100644 (file)
index 0000000..38cb30a
--- /dev/null
@@ -0,0 +1,40 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:29:25 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue,  2 Jun 2009 08:07:55 -0400
+Subject: ext4: Ignore i_file_acl_high unless EXT4_FEATURE_INCOMPAT_64BIT is present
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944479-20574-14-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit a9e817425dc0baede8ebe5fbc9984a640257432b)
+
+Don't try to look at i_file_acl_high unless the INCOMPAT_64BIT feature
+bit is set.  The field is normally zero, but older versions of e2fsck
+didn't automatically check to make sure of this, so in the spirit of
+"be liberal in what you accept", don't look at i_file_acl_high unless
+we are using a 64-bit filesystem.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4300,11 +4300,9 @@ struct inode *ext4_iget(struct super_blo
+       ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+       inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
+       ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
+-      if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+-          cpu_to_le32(EXT4_OS_HURD)) {
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
+               ei->i_file_acl |=
+                       ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+-      }
+       inode->i_size = ext4_isize(raw_inode);
+       ei->i_disksize = inode->i_size;
+       inode->i_generation = le32_to_cpu(raw_inode->i_generation);
diff --git a/queue-2.6.29/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch b/queue-2.6.29/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch
new file mode 100644 (file)
index 0000000..ad016bb
--- /dev/null
@@ -0,0 +1,38 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:28:42 2009
+From: Chuck Ebbert <cebbert@redhat.com>
+Date: Tue,  2 Jun 2009 08:07:53 -0400
+Subject: ext4: really print the find_group_flex fallback warning only once
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Chuck Ebbert <cebbert@redhat.com>
+Message-ID: <1243944479-20574-12-git-send-email-tytso@mit.edu>
+
+
+From: Chuck Ebbert <cebbert@redhat.com>
+
+(cherry picked from commit 6b82f3cb2d480b7714eb0ff61aee99c22160389e)
+
+Missing braces caused the warning to print more than once.
+
+Signed-Off-By: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ialloc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -720,11 +720,12 @@ struct inode *ext4_new_inode(handle_t *h
+               ret2 = find_group_flex(sb, dir, &group);
+               if (ret2 == -1) {
+                       ret2 = find_group_other(sb, dir, &group);
+-                      if (ret2 == 0 && once)
++                      if (ret2 == 0 && once) {
+                               once = 0;
+                               printk(KERN_NOTICE "ext4: find_group_flex "
+                                      "failed, fallback succeeded dir %lu\n",
+                                      dir->i_ino);
++                      }
+               }
+               goto got_group;
+       }
diff --git a/queue-2.6.29/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch b/queue-2.6.29/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch
new file mode 100644 (file)
index 0000000..1cc3592
--- /dev/null
@@ -0,0 +1,59 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:25:08 2009
+From: Bryan Donlan <bdonlan@gmail.com>
+Date: Tue,  2 Jun 2009 08:07:44 -0400
+Subject: ext4: return -EIO not -ESTALE on directory traversal through deleted inode
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, Bryan Donlan <bdonlan@gmail.com>
+Message-ID: <1243944479-20574-3-git-send-email-tytso@mit.edu>
+
+
+From: Bryan Donlan <bdonlan@gmail.com>
+
+(cherry picked from commit e6f009b0b45220c004672d41a58865e94946104d)
+
+ext4_iget() returns -ESTALE if invoked on a deleted inode, in order to
+report errors to NFS properly.  However, in ext4_lookup(), this
+-ESTALE can be propagated to userspace if the filesystem is corrupted
+such that a directory entry references a deleted inode.  This leads to
+a misleading error message - "Stale NFS file handle" - and confusion
+on the part of the admin.
+
+The bug can be easily reproduced by creating a new filesystem, making
+a link to an unused inode using debugfs, then mounting and attempting
+to ls -l said link.
+
+This patch thus changes ext4_lookup to return -EIO if it receives
+-ESTALE from ext4_iget(), as ext4 does for other filesystem metadata
+corruption; and also invokes the appropriate ext*_error functions when
+this case is detected.
+
+Signed-off-by: Bryan Donlan <bdonlan@gmail.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1052,8 +1052,16 @@ static struct dentry *ext4_lookup(struct
+                       return ERR_PTR(-EIO);
+               }
+               inode = ext4_iget(dir->i_sb, ino);
+-              if (IS_ERR(inode))
+-                      return ERR_CAST(inode);
++              if (unlikely(IS_ERR(inode))) {
++                      if (PTR_ERR(inode) == -ESTALE) {
++                              ext4_error(dir->i_sb, __func__,
++                                              "deleted inode referenced: %u",
++                                              ino);
++                              return ERR_PTR(-EIO);
++                      } else {
++                              return ERR_CAST(inode);
++                      }
++              }
+       }
+       return d_splice_alias(inode, dentry);
+ }
diff --git a/queue-2.6.29/ext4-tighten-restrictions-on-inode-flags.patch b/queue-2.6.29/ext4-tighten-restrictions-on-inode-flags.patch
new file mode 100644 (file)
index 0000000..d75afd7
--- /dev/null
@@ -0,0 +1,97 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:24:29 2009
+From: Duane Griffin <duaneg@dghda.com>
+Date: Tue,  2 Jun 2009 08:07:43 -0400
+Subject: ext4: tighten restrictions on inode flags
+To: stable@kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1243944479-20574-2-git-send-email-tytso@mit.edu>
+
+
+From: Duane Griffin <duaneg@dghda.com>
+
+(cherry picked from commit 2dc6b0d48ca0599837df21b14bb8393d0804af57)
+
+At the moment there are few restrictions on which flags may be set on
+which inodes.  Specifically DIRSYNC may only be set on directories and
+IMMUTABLE and APPEND may not be set on links.  Tighten that to disallow
+TOPDIR being set on non-directories and only NODUMP and NOATIME to be set
+on non-regular file, non-directories.
+
+Introduces a flags masking function which masks flags based on mode and
+use it during inode creation and when flags are set via the ioctl to
+facilitate future consistency.
+
+Signed-off-by: Duane Griffin <duaneg@dghda.com>
+Acked-by: Andreas Dilger <adilger@sun.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h   |   17 +++++++++++++++++
+ fs/ext4/ialloc.c |   14 +++++---------
+ fs/ext4/ioctl.c  |    3 +--
+ 3 files changed, 23 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -255,6 +255,23 @@ struct flex_groups {
+                          EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
+                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
++/* Flags that are appropriate for regular files (all but dir-specific ones). */
++#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
++
++/* Flags that are appropriate for non-directories/regular files. */
++#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
++
++/* Mask out flags that are inappropriate for the given type of inode. */
++static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
++{
++      if (S_ISDIR(mode))
++              return flags;
++      else if (S_ISREG(mode))
++              return flags & EXT4_REG_FLMASK;
++      else
++              return flags & EXT4_OTHER_FLMASK;
++}
++
+ /*
+  * Inode dynamic state flags
+  */
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -885,16 +885,12 @@ got:
+       ei->i_disksize = 0;
+       /*
+-       * Don't inherit extent flag from directory. We set extent flag on
+-       * newly created directory and file only if -o extent mount option is
+-       * specified
++       * Don't inherit extent flag from directory, amongst others. We set
++       * extent flag on newly created directory and file only if -o extent
++       * mount option is specified
+        */
+-      ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED;
+-      if (S_ISLNK(mode))
+-              ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+-      /* dirsync only applies to directories */
+-      if (!S_ISDIR(mode))
+-              ei->i_flags &= ~EXT4_DIRSYNC_FL;
++      ei->i_flags =
++              ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+       ei->i_file_acl = 0;
+       ei->i_dtime = 0;
+       ei->i_block_group = group;
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsig
+               if (err)
+                       return err;
+-              if (!S_ISDIR(inode->i_mode))
+-                      flags &= ~EXT4_DIRSYNC_FL;
++              flags = ext4_mask_flags(inode->i_mode, flags);
+               err = -EPERM;
+               mutex_lock(&inode->i_mutex);
diff --git a/queue-2.6.29/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch b/queue-2.6.29/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch
new file mode 100644 (file)
index 0000000..5bf185c
--- /dev/null
@@ -0,0 +1,46 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:30:08 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue,  2 Jun 2009 08:07:57 -0400
+Subject: ext4: Use a fake block number for delayed new buffer_head
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944479-20574-16-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0)
+
+Use a very large unsigned number (~0xffff) as as the fake block number
+for the delayed new buffer. The VFS should never try to write out this
+number, but if it does, this will make it obvious.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2220,6 +2220,10 @@ static int ext4_da_get_block_prep(struct
+                                 struct buffer_head *bh_result, int create)
+ {
+       int ret = 0;
++      sector_t invalid_block = ~((sector_t) 0xffff);
++
++      if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
++              invalid_block = ~0;
+       BUG_ON(create == 0);
+       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+@@ -2241,7 +2245,7 @@ static int ext4_da_get_block_prep(struct
+                       /* not enough space to reserve */
+                       return ret;
+-              map_bh(bh_result, inode->i_sb, 0);
++              map_bh(bh_result, inode->i_sb, invalid_block);
+               set_buffer_new(bh_result);
+               set_buffer_delay(bh_result);
+       } else if (ret > 0) {
diff --git a/queue-2.6.29/jbd2-update-locking-coments.patch b/queue-2.6.29/jbd2-update-locking-coments.patch
new file mode 100644 (file)
index 0000000..b447a58
--- /dev/null
@@ -0,0 +1,72 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:28:16 2009
+From: Jan Kara <jack@suse.cz>
+Date: Tue,  2 Jun 2009 08:07:52 -0400
+Subject: jbd2: Update locking coments
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
+Message-ID: <1243944479-20574-11-git-send-email-tytso@mit.edu>
+
+
+From: Jan Kara <jack@suse.cz>
+
+(cherry picked from commit 86db97c87f744364d5889ca8a4134ca2048b8f83)
+
+Update information about locking in JBD2 revoke code. Inconsistency in
+comments found by Lin Tan <tammy000@gmail.com>
+
+CC: Lin Tan <tammy000@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/jbd2/revoke.c |   24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+--- a/fs/jbd2/revoke.c
++++ b/fs/jbd2/revoke.c
+@@ -55,6 +55,25 @@
+  *                    need do nothing.
+  * RevokeValid set, Revoked set:
+  *                    buffer has been revoked.
++ *
++ * Locking rules:
++ * We keep two hash tables of revoke records. One hashtable belongs to the
++ * running transaction (is pointed to by journal->j_revoke), the other one
++ * belongs to the committing transaction. Accesses to the second hash table
++ * happen only from the kjournald and no other thread touches this table.  Also
++ * journal_switch_revoke_table() which switches which hashtable belongs to the
++ * running and which to the committing transaction is called only from
++ * kjournald. Therefore we need no locks when accessing the hashtable belonging
++ * to the committing transaction.
++ *
++ * All users operating on the hash table belonging to the running transaction
++ * have a handle to the transaction. Therefore they are safe from kjournald
++ * switching hash tables under them. For operations on the lists of entries in
++ * the hash table j_revoke_lock is used.
++ *
++ * Finally, also replay code uses the hash tables but at this moment noone else
++ * can touch them (filesystem isn't mounted yet) and hence no locking is
++ * needed.
+  */
+ #ifndef __KERNEL__
+@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle
+  * the second time we would still have a pending revoke to cancel.  So,
+  * do not trust the Revoked bit on buffers unless RevokeValid is also
+  * set.
+- *
+- * The caller must have the journal locked.
+  */
+ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+ {
+@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo
+ /*
+  * Write revoke records to the journal for all entries in the current
+  * revoke hash, deleting the entries as we go.
+- *
+- * Called with the journal lock held.
+  */
+-
+ void jbd2_journal_write_revoke_records(journal_t *journal,
+                                 transaction_t *transaction)
+ {
diff --git a/queue-2.6.29/keys-handle-there-being-no-fallback-destination-keyring-for-request_key.patch b/queue-2.6.29/keys-handle-there-being-no-fallback-destination-keyring-for-request_key.patch
new file mode 100644 (file)
index 0000000..a40aa5c
--- /dev/null
@@ -0,0 +1,62 @@
+From 34574dd10b6d0697b86703388d6d6af9cbf4bb48 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Thu, 9 Apr 2009 17:14:05 +0100
+Subject: keys: Handle there being no fallback destination keyring for request_key()
+
+From: David Howells <dhowells@redhat.com>
+
+commit 34574dd10b6d0697b86703388d6d6af9cbf4bb48 upstream.
+
+When request_key() is called, without there being any standard process
+keyrings on which to fall back if a destination keyring is not specified, an
+oops is liable to occur when construct_alloc_key() calls down_write() on
+dest_keyring's semaphore.
+
+Due to function inlining this may be seen as an oops in down_write() as called
+from request_key_and_link().
+
+This situation crops up during boot, where request_key() is called from within
+the kernel (such as in CIFS mounts) where nobody is actually logged in, and so
+PAM has not had a chance to create a session keyring and user keyrings to act
+as the fallback.
+
+To fix this, make construct_alloc_key() not attempt to cache a key if there is
+no fallback key if no destination keyring is given specifically.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/request_key.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -311,7 +311,8 @@ static int construct_alloc_key(struct ke
+       set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+-      down_write(&dest_keyring->sem);
++      if (dest_keyring)
++              down_write(&dest_keyring->sem);
+       /* attach the key to the destination keyring under lock, but we do need
+        * to do another check just in case someone beat us to it whilst we
+@@ -322,10 +323,12 @@ static int construct_alloc_key(struct ke
+       if (!IS_ERR(key_ref))
+               goto key_already_present;
+-      __key_link(dest_keyring, key);
++      if (dest_keyring)
++              __key_link(dest_keyring, key);
+       mutex_unlock(&key_construction_mutex);
+-      up_write(&dest_keyring->sem);
++      if (dest_keyring)
++              up_write(&dest_keyring->sem);
+       mutex_unlock(&user->cons_lock);
+       *_key = key;
+       kleave(" = 0 [%d]", key_serial(key));
diff --git a/queue-2.6.29/ptrace-fix-possible-zombie-leak-on-ptrace_detach.patch b/queue-2.6.29/ptrace-fix-possible-zombie-leak-on-ptrace_detach.patch
new file mode 100644 (file)
index 0000000..f760ef1
--- /dev/null
@@ -0,0 +1,75 @@
+From 4576145c1ecdaaea9ef8976a48335206aa1ebf91 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 2 Apr 2009 16:58:14 -0700
+Subject: ptrace: fix possible zombie leak on PTRACE_DETACH
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 4576145c1ecdaaea9ef8976a48335206aa1ebf91 upstream.
+
+When ptrace_detach() takes tasklist, the tracee can be SIGKILL'ed.  If it
+has already passed exit_notify() we can leak a zombie, because a) ptracing
+disables the auto-reaping logic, and b) ->real_parent was not notified
+about the child's death.
+
+ptrace_detach() should follow the ptrace_exit's logic, change the code
+accordingly.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Jerome Marchand <jmarchan@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Tested-by: Denys Vlasenko <dvlasenk@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/ptrace.h |    1 +
+ kernel/ptrace.c        |    9 +++++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code)
+ extern void __ptrace_link(struct task_struct *child,
+                         struct task_struct *new_parent);
+ extern void __ptrace_unlink(struct task_struct *child);
++extern int __ptrace_detach(struct task_struct *tracer, struct task_struct *p);
+ extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
+ #define PTRACE_MODE_READ   1
+ #define PTRACE_MODE_ATTACH 2
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -237,6 +237,8 @@ out:
+ int ptrace_detach(struct task_struct *child, unsigned int data)
+ {
++      int dead = 0;
++
+       if (!valid_signal(data))
+               return -EIO;
+@@ -244,18 +246,21 @@ int ptrace_detach(struct task_struct *ch
+       ptrace_disable(child);
+       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-      /* protect against de_thread()->release_task() */
+       write_lock_irq(&tasklist_lock);
++      /* protect against de_thread()->release_task() */
+       if (child->ptrace) {
+               child->exit_code = data;
+-              __ptrace_unlink(child);
++              dead = __ptrace_detach(current, child);
+               if (!child->exit_state)
+                       wake_up_process(child);
+       }
+       write_unlock_irq(&tasklist_lock);
++      if (unlikely(dead))
++              release_task(child);
++
+       return 0;
+ }
diff --git a/queue-2.6.29/ptrace-kill-__ptrace_detach-fix-exit_state-check.patch b/queue-2.6.29/ptrace-kill-__ptrace_detach-fix-exit_state-check.patch
new file mode 100644 (file)
index 0000000..ffde1d7
--- /dev/null
@@ -0,0 +1,67 @@
+From 95c3eb76dc07fd81289888ffc42948196b34b444 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 2 Apr 2009 16:58:11 -0700
+Subject: ptrace: kill __ptrace_detach(), fix ->exit_state check
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 95c3eb76dc07fd81289888ffc42948196b34b444 upstream.
+
+Move the code from __ptrace_detach() to its single caller and kill this
+helper.
+
+Also, fix the ->exit_state check, we shouldn't wake up EXIT_DEAD tasks.
+Actually, I think task_is_stopped_or_traced() makes more sense, but this
+needs another patch.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Jerome Marchand <jmarchan@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/ptrace.c |   22 +++++++++-------------
+ 1 file changed, 9 insertions(+), 13 deletions(-)
+
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -235,16 +235,6 @@ out:
+       return retval;
+ }
+-static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
+-{
+-      child->exit_code = data;
+-      /* .. re-parent .. */
+-      __ptrace_unlink(child);
+-      /* .. and wake it up. */
+-      if (child->exit_state != EXIT_ZOMBIE)
+-              wake_up_process(child);
+-}
+-
+ int ptrace_detach(struct task_struct *child, unsigned int data)
+ {
+       if (!valid_signal(data))
+@@ -254,10 +244,16 @@ int ptrace_detach(struct task_struct *ch
+       ptrace_disable(child);
+       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-      write_lock_irq(&tasklist_lock);
+       /* protect against de_thread()->release_task() */
+-      if (child->ptrace)
+-              __ptrace_detach(child, data);
++      write_lock_irq(&tasklist_lock);
++      if (child->ptrace) {
++              child->exit_code = data;
++
++              __ptrace_unlink(child);
++
++              if (!child->exit_state)
++                      wake_up_process(child);
++      }
+       write_unlock_irq(&tasklist_lock);
+       return 0;
diff --git a/queue-2.6.29/ptrace-reintroduce-__ptrace_detach-as-a-callee-of-ptrace_exit.patch b/queue-2.6.29/ptrace-reintroduce-__ptrace_detach-as-a-callee-of-ptrace_exit.patch
new file mode 100644 (file)
index 0000000..f73274c
--- /dev/null
@@ -0,0 +1,105 @@
+From b1b4c6799fb59e710454bfe0ab477cb8523a8667 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 2 Apr 2009 16:58:13 -0700
+Subject: ptrace: reintroduce __ptrace_detach() as a callee of ptrace_exit()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit b1b4c6799fb59e710454bfe0ab477cb8523a8667 upstream.
+
+No functional changes, preparation for the next patch.
+
+Move the "should we release this child" logic into the separate handler,
+__ptrace_detach().
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Jerome Marchand <jmarchan@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/exit.c |   62 ++++++++++++++++++++++++++++++----------------------------
+ 1 file changed, 33 insertions(+), 29 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -715,6 +715,38 @@ static int ignoring_children(struct sigh
+       return ret;
+ }
++/* Returns nonzero if the tracee should be released. */
++int __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
++{
++      __ptrace_unlink(p);
++
++      if (p->exit_state != EXIT_ZOMBIE)
++              return 0;
++      /*
++       * If it's a zombie, our attachedness prevented normal
++       * parent notification or self-reaping.  Do notification
++       * now if it would have happened earlier.  If it should
++       * reap itself we return true.
++       *
++       * If it's our own child, there is no notification to do.
++       * But if our normal children self-reap, then this child
++       * was prevented by ptrace and we must reap it now.
++       */
++      if (!task_detached(p) && thread_group_empty(p)) {
++              if (!same_thread_group(p->real_parent, tracer))
++                      do_notify_parent(p, p->exit_signal);
++              else if (ignoring_children(tracer->sighand))
++                      p->exit_signal = -1;
++      }
++
++      if (!task_detached(p))
++              return 0;
++
++      /* Mark it as in the process of being reaped. */
++      p->exit_state = EXIT_DEAD;
++      return 1;
++}
++
+ /*
+  * Detach all tasks we were using ptrace on.
+  * Any that need to be release_task'd are put on the @dead list.
+@@ -726,36 +758,8 @@ static void ptrace_exit(struct task_stru
+       struct task_struct *p, *n;
+       list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+-              __ptrace_unlink(p);
+-
+-              if (p->exit_state != EXIT_ZOMBIE)
+-                      continue;
+-
+-              /*
+-               * If it's a zombie, our attachedness prevented normal
+-               * parent notification or self-reaping.  Do notification
+-               * now if it would have happened earlier.  If it should
+-               * reap itself, add it to the @dead list.  We can't call
+-               * release_task() here because we already hold tasklist_lock.
+-               *
+-               * If it's our own child, there is no notification to do.
+-               * But if our normal children self-reap, then this child
+-               * was prevented by ptrace and we must reap it now.
+-               */
+-              if (!task_detached(p) && thread_group_empty(p)) {
+-                      if (!same_thread_group(p->real_parent, parent))
+-                              do_notify_parent(p, p->exit_signal);
+-                      else if (ignoring_children(parent->sighand))
+-                              p->exit_signal = -1;
+-              }
+-
+-              if (task_detached(p)) {
+-                      /*
+-                       * Mark it as in the process of being reaped.
+-                       */
+-                      p->exit_state = EXIT_DEAD;
++              if (__ptrace_detach(parent, p))
+                       list_add(&p->ptrace_entry, dead);
+-              }
+       }
+ }
diff --git a/queue-2.6.29/ptrace-simplify-ptrace_exit-ignoring_children-path.patch b/queue-2.6.29/ptrace-simplify-ptrace_exit-ignoring_children-path.patch
new file mode 100644 (file)
index 0000000..3912197
--- /dev/null
@@ -0,0 +1,84 @@
+From 6d69cb87f05eef3b02370b2f7bae608ad2301a00 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 2 Apr 2009 16:58:12 -0700
+Subject: ptrace: simplify ptrace_exit()->ignoring_children() path
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 6d69cb87f05eef3b02370b2f7bae608ad2301a00 upstream.
+
+ignoring_children() takes parent->sighand->siglock and checks
+k_sigaction[SIGCHLD] atomically.  But this buys nothing, we can't get the
+"really" wrong result even if we race with sigaction(SIGCHLD).  If we read
+the "stale" sa_handler/sa_flags we can pretend it was changed right after
+the check.
+
+Remove spin_lock(->siglock), and kill "int ign" which caches the result of
+ignoring_children() which becomes rather trivial.
+
+Perhaps it makes sense to export this helper, do_notify_parent() can use
+it too.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Jerome Marchand <jmarchan@redhat.com>
+Cc: Roland McGrath <roland@redhat.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/exit.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -703,19 +703,15 @@ static void exit_mm(struct task_struct *
+ }
+ /*
+- * Return nonzero if @parent's children should reap themselves.
+- *
+- * Called with write_lock_irq(&tasklist_lock) held.
++ * Called with irqs disabled, returns true if childs should reap themselves.
+  */
+-static int ignoring_children(struct task_struct *parent)
++static int ignoring_children(struct sighand_struct *sigh)
+ {
+       int ret;
+-      struct sighand_struct *psig = parent->sighand;
+-      unsigned long flags;
+-      spin_lock_irqsave(&psig->siglock, flags);
+-      ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+-             (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
+-      spin_unlock_irqrestore(&psig->siglock, flags);
++      spin_lock(&sigh->siglock);
++      ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) ||
++            (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT);
++      spin_unlock(&sigh->siglock);
+       return ret;
+ }
+@@ -728,7 +724,6 @@ static int ignoring_children(struct task
+ static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
+ {
+       struct task_struct *p, *n;
+-      int ign = -1;
+       list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+               __ptrace_unlink(p);
+@@ -750,12 +745,8 @@ static void ptrace_exit(struct task_stru
+               if (!task_detached(p) && thread_group_empty(p)) {
+                       if (!same_thread_group(p->real_parent, parent))
+                               do_notify_parent(p, p->exit_signal);
+-                      else {
+-                              if (ign < 0)
+-                                      ign = ignoring_children(parent);
+-                              if (ign)
+-                                      p->exit_signal = -1;
+-                      }
++                      else if (ignoring_children(parent->sighand))
++                              p->exit_signal = -1;
+               }
+               if (task_detached(p)) {
index 84f7c06ea23ff7c6f09e9d28f5f92ec2e703279e..c357455ce5649f4f36dce0e49a771b8c12ced6e7 100644 (file)
@@ -3,14 +3,14 @@ From: Ilpo J
 Date: Tue, 14 Apr 2009 02:08:53 -0700
 Subject: [PATCH 03/16] tcp: fix >2 iw selection
 
-From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+From: Ilpo Jarvinen <ilpo.jarvinen@helsinki.fi>
 
 [ Upstream commit 86bcebafc5e7f5163ccf828792fe694b112ed6fa ]
 
 A long-standing feature in tcp_init_metrics() is such that
 any of its goto reset prevents call to tcp_init_cwnd().
 
-Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Signed-off-by: Ilpo Jarvinen <ilpo.jarvinen@helsinki.fi>
 Signed-off-by: David S. Miller <davem@davemloft.net>
 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
 
index 721a9fffda28a42968c16630d404153b770a6663..349d08410ee21505af04dc5f85956fb0ae94ffd6 100644 (file)
@@ -3,7 +3,7 @@ From: Ilpo J
 Date: Sun, 10 May 2009 20:32:34 +0000
 Subject: tcp: fix MSG_PEEK race check
 
-From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+From: Ilpo Jarvinen <ilpo.jarvinen@helsinki.fi>
 
 [ Upstream commit 775273131810caa41dfc7f9e552ea5d8508caf40 ]
 
@@ -20,7 +20,7 @@ here:
 by using additional offset by one but I certainly have very
 little interest in testing that part.
 
-Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Signed-off-by: Ilpo Jarvinen <ilpo.jarvinen@helsinki.fi>
 Tested-by: Frans Pop <elendil@planet.nl>
 Tested-by: Ian Zimmermann <itz@buug.org>
 Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/queue-2.6.29/x86-hpet-fix-periodic-mode-programming-on-amd-81xx.patch b/queue-2.6.29/x86-hpet-fix-periodic-mode-programming-on-amd-81xx.patch
new file mode 100644 (file)
index 0000000..dd027af
--- /dev/null
@@ -0,0 +1,90 @@
+From 7a6f9cbb37120c745fc187083fb5c3de4dca4f97 Mon Sep 17 00:00:00 2001
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+Date: Tue, 21 Apr 2009 20:00:37 +0200
+Subject: x86: hpet: fix periodic mode programming on AMD 81xx
+
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+
+commit 7a6f9cbb37120c745fc187083fb5c3de4dca4f97 upstream.
+
+(See http://bugzilla.kernel.org/show_bug.cgi?id=12961)
+
+It partially reverts commit c23e253e67c9d8a91a0ffa33c1f571a17f0a2403
+(x86: hpet: stop HPET_COUNTER when programming periodic mode)
+
+HPET on AMD 81xx chipset needs a second write (with HPET_TN_SETVAL
+cleared) to T0_CMP register to set the period in periodic mode.
+
+With this patch HPET_COUNTER is still stopped but not reset when HPET
+is programmed in periodic mode. This should help to avoid races when
+HPET is programmed in periodic mode and fixes a boot time hang that
+I've observed on a machine when using 1000HZ.
+
+[ Impact: fix boot time hang on machines with AMD 81xx chipset ]
+
+Reported-by: Jeff Mahoney <jeffm@suse.com>
+Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
+Tested-by: Jeff Mahoney <jeffm@suse.com>
+LKML-Reference: <20090421180037.GA2763@alberich.amd.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/hpet.c |   18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -196,6 +196,10 @@ static void hpet_stop_counter(void)
+       unsigned long cfg = hpet_readl(HPET_CFG);
+       cfg &= ~HPET_CFG_ENABLE;
+       hpet_writel(cfg, HPET_CFG);
++}
++
++static void hpet_reset_counter(void)
++{
+       hpet_writel(0, HPET_COUNTER);
+       hpet_writel(0, HPET_COUNTER + 4);
+ }
+@@ -210,6 +214,7 @@ static void hpet_start_counter(void)
+ static void hpet_restart_counter(void)
+ {
+       hpet_stop_counter();
++      hpet_reset_counter();
+       hpet_start_counter();
+ }
+@@ -269,7 +274,7 @@ static int hpet_setup_msi_irq(unsigned i
+ static void hpet_set_mode(enum clock_event_mode mode,
+                         struct clock_event_device *evt, int timer)
+ {
+-      unsigned long cfg;
++      unsigned long cfg, cmp, now;
+       uint64_t delta;
+       switch (mode) {
+@@ -277,12 +282,23 @@ static void hpet_set_mode(enum clock_eve
+               hpet_stop_counter();
+               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+               delta >>= evt->shift;
++              now = hpet_readl(HPET_COUNTER);
++              cmp = now + (unsigned long) delta;
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
+               /* Make sure we use edge triggered interrupts */
+               cfg &= ~HPET_TN_LEVEL;
+               cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
+                      HPET_TN_SETVAL | HPET_TN_32BIT;
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
++              hpet_writel(cmp, HPET_Tn_CMP(timer));
++              udelay(1);
++              /*
++               * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
++               * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
++               * bit is automatically cleared after the first write.
++               * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
++               * Publication # 24674)
++               */
+               hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+               hpet_start_counter();
+               break;
diff --git a/queue-2.6.29/x86-hpet-provide-separate-functions-to-stop-and-start-the-counter.patch b/queue-2.6.29/x86-hpet-provide-separate-functions-to-stop-and-start-the-counter.patch
new file mode 100644 (file)
index 0000000..9ab16ad
--- /dev/null
@@ -0,0 +1,84 @@
+From 8d6f0c8214928f7c5083dd54ecb69c5d615b516e Mon Sep 17 00:00:00 2001
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+Date: Sat, 21 Feb 2009 00:10:44 +0100
+Subject: x86: hpet: provide separate functions to stop and start the counter
+
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+
+commit 8d6f0c8214928f7c5083dd54ecb69c5d615b516e upstream.
+
+By splitting up existing hpet_start_counter function.
+
+Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
+Cc: Mark Hounschell <markh@compro.net>
+Cc: Borislav Petkov <borislav.petkov@amd.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/hpet.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -191,27 +191,37 @@ static struct clock_event_device hpet_cl
+       .rating         = 50,
+ };
+-static void hpet_start_counter(void)
++static void hpet_stop_counter(void)
+ {
+       unsigned long cfg = hpet_readl(HPET_CFG);
+-
+       cfg &= ~HPET_CFG_ENABLE;
+       hpet_writel(cfg, HPET_CFG);
+       hpet_writel(0, HPET_COUNTER);
+       hpet_writel(0, HPET_COUNTER + 4);
++}
++
++static void hpet_start_counter(void)
++{
++      unsigned long cfg = hpet_readl(HPET_CFG);
+       cfg |= HPET_CFG_ENABLE;
+       hpet_writel(cfg, HPET_CFG);
+ }
++static void hpet_restart_counter(void)
++{
++      hpet_stop_counter();
++      hpet_start_counter();
++}
++
+ static void hpet_resume_device(void)
+ {
+       force_hpet_resume();
+ }
+-static void hpet_restart_counter(void)
++static void hpet_resume_counter(void)
+ {
+       hpet_resume_device();
+-      hpet_start_counter();
++      hpet_restart_counter();
+ }
+ static void hpet_enable_legacy_int(void)
+@@ -695,7 +705,7 @@ static struct clocksource clocksource_hp
+       .mask           = HPET_MASK,
+       .shift          = HPET_SHIFT,
+       .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+-      .resume         = hpet_restart_counter,
++      .resume         = hpet_resume_counter,
+ #ifdef CONFIG_X86_64
+       .vread          = vread_hpet,
+ #endif
+@@ -707,7 +717,7 @@ static int hpet_clocksource_register(voi
+       cycle_t t1;
+       /* Start the counter */
+-      hpet_start_counter();
++      hpet_restart_counter();
+       /* Verify whether hpet counter works */
+       t1 = read_hpet();
diff --git a/queue-2.6.29/x86-hpet-stop-hpet_counter-when-programming-periodic-mode.patch b/queue-2.6.29/x86-hpet-stop-hpet_counter-when-programming-periodic-mode.patch
new file mode 100644 (file)
index 0000000..8ba4cb7
--- /dev/null
@@ -0,0 +1,74 @@
+From stable-bounces@linux.kernel.org  Tue Jun  9 02:17:53 2009
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+Date: Fri, 29 May 2009 17:35:14 -0400
+Subject: x86: hpet: stop HPET_COUNTER when programming periodic mode
+To: stable@kernel.org
+Cc: Ingo Molnar <mingo@elte.hu>
+
+
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+
+commit c23e253e67c9d8a91a0ffa33c1f571a17f0a2403 upstream
+
+x86: hpet: stop HPET_COUNTER when programming periodic mode
+
+Impact: fix system hang on some systems operating with HZ_1000
+
+On a system that stalled with HZ_1000, the first value written to
+T0_CMP (when the main counter was not stopped) did not trigger an
+interrupt. Instead after the main counter wrapped around (after
+several minutes) an interrupt was triggered and afterwards the
+periodic interrupt took effect.
+
+This can be fixed by implementing HPET spec recommendation for
+programming the periodic mode (i.e. stopping the main counter).
+
+[cebbert@redhat.com: backport to 2.6.29]
+
+Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
+Cc: Mark Hounschell <markh@compro.net>
+Cc: Borislav Petkov <borislav.petkov@amd.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+
+---
+ arch/x86/kernel/hpet.c |   13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -269,29 +269,22 @@ static int hpet_setup_msi_irq(unsigned i
+ static void hpet_set_mode(enum clock_event_mode mode,
+                         struct clock_event_device *evt, int timer)
+ {
+-      unsigned long cfg, cmp, now;
++      unsigned long cfg;
+       uint64_t delta;
+       switch (mode) {
+       case CLOCK_EVT_MODE_PERIODIC:
++              hpet_stop_counter();
+               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+               delta >>= evt->shift;
+-              now = hpet_readl(HPET_COUNTER);
+-              cmp = now + (unsigned long) delta;
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
+               /* Make sure we use edge triggered interrupts */
+               cfg &= ~HPET_TN_LEVEL;
+               cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
+                      HPET_TN_SETVAL | HPET_TN_32BIT;
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
+-              /*
+-               * The first write after writing TN_SETVAL to the
+-               * config register sets the counter value, the second
+-               * write sets the period.
+-               */
+-              hpet_writel(cmp, HPET_Tn_CMP(timer));
+-              udelay(1);
+               hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
++              hpet_start_counter();
+               break;
+       case CLOCK_EVT_MODE_ONESHOT: