]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 1 Jun 2012 08:34:22 +0000 (16:34 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 1 Jun 2012 08:34:22 +0000 (16:34 +0800)
added patches:
cifs-fix-oops-while-traversing-open-file-list-try-4.patch
cifs-include-backup-intent-search-flags-during-searches-try-2.patch
iwlwifi-update-bt-traffic-load-states-correctly.patch
mm-fix-faulty-initialization-in-vmalloc_init.patch
mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch

queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch [new file with mode: 0644]
queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch [new file with mode: 0644]
queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch [new file with mode: 0644]
queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch [new file with mode: 0644]
queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch [new file with mode: 0644]
queue-3.0/series

diff --git a/queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch b/queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch
new file mode 100644 (file)
index 0000000..bbdd150
--- /dev/null
@@ -0,0 +1,136 @@
+From 2c0c2a08bed7a3b791f88d09d16ace56acb3dd98 Mon Sep 17 00:00:00 2001
+From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+Date: Mon, 21 May 2012 09:20:12 -0500
+Subject: cifs: fix oops while traversing open file list (try #4)
+
+From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+
+commit 2c0c2a08bed7a3b791f88d09d16ace56acb3dd98 upstream.
+
+While traversing the linked list of open file handles, if the identfied
+file handle is invalid, a reopen is attempted and if it fails, we
+resume traversing where we stopped and cifs can oops while accessing
+invalid next element, for list might have changed.
+
+So mark the invalid file handle and attempt reopen if no
+valid file handle is found in rest of the list.
+If reopen fails, move the invalid file handle to the end of the list
+and start traversing the list again from the begining.
+Repeat this four times before giving up and returning an error if
+file reopen keeps failing.
+
+Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Steve French <sfrench@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsglob.h |    1 
+ fs/cifs/file.c     |   57 ++++++++++++++++++++++++++++++-----------------------
+ 2 files changed, 34 insertions(+), 24 deletions(-)
+
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -43,6 +43,7 @@
+ #define CIFS_MIN_RCV_POOL 4
++#define MAX_REOPEN_ATT        5 /* these many maximum attempts to reopen a file */
+ /*
+  * default attribute cache timeout (jiffies)
+  */
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -973,10 +973,11 @@ struct cifsFileInfo *find_readable_file(
+ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
+                                       bool fsuid_only)
+ {
+-      struct cifsFileInfo *open_file;
++      struct cifsFileInfo *open_file, *inv_file = NULL;
+       struct cifs_sb_info *cifs_sb;
+       bool any_available = false;
+       int rc;
++      unsigned int refind = 0;
+       /* Having a null inode here (because mapping->host was set to zero by
+       the VFS or MM) should not happen but we had reports of on oops (due to
+@@ -996,40 +997,25 @@ struct cifsFileInfo *find_writable_file(
+       spin_lock(&cifs_file_list_lock);
+ refind_writable:
++      if (refind > MAX_REOPEN_ATT) {
++              spin_unlock(&cifs_file_list_lock);
++              return NULL;
++      }
+       list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+               if (!any_available && open_file->pid != current->tgid)
+                       continue;
+               if (fsuid_only && open_file->uid != current_fsuid())
+                       continue;
+               if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
+-                      cifsFileInfo_get(open_file);
+-
+                       if (!open_file->invalidHandle) {
+                               /* found a good writable file */
++                              cifsFileInfo_get(open_file);
+                               spin_unlock(&cifs_file_list_lock);
+                               return open_file;
++                      } else {
++                              if (!inv_file)
++                                      inv_file = open_file;
+                       }
+-
+-                      spin_unlock(&cifs_file_list_lock);
+-
+-                      /* Had to unlock since following call can block */
+-                      rc = cifs_reopen_file(open_file, false);
+-                      if (!rc)
+-                              return open_file;
+-
+-                      /* if it fails, try another handle if possible */
+-                      cFYI(1, "wp failed on reopen file");
+-                      cifsFileInfo_put(open_file);
+-
+-                      spin_lock(&cifs_file_list_lock);
+-
+-                      /* else we simply continue to the next entry. Thus
+-                         we do not loop on reopen errors.  If we
+-                         can not reopen the file, for example if we
+-                         reconnected to a server with another client
+-                         racing to delete or lock the file we would not
+-                         make progress if we restarted before the beginning
+-                         of the loop here. */
+               }
+       }
+       /* couldn't find useable FH with same pid, try any available */
+@@ -1037,7 +1023,30 @@ refind_writable:
+               any_available = true;
+               goto refind_writable;
+       }
++
++      if (inv_file) {
++              any_available = false;
++              cifsFileInfo_get(inv_file);
++      }
++
+       spin_unlock(&cifs_file_list_lock);
++
++      if (inv_file) {
++              rc = cifs_reopen_file(inv_file, false);
++              if (!rc)
++                      return inv_file;
++              else {
++                      spin_lock(&cifs_file_list_lock);
++                      list_move_tail(&inv_file->flist,
++                                      &cifs_inode->openFileList);
++                      spin_unlock(&cifs_file_list_lock);
++                      cifsFileInfo_put(inv_file);
++                      spin_lock(&cifs_file_list_lock);
++                      ++refind;
++                      goto refind_writable;
++              }
++      }
++
+       return NULL;
+ }
diff --git a/queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch b/queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch
new file mode 100644 (file)
index 0000000..8fee418
--- /dev/null
@@ -0,0 +1,145 @@
+From 2608bee744a92d60d15ff4e6e0b913d8b406aedd Mon Sep 17 00:00:00 2001
+From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+Date: Tue, 15 May 2012 10:19:16 -0500
+Subject: cifs: Include backup intent search flags during searches {try #2)
+
+From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+
+commit 2608bee744a92d60d15ff4e6e0b913d8b406aedd upstream.
+
+As observed and suggested by Tushar Gosavi...
+
+---------
+readdir calls these function to send TRANS2_FIND_FIRST and
+TRANS2_FIND_NEXT command to the server. The current cifs module is
+not specifying CIFS_SEARCH_BACKUP_SEARCH flag while sending these
+command when backupuid/backupgid is specified. This can be resolved
+by specifying CIFS_SEARCH_BACKUP_SEARCH flag.
+---------
+
+Reported-and-Tested-by: Tushar Gosavi <tugosavi@in.ibm.com>
+Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+Acked-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Steve French <sfrench@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsproto.h |    6 ++++--
+ fs/cifs/cifssmb.c   |   12 +++++-------
+ fs/cifs/readdir.c   |   15 +++++++++++++--
+ 3 files changed, 22 insertions(+), 11 deletions(-)
+
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -175,11 +175,13 @@ extern int CIFSTCon(unsigned int xid, st
+ extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
+               const char *searchName, const struct nls_table *nls_codepage,
+-              __u16 *searchHandle, struct cifs_search_info *psrch_inf,
++              __u16 *searchHandle, __u16 search_flags,
++              struct cifs_search_info *psrch_inf,
+               int map, const char dirsep);
+ extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
+-              __u16 searchHandle, struct cifs_search_info *psrch_inf);
++              __u16 searchHandle, __u16 search_flags,
++              struct cifs_search_info *psrch_inf);
+ extern int CIFSFindClose(const int, struct cifs_tcon *tcon,
+                       const __u16 search_handle);
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -3926,7 +3926,7 @@ int
+ CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
+             const char *searchName,
+             const struct nls_table *nls_codepage,
+-            __u16 *pnetfid,
++            __u16 *pnetfid, __u16 search_flags,
+             struct cifs_search_info *psrch_inf, int remap, const char dirsep)
+ {
+ /* level 257 SMB_ */
+@@ -3999,8 +3999,7 @@ findFirstRetry:
+           cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM |
+                       ATTR_DIRECTORY);
+       pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO));
+-      pSMB->SearchFlags = cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END |
+-              CIFS_SEARCH_RETURN_RESUME);
++      pSMB->SearchFlags = cpu_to_le16(search_flags);
+       pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level);
+       /* BB what should we set StorageType to? Does it matter? BB */
+@@ -4071,8 +4070,8 @@ findFirstRetry:
+       return rc;
+ }
+-int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
+-               __u16 searchHandle, struct cifs_search_info *psrch_inf)
++int CIFSFindNext(const int xid, struct cifs_tcon *tcon, __u16 searchHandle,
++               __u16 search_flags, struct cifs_search_info *psrch_inf)
+ {
+       TRANSACTION2_FNEXT_REQ *pSMB = NULL;
+       TRANSACTION2_FNEXT_RSP *pSMBr = NULL;
+@@ -4117,8 +4116,7 @@ int CIFSFindNext(const int xid, struct c
+               cpu_to_le16(CIFSMaxBufSize / sizeof(FILE_UNIX_INFO));
+       pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level);
+       pSMB->ResumeKey = psrch_inf->resume_key;
+-      pSMB->SearchFlags =
+-            cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME);
++      pSMB->SearchFlags = cpu_to_le16(search_flags);
+       name_len = psrch_inf->resume_name_len;
+       params += name_len;
+--- a/fs/cifs/readdir.c
++++ b/fs/cifs/readdir.c
+@@ -218,6 +218,7 @@ int get_symlink_reparse_path(char *full_
+ static int initiate_cifs_search(const int xid, struct file *file)
+ {
++      __u16 search_flags;
+       int rc = 0;
+       char *full_path = NULL;
+       struct cifsFileInfo *cifsFile;
+@@ -269,8 +270,12 @@ ffirst_retry:
+               cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
+       }
++      search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
++      if (backup_cred(cifs_sb))
++              search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
++
+       rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls,
+-              &cifsFile->netfid, &cifsFile->srch_inf,
++              &cifsFile->netfid, search_flags, &cifsFile->srch_inf,
+               cifs_sb->mnt_cifs_flags &
+                       CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
+       if (rc == 0)
+@@ -499,11 +504,13 @@ static int cifs_save_resume_key(const ch
+ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
+       struct file *file, char **ppCurrentEntry, int *num_to_ret)
+ {
++      __u16 search_flags;
+       int rc = 0;
+       int pos_in_buf = 0;
+       loff_t first_entry_in_buffer;
+       loff_t index_to_find = file->f_pos;
+       struct cifsFileInfo *cifsFile = file->private_data;
++      struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+       /* check if index in the buffer */
+       if ((cifsFile == NULL) || (ppCurrentEntry == NULL) ||
+@@ -554,10 +561,14 @@ static int find_cifs_entry(const int xid
+               cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
+       }
++      search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
++      if (backup_cred(cifs_sb))
++              search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
++
+       while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
+             (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
+               cFYI(1, "calling findnext2");
+-              rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
++              rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags,
+                                 &cifsFile->srch_inf);
+               cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
+               if (rc)
diff --git a/queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch b/queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch
new file mode 100644 (file)
index 0000000..4fb4eda
--- /dev/null
@@ -0,0 +1,41 @@
+From 882dde8eb0d49ce0f853f8f4084dde56a21fe55f Mon Sep 17 00:00:00 2001
+From: Meenakshi Venkataraman <meenakshi.venkataraman@intel.com>
+Date: Wed, 16 May 2012 22:35:57 +0200
+Subject: iwlwifi: update BT traffic load states correctly
+
+From: Meenakshi Venkataraman <meenakshi.venkataraman@intel.com>
+
+commit 882dde8eb0d49ce0f853f8f4084dde56a21fe55f upstream.
+
+When BT traffic load changes from its
+previous state, a new LQ command needs to be
+sent down to the firmware. This needs to
+be done only once per change. The state
+variable that keeps track of this change is
+last_bt_traffic_load. However, it was not
+being updated when the change had been
+handled. Not updating this variable was
+causing a flood of advanced BT config
+commands to be sent to the firmware. Fix
+this.
+
+Signed-off-by: Meenakshi Venkataraman <meenakshi.venkataraman@intel.com>
+Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-rs.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+@@ -878,6 +878,7 @@ static void rs_bt_update_lq(struct iwl_p
+       if ((priv->bt_traffic_load != priv->last_bt_traffic_load) ||
+           (priv->bt_full_concurrent != full_concurrent)) {
+               priv->bt_full_concurrent = full_concurrent;
++              priv->last_bt_traffic_load = priv->bt_traffic_load;
+               /* Update uCode's rate table. */
+               tbl = &(lq_sta->lq_info[lq_sta->active_tbl]);
diff --git a/queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch b/queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch
new file mode 100644 (file)
index 0000000..1f781dd
--- /dev/null
@@ -0,0 +1,49 @@
+From dbda591d920b4c7692725b13e3f68ecb251e9080 Mon Sep 17 00:00:00 2001
+From: KyongHo <pullip.cho@samsung.com>
+Date: Tue, 29 May 2012 15:06:49 -0700
+Subject: mm: fix faulty initialization in vmalloc_init()
+
+From: KyongHo <pullip.cho@samsung.com>
+
+commit dbda591d920b4c7692725b13e3f68ecb251e9080 upstream.
+
+The transfer of ->flags causes some of the static mapping virtual
+addresses to be prematurely freed (before the mapping is removed) because
+VM_LAZY_FREE gets "set" if tmp->flags has VM_IOREMAP set.  This might
+cause subsequent vmalloc/ioremap calls to fail because it might allocate
+one of the freed virtual address ranges that aren't unmapped.
+
+va->flags has different types of flags from tmp->flags.  If a region with
+VM_IOREMAP set is registered with vm_area_add_early(), it will be removed
+by __purge_vmap_area_lazy().
+
+Fix vmalloc_init() to correctly initialize vmap_area for the given
+vm_struct.
+
+Also initialise va->vm.  If it is not set, find_vm_area() for the early
+vm regions will always fail.
+
+Signed-off-by: KyongHo Cho <pullip.cho@samsung.com>
+Cc: "Olav Haugan" <ohaugan@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmalloc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1174,9 +1174,10 @@ void __init vmalloc_init(void)
+       /* Import existing vmlist entries. */
+       for (tmp = vmlist; tmp; tmp = tmp->next) {
+               va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
+-              va->flags = tmp->flags | VM_VM_AREA;
++              va->flags = VM_VM_AREA;
+               va->va_start = (unsigned long)tmp->addr;
+               va->va_end = va->va_start + tmp->size;
++              va->vm = tmp;
+               __insert_vmap_area(va);
+       }
diff --git a/queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch b/queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch
new file mode 100644 (file)
index 0000000..c22fa01
--- /dev/null
@@ -0,0 +1,213 @@
+From 26c191788f18129af0eb32a358cdaea0c7479626 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Tue, 29 May 2012 15:06:49 -0700
+Subject: mm: pmd_read_atomic: fix 32bit PAE pmd walk vs pmd_populate SMP race condition
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 26c191788f18129af0eb32a358cdaea0c7479626 upstream.
+
+When holding the mmap_sem for reading, pmd_offset_map_lock should only
+run on a pmd_t that has been read atomically from the pmdp pointer,
+otherwise we may read only half of it leading to this crash.
+
+PID: 11679  TASK: f06e8000  CPU: 3   COMMAND: "do_race_2_panic"
+ #0 [f06a9dd8] crash_kexec at c049b5ec
+ #1 [f06a9e2c] oops_end at c083d1c2
+ #2 [f06a9e40] no_context at c0433ded
+ #3 [f06a9e64] bad_area_nosemaphore at c043401a
+ #4 [f06a9e6c] __do_page_fault at c0434493
+ #5 [f06a9eec] do_page_fault at c083eb45
+ #6 [f06a9f04] error_code (via page_fault) at c083c5d5
+    EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP:
+    00000000
+    DS:  007b     ESI: 9e201000 ES:  007b     EDI: 01fb4700 GS:  00e0
+    CS:  0060     EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246
+ #7 [f06a9f38] _spin_lock at c083bc14
+ #8 [f06a9f44] sys_mincore at c0507b7d
+ #9 [f06a9fb0] system_call at c083becd
+                         start           len
+    EAX: ffffffda  EBX: 9e200000  ECX: 00001000  EDX: 6228537f
+    DS:  007b      ESI: 00000000  ES:  007b      EDI: 003d0f00
+    SS:  007b      ESP: 62285354  EBP: 62285388  GS:  0033
+    CS:  0073      EIP: 00291416  ERR: 000000da  EFLAGS: 00000286
+
+This should be a longstanding bug affecting x86 32bit PAE without THP.
+Only archs with 64bit large pmd_t and 32bit unsigned long should be
+affected.
+
+With THP enabled the barrier() in pmd_none_or_trans_huge_or_clear_bad()
+would partly hide the bug when the pmd transition from none to stable,
+by forcing a re-read of the *pmd in pmd_offset_map_lock, but when THP is
+enabled a new set of problem arises by the fact could then transition
+freely in any of the none, pmd_trans_huge or pmd_trans_stable states.
+So making the barrier in pmd_none_or_trans_huge_or_clear_bad()
+unconditional isn't good idea and it would be a flakey solution.
+
+This should be fully fixed by introducing a pmd_read_atomic that reads
+the pmd in order with THP disabled, or by reading the pmd atomically
+with cmpxchg8b with THP enabled.
+
+Luckily this new race condition only triggers in the places that must
+already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix
+is localized there but this bug is not related to THP.
+
+NOTE: this can trigger on x86 32bit systems with PAE enabled with more
+than 4G of ram, otherwise the high part of the pmd will never risk to be
+truncated because it would be zero at all times, in turn so hiding the
+SMP race.
+
+This bug was discovered and fully debugged by Ulrich, quote:
+
+----
+[..]
+pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and
+eax.
+
+    496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t
+    *pmd)
+    497 {
+    498         /* depend on compiler for an atomic pmd read */
+    499         pmd_t pmdval = *pmd;
+
+                                // edi = pmd pointer
+0xc0507a74 <sys_mincore+548>:   mov    0x8(%esp),%edi
+...
+                                // edx = PTE page table high address
+0xc0507a84 <sys_mincore+564>:   mov    0x4(%edi),%edx
+...
+                                // eax = PTE page table low address
+0xc0507a8e <sys_mincore+574>:   mov    (%edi),%eax
+
+[..]
+
+Please note that the PMD is not read atomically. These are two "mov"
+instructions where the high order bits of the PMD entry are fetched
+first. Hence, the above machine code is prone to the following race.
+
+-  The PMD entry {high|low} is 0x0000000000000000.
+   The "mov" at 0xc0507a84 loads 0x00000000 into edx.
+
+-  A page fault (on another CPU) sneaks in between the two "mov"
+   instructions and instantiates the PMD.
+
+-  The PMD entry {high|low} is now 0x00000003fda38067.
+   The "mov" at 0xc0507a8e loads 0xfda38067 into eax.
+----
+
+Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Petr Matousek <pmatouse@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pgtable-3level.h |   50 ++++++++++++++++++++++++++++++++++
+ include/asm-generic/pgtable.h         |   22 +++++++++++++-
+ 2 files changed, 70 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t
+       ptep->pte_low = pte.pte_low;
+ }
++#define pmd_read_atomic pmd_read_atomic
++/*
++ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
++ * a "*pmdp" dereference done by gcc. Problem is, in certain places
++ * where pte_offset_map_lock is called, concurrent page faults are
++ * allowed, if the mmap_sem is hold for reading. An example is mincore
++ * vs page faults vs MADV_DONTNEED. On the page fault side
++ * pmd_populate rightfully does a set_64bit, but if we're reading the
++ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
++ * because gcc will not read the 64bit of the pmd atomically. To fix
++ * this all places running pmd_offset_map_lock() while holding the
++ * mmap_sem in read mode, shall read the pmdp pointer using this
++ * function to know if the pmd is null nor not, and in turn to know if
++ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
++ * operations.
++ *
++ * Without THP if the mmap_sem is hold for reading, the
++ * pmd can only transition from null to not null while pmd_read_atomic runs.
++ * So there's no need of literally reading it atomically.
++ *
++ * With THP if the mmap_sem is hold for reading, the pmd can become
++ * THP or null or point to a pte (and in turn become "stable") at any
++ * time under pmd_read_atomic, so it's mandatory to read it atomically
++ * with cmpxchg8b.
++ */
++#ifndef CONFIG_TRANSPARENT_HUGEPAGE
++static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
++{
++      pmdval_t ret;
++      u32 *tmp = (u32 *)pmdp;
++
++      ret = (pmdval_t) (*tmp);
++      if (ret) {
++              /*
++               * If the low part is null, we must not read the high part
++               * or we can end up with a partial pmd.
++               */
++              smp_rmb();
++              ret |= ((pmdval_t)*(tmp + 1)) << 32;
++      }
++
++      return (pmd_t) { ret };
++}
++#else /* CONFIG_TRANSPARENT_HUGEPAGE */
++static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
++{
++      return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
++}
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++
+ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+ {
+       set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd)
+ #endif /* __HAVE_ARCH_PMD_WRITE */
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++#ifndef pmd_read_atomic
++static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
++{
++      /*
++       * Depend on compiler for an atomic pmd read. NOTE: this is
++       * only going to work, if the pmdval_t isn't larger than
++       * an unsigned long.
++       */
++      return *pmdp;
++}
++#endif
++
+ /*
+  * This function is meant to be used by sites walking pagetables with
+  * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
+@@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd)
+  * undefined so behaving like if the pmd was none is safe (because it
+  * can return none anyway). The compiler level barrier() is critically
+  * important to compute the two checks atomically on the same pmdval.
++ *
++ * For 32bit kernels with a 64bit large pmd_t this automatically takes
++ * care of reading the pmd atomically to avoid SMP race conditions
++ * against pmd_populate() when the mmap_sem is hold for reading by the
++ * caller (a special atomic read not done by "gcc" as in the generic
++ * version above, is also needed when THP is disabled because the page
++ * fault can populate the pmd from under us).
+  */
+ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
+ {
+-      /* depend on compiler for an atomic pmd read */
+-      pmd_t pmdval = *pmd;
++      pmd_t pmdval = pmd_read_atomic(pmd);
+       /*
+        * The barrier will stabilize the pmdval in a register or on
+        * the stack so that it will stop changing under the code.
index b6921bb5b52ee3a85d3b325e8f45305a581e2b11..801dd57e2164e65acd3f705f487720cafb41f47e 100644 (file)
@@ -1,3 +1,8 @@
 scsi-fix-scsi_wait_scan.patch
 scsi-fix-dm-multipath-starvation-when-scsi-host-is-busy.patch
 mm-consider-all-swapped-back-pages-in-used-once-logic.patch
+mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch
+mm-fix-faulty-initialization-in-vmalloc_init.patch
+iwlwifi-update-bt-traffic-load-states-correctly.patch
+cifs-include-backup-intent-search-flags-during-searches-try-2.patch
+cifs-fix-oops-while-traversing-open-file-list-try-4.patch