From: Greg Kroah-Hartman Date: Fri, 1 Jun 2012 08:34:22 +0000 (+0800) Subject: 3.0-stable patches X-Git-Tag: v3.0.34~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f56a435f07242d0de0d0210f6277d7ef471f46b5;p=thirdparty%2Fkernel%2Fstable-queue.git 3.0-stable patches added patches: cifs-fix-oops-while-traversing-open-file-list-try-4.patch cifs-include-backup-intent-search-flags-during-searches-try-2.patch iwlwifi-update-bt-traffic-load-states-correctly.patch mm-fix-faulty-initialization-in-vmalloc_init.patch mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch --- diff --git a/queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch b/queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch new file mode 100644 index 00000000000..bbdd1501f3b --- /dev/null +++ b/queue-3.0/cifs-fix-oops-while-traversing-open-file-list-try-4.patch @@ -0,0 +1,136 @@ +From 2c0c2a08bed7a3b791f88d09d16ace56acb3dd98 Mon Sep 17 00:00:00 2001 +From: Shirish Pargaonkar +Date: Mon, 21 May 2012 09:20:12 -0500 +Subject: cifs: fix oops while traversing open file list (try #4) + +From: Shirish Pargaonkar + +commit 2c0c2a08bed7a3b791f88d09d16ace56acb3dd98 upstream. + +While traversing the linked list of open file handles, if the identfied +file handle is invalid, a reopen is attempted and if it fails, we +resume traversing where we stopped and cifs can oops while accessing +invalid next element, for list might have changed. + +So mark the invalid file handle and attempt reopen if no +valid file handle is found in rest of the list. +If reopen fails, move the invalid file handle to the end of the list +and start traversing the list again from the begining. +Repeat this four times before giving up and returning an error if +file reopen keeps failing. + +Signed-off-by: Shirish Pargaonkar +Reviewed-by: Jeff Layton +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 57 ++++++++++++++++++++++++++++++----------------------- + 2 files changed, 34 insertions(+), 24 deletions(-) + +--- a/fs/cifs/cifsglob.h ++++ b/fs/cifs/cifsglob.h +@@ -43,6 +43,7 @@ + + #define CIFS_MIN_RCV_POOL 4 + ++#define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */ + /* + * default attribute cache timeout (jiffies) + */ +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -973,10 +973,11 @@ struct cifsFileInfo *find_readable_file( + struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, + bool fsuid_only) + { +- struct cifsFileInfo *open_file; ++ struct cifsFileInfo *open_file, *inv_file = NULL; + struct cifs_sb_info *cifs_sb; + bool any_available = false; + int rc; ++ unsigned int refind = 0; + + /* Having a null inode here (because mapping->host was set to zero by + the VFS or MM) should not happen but we had reports of on oops (due to +@@ -996,40 +997,25 @@ struct cifsFileInfo *find_writable_file( + + spin_lock(&cifs_file_list_lock); + refind_writable: ++ if (refind > MAX_REOPEN_ATT) { ++ spin_unlock(&cifs_file_list_lock); ++ return NULL; ++ } + list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { + if (!any_available && open_file->pid != current->tgid) + continue; + if (fsuid_only && open_file->uid != current_fsuid()) + continue; + if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { +- cifsFileInfo_get(open_file); +- + if (!open_file->invalidHandle) { + /* found a good writable file */ ++ cifsFileInfo_get(open_file); + spin_unlock(&cifs_file_list_lock); + return open_file; ++ } else { ++ if (!inv_file) ++ inv_file = open_file; + } +- +- spin_unlock(&cifs_file_list_lock); +- +- /* Had to unlock since following call can block */ +- rc = cifs_reopen_file(open_file, false); +- if (!rc) +- return open_file; +- +- /* if it fails, try another handle if possible */ +- cFYI(1, "wp failed on reopen file"); +- cifsFileInfo_put(open_file); +- +- spin_lock(&cifs_file_list_lock); +- +- /* else we simply continue to the next entry. Thus +- we do not loop on reopen errors. If we +- can not reopen the file, for example if we +- reconnected to a server with another client +- racing to delete or lock the file we would not +- make progress if we restarted before the beginning +- of the loop here. */ + } + } + /* couldn't find useable FH with same pid, try any available */ +@@ -1037,7 +1023,30 @@ refind_writable: + any_available = true; + goto refind_writable; + } ++ ++ if (inv_file) { ++ any_available = false; ++ cifsFileInfo_get(inv_file); ++ } ++ + spin_unlock(&cifs_file_list_lock); ++ ++ if (inv_file) { ++ rc = cifs_reopen_file(inv_file, false); ++ if (!rc) ++ return inv_file; ++ else { ++ spin_lock(&cifs_file_list_lock); ++ list_move_tail(&inv_file->flist, ++ &cifs_inode->openFileList); ++ spin_unlock(&cifs_file_list_lock); ++ cifsFileInfo_put(inv_file); ++ spin_lock(&cifs_file_list_lock); ++ ++refind; ++ goto refind_writable; ++ } ++ } ++ + return NULL; + } + diff --git a/queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch b/queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch new file mode 100644 index 00000000000..8fee41846a2 --- /dev/null +++ b/queue-3.0/cifs-include-backup-intent-search-flags-during-searches-try-2.patch @@ -0,0 +1,145 @@ +From 2608bee744a92d60d15ff4e6e0b913d8b406aedd Mon Sep 17 00:00:00 2001 +From: Shirish Pargaonkar +Date: Tue, 15 May 2012 10:19:16 -0500 +Subject: cifs: Include backup intent search flags during searches {try #2) + +From: Shirish Pargaonkar + +commit 2608bee744a92d60d15ff4e6e0b913d8b406aedd upstream. + +As observed and suggested by Tushar Gosavi... + +--------- +readdir calls these function to send TRANS2_FIND_FIRST and +TRANS2_FIND_NEXT command to the server. The current cifs module is +not specifying CIFS_SEARCH_BACKUP_SEARCH flag while sending these +command when backupuid/backupgid is specified. This can be resolved +by specifying CIFS_SEARCH_BACKUP_SEARCH flag. +--------- + +Reported-and-Tested-by: Tushar Gosavi +Signed-off-by: Shirish Pargaonkar +Acked-by: Jeff Layton +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/cifsproto.h | 6 ++++-- + fs/cifs/cifssmb.c | 12 +++++------- + fs/cifs/readdir.c | 15 +++++++++++++-- + 3 files changed, 22 insertions(+), 11 deletions(-) + +--- a/fs/cifs/cifsproto.h ++++ b/fs/cifs/cifsproto.h +@@ -175,11 +175,13 @@ extern int CIFSTCon(unsigned int xid, st + + extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon, + const char *searchName, const struct nls_table *nls_codepage, +- __u16 *searchHandle, struct cifs_search_info *psrch_inf, ++ __u16 *searchHandle, __u16 search_flags, ++ struct cifs_search_info *psrch_inf, + int map, const char dirsep); + + extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon, +- __u16 searchHandle, struct cifs_search_info *psrch_inf); ++ __u16 searchHandle, __u16 search_flags, ++ struct cifs_search_info *psrch_inf); + + extern int CIFSFindClose(const int, struct cifs_tcon *tcon, + const __u16 search_handle); +--- a/fs/cifs/cifssmb.c ++++ b/fs/cifs/cifssmb.c +@@ -3926,7 +3926,7 @@ int + CIFSFindFirst(const int xid, struct cifs_tcon *tcon, + const char *searchName, + const struct nls_table *nls_codepage, +- __u16 *pnetfid, ++ __u16 *pnetfid, __u16 search_flags, + struct cifs_search_info *psrch_inf, int remap, const char dirsep) + { + /* level 257 SMB_ */ +@@ -3999,8 +3999,7 @@ findFirstRetry: + cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | + ATTR_DIRECTORY); + pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO)); +- pSMB->SearchFlags = cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | +- CIFS_SEARCH_RETURN_RESUME); ++ pSMB->SearchFlags = cpu_to_le16(search_flags); + pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); + + /* BB what should we set StorageType to? Does it matter? BB */ +@@ -4071,8 +4070,8 @@ findFirstRetry: + return rc; + } + +-int CIFSFindNext(const int xid, struct cifs_tcon *tcon, +- __u16 searchHandle, struct cifs_search_info *psrch_inf) ++int CIFSFindNext(const int xid, struct cifs_tcon *tcon, __u16 searchHandle, ++ __u16 search_flags, struct cifs_search_info *psrch_inf) + { + TRANSACTION2_FNEXT_REQ *pSMB = NULL; + TRANSACTION2_FNEXT_RSP *pSMBr = NULL; +@@ -4117,8 +4116,7 @@ int CIFSFindNext(const int xid, struct c + cpu_to_le16(CIFSMaxBufSize / sizeof(FILE_UNIX_INFO)); + pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); + pSMB->ResumeKey = psrch_inf->resume_key; +- pSMB->SearchFlags = +- cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME); ++ pSMB->SearchFlags = cpu_to_le16(search_flags); + + name_len = psrch_inf->resume_name_len; + params += name_len; +--- a/fs/cifs/readdir.c ++++ b/fs/cifs/readdir.c +@@ -218,6 +218,7 @@ int get_symlink_reparse_path(char *full_ + + static int initiate_cifs_search(const int xid, struct file *file) + { ++ __u16 search_flags; + int rc = 0; + char *full_path = NULL; + struct cifsFileInfo *cifsFile; +@@ -269,8 +270,12 @@ ffirst_retry: + cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + } + ++ search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; ++ if (backup_cred(cifs_sb)) ++ search_flags |= CIFS_SEARCH_BACKUP_SEARCH; ++ + rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls, +- &cifsFile->netfid, &cifsFile->srch_inf, ++ &cifsFile->netfid, search_flags, &cifsFile->srch_inf, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); + if (rc == 0) +@@ -499,11 +504,13 @@ static int cifs_save_resume_key(const ch + static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon, + struct file *file, char **ppCurrentEntry, int *num_to_ret) + { ++ __u16 search_flags; + int rc = 0; + int pos_in_buf = 0; + loff_t first_entry_in_buffer; + loff_t index_to_find = file->f_pos; + struct cifsFileInfo *cifsFile = file->private_data; ++ struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + /* check if index in the buffer */ + + if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || +@@ -554,10 +561,14 @@ static int find_cifs_entry(const int xid + cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); + } + ++ search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; ++ if (backup_cred(cifs_sb)) ++ search_flags |= CIFS_SEARCH_BACKUP_SEARCH; ++ + while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && + (rc == 0) && !cifsFile->srch_inf.endOfSearch) { + cFYI(1, "calling findnext2"); +- rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, ++ rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags, + &cifsFile->srch_inf); + cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); + if (rc) diff --git a/queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch b/queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch new file mode 100644 index 00000000000..4fb4eda4e2e --- /dev/null +++ b/queue-3.0/iwlwifi-update-bt-traffic-load-states-correctly.patch @@ -0,0 +1,41 @@ +From 882dde8eb0d49ce0f853f8f4084dde56a21fe55f Mon Sep 17 00:00:00 2001 +From: Meenakshi Venkataraman +Date: Wed, 16 May 2012 22:35:57 +0200 +Subject: iwlwifi: update BT traffic load states correctly + +From: Meenakshi Venkataraman + +commit 882dde8eb0d49ce0f853f8f4084dde56a21fe55f upstream. + +When BT traffic load changes from its +previous state, a new LQ command needs to be +sent down to the firmware. This needs to +be done only once per change. The state +variable that keeps track of this change is +last_bt_traffic_load. However, it was not +being updated when the change had been +handled. Not updating this variable was +causing a flood of advanced BT config +commands to be sent to the firmware. Fix +this. + +Signed-off-by: Meenakshi Venkataraman +Signed-off-by: Wey-Yi Guy +Signed-off-by: Johannes Berg +Signed-off-by: John W. Linville +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c ++++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +@@ -878,6 +878,7 @@ static void rs_bt_update_lq(struct iwl_p + if ((priv->bt_traffic_load != priv->last_bt_traffic_load) || + (priv->bt_full_concurrent != full_concurrent)) { + priv->bt_full_concurrent = full_concurrent; ++ priv->last_bt_traffic_load = priv->bt_traffic_load; + + /* Update uCode's rate table. */ + tbl = &(lq_sta->lq_info[lq_sta->active_tbl]); diff --git a/queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch b/queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch new file mode 100644 index 00000000000..1f781dd4b9b --- /dev/null +++ b/queue-3.0/mm-fix-faulty-initialization-in-vmalloc_init.patch @@ -0,0 +1,49 @@ +From dbda591d920b4c7692725b13e3f68ecb251e9080 Mon Sep 17 00:00:00 2001 +From: KyongHo +Date: Tue, 29 May 2012 15:06:49 -0700 +Subject: mm: fix faulty initialization in vmalloc_init() + +From: KyongHo + +commit dbda591d920b4c7692725b13e3f68ecb251e9080 upstream. + +The transfer of ->flags causes some of the static mapping virtual +addresses to be prematurely freed (before the mapping is removed) because +VM_LAZY_FREE gets "set" if tmp->flags has VM_IOREMAP set. This might +cause subsequent vmalloc/ioremap calls to fail because it might allocate +one of the freed virtual address ranges that aren't unmapped. + +va->flags has different types of flags from tmp->flags. If a region with +VM_IOREMAP set is registered with vm_area_add_early(), it will be removed +by __purge_vmap_area_lazy(). + +Fix vmalloc_init() to correctly initialize vmap_area for the given +vm_struct. + +Also initialise va->vm. If it is not set, find_vm_area() for the early +vm regions will always fail. + +Signed-off-by: KyongHo Cho +Cc: "Olav Haugan" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmalloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -1174,9 +1174,10 @@ void __init vmalloc_init(void) + /* Import existing vmlist entries. */ + for (tmp = vmlist; tmp; tmp = tmp->next) { + va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); +- va->flags = tmp->flags | VM_VM_AREA; ++ va->flags = VM_VM_AREA; + va->va_start = (unsigned long)tmp->addr; + va->va_end = va->va_start + tmp->size; ++ va->vm = tmp; + __insert_vmap_area(va); + } + diff --git a/queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch b/queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch new file mode 100644 index 00000000000..c22fa012d30 --- /dev/null +++ b/queue-3.0/mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch @@ -0,0 +1,213 @@ +From 26c191788f18129af0eb32a358cdaea0c7479626 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Tue, 29 May 2012 15:06:49 -0700 +Subject: mm: pmd_read_atomic: fix 32bit PAE pmd walk vs pmd_populate SMP race condition + +From: Andrea Arcangeli + +commit 26c191788f18129af0eb32a358cdaea0c7479626 upstream. + +When holding the mmap_sem for reading, pmd_offset_map_lock should only +run on a pmd_t that has been read atomically from the pmdp pointer, +otherwise we may read only half of it leading to this crash. + +PID: 11679 TASK: f06e8000 CPU: 3 COMMAND: "do_race_2_panic" + #0 [f06a9dd8] crash_kexec at c049b5ec + #1 [f06a9e2c] oops_end at c083d1c2 + #2 [f06a9e40] no_context at c0433ded + #3 [f06a9e64] bad_area_nosemaphore at c043401a + #4 [f06a9e6c] __do_page_fault at c0434493 + #5 [f06a9eec] do_page_fault at c083eb45 + #6 [f06a9f04] error_code (via page_fault) at c083c5d5 + EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP: + 00000000 + DS: 007b ESI: 9e201000 ES: 007b EDI: 01fb4700 GS: 00e0 + CS: 0060 EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246 + #7 [f06a9f38] _spin_lock at c083bc14 + #8 [f06a9f44] sys_mincore at c0507b7d + #9 [f06a9fb0] system_call at c083becd + start len + EAX: ffffffda EBX: 9e200000 ECX: 00001000 EDX: 6228537f + DS: 007b ESI: 00000000 ES: 007b EDI: 003d0f00 + SS: 007b ESP: 62285354 EBP: 62285388 GS: 0033 + CS: 0073 EIP: 00291416 ERR: 000000da EFLAGS: 00000286 + +This should be a longstanding bug affecting x86 32bit PAE without THP. +Only archs with 64bit large pmd_t and 32bit unsigned long should be +affected. + +With THP enabled the barrier() in pmd_none_or_trans_huge_or_clear_bad() +would partly hide the bug when the pmd transition from none to stable, +by forcing a re-read of the *pmd in pmd_offset_map_lock, but when THP is +enabled a new set of problem arises by the fact could then transition +freely in any of the none, pmd_trans_huge or pmd_trans_stable states. +So making the barrier in pmd_none_or_trans_huge_or_clear_bad() +unconditional isn't good idea and it would be a flakey solution. + +This should be fully fixed by introducing a pmd_read_atomic that reads +the pmd in order with THP disabled, or by reading the pmd atomically +with cmpxchg8b with THP enabled. + +Luckily this new race condition only triggers in the places that must +already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix +is localized there but this bug is not related to THP. + +NOTE: this can trigger on x86 32bit systems with PAE enabled with more +than 4G of ram, otherwise the high part of the pmd will never risk to be +truncated because it would be zero at all times, in turn so hiding the +SMP race. + +This bug was discovered and fully debugged by Ulrich, quote: + +---- +[..] +pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and +eax. + + 496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t + *pmd) + 497 { + 498 /* depend on compiler for an atomic pmd read */ + 499 pmd_t pmdval = *pmd; + + // edi = pmd pointer +0xc0507a74 : mov 0x8(%esp),%edi +... + // edx = PTE page table high address +0xc0507a84 : mov 0x4(%edi),%edx +... + // eax = PTE page table low address +0xc0507a8e : mov (%edi),%eax + +[..] + +Please note that the PMD is not read atomically. These are two "mov" +instructions where the high order bits of the PMD entry are fetched +first. Hence, the above machine code is prone to the following race. + +- The PMD entry {high|low} is 0x0000000000000000. + The "mov" at 0xc0507a84 loads 0x00000000 into edx. + +- A page fault (on another CPU) sneaks in between the two "mov" + instructions and instantiates the PMD. + +- The PMD entry {high|low} is now 0x00000003fda38067. + The "mov" at 0xc0507a8e loads 0xfda38067 into eax. +---- + +Reported-by: Ulrich Obergfell +Signed-off-by: Andrea Arcangeli +Cc: Mel Gorman +Cc: Hugh Dickins +Cc: Larry Woodman +Cc: Petr Matousek +Cc: Rik van Riel +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable-3level.h | 50 ++++++++++++++++++++++++++++++++++ + include/asm-generic/pgtable.h | 22 +++++++++++++- + 2 files changed, 70 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/pgtable-3level.h ++++ b/arch/x86/include/asm/pgtable-3level.h +@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t + ptep->pte_low = pte.pte_low; + } + ++#define pmd_read_atomic pmd_read_atomic ++/* ++ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with ++ * a "*pmdp" dereference done by gcc. Problem is, in certain places ++ * where pte_offset_map_lock is called, concurrent page faults are ++ * allowed, if the mmap_sem is hold for reading. An example is mincore ++ * vs page faults vs MADV_DONTNEED. On the page fault side ++ * pmd_populate rightfully does a set_64bit, but if we're reading the ++ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen ++ * because gcc will not read the 64bit of the pmd atomically. To fix ++ * this all places running pmd_offset_map_lock() while holding the ++ * mmap_sem in read mode, shall read the pmdp pointer using this ++ * function to know if the pmd is null nor not, and in turn to know if ++ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd ++ * operations. ++ * ++ * Without THP if the mmap_sem is hold for reading, the ++ * pmd can only transition from null to not null while pmd_read_atomic runs. ++ * So there's no need of literally reading it atomically. ++ * ++ * With THP if the mmap_sem is hold for reading, the pmd can become ++ * THP or null or point to a pte (and in turn become "stable") at any ++ * time under pmd_read_atomic, so it's mandatory to read it atomically ++ * with cmpxchg8b. ++ */ ++#ifndef CONFIG_TRANSPARENT_HUGEPAGE ++static inline pmd_t pmd_read_atomic(pmd_t *pmdp) ++{ ++ pmdval_t ret; ++ u32 *tmp = (u32 *)pmdp; ++ ++ ret = (pmdval_t) (*tmp); ++ if (ret) { ++ /* ++ * If the low part is null, we must not read the high part ++ * or we can end up with a partial pmd. ++ */ ++ smp_rmb(); ++ ret |= ((pmdval_t)*(tmp + 1)) << 32; ++ } ++ ++ return (pmd_t) { ret }; ++} ++#else /* CONFIG_TRANSPARENT_HUGEPAGE */ ++static inline pmd_t pmd_read_atomic(pmd_t *pmdp) ++{ ++ return (pmd_t) { atomic64_read((atomic64_t *)pmdp) }; ++} ++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ ++ + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) + { + set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd) + #endif /* __HAVE_ARCH_PMD_WRITE */ + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + ++#ifndef pmd_read_atomic ++static inline pmd_t pmd_read_atomic(pmd_t *pmdp) ++{ ++ /* ++ * Depend on compiler for an atomic pmd read. NOTE: this is ++ * only going to work, if the pmdval_t isn't larger than ++ * an unsigned long. ++ */ ++ return *pmdp; ++} ++#endif ++ + /* + * This function is meant to be used by sites walking pagetables with + * the mmap_sem hold in read mode to protect against MADV_DONTNEED and +@@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd) + * undefined so behaving like if the pmd was none is safe (because it + * can return none anyway). The compiler level barrier() is critically + * important to compute the two checks atomically on the same pmdval. ++ * ++ * For 32bit kernels with a 64bit large pmd_t this automatically takes ++ * care of reading the pmd atomically to avoid SMP race conditions ++ * against pmd_populate() when the mmap_sem is hold for reading by the ++ * caller (a special atomic read not done by "gcc" as in the generic ++ * version above, is also needed when THP is disabled because the page ++ * fault can populate the pmd from under us). + */ + static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) + { +- /* depend on compiler for an atomic pmd read */ +- pmd_t pmdval = *pmd; ++ pmd_t pmdval = pmd_read_atomic(pmd); + /* + * The barrier will stabilize the pmdval in a register or on + * the stack so that it will stop changing under the code. diff --git a/queue-3.0/series b/queue-3.0/series index b6921bb5b52..801dd57e216 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -1,3 +1,8 @@ scsi-fix-scsi_wait_scan.patch scsi-fix-dm-multipath-starvation-when-scsi-host-is-busy.patch mm-consider-all-swapped-back-pages-in-used-once-logic.patch +mm-pmd_read_atomic-fix-32bit-pae-pmd-walk-vs-pmd_populate-smp-race-condition.patch +mm-fix-faulty-initialization-in-vmalloc_init.patch +iwlwifi-update-bt-traffic-load-states-correctly.patch +cifs-include-backup-intent-search-flags-during-searches-try-2.patch +cifs-fix-oops-while-traversing-open-file-list-try-4.patch