From: Greg Kroah-Hartman Date: Thu, 18 May 2017 07:44:56 +0000 (+0200) Subject: 4.11-stable patches X-Git-Tag: v3.18.54~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=12ecb97d92f6ef1654c0daee2fc9d7c2401d7502;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: bluetooth-fix-user-channel-for-32bit-userspace-on-64bit-kernel.patch bluetooth-hci_bcm-add-missing-tty-device-sanity-check.patch bluetooth-hci_intel-add-missing-tty-device-sanity-check.patch cgroup-fix-spurious-warnings-on-cgroup_is_dead-from-cgroup_sk_alloc.patch dax-fix-pmd-data-corruption-when-fault-races-with-write.patch dax-prevent-invalidation-of-mapped-dax-entries.patch device-dax-fix-cdev-leak.patch device-dax-fix-sysfs-attribute-deadlock.patch ext4-return-to-starting-transaction-in-ext4_dax_huge_fault.patch f2fs-check-entire-encrypted-bigname-when-finding-a-dentry.patch f2fs-fix-fs-corruption-due-to-zero-inode-page.patch f2fs-fix-multiple-f2fs_add_link-having-same-name-for-inline-dentry.patch f2fs-fix-wrong-max-cost-initialization.patch f2fs-make-flush-bios-explicitely-sync.patch fscrypt-avoid-collisions-when-presenting-long-encrypted-filenames.patch fscrypt-fix-context-consistency-check-when-key-s-unavailable.patch initramfs-always-do-fput-and-load-modules-after-rootfs-populate.patch initramfs-avoid-label-at-end-of-compound-statement-error.patch libata-reject-passthrough-write-same-requests.patch mm-fix-data-corruption-due-to-stale-mmap-reads.patch revert-f2fs-put-allocate_segment-after-refresh_sit_entry.patch serial-omap-fix-runtime-pm-handling-on-unbind.patch serial-omap-suspend-device-on-probe-errors.patch serial-samsung-add-missing-checks-for-dma_map_single-failure.patch serial-samsung-use-right-device-for-dma-mapping-calls.patch tty-pl011-use-qdf2400_e44-as-the-earlycon-name-for-qdf2400-e44.patch tty-pty-fix-ldisc-flush-after-userspace-become-aware-of-the-data-already.patch --- diff --git a/queue-4.11/bluetooth-fix-user-channel-for-32bit-userspace-on-64bit-kernel.patch b/queue-4.11/bluetooth-fix-user-channel-for-32bit-userspace-on-64bit-kernel.patch new file mode 100644 index 00000000000..1d0fd9f6c4f --- /dev/null +++ b/queue-4.11/bluetooth-fix-user-channel-for-32bit-userspace-on-64bit-kernel.patch @@ -0,0 +1,35 @@ +From ab89f0bdd63a3721f7cd3f064f39fc4ac7ca14d4 Mon Sep 17 00:00:00 2001 +From: Szymon Janc +Date: Mon, 24 Apr 2017 18:25:04 -0700 +Subject: Bluetooth: Fix user channel for 32bit userspace on 64bit kernel + +From: Szymon Janc + +commit ab89f0bdd63a3721f7cd3f064f39fc4ac7ca14d4 upstream. + +Running 32bit userspace on 64bit kernel results in MSG_CMSG_COMPAT being +defined as 0x80000000. This results in sendmsg failure if used from 32bit +userspace running on 64bit kernel. Fix this by accounting for MSG_CMSG_COMPAT +in flags check in hci_sock_sendmsg. + +Signed-off-by: Szymon Janc +Signed-off-by: Marko Kiiskila +Signed-off-by: Marcel Holtmann +Signed-off-by: Greg Kroah-Hartman + +--- + net/bluetooth/hci_sock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/bluetooth/hci_sock.c ++++ b/net/bluetooth/hci_sock.c +@@ -1680,7 +1680,8 @@ static int hci_sock_sendmsg(struct socke + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + +- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE)) ++ if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| ++ MSG_CMSG_COMPAT)) + return -EINVAL; + + if (len < 4 || len > HCI_MAX_FRAME_SIZE) diff --git a/queue-4.11/bluetooth-hci_bcm-add-missing-tty-device-sanity-check.patch b/queue-4.11/bluetooth-hci_bcm-add-missing-tty-device-sanity-check.patch new file mode 100644 index 00000000000..ac09d2ebb12 --- /dev/null +++ b/queue-4.11/bluetooth-hci_bcm-add-missing-tty-device-sanity-check.patch @@ -0,0 +1,44 @@ +From 95065a61e9bf25fb85295127fba893200c2bbbd8 Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Wed, 29 Mar 2017 18:15:27 +0200 +Subject: Bluetooth: hci_bcm: add missing tty-device sanity check + +From: Johan Hovold + +commit 95065a61e9bf25fb85295127fba893200c2bbbd8 upstream. + +Make sure to check the tty-device pointer before looking up the sibling +platform device to avoid dereferencing a NULL-pointer when the tty is +one end of a Unix98 pty. + +Fixes: 0395ffc1ee05 ("Bluetooth: hci_bcm: Add PM for BCM devices") +Cc: Frederic Danis +Signed-off-by: Johan Hovold +Signed-off-by: Marcel Holtmann +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/bluetooth/hci_bcm.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/bluetooth/hci_bcm.c ++++ b/drivers/bluetooth/hci_bcm.c +@@ -287,6 +287,9 @@ static int bcm_open(struct hci_uart *hu) + + hu->priv = bcm; + ++ if (!hu->tty->dev) ++ goto out; ++ + mutex_lock(&bcm_device_lock); + list_for_each(p, &bcm_device_list) { + struct bcm_device *dev = list_entry(p, struct bcm_device, list); +@@ -307,7 +310,7 @@ static int bcm_open(struct hci_uart *hu) + } + + mutex_unlock(&bcm_device_lock); +- ++out: + return 0; + } + diff --git a/queue-4.11/bluetooth-hci_intel-add-missing-tty-device-sanity-check.patch b/queue-4.11/bluetooth-hci_intel-add-missing-tty-device-sanity-check.patch new file mode 100644 index 00000000000..95ff1b10937 --- /dev/null +++ b/queue-4.11/bluetooth-hci_intel-add-missing-tty-device-sanity-check.patch @@ -0,0 +1,74 @@ +From dcb9cfaa5ea9aa0ec08aeb92582ccfe3e4c719a9 Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Wed, 29 Mar 2017 18:15:28 +0200 +Subject: Bluetooth: hci_intel: add missing tty-device sanity check + +From: Johan Hovold + +commit dcb9cfaa5ea9aa0ec08aeb92582ccfe3e4c719a9 upstream. + +Make sure to check the tty-device pointer before looking up the sibling +platform device to avoid dereferencing a NULL-pointer when the tty is +one end of a Unix98 pty. + +Fixes: 74cdad37cd24 ("Bluetooth: hci_intel: Add runtime PM support") +Fixes: 1ab1f239bf17 ("Bluetooth: hci_intel: Add support for platform driver") +Cc: Loic Poulain +Signed-off-by: Johan Hovold +Signed-off-by: Marcel Holtmann +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/bluetooth/hci_intel.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/bluetooth/hci_intel.c ++++ b/drivers/bluetooth/hci_intel.c +@@ -307,6 +307,9 @@ static int intel_set_power(struct hci_ua + struct list_head *p; + int err = -ENODEV; + ++ if (!hu->tty->dev) ++ return err; ++ + mutex_lock(&intel_device_list_lock); + + list_for_each(p, &intel_device_list) { +@@ -379,6 +382,9 @@ static void intel_busy_work(struct work_ + struct intel_data *intel = container_of(work, struct intel_data, + busy_work); + ++ if (!intel->hu->tty->dev) ++ return; ++ + /* Link is busy, delay the suspend */ + mutex_lock(&intel_device_list_lock); + list_for_each(p, &intel_device_list) { +@@ -889,6 +895,8 @@ done: + list_for_each(p, &intel_device_list) { + struct intel_device *dev = list_entry(p, struct intel_device, + list); ++ if (!hu->tty->dev) ++ break; + if (hu->tty->dev->parent == dev->pdev->dev.parent) { + if (device_may_wakeup(&dev->pdev->dev)) { + set_bit(STATE_LPM_ENABLED, &intel->flags); +@@ -1056,6 +1064,9 @@ static int intel_enqueue(struct hci_uart + + BT_DBG("hu %p skb %p", hu, skb); + ++ if (!hu->tty->dev) ++ goto out_enqueue; ++ + /* Be sure our controller is resumed and potential LPM transaction + * completed before enqueuing any packet. + */ +@@ -1072,7 +1083,7 @@ static int intel_enqueue(struct hci_uart + } + } + mutex_unlock(&intel_device_list_lock); +- ++out_enqueue: + skb_queue_tail(&intel->txq, skb); + + return 0; diff --git a/queue-4.11/cgroup-fix-spurious-warnings-on-cgroup_is_dead-from-cgroup_sk_alloc.patch b/queue-4.11/cgroup-fix-spurious-warnings-on-cgroup_is_dead-from-cgroup_sk_alloc.patch new file mode 100644 index 00000000000..4507259afcb --- /dev/null +++ b/queue-4.11/cgroup-fix-spurious-warnings-on-cgroup_is_dead-from-cgroup_sk_alloc.patch @@ -0,0 +1,143 @@ +From a590b90d472f2c176c140576ee3ab44df7f67839 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 28 Apr 2017 15:14:55 -0400 +Subject: cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc() + +From: Tejun Heo + +commit a590b90d472f2c176c140576ee3ab44df7f67839 upstream. + +cgroup_get() expected to be called only on live cgroups and triggers +warning on a dead cgroup; however, cgroup_sk_alloc() may be called +while cloning a socket which is left in an empty and removed cgroup +and thus may legitimately duplicate its reference on a dead cgroup. +This currently triggers the following warning spuriously. + + WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60 + ... + [] __warn+0xd3/0xf0 + [] warn_slowpath_null+0x1e/0x20 + [] cgroup_get+0x55/0x60 + [] cgroup_sk_alloc+0x51/0xe0 + [] sk_clone_lock+0x2db/0x390 + [] inet_csk_clone_lock+0x16/0xc0 + [] tcp_create_openreq_child+0x23/0x4b0 + [] tcp_v6_syn_recv_sock+0x91/0x670 + [] tcp_check_req+0x3a6/0x4e0 + [] tcp_v6_rcv+0x693/0xa00 + [] ip6_input_finish+0x59/0x3e0 + [] ip6_input+0x32/0xb0 + [] ip6_rcv_finish+0x57/0xa0 + [] ipv6_rcv+0x318/0x4d0 + [] __netif_receive_skb_core+0x2d7/0x9a0 + [] __netif_receive_skb+0x16/0x70 + [] netif_receive_skb_internal+0x23/0x80 + [] napi_gro_frags+0x208/0x270 + [] mlx4_en_process_rx_cq+0x74c/0xf40 + [] mlx4_en_poll_rx_cq+0x30/0x90 + [] net_rx_action+0x210/0x350 + [] __do_softirq+0x106/0x2c7 + [] irq_exit+0x9d/0xa0 [] do_IRQ+0x54/0xd0 + [] common_interrupt+0x7f/0x7f + [] cpuidle_enter+0x17/0x20 + [] cpu_startup_entry+0x2a9/0x2f0 + [] start_secondary+0xf1/0x100 + +This patch renames the existing cgroup_get() with the dead cgroup +warning to cgroup_get_live() after cgroup_kn_lock_live() and +introduces the new cgroup_get() which doesn't check whether the cgroup +is live or dead. + +All existing cgroup_get() users except for cgroup_sk_alloc() are +converted to use cgroup_get_live(). + +Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") +Cc: Johannes Weiner +Reported-by: Chris Mason +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup/cgroup.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -438,6 +438,11 @@ out_unlock: + + static void cgroup_get(struct cgroup *cgrp) + { ++ css_get(&cgrp->self); ++} ++ ++static void cgroup_get_live(struct cgroup *cgrp) ++{ + WARN_ON_ONCE(cgroup_is_dead(cgrp)); + css_get(&cgrp->self); + } +@@ -932,7 +937,7 @@ static void link_css_set(struct list_hea + list_add_tail(&link->cgrp_link, &cset->cgrp_links); + + if (cgroup_parent(cgrp)) +- cgroup_get(cgrp); ++ cgroup_get_live(cgrp); + } + + /** +@@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struc + return ERR_PTR(-EINVAL); + } + cgrp_dfl_visible = true; +- cgroup_get(&cgrp_dfl_root.cgrp); ++ cgroup_get_live(&cgrp_dfl_root.cgrp); + + dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, + CGROUP2_SUPER_MAGIC, ns); +@@ -2576,7 +2581,7 @@ restart: + if (!css || !percpu_ref_is_dying(&css->refcnt)) + continue; + +- cgroup_get(dsct); ++ cgroup_get_live(dsct); + prepare_to_wait(&dsct->offline_waitq, &wait, + TASK_UNINTERRUPTIBLE); + +@@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgr + { + lockdep_assert_held(&cgroup_mutex); + +- cgroup_get(cgrp); ++ cgroup_get_live(cgrp); + + memset(css, 0, sizeof(*css)); + css->cgroup = cgrp; +@@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(stru + /* allocation complete, commit to creation */ + list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); + atomic_inc(&root->nr_cgrps); +- cgroup_get(parent); ++ cgroup_get_live(parent); + + /* + * @cgrp is now fully operational. If something fails after this +@@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(cons + if (kn) { + if (kernfs_type(kn) == KERNFS_DIR) { + cgrp = kn->priv; +- cgroup_get(cgrp); ++ cgroup_get_live(cgrp); + } else { + cgrp = ERR_PTR(-ENOTDIR); + } +@@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_ + + /* Socket clone path */ + if (skcd->val) { ++ /* ++ * We might be cloning a socket which is left in an empty ++ * cgroup and the cgroup might have already been rmdir'd. ++ * Don't use cgroup_get_live(). ++ */ + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } diff --git a/queue-4.11/dax-fix-pmd-data-corruption-when-fault-races-with-write.patch b/queue-4.11/dax-fix-pmd-data-corruption-when-fault-races-with-write.patch new file mode 100644 index 00000000000..8e45f266d79 --- /dev/null +++ b/queue-4.11/dax-fix-pmd-data-corruption-when-fault-races-with-write.patch @@ -0,0 +1,121 @@ +From 876f29460cbd4086b43475890c1bf2488fa11d40 Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Fri, 12 May 2017 15:47:00 -0700 +Subject: dax: fix PMD data corruption when fault races with write + +From: Ross Zwisler + +commit 876f29460cbd4086b43475890c1bf2488fa11d40 upstream. + +This is based on a patch from Jan Kara that fixed the equivalent race in +the DAX PTE fault path. + +Currently DAX PMD read fault can race with write(2) in the following +way: + +CPU1 - write(2) CPU2 - read fault + dax_iomap_pmd_fault() + ->iomap_begin() - sees hole + +dax_iomap_rw() + iomap_apply() + ->iomap_begin - allocates blocks + dax_iomap_actor() + invalidate_inode_pages2_range() + - there's nothing to invalidate + + grab_mapping_entry() + - we add huge zero page to the radix tree + and map it to page tables + +The result is that hole page is mapped into page tables (and thus zeros +are seen in mmap) while file has data written in that place. + +Fix the problem by locking exception entry before mapping blocks for the +fault. That way we are sure invalidate_inode_pages2_range() call for +racing write will either block on entry lock waiting for the fault to +finish (and unmap stale page tables after that) or read fault will see +already allocated blocks by write(2). + +Fixes: 9f141d6ef6258 ("dax: Call ->iomap_begin without entry lock during dax fault") +Link: http://lkml.kernel.org/r/20170510172700.18991-1-ross.zwisler@linux.intel.com +Signed-off-by: Ross Zwisler +Reviewed-by: Jan Kara +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dax.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -1353,6 +1353,16 @@ static int dax_iomap_pmd_fault(struct vm + goto fallback; + + /* ++ * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX ++ * PMD or a HZP entry. If it can't (because a 4k page is already in ++ * the tree, for instance), it will return -EEXIST and we just fall ++ * back to 4k entries. ++ */ ++ entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); ++ if (IS_ERR(entry)) ++ goto fallback; ++ ++ /* + * Note that we don't use iomap_apply here. We aren't doing I/O, only + * setting up a mapping, so really we're using iomap_begin() as a way + * to look up our filesystem block. +@@ -1360,21 +1370,11 @@ static int dax_iomap_pmd_fault(struct vm + pos = (loff_t)pgoff << PAGE_SHIFT; + error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); + if (error) +- goto fallback; ++ goto unlock_entry; + + if (iomap.offset + iomap.length < pos + PMD_SIZE) + goto finish_iomap; + +- /* +- * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX +- * PMD or a HZP entry. If it can't (because a 4k page is already in +- * the tree, for instance), it will return -EEXIST and we just fall +- * back to 4k entries. +- */ +- entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); +- if (IS_ERR(entry)) +- goto finish_iomap; +- + switch (iomap.type) { + case IOMAP_MAPPED: + result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); +@@ -1382,7 +1382,7 @@ static int dax_iomap_pmd_fault(struct vm + case IOMAP_UNWRITTEN: + case IOMAP_HOLE: + if (WARN_ON_ONCE(write)) +- goto unlock_entry; ++ break; + result = dax_pmd_load_hole(vmf, &iomap, &entry); + break; + default: +@@ -1390,8 +1390,6 @@ static int dax_iomap_pmd_fault(struct vm + break; + } + +- unlock_entry: +- put_locked_mapping_entry(mapping, pgoff, entry); + finish_iomap: + if (ops->iomap_end) { + int copied = PMD_SIZE; +@@ -1407,6 +1405,8 @@ static int dax_iomap_pmd_fault(struct vm + ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, + &iomap); + } ++ unlock_entry: ++ put_locked_mapping_entry(mapping, pgoff, entry); + fallback: + if (result == VM_FAULT_FALLBACK) { + split_huge_pmd(vma, vmf->pmd, vmf->address); diff --git a/queue-4.11/dax-prevent-invalidation-of-mapped-dax-entries.patch b/queue-4.11/dax-prevent-invalidation-of-mapped-dax-entries.patch new file mode 100644 index 00000000000..9117129477f --- /dev/null +++ b/queue-4.11/dax-prevent-invalidation-of-mapped-dax-entries.patch @@ -0,0 +1,134 @@ +From 4636e70bb0a8b871998b6841a2e4b205cf2bc863 Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Fri, 12 May 2017 15:46:47 -0700 +Subject: dax: prevent invalidation of mapped DAX entries + +From: Ross Zwisler + +commit 4636e70bb0a8b871998b6841a2e4b205cf2bc863 upstream. + +Patch series "mm,dax: Fix data corruption due to mmap inconsistency", +v4. + +This series fixes data corruption that can happen for DAX mounts when +page faults race with write(2) and as a result page tables get out of +sync with block mappings in the filesystem and thus data seen through +mmap is different from data seen through read(2). + +The series passes testing with t_mmap_stale test program from Ross and +also other mmap related tests on DAX filesystem. + +This patch (of 4): + +dax_invalidate_mapping_entry() currently removes DAX exceptional entries +only if they are clean and unlocked. This is done via: + + invalidate_mapping_pages() + invalidate_exceptional_entry() + dax_invalidate_mapping_entry() + +However, for page cache pages removed in invalidate_mapping_pages() +there is an additional criteria which is that the page must not be +mapped. This is noted in the comments above invalidate_mapping_pages() +and is checked in invalidate_inode_page(). + +For DAX entries this means that we can can end up in a situation where a +DAX exceptional entry, either a huge zero page or a regular DAX entry, +could end up mapped but without an associated radix tree entry. This is +inconsistent with the rest of the DAX code and with what happens in the +page cache case. + +We aren't able to unmap the DAX exceptional entry because according to +its comments invalidate_mapping_pages() isn't allowed to block, and +unmap_mapping_range() takes a write lock on the mapping->i_mmap_rwsem. + +Since we essentially never have unmapped DAX entries to evict from the +radix tree, just remove dax_invalidate_mapping_entry(). + +Fixes: c6dcf52c23d2 ("mm: Invalidate DAX radix tree entries only if appropriate") +Link: http://lkml.kernel.org/r/20170510085419.27601-2-jack@suse.cz +Signed-off-by: Ross Zwisler +Signed-off-by: Jan Kara +Reported-by: Jan Kara +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dax.c | 29 ----------------------------- + include/linux/dax.h | 1 - + mm/truncate.c | 9 +++------ + 3 files changed, 3 insertions(+), 36 deletions(-) + +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -507,35 +507,6 @@ int dax_delete_mapping_entry(struct addr + } + + /* +- * Invalidate exceptional DAX entry if easily possible. This handles DAX +- * entries for invalidate_inode_pages() so we evict the entry only if we can +- * do so without blocking. +- */ +-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) +-{ +- int ret = 0; +- void *entry, **slot; +- struct radix_tree_root *page_tree = &mapping->page_tree; +- +- spin_lock_irq(&mapping->tree_lock); +- entry = __radix_tree_lookup(page_tree, index, NULL, &slot); +- if (!entry || !radix_tree_exceptional_entry(entry) || +- slot_locked(mapping, slot)) +- goto out; +- if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || +- radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) +- goto out; +- radix_tree_delete(page_tree, index); +- mapping->nrexceptional--; +- ret = 1; +-out: +- spin_unlock_irq(&mapping->tree_lock); +- if (ret) +- dax_wake_mapping_entry_waiter(mapping, index, entry, true); +- return ret; +-} +- +-/* + * Invalidate exceptional DAX entry if it is clean. + */ + int dax_invalidate_mapping_entry_sync(struct address_space *mapping, +--- a/include/linux/dax.h ++++ b/include/linux/dax.h +@@ -41,7 +41,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, + int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + const struct iomap_ops *ops); + int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); + int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index); + void dax_wake_mapping_entry_waiter(struct address_space *mapping, +--- a/mm/truncate.c ++++ b/mm/truncate.c +@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(s + + /* + * Invalidate exceptional entry if easily possible. This handles exceptional +- * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and +- * clean entries. ++ * entries for invalidate_inode_pages(). + */ + static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) + { +- /* Handled by shmem itself */ +- if (shmem_mapping(mapping)) ++ /* Handled by shmem itself, or for DAX we do nothing. */ ++ if (shmem_mapping(mapping) || dax_mapping(mapping)) + return 1; +- if (dax_mapping(mapping)) +- return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; + } diff --git a/queue-4.11/device-dax-fix-cdev-leak.patch b/queue-4.11/device-dax-fix-cdev-leak.patch new file mode 100644 index 00000000000..2f7a5471cad --- /dev/null +++ b/queue-4.11/device-dax-fix-cdev-leak.patch @@ -0,0 +1,70 @@ +From ed01e50acdd3e4a640cf9ebd28a7e810c3ceca97 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 17 Mar 2017 12:48:09 -0600 +Subject: device-dax: fix cdev leak + +From: Dan Williams + +commit ed01e50acdd3e4a640cf9ebd28a7e810c3ceca97 upstream. + +If device_add() fails, cleanup the cdev. Otherwise, we leak a kobj_map() +with a stale device number. + +As Jason points out, there is a small possibility that userspace has +opened and mapped the device in the time between cdev_add() and the +device_add() failure. We need a new kill_dax_dev() helper to invalidate +any established mappings. + +Fixes: ba09c01d2fa8 ("dax: convert to the cdev api") +Reported-by: Jason Gunthorpe +Signed-off-by: Dan Williams +Signed-off-by: Logan Gunthorpe +Reviewed-by: Johannes Thumshirn +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dax/dax.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/drivers/dax/dax.c ++++ b/drivers/dax/dax.c +@@ -703,13 +703,10 @@ static void dax_dev_release(struct devic + kfree(dax_dev); + } + +-static void unregister_dax_dev(void *dev) ++static void kill_dax_dev(struct dax_dev *dax_dev) + { +- struct dax_dev *dax_dev = to_dax_dev(dev); + struct cdev *cdev = &dax_dev->cdev; + +- dev_dbg(dev, "%s\n", __func__); +- + /* + * Note, rcu is not protecting the liveness of dax_dev, rcu is + * ensuring that any fault handlers that might have seen +@@ -721,6 +718,15 @@ static void unregister_dax_dev(void *dev + synchronize_srcu(&dax_srcu); + unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); + cdev_del(cdev); ++} ++ ++static void unregister_dax_dev(void *dev) ++{ ++ struct dax_dev *dax_dev = to_dax_dev(dev); ++ ++ dev_dbg(dev, "%s\n", __func__); ++ ++ kill_dax_dev(dax_dev); + device_unregister(dev); + } + +@@ -797,6 +803,7 @@ struct dax_dev *devm_create_dax_dev(stru + dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id); + rc = device_add(dev); + if (rc) { ++ kill_dax_dev(dax_dev); + put_device(dev); + return ERR_PTR(rc); + } diff --git a/queue-4.11/device-dax-fix-sysfs-attribute-deadlock.patch b/queue-4.11/device-dax-fix-sysfs-attribute-deadlock.patch new file mode 100644 index 00000000000..ae5fa2609b1 --- /dev/null +++ b/queue-4.11/device-dax-fix-sysfs-attribute-deadlock.patch @@ -0,0 +1,129 @@ +From 565851c972b50612f3a4542e26879ffb3e906fc2 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Sun, 30 Apr 2017 06:57:01 -0700 +Subject: device-dax: fix sysfs attribute deadlock + +From: Dan Williams + +commit 565851c972b50612f3a4542e26879ffb3e906fc2 upstream. + +Usage of device_lock() for dax_region attributes is unnecessary and +deadlock prone. It's unnecessary because the order of registration / +un-registration guarantees that drvdata is always valid. It's deadlock +prone because it sets up this situation: + + ndctl D 0 2170 2082 0x00000000 + Call Trace: + __schedule+0x31f/0x980 + schedule+0x3d/0x90 + schedule_preempt_disabled+0x15/0x20 + __mutex_lock+0x402/0x980 + ? __mutex_lock+0x158/0x980 + ? align_show+0x2b/0x80 [dax] + ? kernfs_seq_start+0x2f/0x90 + mutex_lock_nested+0x1b/0x20 + align_show+0x2b/0x80 [dax] + dev_attr_show+0x20/0x50 + + ndctl D 0 2186 2079 0x00000000 + Call Trace: + __schedule+0x31f/0x980 + schedule+0x3d/0x90 + __kernfs_remove+0x1f6/0x340 + ? kernfs_remove_by_name_ns+0x45/0xa0 + ? remove_wait_queue+0x70/0x70 + kernfs_remove_by_name_ns+0x45/0xa0 + remove_files.isra.1+0x35/0x70 + sysfs_remove_group+0x44/0x90 + sysfs_remove_groups+0x2e/0x50 + dax_region_unregister+0x25/0x40 [dax] + devm_action_release+0xf/0x20 + release_nodes+0x16d/0x2b0 + devres_release_all+0x3c/0x60 + device_release_driver_internal+0x17d/0x220 + device_release_driver+0x12/0x20 + unbind_store+0x112/0x160 + +ndctl/2170 is trying to acquire the device_lock() to read an attribute, +and ndctl/2186 is holding the device_lock() while trying to drain all +active attribute readers. + +Thanks to Yi Zhang for the reproduction script. + +Fixes: d7fe1a67f658 ("dax: add region 'id', 'size', and 'align' attributes") +Reported-by: Yi Zhang +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dax/dax.c | 40 ++++++++++++---------------------------- + 1 file changed, 12 insertions(+), 28 deletions(-) + +--- a/drivers/dax/dax.c ++++ b/drivers/dax/dax.c +@@ -77,36 +77,27 @@ struct dax_dev { + struct resource res[0]; + }; + ++/* ++ * Rely on the fact that drvdata is set before the attributes are ++ * registered, and that the attributes are unregistered before drvdata ++ * is cleared to assume that drvdata is always valid. ++ */ + static ssize_t id_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- struct dax_region *dax_region; +- ssize_t rc = -ENXIO; ++ struct dax_region *dax_region = dev_get_drvdata(dev); + +- device_lock(dev); +- dax_region = dev_get_drvdata(dev); +- if (dax_region) +- rc = sprintf(buf, "%d\n", dax_region->id); +- device_unlock(dev); +- +- return rc; ++ return sprintf(buf, "%d\n", dax_region->id); + } + static DEVICE_ATTR_RO(id); + + static ssize_t region_size_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- struct dax_region *dax_region; +- ssize_t rc = -ENXIO; +- +- device_lock(dev); +- dax_region = dev_get_drvdata(dev); +- if (dax_region) +- rc = sprintf(buf, "%llu\n", (unsigned long long) +- resource_size(&dax_region->res)); +- device_unlock(dev); ++ struct dax_region *dax_region = dev_get_drvdata(dev); + +- return rc; ++ return sprintf(buf, "%llu\n", (unsigned long long) ++ resource_size(&dax_region->res)); + } + static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, + region_size_show, NULL); +@@ -114,16 +105,9 @@ static struct device_attribute dev_attr_ + static ssize_t align_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- struct dax_region *dax_region; +- ssize_t rc = -ENXIO; +- +- device_lock(dev); +- dax_region = dev_get_drvdata(dev); +- if (dax_region) +- rc = sprintf(buf, "%u\n", dax_region->align); +- device_unlock(dev); ++ struct dax_region *dax_region = dev_get_drvdata(dev); + +- return rc; ++ return sprintf(buf, "%u\n", dax_region->align); + } + static DEVICE_ATTR_RO(align); + diff --git a/queue-4.11/ext4-return-to-starting-transaction-in-ext4_dax_huge_fault.patch b/queue-4.11/ext4-return-to-starting-transaction-in-ext4_dax_huge_fault.patch new file mode 100644 index 00000000000..4361b1f7573 --- /dev/null +++ b/queue-4.11/ext4-return-to-starting-transaction-in-ext4_dax_huge_fault.patch @@ -0,0 +1,66 @@ +From fb26a1cbed8c90025572d48bc9eabe59f7571e88 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 12 May 2017 15:46:54 -0700 +Subject: ext4: return to starting transaction in ext4_dax_huge_fault() + +From: Jan Kara + +commit fb26a1cbed8c90025572d48bc9eabe59f7571e88 upstream. + +DAX will return to locking exceptional entry before mapping blocks for a +page fault to fix possible races with concurrent writes. To avoid lock +inversion between exceptional entry lock and transaction start, start +the transaction already in ext4_dax_huge_fault(). + +Fixes: 9f141d6ef6258a3a37a045842d9ba7e68f368956 +Link: http://lkml.kernel.org/r/20170510085419.27601-4-jack@suse.cz +Signed-off-by: Jan Kara +Cc: Ross Zwisler +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/file.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm + enum page_entry_size pe_size) + { + int result; ++ handle_t *handle = NULL; + struct inode *inode = file_inode(vmf->vma->vm_file); + struct super_block *sb = inode->i_sb; + bool write = vmf->flags & FAULT_FLAG_WRITE; +@@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm + if (write) { + sb_start_pagefault(sb); + file_update_time(vmf->vma->vm_file); ++ down_read(&EXT4_I(inode)->i_mmap_sem); ++ handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, ++ EXT4_DATA_TRANS_BLOCKS(sb)); ++ } else { ++ down_read(&EXT4_I(inode)->i_mmap_sem); + } +- down_read(&EXT4_I(inode)->i_mmap_sem); +- result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); +- up_read(&EXT4_I(inode)->i_mmap_sem); +- if (write) ++ if (!IS_ERR(handle)) ++ result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); ++ else ++ result = VM_FAULT_SIGBUS; ++ if (write) { ++ if (!IS_ERR(handle)) ++ ext4_journal_stop(handle); ++ up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(sb); ++ } else { ++ up_read(&EXT4_I(inode)->i_mmap_sem); ++ } + + return result; + } diff --git a/queue-4.11/f2fs-check-entire-encrypted-bigname-when-finding-a-dentry.patch b/queue-4.11/f2fs-check-entire-encrypted-bigname-when-finding-a-dentry.patch new file mode 100644 index 00000000000..c34aa266f10 --- /dev/null +++ b/queue-4.11/f2fs-check-entire-encrypted-bigname-when-finding-a-dentry.patch @@ -0,0 +1,158 @@ +From 6332cd32c8290a80e929fc044dc5bdba77396e33 Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Mon, 24 Apr 2017 10:00:08 -0700 +Subject: f2fs: check entire encrypted bigname when finding a dentry + +From: Jaegeuk Kim + +commit 6332cd32c8290a80e929fc044dc5bdba77396e33 upstream. + +If user has no key under an encrypted dir, fscrypt gives digested dentries. +Previously, when looking up a dentry, f2fs only checks its hash value with +first 4 bytes of the digested dentry, which didn't handle hash collisions fully. +This patch enhances to check entire dentry bytes likewise ext4. + +Eric reported how to reproduce this issue by: + + # seq -f "edir/abcdefghijklmnopqrstuvwxyz012345%.0f" 100000 | xargs touch + # find edir -type f | xargs stat -c %i | sort | uniq | wc -l +100000 + # sync + # echo 3 > /proc/sys/vm/drop_caches + # keyctl new_session + # find edir -type f | xargs stat -c %i | sort | uniq | wc -l +99999 + +Reported-by: Eric Biggers +Signed-off-by: Jaegeuk Kim +(fixed f2fs_dentry_hash() to work even when the hash is 0) +Signed-off-by: Eric Biggers +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/dir.c | 37 +++++++++++++++++++++---------------- + fs/f2fs/f2fs.h | 3 ++- + fs/f2fs/hash.c | 7 ++++++- + fs/f2fs/inline.c | 4 ++-- + 4 files changed, 31 insertions(+), 20 deletions(-) + +--- a/fs/f2fs/dir.c ++++ b/fs/f2fs/dir.c +@@ -130,19 +130,29 @@ struct f2fs_dir_entry *find_target_dentr + continue; + } + +- /* encrypted case */ ++ if (de->hash_code != namehash) ++ goto not_match; ++ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + +- /* show encrypted name */ +- if (fname->hash) { +- if (de->hash_code == cpu_to_le32(fname->hash)) +- goto found; +- } else if (de_name.len == name->len && +- de->hash_code == namehash && +- !memcmp(de_name.name, name->name, name->len)) ++#ifdef CONFIG_F2FS_FS_ENCRYPTION ++ if (unlikely(!name->name)) { ++ if (fname->usr_fname->name[0] == '_') { ++ if (de_name.len >= 16 && ++ !memcmp(de_name.name + de_name.len - 16, ++ fname->crypto_buf.name + 8, 16)) ++ goto found; ++ goto not_match; ++ } ++ name->name = fname->crypto_buf.name; ++ name->len = fname->crypto_buf.len; ++ } ++#endif ++ if (de_name.len == name->len && ++ !memcmp(de_name.name, name->name, name->len)) + goto found; +- ++not_match: + if (max_slots && max_len > *max_slots) + *max_slots = max_len; + max_len = 0; +@@ -170,12 +180,7 @@ static struct f2fs_dir_entry *find_in_le + struct f2fs_dir_entry *de = NULL; + bool room = false; + int max_slots; +- f2fs_hash_t namehash; +- +- if(fname->hash) +- namehash = cpu_to_le32(fname->hash); +- else +- namehash = f2fs_dentry_hash(&name); ++ f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname); + + nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); + nblock = bucket_blocks(level); +@@ -541,7 +546,7 @@ int f2fs_add_regular_entry(struct inode + + level = 0; + slots = GET_DENTRY_SLOTS(new_name->len); +- dentry_hash = f2fs_dentry_hash(new_name); ++ dentry_hash = f2fs_dentry_hash(new_name, NULL); + + current_depth = F2FS_I(dir)->i_current_depth; + if (F2FS_I(dir)->chash == dentry_hash) { +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -2133,7 +2133,8 @@ int sanity_check_ckpt(struct f2fs_sb_inf + /* + * hash.c + */ +-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info); ++f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, ++ struct fscrypt_name *fname); + + /* + * node.c +--- a/fs/f2fs/hash.c ++++ b/fs/f2fs/hash.c +@@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned c + *buf++ = pad; + } + +-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) ++f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, ++ struct fscrypt_name *fname) + { + __u32 hash; + f2fs_hash_t f2fs_hash; +@@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struc + const unsigned char *name = name_info->name; + size_t len = name_info->len; + ++ /* encrypted bigname case */ ++ if (fname && !fname->disk_name.name) ++ return cpu_to_le32(fname->hash); ++ + if (is_dot_dotdot(name_info)) + return 0; + +--- a/fs/f2fs/inline.c ++++ b/fs/f2fs/inline.c +@@ -296,7 +296,7 @@ struct f2fs_dir_entry *find_in_inline_di + return NULL; + } + +- namehash = f2fs_dentry_hash(&name); ++ namehash = f2fs_dentry_hash(&name, fname); + + inline_dentry = inline_data_addr(ipage); + +@@ -533,7 +533,7 @@ int f2fs_add_inline_entry(struct inode * + + f2fs_wait_on_page_writeback(ipage, NODE, true); + +- name_hash = f2fs_dentry_hash(new_name); ++ name_hash = f2fs_dentry_hash(new_name, NULL); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); + diff --git a/queue-4.11/f2fs-fix-fs-corruption-due-to-zero-inode-page.patch b/queue-4.11/f2fs-fix-fs-corruption-due-to-zero-inode-page.patch new file mode 100644 index 00000000000..4044d315ab0 --- /dev/null +++ b/queue-4.11/f2fs-fix-fs-corruption-due-to-zero-inode-page.patch @@ -0,0 +1,152 @@ +From 9bb02c3627f46e50246bf7ab957b56ffbef623cb Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Tue, 11 Apr 2017 19:01:26 -0700 +Subject: f2fs: fix fs corruption due to zero inode page + +From: Jaegeuk Kim + +commit 9bb02c3627f46e50246bf7ab957b56ffbef623cb upstream. + +This patch fixes the following scenario. + +- f2fs_create/f2fs_mkdir - write_checkpoint + - f2fs_mark_inode_dirty_sync - block_operations + - f2fs_lock_all + - f2fs_sync_inode_meta + - f2fs_unlock_all + - sync_inode_metadata + - f2fs_lock_op + - f2fs_write_inode + - update_inode_page + - get_node_page + return -ENOENT + - new_inode_page + - fill_node_footer + - f2fs_mark_inode_dirty_sync + - ... + - f2fs_unlock_op + - f2fs_inode_synced + - f2fs_lock_all + - do_checkpoint + +In this checkpoint, we can get an inode page which contains zeros having valid +node footer only. + +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/inode.c | 2 +- + fs/f2fs/namei.c | 20 ++++++++++---------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -316,7 +316,6 @@ retry: + } else if (err != -ENOENT) { + f2fs_stop_checkpoint(sbi, false); + } +- f2fs_inode_synced(inode); + return 0; + } + ret = update_inode(inode, node_page); +@@ -448,6 +447,7 @@ void handle_failed_inode(struct inode *i + * in a panic when flushing dirty inodes in gdirty_list. + */ + update_inode_page(inode); ++ f2fs_inode_synced(inode); + + /* don't make bad inode, since it becomes a regular file. */ + unlock_new_inode(inode); +--- a/fs/f2fs/namei.c ++++ b/fs/f2fs/namei.c +@@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir + inode->i_mapping->a_ops = &f2fs_dblock_aops; + ino = inode->i_ino; + +- f2fs_balance_fs(sbi, true); +- + f2fs_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + if (err) +@@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); ++ ++ f2fs_balance_fs(sbi, true); + return 0; + out: + handle_failed_inode(inode); +@@ -423,8 +423,6 @@ static int f2fs_symlink(struct inode *di + inode_nohighmem(inode); + inode->i_mapping->a_ops = &f2fs_dblock_aops; + +- f2fs_balance_fs(sbi, true); +- + f2fs_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + if (err) +@@ -487,6 +485,8 @@ err_out: + } + + kfree(sd); ++ ++ f2fs_balance_fs(sbi, true); + return err; + out: + handle_failed_inode(inode); +@@ -508,8 +508,6 @@ static int f2fs_mkdir(struct inode *dir, + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + +- f2fs_balance_fs(sbi, true); +- + set_inode_flag(inode, FI_INC_LINK); + f2fs_lock_op(sbi); + err = f2fs_add_link(dentry, inode); +@@ -524,6 +522,8 @@ static int f2fs_mkdir(struct inode *dir, + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); ++ ++ f2fs_balance_fs(sbi, true); + return 0; + + out_fail: +@@ -554,8 +554,6 @@ static int f2fs_mknod(struct inode *dir, + init_special_inode(inode, inode->i_mode, rdev); + inode->i_op = &f2fs_special_inode_operations; + +- f2fs_balance_fs(sbi, true); +- + f2fs_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + if (err) +@@ -569,6 +567,8 @@ static int f2fs_mknod(struct inode *dir, + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); ++ ++ f2fs_balance_fs(sbi, true); + return 0; + out: + handle_failed_inode(inode); +@@ -595,8 +595,6 @@ static int __f2fs_tmpfile(struct inode * + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } + +- f2fs_balance_fs(sbi, true); +- + f2fs_lock_op(sbi); + err = acquire_orphan_inode(sbi); + if (err) +@@ -622,6 +620,8 @@ static int __f2fs_tmpfile(struct inode * + /* link_count was changed by d_tmpfile as well. */ + f2fs_unlock_op(sbi); + unlock_new_inode(inode); ++ ++ f2fs_balance_fs(sbi, true); + return 0; + + release_out: diff --git a/queue-4.11/f2fs-fix-multiple-f2fs_add_link-having-same-name-for-inline-dentry.patch b/queue-4.11/f2fs-fix-multiple-f2fs_add_link-having-same-name-for-inline-dentry.patch new file mode 100644 index 00000000000..9779a744aa5 --- /dev/null +++ b/queue-4.11/f2fs-fix-multiple-f2fs_add_link-having-same-name-for-inline-dentry.patch @@ -0,0 +1,56 @@ +From d3bb910c15d75ee3340311c64a1c05985bb663a3 Mon Sep 17 00:00:00 2001 +From: Sheng Yong +Date: Sat, 22 Apr 2017 10:39:20 +0800 +Subject: f2fs: fix multiple f2fs_add_link() having same name for inline dentry + +From: Sheng Yong + +commit d3bb910c15d75ee3340311c64a1c05985bb663a3 upstream. + +Commit 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having +same name) does not cover the scenario where inline dentry is enabled. +In that case, F2FS_I(dir)->task will be NULL, and __f2fs_add_link will +lookup dentries one more time. + +This patch fixes it by moving the assigment of current task to a upper +level to cover both normal and inline dentry. + +Fixes: 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having same name) +Signed-off-by: Sheng Yong +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/dir.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/fs/f2fs/dir.c ++++ b/fs/f2fs/dir.c +@@ -207,13 +207,9 @@ static struct f2fs_dir_entry *find_in_le + f2fs_put_page(dentry_page, 0); + } + +- /* This is to increase the speed of f2fs_create */ +- if (!de && room) { +- F2FS_I(dir)->task = current; +- if (F2FS_I(dir)->chash != namehash) { +- F2FS_I(dir)->chash = namehash; +- F2FS_I(dir)->clevel = level; +- } ++ if (!de && room && F2FS_I(dir)->chash != namehash) { ++ F2FS_I(dir)->chash = namehash; ++ F2FS_I(dir)->clevel = level; + } + + return de; +@@ -254,6 +250,9 @@ struct f2fs_dir_entry *__f2fs_find_entry + break; + } + out: ++ /* This is to increase the speed of f2fs_create */ ++ if (!de) ++ F2FS_I(dir)->task = current; + return de; + } + diff --git a/queue-4.11/f2fs-fix-wrong-max-cost-initialization.patch b/queue-4.11/f2fs-fix-wrong-max-cost-initialization.patch new file mode 100644 index 00000000000..6a28be3bc71 --- /dev/null +++ b/queue-4.11/f2fs-fix-wrong-max-cost-initialization.patch @@ -0,0 +1,31 @@ +From c541a51b8ce81d003b02ed67ad3604a2e6220e3e Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Sat, 25 Mar 2017 00:03:02 -0700 +Subject: f2fs: fix wrong max cost initialization + +From: Jaegeuk Kim + +commit c541a51b8ce81d003b02ed67ad3604a2e6220e3e upstream. + +This patch fixes missing increased max cost caused by a patch that we increased +cose of data segments in greedy algorithm. + +Fixes: b9cd20619 "f2fs: node segment is prior to data segment selected victim" +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/gc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/f2fs/gc.c ++++ b/fs/f2fs/gc.c +@@ -182,7 +182,7 @@ static unsigned int get_max_cost(struct + if (p->alloc_mode == SSR) + return sbi->blocks_per_seg; + if (p->gc_mode == GC_GREEDY) +- return sbi->blocks_per_seg * p->ofs_unit; ++ return 2 * sbi->blocks_per_seg * p->ofs_unit; + else if (p->gc_mode == GC_CB) + return UINT_MAX; + else /* No other gc_mode */ diff --git a/queue-4.11/f2fs-make-flush-bios-explicitely-sync.patch b/queue-4.11/f2fs-make-flush-bios-explicitely-sync.patch new file mode 100644 index 00000000000..bef46bccacd --- /dev/null +++ b/queue-4.11/f2fs-make-flush-bios-explicitely-sync.patch @@ -0,0 +1,66 @@ +From 3adc5fcb7edf5f8dfe8d37dcb50ba6b30077c905 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 2 May 2017 17:03:47 +0200 +Subject: f2fs: Make flush bios explicitely sync + +From: Jan Kara + +commit 3adc5fcb7edf5f8dfe8d37dcb50ba6b30077c905 upstream. + +Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as +synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} +definitions. generic_make_request_checks() however strips REQ_FUA and +REQ_PREFLUSH flags from a bio when the storage doesn't report volatile +write cache and thus write effectively becomes asynchronous which can +lead to performance regressions. + +Fix the problem by making sure all bios which are synchronous are +properly marked with REQ_SYNC. + +Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 +CC: Jaegeuk Kim +CC: linux-f2fs-devel@lists.sourceforge.net +Signed-off-by: Jan Kara +Acked-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/data.c | 2 +- + fs/f2fs/segment.c | 2 +- + fs/f2fs/super.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(str + if (type >= META_FLUSH) { + io->fio.type = META_FLUSH; + io->fio.op = REQ_OP_WRITE; +- io->fio.op_flags = REQ_META | REQ_PRIO; ++ io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; + if (!test_opt(sbi, NOBARRIER)) + io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + } +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -416,7 +416,7 @@ static int __submit_flush_wait(struct bl + struct bio *bio = f2fs_bio_alloc(0); + int ret; + +- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ++ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; + bio->bi_bdev = bdev; + ret = submit_bio_wait(bio); + bio_put(bio); +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1307,7 +1307,7 @@ static int __f2fs_commit_super(struct bu + unlock_buffer(bh); + + /* it's rare case, we can do fua all the time */ +- return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); ++ return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + } + + static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, diff --git a/queue-4.11/fscrypt-avoid-collisions-when-presenting-long-encrypted-filenames.patch b/queue-4.11/fscrypt-avoid-collisions-when-presenting-long-encrypted-filenames.patch new file mode 100644 index 00000000000..a0d036d5668 --- /dev/null +++ b/queue-4.11/fscrypt-avoid-collisions-when-presenting-long-encrypted-filenames.patch @@ -0,0 +1,111 @@ +From 6b06cdee81d68a8a829ad8e8d0f31d6836744af9 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Mon, 24 Apr 2017 10:00:09 -0700 +Subject: fscrypt: avoid collisions when presenting long encrypted filenames + +From: Eric Biggers + +commit 6b06cdee81d68a8a829ad8e8d0f31d6836744af9 upstream. + +When accessing an encrypted directory without the key, userspace must +operate on filenames derived from the ciphertext names, which contain +arbitrary bytes. Since we must support filenames as long as NAME_MAX, +we can't always just base64-encode the ciphertext, since that may make +it too long. Currently, this is solved by presenting long names in an +abbreviated form containing any needed filesystem-specific hashes (e.g. +to identify a directory block), then the last 16 bytes of ciphertext. +This needs to be sufficient to identify the actual name on lookup. + +However, there is a bug. It seems to have been assumed that due to the +use of a CBC (ciphertext block chaining)-based encryption mode, the last +16 bytes (i.e. the AES block size) of ciphertext would depend on the +full plaintext, preventing collisions. However, we actually use CBC +with ciphertext stealing (CTS), which handles the last two blocks +specially, causing them to appear "flipped". Thus, it's actually the +second-to-last block which depends on the full plaintext. + +This caused long filenames that differ only near the end of their +plaintexts to, when observed without the key, point to the wrong inode +and be undeletable. For example, with ext4: + + # echo pass | e4crypt add_key -p 16 edir/ + # seq -f "edir/abcdefghijklmnopqrstuvwxyz012345%.0f" 100000 | xargs touch + # find edir/ -type f | xargs stat -c %i | sort | uniq | wc -l + 100000 + # sync + # echo 3 > /proc/sys/vm/drop_caches + # keyctl new_session + # find edir/ -type f | xargs stat -c %i | sort | uniq | wc -l + 2004 + # rm -rf edir/ + rm: cannot remove 'edir/_A7nNFi3rhkEQlJ6P,hdzluhODKOeWx5V': Structure needs cleaning + ... + +To fix this, when presenting long encrypted filenames, encode the +second-to-last block of ciphertext rather than the last 16 bytes. + +Although it would be nice to solve this without depending on a specific +encryption mode, that would mean doing a cryptographic hash like SHA-256 +which would be much less efficient. This way is sufficient for now, and +it's still compatible with encryption modes like HEH which are strong +pseudorandom permutations. Also, changing the presented names is still +allowed at any time because they are only provided to allow applications +to do things like delete encrypted directories. They're not designed to +be used to persistently identify files --- which would be hard to do +anyway, given that they're encrypted after all. + +For ease of backports, this patch only makes the minimal fix to both +ext4 and f2fs. It leaves ubifs as-is, since ubifs doesn't compare the +ciphertext block yet. Follow-on patches will clean things up properly +and make the filesystems use a shared helper function. + +Fixes: 5de0b4d0cd15 ("ext4 crypto: simplify and speed up filename encryption") +Reported-by: Gwendal Grignou +Signed-off-by: Eric Biggers +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/crypto/fname.c | 2 +- + fs/ext4/namei.c | 4 ++-- + fs/f2fs/dir.c | 4 ++-- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/crypto/fname.c ++++ b/fs/crypto/fname.c +@@ -300,7 +300,7 @@ int fscrypt_fname_disk_to_usr(struct ino + } else { + memset(buf, 0, 8); + } +- memcpy(buf + 8, iname->name + iname->len - 16, 16); ++ memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16); + oname->name[0] = '_'; + oname->len = 1 + digest_encode(buf, 24, oname->name + 1); + return 0; +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1255,9 +1255,9 @@ static inline int ext4_match(struct ext4 + if (unlikely(!name)) { + if (fname->usr_fname->name[0] == '_') { + int ret; +- if (de->name_len < 16) ++ if (de->name_len <= 32) + return 0; +- ret = memcmp(de->name + de->name_len - 16, ++ ret = memcmp(de->name + ((de->name_len - 17) & ~15), + fname->crypto_buf.name + 8, 16); + return (ret == 0) ? 1 : 0; + } +--- a/fs/f2fs/dir.c ++++ b/fs/f2fs/dir.c +@@ -139,8 +139,8 @@ struct f2fs_dir_entry *find_target_dentr + #ifdef CONFIG_F2FS_FS_ENCRYPTION + if (unlikely(!name->name)) { + if (fname->usr_fname->name[0] == '_') { +- if (de_name.len >= 16 && +- !memcmp(de_name.name + de_name.len - 16, ++ if (de_name.len > 32 && ++ !memcmp(de_name.name + ((de_name.len - 17) & ~15), + fname->crypto_buf.name + 8, 16)) + goto found; + goto not_match; diff --git a/queue-4.11/fscrypt-fix-context-consistency-check-when-key-s-unavailable.patch b/queue-4.11/fscrypt-fix-context-consistency-check-when-key-s-unavailable.patch new file mode 100644 index 00000000000..6df1795bd0c --- /dev/null +++ b/queue-4.11/fscrypt-fix-context-consistency-check-when-key-s-unavailable.patch @@ -0,0 +1,166 @@ +From 272f98f6846277378e1758a49a49d7bf39343c02 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 7 Apr 2017 10:58:37 -0700 +Subject: fscrypt: fix context consistency check when key(s) unavailable + +From: Eric Biggers + +commit 272f98f6846277378e1758a49a49d7bf39343c02 upstream. + +To mitigate some types of offline attacks, filesystem encryption is +designed to enforce that all files in an encrypted directory tree use +the same encryption policy (i.e. the same encryption context excluding +the nonce). However, the fscrypt_has_permitted_context() function which +enforces this relies on comparing struct fscrypt_info's, which are only +available when we have the encryption keys. This can cause two +incorrect behaviors: + +1. If we have the parent directory's key but not the child's key, or + vice versa, then fscrypt_has_permitted_context() returned false, + causing applications to see EPERM or ENOKEY. This is incorrect if + the encryption contexts are in fact consistent. Although we'd + normally have either both keys or neither key in that case since the + master_key_descriptors would be the same, this is not guaranteed + because keys can be added or removed from keyrings at any time. + +2. If we have neither the parent's key nor the child's key, then + fscrypt_has_permitted_context() returned true, causing applications + to see no error (or else an error for some other reason). This is + incorrect if the encryption contexts are in fact inconsistent, since + in that case we should deny access. + +To fix this, retrieve and compare the fscrypt_contexts if we are unable +to set up both fscrypt_infos. + +While this slightly hurts performance when accessing an encrypted +directory tree without the key, this isn't a case we really need to be +optimizing for; access *with* the key is much more important. +Furthermore, the performance hit is barely noticeable given that we are +already retrieving the fscrypt_context and doing two keyring searches in +fscrypt_get_encryption_info(). If we ever actually wanted to optimize +this case we might start by caching the fscrypt_contexts. + +Signed-off-by: Eric Biggers +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/crypto/policy.c | 87 +++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 68 insertions(+), 19 deletions(-) + +--- a/fs/crypto/policy.c ++++ b/fs/crypto/policy.c +@@ -143,27 +143,61 @@ int fscrypt_ioctl_get_policy(struct file + } + EXPORT_SYMBOL(fscrypt_ioctl_get_policy); + ++/** ++ * fscrypt_has_permitted_context() - is a file's encryption policy permitted ++ * within its directory? ++ * ++ * @parent: inode for parent directory ++ * @child: inode for file being looked up, opened, or linked into @parent ++ * ++ * Filesystems must call this before permitting access to an inode in a ++ * situation where the parent directory is encrypted (either before allowing ++ * ->lookup() to succeed, or for a regular file before allowing it to be opened) ++ * and before any operation that involves linking an inode into an encrypted ++ * directory, including link, rename, and cross rename. It enforces the ++ * constraint that within a given encrypted directory tree, all files use the ++ * same encryption policy. The pre-access check is needed to detect potentially ++ * malicious offline violations of this constraint, while the link and rename ++ * checks are needed to prevent online violations of this constraint. ++ * ++ * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail ++ * the filesystem operation with EPERM. ++ */ + int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) + { +- struct fscrypt_info *parent_ci, *child_ci; ++ const struct fscrypt_operations *cops = parent->i_sb->s_cop; ++ const struct fscrypt_info *parent_ci, *child_ci; ++ struct fscrypt_context parent_ctx, child_ctx; + int res; + +- if ((parent == NULL) || (child == NULL)) { +- printk(KERN_ERR "parent %p child %p\n", parent, child); +- BUG_ON(1); +- } +- + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + +- /* no restrictions if the parent directory is not encrypted */ +- if (!parent->i_sb->s_cop->is_encrypted(parent)) ++ /* No restrictions if the parent directory is unencrypted */ ++ if (!cops->is_encrypted(parent)) + return 1; +- /* if the child directory is not encrypted, this is always a problem */ +- if (!parent->i_sb->s_cop->is_encrypted(child)) ++ ++ /* Encrypted directories must not contain unencrypted files */ ++ if (!cops->is_encrypted(child)) + return 0; ++ ++ /* ++ * Both parent and child are encrypted, so verify they use the same ++ * encryption policy. Compare the fscrypt_info structs if the keys are ++ * available, otherwise retrieve and compare the fscrypt_contexts. ++ * ++ * Note that the fscrypt_context retrieval will be required frequently ++ * when accessing an encrypted directory tree without the key. ++ * Performance-wise this is not a big deal because we already don't ++ * really optimize for file access without the key (to the extent that ++ * such access is even possible), given that any attempted access ++ * already causes a fscrypt_context retrieval and keyring search. ++ * ++ * In any case, if an unexpected error occurs, fall back to "forbidden". ++ */ ++ + res = fscrypt_get_encryption_info(parent); + if (res) + return 0; +@@ -172,17 +206,32 @@ int fscrypt_has_permitted_context(struct + return 0; + parent_ci = parent->i_crypt_info; + child_ci = child->i_crypt_info; +- if (!parent_ci && !child_ci) +- return 1; +- if (!parent_ci || !child_ci) ++ ++ if (parent_ci && child_ci) { ++ return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, ++ FS_KEY_DESCRIPTOR_SIZE) == 0 && ++ (parent_ci->ci_data_mode == child_ci->ci_data_mode) && ++ (parent_ci->ci_filename_mode == ++ child_ci->ci_filename_mode) && ++ (parent_ci->ci_flags == child_ci->ci_flags); ++ } ++ ++ res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx)); ++ if (res != sizeof(parent_ctx)) ++ return 0; ++ ++ res = cops->get_context(child, &child_ctx, sizeof(child_ctx)); ++ if (res != sizeof(child_ctx)) + return 0; + +- return (memcmp(parent_ci->ci_master_key, +- child_ci->ci_master_key, +- FS_KEY_DESCRIPTOR_SIZE) == 0 && +- (parent_ci->ci_data_mode == child_ci->ci_data_mode) && +- (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && +- (parent_ci->ci_flags == child_ci->ci_flags)); ++ return memcmp(parent_ctx.master_key_descriptor, ++ child_ctx.master_key_descriptor, ++ FS_KEY_DESCRIPTOR_SIZE) == 0 && ++ (parent_ctx.contents_encryption_mode == ++ child_ctx.contents_encryption_mode) && ++ (parent_ctx.filenames_encryption_mode == ++ child_ctx.filenames_encryption_mode) && ++ (parent_ctx.flags == child_ctx.flags); + } + EXPORT_SYMBOL(fscrypt_has_permitted_context); + diff --git a/queue-4.11/initramfs-always-do-fput-and-load-modules-after-rootfs-populate.patch b/queue-4.11/initramfs-always-do-fput-and-load-modules-after-rootfs-populate.patch new file mode 100644 index 00000000000..729d101ed0b --- /dev/null +++ b/queue-4.11/initramfs-always-do-fput-and-load-modules-after-rootfs-populate.patch @@ -0,0 +1,66 @@ +From 17a9be31747535184f2af156b1f080ec4c92a952 Mon Sep 17 00:00:00 2001 +From: Stafford Horne +Date: Thu, 4 May 2017 21:15:56 +0900 +Subject: initramfs: Always do fput() and load modules after rootfs populate + +From: Stafford Horne + +commit 17a9be31747535184f2af156b1f080ec4c92a952 upstream. + +In OpenRISC we do not have a bootloader passed initrd, but the built in +initramfs does contain the /init and other binaries, including modules. +The previous commit 08865514805d2 ("initramfs: finish fput() before +accessing any binary from initramfs") made a change to only call fput() +if the bootloader initrd was available, this caused intermittent crashes +for OpenRISC. + +This patch changes the fput() to happen unconditionally if any rootfs is +loaded. Also, I added some comments to make it a bit more clear why we +call unpack_to_rootfs() multiple times. + +Fixes: 08865514805d2 ("initramfs: finish fput() before accessing any binary from initramfs") +Cc: Lokesh Vutla +Cc: Al Viro +Acked-by: Al Viro +Signed-off-by: Stafford Horne +Signed-off-by: Greg Kroah-Hartman + +--- + init/initramfs.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@ -608,9 +608,11 @@ static void __init clean_rootfs(void) + + static int __init populate_rootfs(void) + { ++ /* Load the built in initramfs */ + char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); + if (err) + panic("%s", err); /* Failed to decompress INTERNAL initramfs */ ++ /* If available load the bootloader supplied initrd */ + if (initrd_start) { + #ifdef CONFIG_BLK_DEV_RAM + int fd; +@@ -648,13 +650,14 @@ static int __init populate_rootfs(void) + printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); + free_initrd(); + #endif +- flush_delayed_fput(); +- /* +- * Try loading default modules from initramfs. This gives +- * us a chance to load before device_initcalls. +- */ +- load_default_modules(); + } ++ flush_delayed_fput(); ++ /* ++ * Try loading default modules from initramfs. This gives ++ * us a chance to load before device_initcalls. ++ */ ++ load_default_modules(); ++ + return 0; + } + rootfs_initcall(populate_rootfs); diff --git a/queue-4.11/initramfs-avoid-label-at-end-of-compound-statement-error.patch b/queue-4.11/initramfs-avoid-label-at-end-of-compound-statement-error.patch new file mode 100644 index 00000000000..dad51445b9e --- /dev/null +++ b/queue-4.11/initramfs-avoid-label-at-end-of-compound-statement-error.patch @@ -0,0 +1,47 @@ +From 394e4f5d5834b610ee032cceb20a1b1f45b01d28 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 6 May 2017 10:27:13 -0700 +Subject: initramfs: avoid "label at end of compound statement" error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Linus Torvalds + +commit 394e4f5d5834b610ee032cceb20a1b1f45b01d28 upstream. + +Commit 17a9be317475 ("initramfs: Always do fput() and load modules after +rootfs populate") introduced an error for the + + CONFIG_BLK_DEV_RAM=y + +case, because even though the code looks fine, the compiler really wants +a statement after a label, or you'll get complaints: + + init/initramfs.c: In function 'populate_rootfs': + init/initramfs.c:644:2: error: label at end of compound statement + +That commit moved the subsequent statements to outside the compound +statement, leaving the label without any associated statements. + +Reported-by: Jörg Otte +Fixes: 17a9be317475 ("initramfs: Always do fput() and load modules after rootfs populate") +Cc: Al Viro +Cc: Stafford Horne +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + init/initramfs.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@ -642,6 +642,7 @@ static int __init populate_rootfs(void) + free_initrd(); + } + done: ++ /* empty statement */; + #else + printk(KERN_INFO "Unpacking initramfs...\n"); + err = unpack_to_rootfs((char *)initrd_start, diff --git a/queue-4.11/libata-reject-passthrough-write-same-requests.patch b/queue-4.11/libata-reject-passthrough-write-same-requests.patch new file mode 100644 index 00000000000..dcf94bb2e18 --- /dev/null +++ b/queue-4.11/libata-reject-passthrough-write-same-requests.patch @@ -0,0 +1,41 @@ +From c6ade20f5e50e188d20b711a618b20dd1d50457e Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 25 Apr 2017 13:39:54 +0200 +Subject: libata: reject passthrough WRITE SAME requests + +From: Christoph Hellwig + +commit c6ade20f5e50e188d20b711a618b20dd1d50457e upstream. + +The WRITE SAME to TRIM translation rewrites the DATA OUT buffer. While +the SCSI code accomodates for this by passing a read-writable buffer +userspace applications don't cater for this behavior. In fact it can +be used to rewrite e.g. a readonly file through mmap and should be +considered as a security fix. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Martin K. Petersen +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-scsi.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -3462,6 +3462,14 @@ static unsigned int ata_scsi_write_same_ + if (unlikely(!dev->dma_mode)) + goto invalid_opcode; + ++ /* ++ * We only allow sending this command through the block layer, ++ * as it modifies the DATA OUT buffer, which would corrupt user ++ * memory for SG_IO commands. ++ */ ++ if (unlikely(blk_rq_is_passthrough(scmd->request))) ++ goto invalid_opcode; ++ + if (unlikely(scmd->cmd_len < 16)) { + fp = 15; + goto invalid_fld; diff --git a/queue-4.11/mm-fix-data-corruption-due-to-stale-mmap-reads.patch b/queue-4.11/mm-fix-data-corruption-due-to-stale-mmap-reads.patch new file mode 100644 index 00000000000..aeaa5fd2c76 --- /dev/null +++ b/queue-4.11/mm-fix-data-corruption-due-to-stale-mmap-reads.patch @@ -0,0 +1,76 @@ +From cd656375f94632d7b5af57bf67b7b5c0270c591c Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 12 May 2017 15:46:50 -0700 +Subject: mm: fix data corruption due to stale mmap reads + +From: Jan Kara + +commit cd656375f94632d7b5af57bf67b7b5c0270c591c upstream. + +Currently, we didn't invalidate page tables during invalidate_inode_pages2() +for DAX. That could result in e.g. 2MiB zero page being mapped into +page tables while there were already underlying blocks allocated and +thus data seen through mmap were different from data seen by read(2). +The following sequence reproduces the problem: + + - open an mmap over a 2MiB hole + + - read from a 2MiB hole, faulting in a 2MiB zero page + + - write to the hole with write(3p). The write succeeds but we + incorrectly leave the 2MiB zero page mapping intact. + + - via the mmap, read the data that was just written. Since the zero + page mapping is still intact we read back zeroes instead of the new + data. + +Fix the problem by unconditionally calling invalidate_inode_pages2_range() +in dax_iomap_actor() for new block allocations and by properly +invalidating page tables in invalidate_inode_pages2_range() for DAX +mappings. + +Fixes: c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 +Link: http://lkml.kernel.org/r/20170510085419.27601-3-jack@suse.cz +Signed-off-by: Jan Kara +Signed-off-by: Ross Zwisler +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dax.c | 2 +- + mm/truncate.c | 11 +++++++++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -1003,7 +1003,7 @@ dax_iomap_actor(struct inode *inode, lof + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ +- if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { ++ if (iomap->flags & IOMAP_F_NEW) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); +--- a/mm/truncate.c ++++ b/mm/truncate.c +@@ -683,6 +683,17 @@ int invalidate_inode_pages2_range(struct + cond_resched(); + index++; + } ++ /* ++ * For DAX we invalidate page tables after invalidating radix tree. We ++ * could invalidate page tables while invalidating each entry however ++ * that would be expensive. And doing range unmapping before doesn't ++ * work as we have no cheap way to find whether radix tree entry didn't ++ * get remapped later. ++ */ ++ if (dax_mapping(mapping)) { ++ unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, ++ (loff_t)(end - start + 1) << PAGE_SHIFT, 0); ++ } + cleancache_invalidate_inode(mapping); + return ret; + } diff --git a/queue-4.11/revert-f2fs-put-allocate_segment-after-refresh_sit_entry.patch b/queue-4.11/revert-f2fs-put-allocate_segment-after-refresh_sit_entry.patch new file mode 100644 index 00000000000..674f6f92cf4 --- /dev/null +++ b/queue-4.11/revert-f2fs-put-allocate_segment-after-refresh_sit_entry.patch @@ -0,0 +1,45 @@ +From c6f82fe90d7458e5fa190a6820bfc24f96b0de4e Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Tue, 4 Apr 2017 16:45:30 -0700 +Subject: Revert "f2fs: put allocate_segment after refresh_sit_entry" + +From: Jaegeuk Kim + +commit c6f82fe90d7458e5fa190a6820bfc24f96b0de4e upstream. + +This reverts commit 3436c4bdb30de421d46f58c9174669fbcfd40ce0. + +This makes a leak to register dirty segments. I reproduced the issue by +modified postmark which injects a lot of file create/delete/update and +finally triggers huge number of SSR allocations. + +[Jaegeuk Kim: Change missing incorrect comment] +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman + +--- + fs/f2fs/segment.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -1788,15 +1788,14 @@ void allocate_data_block(struct f2fs_sb_ + + stat_inc_block_count(sbi, curseg); + ++ if (!__has_curseg_space(sbi, type)) ++ sit_i->s_ops->allocate_segment(sbi, type, false); + /* +- * SIT information should be updated before segment allocation, +- * since SSR needs latest valid block information. ++ * SIT information should be updated after segment allocation, ++ * since we need to keep dirty segments precisely under SSR. + */ + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + +- if (!__has_curseg_space(sbi, type)) +- sit_i->s_ops->allocate_segment(sbi, type, false); +- + mutex_unlock(&sit_i->sentry_lock); + + if (page && IS_NODESEG(type)) diff --git a/queue-4.11/serial-omap-fix-runtime-pm-handling-on-unbind.patch b/queue-4.11/serial-omap-fix-runtime-pm-handling-on-unbind.patch new file mode 100644 index 00000000000..e0178055022 --- /dev/null +++ b/queue-4.11/serial-omap-fix-runtime-pm-handling-on-unbind.patch @@ -0,0 +1,66 @@ +From 099bd73dc17ed77aa8c98323e043613b6e8f54fc Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 10 Apr 2017 11:21:38 +0200 +Subject: serial: omap: fix runtime-pm handling on unbind + +From: Johan Hovold + +commit 099bd73dc17ed77aa8c98323e043613b6e8f54fc upstream. + +An unbalanced and misplaced synchronous put was used to suspend the +device on driver unbind, something which with a likewise misplaced +pm_runtime_disable leads to external aborts when an open port is being +removed. + +Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa024010 +... +[] (serial_omap_set_mctrl) from [] (uart_update_mctrl+0x50/0x60) +[] (uart_update_mctrl) from [] (uart_shutdown+0xbc/0x138) +[] (uart_shutdown) from [] (uart_hangup+0x94/0x190) +[] (uart_hangup) from [] (__tty_hangup+0x404/0x41c) +[] (__tty_hangup) from [] (tty_vhangup+0x1c/0x20) +[] (tty_vhangup) from [] (uart_remove_one_port+0xec/0x260) +[] (uart_remove_one_port) from [] (serial_omap_remove+0x40/0x60) +[] (serial_omap_remove) from [] (platform_drv_remove+0x34/0x4c) + +Fix this up by resuming the device before deregistering the port and by +suspending and disabling runtime pm only after the port has been +removed. + +Also make sure to disable autosuspend before disabling runtime pm so +that the usage count is balanced and device actually suspended before +returning. + +Note that due to a negative autosuspend delay being set in probe, the +unbalanced put would actually suspend the device on first driver unbind, +while rebinding and again unbinding would result in a negative +power.usage_count. + +Fixes: 7e9c8e7dbf3b ("serial: omap: make sure to suspend device before remove") +Cc: Felipe Balbi +Cc: Santosh Shilimkar +Signed-off-by: Johan Hovold +Acked-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/omap-serial.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/tty/serial/omap-serial.c ++++ b/drivers/tty/serial/omap-serial.c +@@ -1780,9 +1780,13 @@ static int serial_omap_remove(struct pla + { + struct uart_omap_port *up = platform_get_drvdata(dev); + ++ pm_runtime_get_sync(up->dev); ++ ++ uart_remove_one_port(&serial_omap_reg, &up->port); ++ ++ pm_runtime_dont_use_autosuspend(up->dev); + pm_runtime_put_sync(up->dev); + pm_runtime_disable(up->dev); +- uart_remove_one_port(&serial_omap_reg, &up->port); + pm_qos_remove_request(&up->pm_qos_request); + device_init_wakeup(&dev->dev, false); + diff --git a/queue-4.11/serial-omap-suspend-device-on-probe-errors.patch b/queue-4.11/serial-omap-suspend-device-on-probe-errors.patch new file mode 100644 index 00000000000..8e773799486 --- /dev/null +++ b/queue-4.11/serial-omap-suspend-device-on-probe-errors.patch @@ -0,0 +1,38 @@ +From 77e6fe7fd2b7cba0bf2f2dc8cde51d7b9a35bf74 Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 10 Apr 2017 11:21:39 +0200 +Subject: serial: omap: suspend device on probe errors + +From: Johan Hovold + +commit 77e6fe7fd2b7cba0bf2f2dc8cde51d7b9a35bf74 upstream. + +Make sure to actually suspend the device before returning after a failed +(or deferred) probe. + +Note that autosuspend must be disabled before runtime pm is disabled in +order to balance the usage count due to a negative autosuspend delay as +well as to make the final put suspend the device synchronously. + +Fixes: 388bc2622680 ("omap-serial: Fix the error handling in the omap_serial probe") +Cc: Shubhrajyoti D +Signed-off-by: Johan Hovold +Acked-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/omap-serial.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/tty/serial/omap-serial.c ++++ b/drivers/tty/serial/omap-serial.c +@@ -1767,7 +1767,8 @@ static int serial_omap_probe(struct plat + return 0; + + err_add_port: +- pm_runtime_put(&pdev->dev); ++ pm_runtime_dont_use_autosuspend(&pdev->dev); ++ pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_qos_remove_request(&up->pm_qos_request); + device_init_wakeup(up->dev, false); diff --git a/queue-4.11/serial-samsung-add-missing-checks-for-dma_map_single-failure.patch b/queue-4.11/serial-samsung-add-missing-checks-for-dma_map_single-failure.patch new file mode 100644 index 00000000000..6a79b0f2291 --- /dev/null +++ b/queue-4.11/serial-samsung-add-missing-checks-for-dma_map_single-failure.patch @@ -0,0 +1,114 @@ +From 500fcc08a32bfd54f11951ba81530775df15c474 Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski +Date: Mon, 3 Apr 2017 08:21:00 +0200 +Subject: serial: samsung: Add missing checks for dma_map_single failure + +From: Marek Szyprowski + +commit 500fcc08a32bfd54f11951ba81530775df15c474 upstream. + +This patch adds missing checks for dma_map_single() failure and proper error +reporting. Although this issue was harmless on ARM architecture, it is always +good to use the DMA mapping API in a proper way. This patch fixes the following +DMA API debug warning: + +WARNING: CPU: 1 PID: 3785 at lib/dma-debug.c:1171 check_unmap+0x8a0/0xf28 +dma-pl330 121a0000.pdma: DMA-API: device driver failed to check map error[device address=0x000000006e0f9000] [size=4096 bytes] [mapped as single] +Modules linked in: +CPU: 1 PID: 3785 Comm: (agetty) Tainted: G W 4.11.0-rc1-00137-g07ca963-dirty #59 +Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) +[] (unwind_backtrace) from [] (show_stack+0x20/0x24) +[] (show_stack) from [] (dump_stack+0x84/0xa0) +[] (dump_stack) from [] (__warn+0x14c/0x180) +[] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) +[] (warn_slowpath_fmt) from [] (check_unmap+0x8a0/0xf28) +[] (check_unmap) from [] (debug_dma_unmap_page+0x98/0xc8) +[] (debug_dma_unmap_page) from [] (s3c24xx_serial_shutdown+0x314/0x52c) +[] (s3c24xx_serial_shutdown) from [] (uart_port_shutdown+0x54/0x88) +[] (uart_port_shutdown) from [] (uart_shutdown+0xd4/0x110) +[] (uart_shutdown) from [] (uart_hangup+0x9c/0x208) +[] (uart_hangup) from [] (__tty_hangup+0x49c/0x634) +[] (__tty_hangup) from [] (tty_ioctl+0xc88/0x16e4) +[] (tty_ioctl) from [] (do_vfs_ioctl+0xc4/0xd10) +[] (do_vfs_ioctl) from [] (SyS_ioctl+0x7c/0x8c) +[] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x3c) + +Reported-by: Seung-Woo Kim +Fixes: 62c37eedb74c8 ("serial: samsung: add dma reqest/release functions") +Signed-off-by: Marek Szyprowski +Reviewed-by: Bartlomiej Zolnierkiewicz +Reviewed-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/samsung.c | 31 ++++++++++++++++++++++++------- + 1 file changed, 24 insertions(+), 7 deletions(-) + +--- a/drivers/tty/serial/samsung.c ++++ b/drivers/tty/serial/samsung.c +@@ -859,7 +859,7 @@ static void s3c24xx_serial_break_ctl(str + static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) + { + struct s3c24xx_uart_dma *dma = p->dma; +- unsigned long flags; ++ int ret; + + /* Default slave configuration parameters */ + dma->rx_conf.direction = DMA_DEV_TO_MEM; +@@ -884,8 +884,8 @@ static int s3c24xx_serial_request_dma(st + + dma->tx_chan = dma_request_chan(p->port.dev, "tx"); + if (IS_ERR(dma->tx_chan)) { +- dma_release_channel(dma->rx_chan); +- return PTR_ERR(dma->tx_chan); ++ ret = PTR_ERR(dma->tx_chan); ++ goto err_release_rx; + } + + dmaengine_slave_config(dma->tx_chan, &dma->tx_conf); +@@ -894,15 +894,17 @@ static int s3c24xx_serial_request_dma(st + dma->rx_size = PAGE_SIZE; + + dma->rx_buf = kmalloc(dma->rx_size, GFP_KERNEL); +- + if (!dma->rx_buf) { +- dma_release_channel(dma->rx_chan); +- dma_release_channel(dma->tx_chan); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto err_release_tx; + } + + dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf, + dma->rx_size, DMA_FROM_DEVICE); ++ if (dma_mapping_error(p->port.dev, dma->rx_addr)) { ++ ret = -EIO; ++ goto err_free_rx; ++ } + + spin_lock_irqsave(&p->port.lock, flags); + +@@ -911,8 +913,23 @@ static int s3c24xx_serial_request_dma(st + UART_XMIT_SIZE, DMA_TO_DEVICE); + + spin_unlock_irqrestore(&p->port.lock, flags); ++ if (dma_mapping_error(p->port.dev, dma->tx_addr)) { ++ ret = -EIO; ++ goto err_unmap_rx; ++ } + + return 0; ++ ++err_unmap_rx: ++ dma_unmap_single(p->port.dev, dma->rx_addr, dma->rx_size, ++ DMA_FROM_DEVICE); ++err_free_rx: ++ kfree(dma->rx_buf); ++err_release_tx: ++ dma_release_channel(dma->tx_chan); ++err_release_rx: ++ dma_release_channel(dma->rx_chan); ++ return ret; + } + + static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) diff --git a/queue-4.11/serial-samsung-use-right-device-for-dma-mapping-calls.patch b/queue-4.11/serial-samsung-use-right-device-for-dma-mapping-calls.patch new file mode 100644 index 00000000000..a6f590e56a1 --- /dev/null +++ b/queue-4.11/serial-samsung-use-right-device-for-dma-mapping-calls.patch @@ -0,0 +1,78 @@ +From 768d64f491a530062ddad50e016fb27125f8bd7c Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski +Date: Mon, 3 Apr 2017 08:20:59 +0200 +Subject: serial: samsung: Use right device for DMA-mapping calls + +From: Marek Szyprowski + +commit 768d64f491a530062ddad50e016fb27125f8bd7c upstream. + +Driver should provide its own struct device for all DMA-mapping calls instead +of extracting device pointer from DMA engine channel. Although this is harmless +from the driver operation perspective on ARM architecture, it is always good +to use the DMA mapping API in a proper way. This patch fixes following DMA API +debug warning: + +WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1241 check_sync+0x520/0x9f4 +samsung-uart 12c20000.serial: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x000000006df0f580] [size=64 bytes] +Modules linked in: +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc1-00137-g07ca963 #51 +Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) +[] (unwind_backtrace) from [] (show_stack+0x20/0x24) +[] (show_stack) from [] (dump_stack+0x84/0xa0) +[] (dump_stack) from [] (__warn+0x14c/0x180) +[] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) +[] (warn_slowpath_fmt) from [] (check_sync+0x520/0x9f4) +[] (check_sync) from [] (debug_dma_sync_single_for_device+0x88/0xc8) +[] (debug_dma_sync_single_for_device) from [] (s3c24xx_serial_start_tx_dma+0x100/0x2f8) +[] (s3c24xx_serial_start_tx_dma) from [] (s3c24xx_serial_tx_chars+0x198/0x33c) + +Reported-by: Seung-Woo Kim +Fixes: 62c37eedb74c8 ("serial: samsung: add dma reqest/release functions") +Signed-off-by: Marek Szyprowski +Reviewed-by: Bartlomiej Zolnierkiewicz +Reviewed-by: Krzysztof Kozlowski +Reviewed-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/samsung.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/tty/serial/samsung.c ++++ b/drivers/tty/serial/samsung.c +@@ -901,14 +901,13 @@ static int s3c24xx_serial_request_dma(st + return -ENOMEM; + } + +- dma->rx_addr = dma_map_single(dma->rx_chan->device->dev, dma->rx_buf, ++ dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf, + dma->rx_size, DMA_FROM_DEVICE); + + spin_lock_irqsave(&p->port.lock, flags); + + /* TX buffer */ +- dma->tx_addr = dma_map_single(dma->tx_chan->device->dev, +- p->port.state->xmit.buf, ++ dma->tx_addr = dma_map_single(p->port.dev, p->port.state->xmit.buf, + UART_XMIT_SIZE, DMA_TO_DEVICE); + + spin_unlock_irqrestore(&p->port.lock, flags); +@@ -922,7 +921,7 @@ static void s3c24xx_serial_release_dma(s + + if (dma->rx_chan) { + dmaengine_terminate_all(dma->rx_chan); +- dma_unmap_single(dma->rx_chan->device->dev, dma->rx_addr, ++ dma_unmap_single(p->port.dev, dma->rx_addr, + dma->rx_size, DMA_FROM_DEVICE); + kfree(dma->rx_buf); + dma_release_channel(dma->rx_chan); +@@ -931,7 +930,7 @@ static void s3c24xx_serial_release_dma(s + + if (dma->tx_chan) { + dmaengine_terminate_all(dma->tx_chan); +- dma_unmap_single(dma->tx_chan->device->dev, dma->tx_addr, ++ dma_unmap_single(p->port.dev, dma->tx_addr, + UART_XMIT_SIZE, DMA_TO_DEVICE); + dma_release_channel(dma->tx_chan); + dma->tx_chan = NULL; diff --git a/queue-4.11/series b/queue-4.11/series index c33aa1749b6..6e91451fb3b 100644 --- a/queue-4.11/series +++ b/queue-4.11/series @@ -77,3 +77,30 @@ cifs-add-misssing-sfm-mapping-for-doublequote.patch ovl-do-not-set-overlay.opaque-on-non-dir-create.patch padata-free-correct-variable.patch md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch +device-dax-fix-cdev-leak.patch +device-dax-fix-sysfs-attribute-deadlock.patch +dax-prevent-invalidation-of-mapped-dax-entries.patch +mm-fix-data-corruption-due-to-stale-mmap-reads.patch +ext4-return-to-starting-transaction-in-ext4_dax_huge_fault.patch +dax-fix-pmd-data-corruption-when-fault-races-with-write.patch +f2fs-fix-wrong-max-cost-initialization.patch +revert-f2fs-put-allocate_segment-after-refresh_sit_entry.patch +f2fs-fix-fs-corruption-due-to-zero-inode-page.patch +f2fs-fix-multiple-f2fs_add_link-having-same-name-for-inline-dentry.patch +f2fs-check-entire-encrypted-bigname-when-finding-a-dentry.patch +f2fs-make-flush-bios-explicitely-sync.patch +initramfs-always-do-fput-and-load-modules-after-rootfs-populate.patch +initramfs-avoid-label-at-end-of-compound-statement-error.patch +fscrypt-fix-context-consistency-check-when-key-s-unavailable.patch +fscrypt-avoid-collisions-when-presenting-long-encrypted-filenames.patch +serial-samsung-use-right-device-for-dma-mapping-calls.patch +serial-samsung-add-missing-checks-for-dma_map_single-failure.patch +serial-omap-fix-runtime-pm-handling-on-unbind.patch +serial-omap-suspend-device-on-probe-errors.patch +tty-pty-fix-ldisc-flush-after-userspace-become-aware-of-the-data-already.patch +tty-pl011-use-qdf2400_e44-as-the-earlycon-name-for-qdf2400-e44.patch +bluetooth-fix-user-channel-for-32bit-userspace-on-64bit-kernel.patch +bluetooth-hci_bcm-add-missing-tty-device-sanity-check.patch +bluetooth-hci_intel-add-missing-tty-device-sanity-check.patch +cgroup-fix-spurious-warnings-on-cgroup_is_dead-from-cgroup_sk_alloc.patch +libata-reject-passthrough-write-same-requests.patch diff --git a/queue-4.11/tty-pl011-use-qdf2400_e44-as-the-earlycon-name-for-qdf2400-e44.patch b/queue-4.11/tty-pl011-use-qdf2400_e44-as-the-earlycon-name-for-qdf2400-e44.patch new file mode 100644 index 00000000000..b0fac26c64b --- /dev/null +++ b/queue-4.11/tty-pl011-use-qdf2400_e44-as-the-earlycon-name-for-qdf2400-e44.patch @@ -0,0 +1,73 @@ +From 5a0722b898f851b9ef108ea7babc529e4efc773d Mon Sep 17 00:00:00 2001 +From: Timur Tabi +Date: Thu, 13 Apr 2017 08:55:08 -0500 +Subject: tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44 + +From: Timur Tabi + +commit 5a0722b898f851b9ef108ea7babc529e4efc773d upstream. + +Define a new early console name for Qualcomm Datacenter Technologies +QDF2400 SOCs affected by erratum 44, instead of piggy-backing on "pl011". +Previously, to enable traditional (non-SPCR) earlycon, the documentation +said to specify "earlycon=pl011,
,qdf2400_e44", but the code was +broken and this didn't actually work. + +So instead, the method for specifying the E44 work-around with traditional +earlycon is "earlycon=qdf2400_e44,
". Both methods of earlycon +are now enabled with the same function. + +Fixes: e53e597fd4c4 ("tty: pl011: fix earlycon work-around for QDF2400 erratum 44") +Signed-off-by: Timur Tabi +Tested-by: Shanker Donthineni +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/amba-pl011.c | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -2470,19 +2470,34 @@ static int __init pl011_early_console_se + if (!device->port.membase) + return -ENODEV; + +- /* On QDF2400 SOCs affected by Erratum 44, the "qdf2400_e44" must +- * also be specified, e.g. "earlycon=pl011,
,qdf2400_e44". +- */ +- if (!strcmp(device->options, "qdf2400_e44")) +- device->con->write = qdf2400_e44_early_write; +- else +- device->con->write = pl011_early_write; ++ device->con->write = pl011_early_write; + + return 0; + } + OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); + OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); +-EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup); ++ ++/* ++ * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by ++ * Erratum 44, traditional earlycon can be enabled by specifying ++ * "earlycon=qdf2400_e44,
". Any options are ignored. ++ * ++ * Alternatively, you can just specify "earlycon", and the early console ++ * will be enabled with the information from the SPCR table. In this ++ * case, the SPCR code will detect the need for the E44 work-around, ++ * and set the console name to "qdf2400_e44". ++ */ ++static int __init ++qdf2400_e44_early_console_setup(struct earlycon_device *device, ++ const char *opt) ++{ ++ if (!device->port.membase) ++ return -ENODEV; ++ ++ device->con->write = qdf2400_e44_early_write; ++ return 0; ++} ++EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup); + + #else + #define AMBA_CONSOLE NULL diff --git a/queue-4.11/tty-pty-fix-ldisc-flush-after-userspace-become-aware-of-the-data-already.patch b/queue-4.11/tty-pty-fix-ldisc-flush-after-userspace-become-aware-of-the-data-already.patch new file mode 100644 index 00000000000..dafc9a4a375 --- /dev/null +++ b/queue-4.11/tty-pty-fix-ldisc-flush-after-userspace-become-aware-of-the-data-already.patch @@ -0,0 +1,83 @@ +From 77dae6134440420bac334581a3ccee94cee1c054 Mon Sep 17 00:00:00 2001 +From: Wang YanQing +Date: Wed, 22 Feb 2017 19:37:08 +0800 +Subject: tty: pty: Fix ldisc flush after userspace become aware of the data already + +From: Wang YanQing + +commit 77dae6134440420bac334581a3ccee94cee1c054 upstream. + +While using emacs, cat or others' commands in konsole with recent +kernels, I have met many times that CTRL-C freeze konsole. After +konsole freeze I can't type anything, then I have to open a new one, +it is very annoying. + +See bug report: +https://bugs.kde.org/show_bug.cgi?id=175283 + +The platform in that bug report is Solaris, but now the pty in linux +has the same problem or the same behavior as Solaris :) + +It has high possibility to trigger the problem follow steps below: +Note: In my test, BigFile is a text file whose size is bigger than 1G +1:open konsole +1:cat BigFile +2:CTRL-C + +After some digging, I find out the reason is that commit 1d1d14da12e7 +("pty: Fix buffer flush deadlock") changes the behavior of pty_flush_buffer. + +Thread A Thread B +-------- -------- +1:n_tty_poll return POLLIN + 2:CTRL-C trigger pty_flush_buffer + tty_buffer_flush + n_tty_flush_buffer +3:attempt to check count of chars: + ioctl(fd, TIOCINQ, &available) + available is equal to 0 + +4:read(fd, buffer, avaiable) + return 0 + +5:konsole close fd + +Yes, I know we could use the same patch included in the BUG report as +a workaround for linux platform too. But I think the data in ldisc is +belong to application of another side, we shouldn't clear it when we +want to flush write buffer of this side in pty_flush_buffer. So I think +it is better to disable ldisc flush in pty_flush_buffer, because its new +hehavior bring no benefit except that it mess up the behavior between +POLLIN, and TIOCINQ or FIONREAD. + +Also I find no flush_buffer function in others' tty driver has the +same behavior as current pty_flush_buffer. + +Fixes: 1d1d14da12e7 ("pty: Fix buffer flush deadlock") +Signed-off-by: Wang YanQing +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/pty.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +--- a/drivers/tty/pty.c ++++ b/drivers/tty/pty.c +@@ -216,16 +216,11 @@ static int pty_signal(struct tty_struct + static void pty_flush_buffer(struct tty_struct *tty) + { + struct tty_struct *to = tty->link; +- struct tty_ldisc *ld; + + if (!to) + return; + +- ld = tty_ldisc_ref(to); +- tty_buffer_flush(to, ld); +- if (ld) +- tty_ldisc_deref(ld); +- ++ tty_buffer_flush(to, NULL); + if (to->packet) { + spin_lock_irq(&tty->ctrl_lock); + tty->ctrl_status |= TIOCPKT_FLUSHWRITE;